Add wav->ogg (opus) transcoding

This commit is contained in:
Elijah R 2024-07-14 16:05:33 -04:00
parent 1ad9ee14fe
commit 9e0f472251
7 changed files with 57 additions and 3 deletions

View file

@ -42,6 +42,8 @@ enabled = true
server = "http://127.0.0.1:3001"
voice = "Microsoft Sam"
tempDir = "/tmp/msac-tts"
# Transcode SAPI5 Waveform files to Opus. Greatly reduces bandwidth usage. Requires ffmpeg to be installed and in PATH.
transcodeOpus = true
wavExpirySeconds = 60
[[agents]]

View file

@ -6,6 +6,7 @@
},
"packageManager": "yarn@4.2.2",
"devDependencies": {
"@types/fluent-ffmpeg": "^2.1.24",
"@types/node": "^20.14.10",
"@types/ws": "^8.5.10",
"typescript": "5.4.5"
@ -15,6 +16,7 @@
"@fastify/websocket": "^10.0.1",
"discord.js": "^14.15.3",
"fastify": "^4.28.1",
"fluent-ffmpeg": "^2.1.3",
"html-entities": "^2.5.2",
"mysql2": "^3.10.2",
"toml": "^3.0.0",

View file

@ -17,6 +17,7 @@ export interface TTSConfig {
server: string;
voice: string;
tempDir: string;
transcodeOpus: boolean;
wavExpirySeconds: number;
}

View file

@ -78,6 +78,8 @@ export class MSAgentChatRoom {
}
});
client.on('talk', async (message) => {
message = message.trim();
if (message.length === 0) return;
let msg: MSAgentChatMessage = {
op: MSAgentProtocolMessageType.Chat,
data: {

View file

@ -4,6 +4,7 @@ import { TTSConfig } from './config.js';
import { Readable } from 'node:stream';
import { ReadableStream } from 'node:stream/web';
import { finished } from 'node:stream/promises';
import ffmpeg from 'fluent-ffmpeg';
export class TTSClient {
private config: TTSConfig;
@ -42,7 +43,7 @@ export class TTSClient {
async synthesizeToFile(text: string, id: string): Promise<string> {
this.ensureDirectoryExists();
let wavFilename = id + '.wav';
let wavFilename = id + (this.config.transcodeOpus ? '.ogg' : '.wav');
let wavPath = path.join(this.config.tempDir, wavFilename);
try {
await fs.unlink(wavPath);
@ -59,7 +60,14 @@ export class TTSClient {
voice: this.config.voice
})
});
await finished(Readable.fromWeb(res.body as ReadableStream<any>).pipe(stream));
if (this.config.transcodeOpus) {
let coder = ffmpeg(Readable.fromWeb(res.body as ReadableStream<any>))
.audioCodec('opus')
.outputFormat('ogg');
await finished(coder.pipe(stream));
} else {
await finished(Readable.fromWeb(res.body as ReadableStream<any>).pipe(stream));
}
await file.close();
this.deleteOps.set(
wavPath,

View file

@ -1,4 +1,4 @@
export const Config = {
// The server address for the webapp to connect to. The below default is the same address the webapp is hosted at.
serverAddress: `${window.location.protocol}//${window.location.host}`
serverAddress: `http://127.0.0.1:3000`
};

View file

@ -308,10 +308,12 @@ __metadata:
dependencies:
"@fastify/static": "npm:^7.0.4"
"@fastify/websocket": "npm:^10.0.1"
"@types/fluent-ffmpeg": "npm:^2.1.24"
"@types/node": "npm:^20.14.10"
"@types/ws": "npm:^8.5.10"
discord.js: "npm:^14.15.3"
fastify: "npm:^4.28.1"
fluent-ffmpeg: "npm:^2.1.3"
html-entities: "npm:^2.5.2"
mysql2: "npm:^3.10.2"
toml: "npm:^3.0.0"
@ -1458,6 +1460,15 @@ __metadata:
languageName: node
linkType: hard
"@types/fluent-ffmpeg@npm:^2.1.24":
version: 2.1.24
resolution: "@types/fluent-ffmpeg@npm:2.1.24"
dependencies:
"@types/node": "npm:*"
checksum: 10c0/73a32f2313a0225452b5acdf7a341b9860ffcb88490ee726a1241887111cf4f2f7259d7491cf4e9b6be8316ababdd9064e085437f0b768adcfb0becc0733973c
languageName: node
linkType: hard
"@types/node@npm:*, @types/node@npm:^20.14.10":
version: 20.14.10
resolution: "@types/node@npm:20.14.10"
@ -1628,6 +1639,13 @@ __metadata:
languageName: node
linkType: hard
"async@npm:^0.2.9":
version: 0.2.10
resolution: "async@npm:0.2.10"
checksum: 10c0/714d284dc6c3ae59f3e8b347083e32c7657ba4ffc4ff945eb152ad4fb08def27e768992fcd4d9fd3b411c6b42f1541862ac917446bf2a1acfa0f302d1001f7d2
languageName: node
linkType: hard
"atomic-sleep@npm:^1.0.0":
version: 1.0.0
resolution: "atomic-sleep@npm:1.0.0"
@ -2403,6 +2421,16 @@ __metadata:
languageName: node
linkType: hard
"fluent-ffmpeg@npm:^2.1.3":
version: 2.1.3
resolution: "fluent-ffmpeg@npm:2.1.3"
dependencies:
async: "npm:^0.2.9"
which: "npm:^1.1.1"
checksum: 10c0/0397379ec3237c10b2389edeef26fdaf93f36d1b20b0f28f8945fe6d9121dcee9b0c615bf7d44edb7abd37233e0d24f0db39389668d3c86a1a2a0d59e3f4457b
languageName: node
linkType: hard
"foreground-child@npm:^3.1.0":
version: 3.2.1
resolution: "foreground-child@npm:3.2.1"
@ -4472,6 +4500,17 @@ __metadata:
languageName: node
linkType: hard
"which@npm:^1.1.1":
version: 1.3.1
resolution: "which@npm:1.3.1"
dependencies:
isexe: "npm:^2.0.0"
bin:
which: ./bin/which
checksum: 10c0/e945a8b6bbf6821aaaef7f6e0c309d4b615ef35699576d5489b4261da9539f70393c6b2ce700ee4321c18f914ebe5644bc4631b15466ffbaad37d83151f6af59
languageName: node
linkType: hard
"which@npm:^2.0.1":
version: 2.0.2
resolution: "which@npm:2.0.2"