diff --git a/server/config.example.toml b/server/config.example.toml index 6a9d999..6d41b59 100644 --- a/server/config.example.toml +++ b/server/config.example.toml @@ -42,6 +42,8 @@ enabled = true server = "http://127.0.0.1:3001" voice = "Microsoft Sam" tempDir = "/tmp/msac-tts" +# Transcode SAPI5 Waveform files to Opus. Greatly reduces bandwidth usage. Requires ffmpeg to be installed and in PATH. +transcodeOpus = true wavExpirySeconds = 60 [[agents]] diff --git a/server/package.json b/server/package.json index 6902e4d..2aea996 100644 --- a/server/package.json +++ b/server/package.json @@ -6,6 +6,7 @@ }, "packageManager": "yarn@4.2.2", "devDependencies": { + "@types/fluent-ffmpeg": "^2.1.24", "@types/node": "^20.14.10", "@types/ws": "^8.5.10", "typescript": "5.4.5" @@ -15,6 +16,7 @@ "@fastify/websocket": "^10.0.1", "discord.js": "^14.15.3", "fastify": "^4.28.1", + "fluent-ffmpeg": "^2.1.3", "html-entities": "^2.5.2", "mysql2": "^3.10.2", "toml": "^3.0.0", diff --git a/server/src/config.ts b/server/src/config.ts index c9b197b..c407344 100644 --- a/server/src/config.ts +++ b/server/src/config.ts @@ -17,6 +17,7 @@ export interface TTSConfig { server: string; voice: string; tempDir: string; + transcodeOpus: boolean; wavExpirySeconds: number; } diff --git a/server/src/room.ts b/server/src/room.ts index 484c4f5..08721e4 100644 --- a/server/src/room.ts +++ b/server/src/room.ts @@ -78,6 +78,8 @@ export class MSAgentChatRoom { } }); client.on('talk', async (message) => { + message = message.trim(); + if (message.length === 0) return; let msg: MSAgentChatMessage = { op: MSAgentProtocolMessageType.Chat, data: { diff --git a/server/src/tts.ts b/server/src/tts.ts index 9d74137..7605b3c 100644 --- a/server/src/tts.ts +++ b/server/src/tts.ts @@ -4,6 +4,7 @@ import { TTSConfig } from './config.js'; import { Readable } from 'node:stream'; import { ReadableStream } from 'node:stream/web'; import { finished } from 'node:stream/promises'; +import ffmpeg from 'fluent-ffmpeg'; export class TTSClient { private config: TTSConfig; @@ -42,7 +43,7 @@ export class TTSClient { async synthesizeToFile(text: string, id: string): Promise { this.ensureDirectoryExists(); - let wavFilename = id + '.wav'; + let wavFilename = id + (this.config.transcodeOpus ? '.ogg' : '.wav'); let wavPath = path.join(this.config.tempDir, wavFilename); try { await fs.unlink(wavPath); @@ -59,7 +60,14 @@ export class TTSClient { voice: this.config.voice }) }); - await finished(Readable.fromWeb(res.body as ReadableStream).pipe(stream)); + if (this.config.transcodeOpus) { + let coder = ffmpeg(Readable.fromWeb(res.body as ReadableStream)) + .audioCodec('opus') + .outputFormat('ogg'); + await finished(coder.pipe(stream)); + } else { + await finished(Readable.fromWeb(res.body as ReadableStream).pipe(stream)); + } await file.close(); this.deleteOps.set( wavPath, diff --git a/webapp/config.ts b/webapp/config.ts index 98439ed..1207cd5 100644 --- a/webapp/config.ts +++ b/webapp/config.ts @@ -1,4 +1,4 @@ export const Config = { // The server address for the webapp to connect to. The below default is the same address the webapp is hosted at. - serverAddress: `${window.location.protocol}//${window.location.host}` + serverAddress: `http://127.0.0.1:3000` }; diff --git a/yarn.lock b/yarn.lock index 9f95c91..05b9ab2 100644 --- a/yarn.lock +++ b/yarn.lock @@ -308,10 +308,12 @@ __metadata: dependencies: "@fastify/static": "npm:^7.0.4" "@fastify/websocket": "npm:^10.0.1" + "@types/fluent-ffmpeg": "npm:^2.1.24" "@types/node": "npm:^20.14.10" "@types/ws": "npm:^8.5.10" discord.js: "npm:^14.15.3" fastify: "npm:^4.28.1" + fluent-ffmpeg: "npm:^2.1.3" html-entities: "npm:^2.5.2" mysql2: "npm:^3.10.2" toml: "npm:^3.0.0" @@ -1458,6 +1460,15 @@ __metadata: languageName: node linkType: hard +"@types/fluent-ffmpeg@npm:^2.1.24": + version: 2.1.24 + resolution: "@types/fluent-ffmpeg@npm:2.1.24" + dependencies: + "@types/node": "npm:*" + checksum: 10c0/73a32f2313a0225452b5acdf7a341b9860ffcb88490ee726a1241887111cf4f2f7259d7491cf4e9b6be8316ababdd9064e085437f0b768adcfb0becc0733973c + languageName: node + linkType: hard + "@types/node@npm:*, @types/node@npm:^20.14.10": version: 20.14.10 resolution: "@types/node@npm:20.14.10" @@ -1628,6 +1639,13 @@ __metadata: languageName: node linkType: hard +"async@npm:^0.2.9": + version: 0.2.10 + resolution: "async@npm:0.2.10" + checksum: 10c0/714d284dc6c3ae59f3e8b347083e32c7657ba4ffc4ff945eb152ad4fb08def27e768992fcd4d9fd3b411c6b42f1541862ac917446bf2a1acfa0f302d1001f7d2 + languageName: node + linkType: hard + "atomic-sleep@npm:^1.0.0": version: 1.0.0 resolution: "atomic-sleep@npm:1.0.0" @@ -2403,6 +2421,16 @@ __metadata: languageName: node linkType: hard +"fluent-ffmpeg@npm:^2.1.3": + version: 2.1.3 + resolution: "fluent-ffmpeg@npm:2.1.3" + dependencies: + async: "npm:^0.2.9" + which: "npm:^1.1.1" + checksum: 10c0/0397379ec3237c10b2389edeef26fdaf93f36d1b20b0f28f8945fe6d9121dcee9b0c615bf7d44edb7abd37233e0d24f0db39389668d3c86a1a2a0d59e3f4457b + languageName: node + linkType: hard + "foreground-child@npm:^3.1.0": version: 3.2.1 resolution: "foreground-child@npm:3.2.1" @@ -4472,6 +4500,17 @@ __metadata: languageName: node linkType: hard +"which@npm:^1.1.1": + version: 1.3.1 + resolution: "which@npm:1.3.1" + dependencies: + isexe: "npm:^2.0.0" + bin: + which: ./bin/which + checksum: 10c0/e945a8b6bbf6821aaaef7f6e0c309d4b615ef35699576d5489b4261da9539f70393c6b2ce700ee4321c18f914ebe5644bc4631b15466ffbaad37d83151f6af59 + languageName: node + linkType: hard + "which@npm:^2.0.1": version: 2.0.2 resolution: "which@npm:2.0.2"