Add wav->ogg (opus) transcoding

This commit is contained in:
Elijah R 2024-07-14 16:05:33 -04:00
parent 1ad9ee14fe
commit 9e0f472251
7 changed files with 57 additions and 3 deletions

View file

@ -42,6 +42,8 @@ enabled = true
server = "http://127.0.0.1:3001" server = "http://127.0.0.1:3001"
voice = "Microsoft Sam" voice = "Microsoft Sam"
tempDir = "/tmp/msac-tts" tempDir = "/tmp/msac-tts"
# Transcode SAPI5 Waveform files to Opus. Greatly reduces bandwidth usage. Requires ffmpeg to be installed and in PATH.
transcodeOpus = true
wavExpirySeconds = 60 wavExpirySeconds = 60
[[agents]] [[agents]]

View file

@ -6,6 +6,7 @@
}, },
"packageManager": "yarn@4.2.2", "packageManager": "yarn@4.2.2",
"devDependencies": { "devDependencies": {
"@types/fluent-ffmpeg": "^2.1.24",
"@types/node": "^20.14.10", "@types/node": "^20.14.10",
"@types/ws": "^8.5.10", "@types/ws": "^8.5.10",
"typescript": "5.4.5" "typescript": "5.4.5"
@ -15,6 +16,7 @@
"@fastify/websocket": "^10.0.1", "@fastify/websocket": "^10.0.1",
"discord.js": "^14.15.3", "discord.js": "^14.15.3",
"fastify": "^4.28.1", "fastify": "^4.28.1",
"fluent-ffmpeg": "^2.1.3",
"html-entities": "^2.5.2", "html-entities": "^2.5.2",
"mysql2": "^3.10.2", "mysql2": "^3.10.2",
"toml": "^3.0.0", "toml": "^3.0.0",

View file

@ -17,6 +17,7 @@ export interface TTSConfig {
server: string; server: string;
voice: string; voice: string;
tempDir: string; tempDir: string;
transcodeOpus: boolean;
wavExpirySeconds: number; wavExpirySeconds: number;
} }

View file

@ -78,6 +78,8 @@ export class MSAgentChatRoom {
} }
}); });
client.on('talk', async (message) => { client.on('talk', async (message) => {
message = message.trim();
if (message.length === 0) return;
let msg: MSAgentChatMessage = { let msg: MSAgentChatMessage = {
op: MSAgentProtocolMessageType.Chat, op: MSAgentProtocolMessageType.Chat,
data: { data: {

View file

@ -4,6 +4,7 @@ import { TTSConfig } from './config.js';
import { Readable } from 'node:stream'; import { Readable } from 'node:stream';
import { ReadableStream } from 'node:stream/web'; import { ReadableStream } from 'node:stream/web';
import { finished } from 'node:stream/promises'; import { finished } from 'node:stream/promises';
import ffmpeg from 'fluent-ffmpeg';
export class TTSClient { export class TTSClient {
private config: TTSConfig; private config: TTSConfig;
@ -42,7 +43,7 @@ export class TTSClient {
async synthesizeToFile(text: string, id: string): Promise<string> { async synthesizeToFile(text: string, id: string): Promise<string> {
this.ensureDirectoryExists(); this.ensureDirectoryExists();
let wavFilename = id + '.wav'; let wavFilename = id + (this.config.transcodeOpus ? '.ogg' : '.wav');
let wavPath = path.join(this.config.tempDir, wavFilename); let wavPath = path.join(this.config.tempDir, wavFilename);
try { try {
await fs.unlink(wavPath); await fs.unlink(wavPath);
@ -59,7 +60,14 @@ export class TTSClient {
voice: this.config.voice voice: this.config.voice
}) })
}); });
if (this.config.transcodeOpus) {
let coder = ffmpeg(Readable.fromWeb(res.body as ReadableStream<any>))
.audioCodec('opus')
.outputFormat('ogg');
await finished(coder.pipe(stream));
} else {
await finished(Readable.fromWeb(res.body as ReadableStream<any>).pipe(stream)); await finished(Readable.fromWeb(res.body as ReadableStream<any>).pipe(stream));
}
await file.close(); await file.close();
this.deleteOps.set( this.deleteOps.set(
wavPath, wavPath,

View file

@ -1,4 +1,4 @@
export const Config = { export const Config = {
// The server address for the webapp to connect to. The below default is the same address the webapp is hosted at. // The server address for the webapp to connect to. The below default is the same address the webapp is hosted at.
serverAddress: `${window.location.protocol}//${window.location.host}` serverAddress: `http://127.0.0.1:3000`
}; };

View file

@ -308,10 +308,12 @@ __metadata:
dependencies: dependencies:
"@fastify/static": "npm:^7.0.4" "@fastify/static": "npm:^7.0.4"
"@fastify/websocket": "npm:^10.0.1" "@fastify/websocket": "npm:^10.0.1"
"@types/fluent-ffmpeg": "npm:^2.1.24"
"@types/node": "npm:^20.14.10" "@types/node": "npm:^20.14.10"
"@types/ws": "npm:^8.5.10" "@types/ws": "npm:^8.5.10"
discord.js: "npm:^14.15.3" discord.js: "npm:^14.15.3"
fastify: "npm:^4.28.1" fastify: "npm:^4.28.1"
fluent-ffmpeg: "npm:^2.1.3"
html-entities: "npm:^2.5.2" html-entities: "npm:^2.5.2"
mysql2: "npm:^3.10.2" mysql2: "npm:^3.10.2"
toml: "npm:^3.0.0" toml: "npm:^3.0.0"
@ -1458,6 +1460,15 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"@types/fluent-ffmpeg@npm:^2.1.24":
version: 2.1.24
resolution: "@types/fluent-ffmpeg@npm:2.1.24"
dependencies:
"@types/node": "npm:*"
checksum: 10c0/73a32f2313a0225452b5acdf7a341b9860ffcb88490ee726a1241887111cf4f2f7259d7491cf4e9b6be8316ababdd9064e085437f0b768adcfb0becc0733973c
languageName: node
linkType: hard
"@types/node@npm:*, @types/node@npm:^20.14.10": "@types/node@npm:*, @types/node@npm:^20.14.10":
version: 20.14.10 version: 20.14.10
resolution: "@types/node@npm:20.14.10" resolution: "@types/node@npm:20.14.10"
@ -1628,6 +1639,13 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"async@npm:^0.2.9":
version: 0.2.10
resolution: "async@npm:0.2.10"
checksum: 10c0/714d284dc6c3ae59f3e8b347083e32c7657ba4ffc4ff945eb152ad4fb08def27e768992fcd4d9fd3b411c6b42f1541862ac917446bf2a1acfa0f302d1001f7d2
languageName: node
linkType: hard
"atomic-sleep@npm:^1.0.0": "atomic-sleep@npm:^1.0.0":
version: 1.0.0 version: 1.0.0
resolution: "atomic-sleep@npm:1.0.0" resolution: "atomic-sleep@npm:1.0.0"
@ -2403,6 +2421,16 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"fluent-ffmpeg@npm:^2.1.3":
version: 2.1.3
resolution: "fluent-ffmpeg@npm:2.1.3"
dependencies:
async: "npm:^0.2.9"
which: "npm:^1.1.1"
checksum: 10c0/0397379ec3237c10b2389edeef26fdaf93f36d1b20b0f28f8945fe6d9121dcee9b0c615bf7d44edb7abd37233e0d24f0db39389668d3c86a1a2a0d59e3f4457b
languageName: node
linkType: hard
"foreground-child@npm:^3.1.0": "foreground-child@npm:^3.1.0":
version: 3.2.1 version: 3.2.1
resolution: "foreground-child@npm:3.2.1" resolution: "foreground-child@npm:3.2.1"
@ -4472,6 +4500,17 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"which@npm:^1.1.1":
version: 1.3.1
resolution: "which@npm:1.3.1"
dependencies:
isexe: "npm:^2.0.0"
bin:
which: ./bin/which
checksum: 10c0/e945a8b6bbf6821aaaef7f6e0c309d4b615ef35699576d5489b4261da9539f70393c6b2ce700ee4321c18f914ebe5644bc4631b15466ffbaad37d83151f6af59
languageName: node
linkType: hard
"which@npm:^2.0.1": "which@npm:^2.0.1":
version: 2.0.2 version: 2.0.2
resolution: "which@npm:2.0.2" resolution: "which@npm:2.0.2"