diff --git a/Justfile b/Justfile index 4752347..2550619 100644 --- a/Justfile +++ b/Justfile @@ -8,9 +8,9 @@ build: build-debug: dotnet build -c Debug - make -C speech2 CONFIG=Debug -j$(nproc) + make -C speech2 CONFIG=Release -j$(nproc) - cp speech2/bin/x86/Debug/speech2.dll SAPIServer/bin/Debug/net40/windows-x86 + cp speech2/bin/x86/Release/speech2.dll SAPIServer/bin/Debug/net40/windows-x86 cp /usr/i686-w64-mingw32/bin/libgcc_s_dw2-1.dll SAPIServer/bin/Debug/net40/windows-x86/ cp /usr/i686-w64-mingw32/bin/libstdc++-6.dll SAPIServer/bin/Debug/net40/windows-x86/ diff --git a/SAPIServer/Program.cs b/SAPIServer/Program.cs index 29deb43..d260058 100644 --- a/SAPIServer/Program.cs +++ b/SAPIServer/Program.cs @@ -15,13 +15,21 @@ namespace SAPIServer { public SpeechServer() { // Test out creating a speech2 api object apis["sapi4"] = new SpeechAPI(EngineType.ET_SAPI4); +#if true + foreach(var voice in apis["sapi4"].GetVoices()) { + Console.WriteLine($"ggg {voice.name}"); + } +#endif httpServer.Get("/api/voices", ctx => { + /* using(var synth = new SpeechSynthesizer()) { ctx.Response.ContentType = "application/json"; return Encoding.UTF8.GetBytes( JsonConvert.SerializeObject(new VoicesResponse { voices = synth.GetInstalledVoices().Select(v => v.VoiceInfo.Name).ToArray() })); - } + }*/ + + return new byte[]{0}; }); httpServer.Post("/api/synthesize", ctx => { diff --git a/SAPIServer/SAPIServer.csproj b/SAPIServer/SAPIServer.csproj index 68058e6..0eb5fcb 100644 --- a/SAPIServer/SAPIServer.csproj +++ b/SAPIServer/SAPIServer.csproj @@ -6,7 +6,7 @@ false Exe net40 - + true diff --git a/SAPIServer/SpeechDLL.cs b/SAPIServer/SpeechDLL.cs index 29674f1..39465bb 100644 --- a/SAPIServer/SpeechDLL.cs +++ b/SAPIServer/SpeechDLL.cs @@ -1,13 +1,39 @@ using System; +using System.Collections.Generic; using System.Runtime.InteropServices; +using System.Security; namespace SAPIServer { // Sync with C++ code. public enum EngineType : int { ET_SAPI4, ET_SAPI5, ET_DECTALK } + public class VoiceDef { + public Guid guid; + public string name; + } + + [StructLayout(LayoutKind.Sequential, Pack = 1)] + internal struct VoiceDefInternal { + Guid guid; + + [MarshalAs(UnmanagedType.LPStr)] + string name; + + public VoiceDef Voicify() { + Console.WriteLine($"FUCK ${name}"); + //var str = Marshal.PtrToStringAnsi(name); + VoiceDef def = new(); + + def.guid = guid; + def.name = name; + return def; + } + } + + // Speech2 DLL API. Sync with c++ code. internal class SpeechDLL { [DllImport("speech2.dll")] @@ -15,6 +41,12 @@ namespace SAPIServer { [DllImport("speech2.dll")] public static extern void speech2_destroy_api(IntPtr pAPI); + + [DllImport("speech2.dll")] + public static extern int speech2_api_get_voiceinfo_count(IntPtr pAPI); + + [DllImport("speech2.dll")] + public static extern IntPtr speech2_api_get_voiceinfo_index(IntPtr pAPI, int index); } // A speech API. This is generic for all speech2 supported speech APIs, so @@ -28,6 +60,18 @@ namespace SAPIServer { throw new InvalidOperationException("Failed to create speech API"); } + public List GetVoices() { + var count = SpeechDLL.speech2_api_get_voiceinfo_count(handle); + Console.WriteLine($"count {count}"); + var list = new List(); + for(var i = 0; i < count; ++i) { + var ptr = SpeechDLL.speech2_api_get_voiceinfo_index(handle, i); + var obj = (VoiceDefInternal)Marshal.PtrToStructure(ptr, typeof(VoiceDefInternal)); + list.Add(obj.Voicify()); + } + return list; + } + void IDisposable.Dispose() { if(handle != IntPtr.Zero) SpeechDLL.speech2_destroy_api(handle); diff --git a/speech2/build/configs.mk b/speech2/build/configs.mk index bcc3dbe..e793758 100644 --- a/speech2/build/configs.mk +++ b/speech2/build/configs.mk @@ -1,6 +1,6 @@ # Base compiler flags. Only change if you *explicitly* know what you're doing. -BASE_CCFLAGS = -MMD -fvisibility=hidden -std=gnu17 -fpermissive -fno-ident -msse -Iinclude -Isrc -D_UCRT -D_WIN32_WINNT=0x0501 -BASE_CXXFLAGS = -MMD -fvisibility=hidden -std=c++20 -fpermissive -fno-ident -msse -Iinclude -Isrc -Ithird_party -D_UCRT -D_WIN32_WINNT=0x0501 +BASE_CCFLAGS = -MMD -fvisibility=hidden -std=gnu17 -fpermissive -fno-pic -fno-pie -fno-ident -msse -Iinclude -Isrc -D_UCRT -D_WIN32_WINNT=0x0501 +BASE_CXXFLAGS = -MMD -fvisibility=hidden -std=c++20 -fpermissive -fno-pic -fno-pie -fno-ident -msse -Iinclude -Isrc -Ithird_party -D_UCRT -D_WIN32_WINNT=0x0501 BASE_LDFLAGS = -Wl,--subsystem=windows -fvisibility=hidden -shared -lkernel32 -lshell32 -luser32 -luuid -lole32 Release_Valid = yes diff --git a/speech2/src/bindings.cpp b/speech2/src/bindings.cpp index c28efc9..5e71c1e 100644 --- a/speech2/src/bindings.cpp +++ b/speech2/src/bindings.cpp @@ -9,10 +9,20 @@ enum class EngineType : int { ET_SAPI4, ET_SAPI5, ET_DECTALK }; extern "C" { SP2_EXPORT void* speech2_create_api(EngineType type) { + ISpeechAPI* api = nullptr; switch(type) { - case EngineType::ET_SAPI4: return static_cast(ISpeechAPI::CreateSapi4()); + case EngineType::ET_SAPI4: + api = ISpeechAPI::CreateSapi4(); + break; default: return nullptr; } + + if(auto hr = api->Initialize(); FAILED(hr)) { + delete api; + return nullptr; + } + + return static_cast(api); } SP2_EXPORT void speech2_destroy_api(void* engine) { @@ -22,4 +32,20 @@ SP2_EXPORT void speech2_destroy_api(void* engine) { // API bindings TODO +SP2_EXPORT int speech2_api_get_voiceinfo_count(void* engine) { + if(engine) { + auto* api = static_cast(engine); + return api->GetVoices().size(); + } + return -1; +} + +SP2_EXPORT const ISpeechAPI::VoiceInfo* speech2_api_get_voiceinfo_index(void* engine, int index) { + if(engine) { + auto* api = static_cast(engine); + return &api->GetVoices()[index]; + } + return nullptr; +} + } diff --git a/speech2/src/dllmain.cpp b/speech2/src/dllmain.cpp index 129073d..a1db439 100644 --- a/speech2/src/dllmain.cpp +++ b/speech2/src/dllmain.cpp @@ -1,11 +1,14 @@ #include +#include BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) { // N.B: Should initalize COM if it's not initalized. // Note that with .NET Framework, *all* managed threads (incl. ThreadPool threads) // have COM initalized by default, so we don't need to do so there. switch(fdwReason) { - case DLL_PROCESS_ATTACH: break; + case DLL_PROCESS_ATTACH: + CoInitialize(nullptr); + break; case DLL_THREAD_ATTACH: break; @@ -15,6 +18,7 @@ BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) { if(lpvReserved != nullptr) { break; // do not do cleanup if process termination scenario } + CoUninitialize(); break; } return TRUE; diff --git a/speech2/src/sapi4/api_sapi4.cpp b/speech2/src/sapi4/api_sapi4.cpp index 532ce01..bd70c0b 100644 --- a/speech2/src/sapi4/api_sapi4.cpp +++ b/speech2/src/sapi4/api_sapi4.cpp @@ -19,23 +19,40 @@ struct SpeechAPI_SAPI4 : public ISpeechAPI { HRESULT Initialize() override { HRESULT hRes; + printf("speech2: SpeechAPI_Sapi4::Initalize() begin\n"); + hRes = pEnum.CreateInstance(CLSID_TTSEnumerator, CLSCTX_INPROC); if(FAILED(hRes)) return hRes; + + pEnum->Reset(); + + printf("speech2: SpeechAPI_Sapi4::Initalize() created enum\n"); + + // Fill out voices + EnumVoices(); + + + printf("speech2: SpeechAPI_Sapi4::Initalize() filled out voices! Yay\n"); + return S_OK; } - std::vector GetVoices() override { - TTSMODEINFO found {}; - std::vector ret; + void EnumVoices() { + static TTSMODEINFO found {}; while(!pEnum->Next(1, &found, nullptr)) { - ret.push_back({ .guid = found.gModeID, .voiceName = found.szModeName }); + //auto ptr = strdup(found.szModeName); + printf("EnumVoices() voice %p\n", &found.szModeName); + //voices.push_back(VoiceInfo { .guid = found.gModeID, .voiceName = ptr }); } pEnum->Reset(); - return ret; + } + + const std::vector& GetVoices() override { + return voices; } HRESULT SelectVoiceImpl(const GUID& guid) { @@ -77,6 +94,8 @@ struct SpeechAPI_SAPI4 : public ISpeechAPI { // The above comment is also why this isn't a ComPtr. AudioOutBuffer* pAudioOut { nullptr }; + + std::vector voices{}; }; ISpeechAPI* ISpeechAPI::CreateSapi4() { diff --git a/speech2/src/speechapi.hpp b/speech2/src/speechapi.hpp index 854d6a7..88ed6e7 100644 --- a/speech2/src/speechapi.hpp +++ b/speech2/src/speechapi.hpp @@ -7,7 +7,15 @@ struct ISpeechAPI { struct VoiceInfo { GUID guid{}; // Optional. May not be filled out if th e - std::string voiceName; + char* voiceName; + + //VoiceInfo(const VoiceInfo&) = delete; + //VoiceInfo(VoiceInfo&&) = delete; + + ~VoiceInfo() { + if(voiceName) + free(voiceName); + } }; virtual ~ISpeechAPI() = default; @@ -19,7 +27,7 @@ struct ISpeechAPI { /// Performs the bare level of initalization required to use the speech API virtual HRESULT Initialize() = 0; - virtual std::vector GetVoices() = 0; + virtual const std::vector& GetVoices() = 0; /// Selects a voice. virtual HRESULT SelectVoice(std::string_view voiceName) = 0;