eSpeak has been dead for me on Windows and Mac. In my current fork I've fixed it for mac There were two things stopping it on Mac. 1 Building it from scratch the normal dll file cant be found so I updated that. (I also refactored that dll finding logic as it was annoying me) and 2. It had no way to play the temp file. aplay is a lunux only thing and the ffmpeg line was never called. So we needed to output to the OS native alternative (afplay on Mac).
Now on Windows its still broken. And I cant figure out why. This code below works (its standalone). But why when I integrate the onSynth logic into the main code it hangs I haven't figured out yet.
import os
import platform
import wave
from tempfile import NamedTemporaryFile
from ctypes import cdll, CFUNCTYPE, POINTER, Structure, c_char_p, c_int, c_short, c_uint, c_void_p, cast
# Conditional import for winsound on Windows
if platform.system() == 'Windows':
import winsound
dll = None
def load_library():
global dll
paths = [
# macOS paths
'/usr/local/lib/libespeak-ng.1.dylib',
'/usr/local/lib/libespeak.dylib',
# Linux paths
'libespeak-ng.so.1',
'/usr/local/lib/libespeak-ng.so.1',
'libespeak.so.1',
# Windows paths
r'C:\Program Files\eSpeak NG\libespeak-ng.dll',
r'C:\Program Files (x86)\eSpeak NG\libespeak-ng.dll'
]
for path in paths:
try:
dll = cdll.LoadLibrary(path)
print(f"Successfully loaded: {path}")
return True
except Exception as e:
print(f"Failed to load: {path}, Exception: {str(e)}")
return False
class ESPEAK_EVENT(Structure):
_fields_ = [
("type", c_int),
("unique_identifier", c_uint),
("text_position", c_int),
("length", c_int),
("audio_position", c_int),
("sample", c_int),
("user_data", c_void_p),
]
# Define the synthesis callback function
def synth_callback(wav, numsamples, events):
print("Synthesis callback called")
if not wav or numsamples <= 0:
print("No samples to process")
return 0 # Return 0 to indicate success
stream = NamedTemporaryFile(delete=False, suffix='.wav')
try:
with wave.open(stream, 'wb') as f:
f.setnchannels(1)
f.setsampwidth(2)
f.setframerate(22050.0)
# Convert the wav data to bytes
audio_data = cast(wav, POINTER(c_short * numsamples)).contents
byte_data = bytearray()
for sample in audio_data:
byte_data.extend(sample.to_bytes(2, byteorder='little', signed=True))
f.writeframes(byte_data)
stream.close()
print(f"Temporary WAV file created at: {stream.name}")
if platform.system() == 'Darwin': # macOS
os.system(f'afplay {stream.name}')
elif platform.system() == 'Linux':
os.system(f'aplay {stream.name} -q')
elif platform.system() == 'Windows':
print(f"Playing sound on Windows... {stream.name}")
winsound.PlaySound(stream.name, winsound.SND_FILENAME)
else:
raise RuntimeError("Unsupported operating system for audio playback")
except Exception as e:
print(f"Error during playback: {e}")
finally:
try:
os.remove(stream.name)
print(f"Temporary WAV file deleted: {stream.name}")
except Exception as e:
print(f"Error deleting temporary WAV file: {e}")
return 0 # Return 0 to indicate success
def main():
if not load_library():
raise RuntimeError("This means you probably do not have eSpeak or eSpeak-ng installed!")
# Initialize eSpeak
dll.espeak_Initialize.restype = c_int
if dll.espeak_Initialize(c_int(1), c_int(22050), c_void_p(0), c_int(0)) == -1:
raise RuntimeError("Failed to initialize eSpeak")
print("eSpeak initialized")
# Set the voice
dll.espeak_SetVoiceByName.restype = c_int
if dll.espeak_SetVoiceByName(c_char_p(b'en')) != 0:
raise RuntimeError("Failed to set voice")
print("Voice set")
# Define the synthesis callback
CALLBACK = CFUNCTYPE(c_int, POINTER(c_short), c_int, POINTER(ESPEAK_EVENT))
callback = CALLBACK(synth_callback)
# Set the callback function
dll.espeak_SetSynthCallback(callback)
print("Synthesis callback set")
# Send text to eSpeak
text = "Hello World, this is a test."
dll.espeak_Synth.restype = c_int
if dll.espeak_Synth(c_char_p(text.encode('utf-8')), c_int(len(text)), c_int(0), c_int(0), c_int(0), c_uint(0), c_void_p(0), c_void_p(0)) != 0:
raise RuntimeError("Failed to synthesize text")
print("Text synthesized")
# Wait for synthesis to complete
dll.espeak_Synchronize.restype = c_void_p
dll.espeak_Synchronize()
print("Synthesis synchronized")
if __name__ == "__main__":
try:
main()
except Exception as exp:
print("Exception: " + str(exp) + "\n")
raise
eSpeak has been dead for me on Windows and Mac. In my current fork I've fixed it for mac There were two things stopping it on Mac. 1 Building it from scratch the normal dll file cant be found so I updated that. (I also refactored that dll finding logic as it was annoying me) and 2. It had no way to play the temp file. aplay is a lunux only thing and the ffmpeg line was never called. So we needed to output to the OS native alternative (afplay on Mac).
Now on Windows its still broken. And I cant figure out why. This code below works (its standalone). But why when I integrate the onSynth logic into the main code it hangs I haven't figured out yet.