Skip to content

eSpeak on Windows and Mac #4

@willwade

Description

@willwade

eSpeak has been dead for me on Windows and Mac. In my current fork I've fixed it for mac There were two things stopping it on Mac. 1 Building it from scratch the normal dll file cant be found so I updated that. (I also refactored that dll finding logic as it was annoying me) and 2. It had no way to play the temp file. aplay is a lunux only thing and the ffmpeg line was never called. So we needed to output to the OS native alternative (afplay on Mac).

Now on Windows its still broken. And I cant figure out why. This code below works (its standalone). But why when I integrate the onSynth logic into the main code it hangs I haven't figured out yet.

import os
import platform
import wave
from tempfile import NamedTemporaryFile
from ctypes import cdll, CFUNCTYPE, POINTER, Structure, c_char_p, c_int, c_short, c_uint, c_void_p, cast

# Conditional import for winsound on Windows
if platform.system() == 'Windows':
    import winsound

dll = None

def load_library():
    global dll
    paths = [
        # macOS paths
        '/usr/local/lib/libespeak-ng.1.dylib',
        '/usr/local/lib/libespeak.dylib',
        
        # Linux paths
        'libespeak-ng.so.1',
        '/usr/local/lib/libespeak-ng.so.1',
        'libespeak.so.1',
        
        # Windows paths
        r'C:\Program Files\eSpeak NG\libespeak-ng.dll',
        r'C:\Program Files (x86)\eSpeak NG\libespeak-ng.dll'
    ]
    
    for path in paths:
        try:
            dll = cdll.LoadLibrary(path)
            print(f"Successfully loaded: {path}")
            return True
        except Exception as e:
            print(f"Failed to load: {path}, Exception: {str(e)}")
    return False

class ESPEAK_EVENT(Structure):
    _fields_ = [
        ("type", c_int),
        ("unique_identifier", c_uint),
        ("text_position", c_int),
        ("length", c_int),
        ("audio_position", c_int),
        ("sample", c_int),
        ("user_data", c_void_p),
    ]

# Define the synthesis callback function
def synth_callback(wav, numsamples, events):
    print("Synthesis callback called")
    
    if not wav or numsamples <= 0:
        print("No samples to process")
        return 0  # Return 0 to indicate success

    stream = NamedTemporaryFile(delete=False, suffix='.wav')

    try:
        with wave.open(stream, 'wb') as f:
            f.setnchannels(1)
            f.setsampwidth(2)
            f.setframerate(22050.0)
            # Convert the wav data to bytes
            audio_data = cast(wav, POINTER(c_short * numsamples)).contents
            byte_data = bytearray()
            for sample in audio_data:
                byte_data.extend(sample.to_bytes(2, byteorder='little', signed=True))
            f.writeframes(byte_data)

        stream.close()
        print(f"Temporary WAV file created at: {stream.name}")

        if platform.system() == 'Darwin':  # macOS
            os.system(f'afplay {stream.name}')
        elif platform.system() == 'Linux':
            os.system(f'aplay {stream.name} -q')
        elif platform.system() == 'Windows':
            print(f"Playing sound on Windows... {stream.name}")
            winsound.PlaySound(stream.name, winsound.SND_FILENAME)
        else:
            raise RuntimeError("Unsupported operating system for audio playback")

    except Exception as e:
        print(f"Error during playback: {e}")
    
    finally:
        try:
            os.remove(stream.name)
            print(f"Temporary WAV file deleted: {stream.name}")
        except Exception as e:
            print(f"Error deleting temporary WAV file: {e}")

    return 0  # Return 0 to indicate success

def main():
    if not load_library():
        raise RuntimeError("This means you probably do not have eSpeak or eSpeak-ng installed!")

    # Initialize eSpeak
    dll.espeak_Initialize.restype = c_int
    if dll.espeak_Initialize(c_int(1), c_int(22050), c_void_p(0), c_int(0)) == -1:
        raise RuntimeError("Failed to initialize eSpeak")
    print("eSpeak initialized")

    # Set the voice
    dll.espeak_SetVoiceByName.restype = c_int
    if dll.espeak_SetVoiceByName(c_char_p(b'en')) != 0:
        raise RuntimeError("Failed to set voice")
    print("Voice set")

    # Define the synthesis callback
    CALLBACK = CFUNCTYPE(c_int, POINTER(c_short), c_int, POINTER(ESPEAK_EVENT))
    callback = CALLBACK(synth_callback)

    # Set the callback function
    dll.espeak_SetSynthCallback(callback)
    print("Synthesis callback set")

    # Send text to eSpeak
    text = "Hello World, this is a test."
    dll.espeak_Synth.restype = c_int
    if dll.espeak_Synth(c_char_p(text.encode('utf-8')), c_int(len(text)), c_int(0), c_int(0), c_int(0), c_uint(0), c_void_p(0), c_void_p(0)) != 0:
        raise RuntimeError("Failed to synthesize text")
    print("Text synthesized")

    # Wait for synthesis to complete
    dll.espeak_Synchronize.restype = c_void_p
    dll.espeak_Synchronize()
    print("Synthesis synchronized")

if __name__ == "__main__":
    try:
        main()
    except Exception as exp:
        print("Exception: " + str(exp) + "\n")
        raise

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions