MockFlow-AI/audio_cache.py at main · PranavMishra17/MockFlow-AI · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
"""
Audio Cache Module

Handles pre-generated welcome audio playback for each interview track.
Audio files are pre-committed to static/audio/ and played at session start
instead of generating TTS each time.

If a file is missing, falls back to in-session TTS generation (logs WARNING).
"""

import os
import logging
from typing import Optional

logger = logging.getLogger(__name__)

# Map track type to pre-generated audio file path
WELCOME_AUDIO_FILES = {
    'intro': 'static/audio/welcome_intro.mp3',
    'behavioral': 'static/audio/welcome_behavioral.mp3',
    'technical_voice': 'static/audio/welcome_technical_voice.mp3',
    'coding': 'static/audio/welcome_coding.mp3',
}

# Welcome scripts used for fallback TTS generation (and to pre-generate files)
WELCOME_SCRIPTS = {
    'intro': (
        "Welcome to your mock interview. I'm Alex, your AI interviewer. "
        "We'll go through a few stages: you'll introduce yourself, discuss your past experience, "
        "explore how you fit the role, and wrap up. Let's get started!"
    ),
    'behavioral': (
        "Welcome to your behavioral mock interview. I'm Alex, your AI interviewer. "
        "In this session, I'll ask you behavioral questions about your past experiences. "
        "Think about specific situations, your actions, and the results you achieved. "
        "Ready when you are!"
    ),
    'technical_voice': (
        "Welcome to your technical mock interview. I'm Alex, your AI interviewer. "
        "We'll be exploring your technical knowledge through conceptual questions. "
        "No coding today - just explain your understanding of the topics we'll cover. "
        "Let's begin!"
    ),
    'coding': (
        "Welcome to your technical coding interview. I'm Alex, your AI interviewer. "
        "You'll be solving coding problems while thinking aloud. "
        "I'll be here if you need a nudge. Good luck!"
    ),
}


def get_welcome_audio_bytes(track_type: str) -> Optional[bytes]:
    """
    Return the bytes of the pre-generated welcome audio for the given track.
    Returns None if the file does not exist (caller should fall back to TTS).

    Args:
        track_type: 'intro', 'behavioral', 'technical_voice', or 'coding'

    Returns:
        Audio bytes, or None if not found
    """
    file_path = WELCOME_AUDIO_FILES.get(track_type)
    if not file_path:
        logger.warning(f"[AUDIO] No welcome audio file configured for track: {track_type}")
        return None

    abs_path = os.path.join(os.path.dirname(__file__), file_path)
    if not os.path.exists(abs_path):
        logger.warning(f"[AUDIO] Welcome audio file missing: {abs_path}. Will use TTS fallback.")
        return None

    try:
        with open(abs_path, 'rb') as f:
            audio_bytes = f.read()
        logger.info(f"[AUDIO] Loaded welcome audio for track '{track_type}': {len(audio_bytes)} bytes")
        return audio_bytes
    except Exception as e:
        logger.error(f"[AUDIO] Failed to read welcome audio file {abs_path}: {e}")
        return None


def get_welcome_script(track_type: str) -> str:
    """
    Return the welcome speech text for a track.
    Used as fallback TTS text when audio file is missing.

    Args:
        track_type: Track type string

    Returns:
        Welcome script text
    """
    return WELCOME_SCRIPTS.get(track_type, WELCOME_SCRIPTS['intro'])


def generate_and_cache_welcome_audio(track_type: str, openai_api_key: str) -> bool:
    """
    Generate welcome audio via OpenAI TTS and save to static/audio/.
    Used as a one-time setup script or runtime fallback.

    Args:
        track_type: Track type string
        openai_api_key: OpenAI API key for TTS

    Returns:
        True if successful
    """
    try:
        from openai import OpenAI
        client = OpenAI(api_key=openai_api_key)

        file_path = WELCOME_AUDIO_FILES.get(track_type)
        if not file_path:
            logger.error(f"[AUDIO] Unknown track type: {track_type}")
            return False

        script = get_welcome_script(track_type)
        abs_path = os.path.join(os.path.dirname(__file__), file_path)
        os.makedirs(os.path.dirname(abs_path), exist_ok=True)

        response = client.audio.speech.create(
            model='tts-1',
            voice='alloy',
            input=script,
        )

        with open(abs_path, 'wb') as f:
            f.write(response.content)

        logger.info(f"[AUDIO] Generated and cached welcome audio for track '{track_type}': {abs_path}")
        return True

    except Exception as e:
        logger.error(f"[AUDIO] Failed to generate welcome audio for track '{track_type}': {e}")
        return False