Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
189 changes: 189 additions & 0 deletions voice/google/google_voice2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
import os
import time
import uuid
from google.cloud import speech
from google.cloud import texttospeech
from google.api_core.exceptions import GoogleAPIError
from pydub import AudioSegment
from bridge.reply import Reply, ReplyType
from common.log import logger
from common.tmp_dir import TmpDir
from voice.voice import Voice

# 设置 Google Cloud 凭据
cred_path = os.path.join(os.path.dirname(__file__), "google-credentials.json")
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = cred_path
Comment on lines +14 to +15
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Caution

🚨 导入模块即设置 GOOGLE_APPLICATION_CREDENTIALS,且强依赖仓库目录内凭据文件,存在安全/部署风险与副作用

模块顶层拼接 google-credentials.json 路径并写入 os.environ["GOOGLE_APPLICATION_CREDENTIALS"]:1) 导入即产生全局副作用,影响进程内其他 Google SDK 客户端/模块;2) 强依赖代码目录存在凭据文件,容器/线上环境/只读文件系统下易失效;3) 诱导将密钥文件放入仓库目录,凭据泄露风险高。更合理的是使用 Application Default Credentials(ADC);如需指定 key file,应通过配置/参数显式传入,并在初始化时用显式凭据创建 client,而非改全局环境变量。

建议: 移除模块顶层环境变量写入;在 init 支持通过环境变量/配置传入 key file 路径,并使用 service_account.Credentials.from_service_account_file 创建客户端;未提供则走默认凭据(ADC)。

Suggested change
cred_path = os.path.join(os.path.dirname(__file__), "google-credentials.json")
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = cred_path
import os
import time
import uuid
from google.cloud import speech
from google.cloud import texttospeech
from google.api_core.exceptions import GoogleAPIError
from google.oauth2 import service_account
from pydub import AudioSegment
from bridge.reply import Reply, ReplyType
from common.log import logger
from common.tmp_dir import TmpDir
from voice.voice import Voice
class GoogleVoice(Voice):
def __init__(self, credentials_path: str | None = None):
super().__init__()
credentials_path = credentials_path or os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
if credentials_path:
credentials = service_account.Credentials.from_service_account_file(credentials_path)
self.speech_client = speech.SpeechClient(credentials=credentials)
self.tts_client = texttospeech.TextToSpeechClient(credentials=credentials)
else:
self.speech_client = speech.SpeechClient()
self.tts_client = texttospeech.TextToSpeechClient()


class GoogleVoice(Voice):
def __init__(self):
super().__init__()
self.speech_client = speech.SpeechClient()
self.tts_client = texttospeech.TextToSpeechClient()

def convert_audio_to_wav(self, input_file_path, output_file_path="temp_audio.wav"):
"""
将 AMR 或 MP3 文件转换为 WAV 格式
参数:
input_file_path: 输入音频文件路径(AMR 或 MP3)
output_file_path: 输出 WAV 文件路径
返回:
转换后的 WAV 文件路径
"""
try:
audio = AudioSegment.from_file(input_file_path)
audio = audio.set_frame_rate(16000).set_channels(1)
audio.export(output_file_path, format="wav")
return output_file_path
except Exception as e:
logger.error(f"音频转换失败: {e}")
return None

def voiceToText(self, voice_file):
"""
将中文音频文件(AMR 或 MP3)转换为文本
参数:
voice_file: 输入音频文件路径
返回:
Reply 对象,包含转录文本或错误信息
"""
try:
file_ext = os.path.splitext(voice_file)[1].lower()
if file_ext in [".amr", ".mp3"]:
temp_wav_file = f"temp_audio_{uuid.uuid4().hex}.wav"
voice_file = self.convert_audio_to_wav(voice_file, temp_wav_file)
if not voice_file:
logger.error("音频转换失败")
return Reply(ReplyType.ERROR, "音频转换失败")
elif file_ext != ".wav":
logger.error("不支持的音频格式,仅支持 AMR、MP3 和 WAV")
return Reply(ReplyType.ERROR, "不支持的音频格式,仅支持 AMR、MP3 和 WAV")

with open(voice_file, "rb") as audio_file:
audio_content = audio_file.read()

# 配置音频和识别设置(中文普通话)
audio = speech.RecognitionAudio(content=audio_content)
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code="cmn-CN",
)

# 执行语音识别
response = self.speech_client.recognize(config=config, audio=audio)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Warning

⚠️ 语音识别配置固定为 LINEAR16/16000,但输入 WAV 实际编码可能不匹配,可能导致识别失败或效果差

convert_audio_to_wav 仅设置采样率/声道并导出 wav,但未显式保证导出为 16-bit PCM(LINEAR16)。当输入本身为 wav 时又直接按 LINEAR16/16000 送入 API,若实际为其他采样率或压缩编码,会导致识别报错或质量下降。

建议: 对所有输入统一转成 PCM16 mono 16000Hz,并在 config 中与转换后的参数保持一致;至少在 wav 分支也执行转换以保证一致性。


# 提取转录结果
transcript = ""
for result in response.results:
transcript += result.alternatives[0].transcript + " "

transcript = transcript.strip()
if not transcript:
logger.error("语音识别失败:无法理解音频内容")
return Reply(ReplyType.ERROR, "抱歉,我听不懂")

logger.info(f"[Google] voiceToText text={transcript} voice file name={voice_file}")
reply = Reply(ReplyType.TEXT, transcript)

# 清理临时 WAV 文件
if file_ext in [".amr", ".mp3"] and os.path.exists(voice_file):
os.remove(voice_file)

return reply

Comment on lines +49 to +93
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Warning

⚠️ 临时 WAV 文件清理不可靠:异常路径/早返回时可能泄漏临时文件

目前仅在成功路径末尾依据 file_ext 清理临时 wav。若 recognize 抛异常或中途 return(如 transcript 为空)临时文件会遗留。此外 voice_file 变量在 amr/mp3 场景会被覆盖为临时 wav 路径,清理逻辑再依赖 file_ext 判断,容易遗漏其他转换场景。

建议: 使用 try/finally 统一清理临时文件;用单独变量 temp_wav_path 保存临时文件路径,避免覆盖原始 voice_file。

except GoogleAPIError as e:
logger.error(f"语音识别失败:无法连接到 Google 语音识别服务;{e}")
return Reply(ReplyType.ERROR, f"抱歉,无法连接到 Google 语音识别服务;{e}")
except Exception as e:
logger.error(f"发生错误: {e}")
return Reply(ReplyType.ERROR, f"抱歉,我听不懂或发生错误:{e}")

def textToVoice(self, text):
"""
将中文文本转换为语音并保存为音频文件
参数:
text: 要转换的中文文本
返回:
Reply 对象,包含音频文件路径或错误信息
"""
try:
# 生成唯一的输出文件名
unique_id = uuid.uuid4().hex
mp3_file = f"{TmpDir().path()}reply-{int(time.time())}-{unique_id}.mp3"

# 配置要转换的文本
synthesis_input = texttospeech.SynthesisInput(text=text)

# 配置语音参数(中文普通话)
voice = texttospeech.VoiceSelectionParams(
language_code="cmn-CN",
name="cmn-CN-Wavenet-A",
)

# 配置音频输出格式
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3
)

# 执行文字转语音
response = self.tts_client.synthesize_speech(
input=synthesis_input, voice=voice, audio_config=audio_config
)

# 保存音频文件
with open(mp3_file, "wb") as out:
out.write(response.audio_content)
logger.info(f"[Google] textToVoice text={text} voice file name={mp3_file}")

return Reply(ReplyType.VOICE, mp3_file)

except GoogleAPIError as e:
logger.error(f"文字转语音失败: {e}")
return Reply(ReplyType.ERROR, f"抱歉,无法连接到 Google 文字转语音服务;{e}")
except Exception as e:
logger.error(f"发生错误: {e}")
return Reply(ReplyType.ERROR, f"发生错误:{e}")


"""
语言代码: cmn-CN
名称: cmn-CN-Chirp3-HD-Achernar, 性别: FEMALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Achird, 性别: MALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Algenib, 性别: MALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Algieba, 性别: MALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Alnilam, 性别: MALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Aoede, 性别: FEMALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Autonoe, 性别: FEMALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Callirrhoe, 性别: FEMALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Charon, 性别: MALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Despina, 性别: FEMALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Enceladus, 性别: MALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Erinome, 性别: FEMALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Fenrir, 性别: MALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Gacrux, 性别: FEMALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Iapetus, 性别: MALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Kore, 性别: FEMALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Laomedeia, 性别: FEMALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Leda, 性别: FEMALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Orus, 性别: MALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Puck, 性别: MALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Pulcherrima, 性别: FEMALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Rasalgethi, 性别: MALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Sadachbia, 性别: MALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Sadaltager, 性别: MALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Schedar, 性别: MALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Sulafat, 性别: FEMALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Umbriel, 性别: MALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Vindemiatrix, 性别: FEMALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Zephyr, 性别: FEMALE, 采样率: 24000Hz
名称: cmn-CN-Chirp3-HD-Zubenelgenubi, 性别: MALE, 采样率: 24000Hz
名称: cmn-CN-Standard-A, 性别: FEMALE, 采样率: 24000Hz
名称: cmn-CN-Standard-B, 性别: MALE, 采样率: 24000Hz
名称: cmn-CN-Standard-C, 性别: MALE, 采样率: 24000Hz
名称: cmn-CN-Standard-D, 性别: FEMALE, 采样率: 24000Hz
名称: cmn-CN-Wavenet-A, 性别: FEMALE, 采样率: 24000Hz
名称: cmn-CN-Wavenet-B, 性别: MALE, 采样率: 24000Hz
名称: cmn-CN-Wavenet-C, 性别: MALE, 采样率: 24000Hz
名称: cmn-CN-Wavenet-D, 性别: FEMALE, 采样率: 24000Hz
"""

1 change: 1 addition & 0 deletions voice/google/note.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
需要在google cloud控制台创建授权项目,分配IAM角色和权限,下载自己的密钥文件, 把密钥文件命名为google-credentials.json放在本目录。
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tip

💡 缺少安全指引:密钥文件放入代码目录有泄漏风险

note.txt 指导将密钥文件放在本目录并使用固定文件名,容易被误提交到仓库或打包进镜像,导致凭据泄漏。

建议: 补充安全指引:通过环境变量/Secret Manager/挂载方式提供凭据并确保被 .gitignore 忽略;推荐使用 ADC 或工作负载身份。