-
Notifications
You must be signed in to change notification settings - Fork 10k
add a method to access Google Cloud voice API with credential file #2598
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,189 @@ | ||
| import os | ||
| import time | ||
| import uuid | ||
| from google.cloud import speech | ||
| from google.cloud import texttospeech | ||
| from google.api_core.exceptions import GoogleAPIError | ||
| from pydub import AudioSegment | ||
| from bridge.reply import Reply, ReplyType | ||
| from common.log import logger | ||
| from common.tmp_dir import TmpDir | ||
| from voice.voice import Voice | ||
|
|
||
| # 设置 Google Cloud 凭据 | ||
| cred_path = os.path.join(os.path.dirname(__file__), "google-credentials.json") | ||
| os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = cred_path | ||
|
|
||
| class GoogleVoice(Voice): | ||
| def __init__(self): | ||
| super().__init__() | ||
| self.speech_client = speech.SpeechClient() | ||
| self.tts_client = texttospeech.TextToSpeechClient() | ||
|
|
||
| def convert_audio_to_wav(self, input_file_path, output_file_path="temp_audio.wav"): | ||
| """ | ||
| 将 AMR 或 MP3 文件转换为 WAV 格式 | ||
| 参数: | ||
| input_file_path: 输入音频文件路径(AMR 或 MP3) | ||
| output_file_path: 输出 WAV 文件路径 | ||
| 返回: | ||
| 转换后的 WAV 文件路径 | ||
| """ | ||
| try: | ||
| audio = AudioSegment.from_file(input_file_path) | ||
| audio = audio.set_frame_rate(16000).set_channels(1) | ||
| audio.export(output_file_path, format="wav") | ||
| return output_file_path | ||
| except Exception as e: | ||
| logger.error(f"音频转换失败: {e}") | ||
| return None | ||
|
|
||
| def voiceToText(self, voice_file): | ||
| """ | ||
| 将中文音频文件(AMR 或 MP3)转换为文本 | ||
| 参数: | ||
| voice_file: 输入音频文件路径 | ||
| 返回: | ||
| Reply 对象,包含转录文本或错误信息 | ||
| """ | ||
| try: | ||
| file_ext = os.path.splitext(voice_file)[1].lower() | ||
| if file_ext in [".amr", ".mp3"]: | ||
| temp_wav_file = f"temp_audio_{uuid.uuid4().hex}.wav" | ||
| voice_file = self.convert_audio_to_wav(voice_file, temp_wav_file) | ||
| if not voice_file: | ||
| logger.error("音频转换失败") | ||
| return Reply(ReplyType.ERROR, "音频转换失败") | ||
| elif file_ext != ".wav": | ||
| logger.error("不支持的音频格式,仅支持 AMR、MP3 和 WAV") | ||
| return Reply(ReplyType.ERROR, "不支持的音频格式,仅支持 AMR、MP3 和 WAV") | ||
|
|
||
| with open(voice_file, "rb") as audio_file: | ||
| audio_content = audio_file.read() | ||
|
|
||
| # 配置音频和识别设置(中文普通话) | ||
| audio = speech.RecognitionAudio(content=audio_content) | ||
| config = speech.RecognitionConfig( | ||
| encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, | ||
| sample_rate_hertz=16000, | ||
| language_code="cmn-CN", | ||
| ) | ||
|
|
||
| # 执行语音识别 | ||
| response = self.speech_client.recognize(config=config, audio=audio) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Warning convert_audio_to_wav 仅设置采样率/声道并导出 wav,但未显式保证导出为 16-bit PCM(LINEAR16)。当输入本身为 wav 时又直接按 LINEAR16/16000 送入 API,若实际为其他采样率或压缩编码,会导致识别报错或质量下降。 建议: 对所有输入统一转成 PCM16 mono 16000Hz,并在 config 中与转换后的参数保持一致;至少在 wav 分支也执行转换以保证一致性。 |
||
|
|
||
| # 提取转录结果 | ||
| transcript = "" | ||
| for result in response.results: | ||
| transcript += result.alternatives[0].transcript + " " | ||
|
|
||
| transcript = transcript.strip() | ||
| if not transcript: | ||
| logger.error("语音识别失败:无法理解音频内容") | ||
| return Reply(ReplyType.ERROR, "抱歉,我听不懂") | ||
|
|
||
| logger.info(f"[Google] voiceToText text={transcript} voice file name={voice_file}") | ||
| reply = Reply(ReplyType.TEXT, transcript) | ||
|
|
||
| # 清理临时 WAV 文件 | ||
| if file_ext in [".amr", ".mp3"] and os.path.exists(voice_file): | ||
| os.remove(voice_file) | ||
|
|
||
| return reply | ||
|
|
||
|
Comment on lines
+49
to
+93
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Warning 目前仅在成功路径末尾依据 file_ext 清理临时 wav。若 recognize 抛异常或中途 return(如 transcript 为空)临时文件会遗留。此外 voice_file 变量在 amr/mp3 场景会被覆盖为临时 wav 路径,清理逻辑再依赖 file_ext 判断,容易遗漏其他转换场景。 建议: 使用 try/finally 统一清理临时文件;用单独变量 temp_wav_path 保存临时文件路径,避免覆盖原始 voice_file。 |
||
| except GoogleAPIError as e: | ||
| logger.error(f"语音识别失败:无法连接到 Google 语音识别服务;{e}") | ||
| return Reply(ReplyType.ERROR, f"抱歉,无法连接到 Google 语音识别服务;{e}") | ||
| except Exception as e: | ||
| logger.error(f"发生错误: {e}") | ||
| return Reply(ReplyType.ERROR, f"抱歉,我听不懂或发生错误:{e}") | ||
|
|
||
| def textToVoice(self, text): | ||
| """ | ||
| 将中文文本转换为语音并保存为音频文件 | ||
| 参数: | ||
| text: 要转换的中文文本 | ||
| 返回: | ||
| Reply 对象,包含音频文件路径或错误信息 | ||
| """ | ||
| try: | ||
| # 生成唯一的输出文件名 | ||
| unique_id = uuid.uuid4().hex | ||
| mp3_file = f"{TmpDir().path()}reply-{int(time.time())}-{unique_id}.mp3" | ||
|
|
||
| # 配置要转换的文本 | ||
| synthesis_input = texttospeech.SynthesisInput(text=text) | ||
|
|
||
| # 配置语音参数(中文普通话) | ||
| voice = texttospeech.VoiceSelectionParams( | ||
| language_code="cmn-CN", | ||
| name="cmn-CN-Wavenet-A", | ||
| ) | ||
|
|
||
| # 配置音频输出格式 | ||
| audio_config = texttospeech.AudioConfig( | ||
| audio_encoding=texttospeech.AudioEncoding.MP3 | ||
| ) | ||
|
|
||
| # 执行文字转语音 | ||
| response = self.tts_client.synthesize_speech( | ||
| input=synthesis_input, voice=voice, audio_config=audio_config | ||
| ) | ||
|
|
||
| # 保存音频文件 | ||
| with open(mp3_file, "wb") as out: | ||
| out.write(response.audio_content) | ||
| logger.info(f"[Google] textToVoice text={text} voice file name={mp3_file}") | ||
|
|
||
| return Reply(ReplyType.VOICE, mp3_file) | ||
|
|
||
| except GoogleAPIError as e: | ||
| logger.error(f"文字转语音失败: {e}") | ||
| return Reply(ReplyType.ERROR, f"抱歉,无法连接到 Google 文字转语音服务;{e}") | ||
| except Exception as e: | ||
| logger.error(f"发生错误: {e}") | ||
| return Reply(ReplyType.ERROR, f"发生错误:{e}") | ||
|
|
||
|
|
||
| """ | ||
| 语言代码: cmn-CN | ||
| 名称: cmn-CN-Chirp3-HD-Achernar, 性别: FEMALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Achird, 性别: MALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Algenib, 性别: MALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Algieba, 性别: MALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Alnilam, 性别: MALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Aoede, 性别: FEMALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Autonoe, 性别: FEMALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Callirrhoe, 性别: FEMALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Charon, 性别: MALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Despina, 性别: FEMALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Enceladus, 性别: MALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Erinome, 性别: FEMALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Fenrir, 性别: MALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Gacrux, 性别: FEMALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Iapetus, 性别: MALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Kore, 性别: FEMALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Laomedeia, 性别: FEMALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Leda, 性别: FEMALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Orus, 性别: MALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Puck, 性别: MALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Pulcherrima, 性别: FEMALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Rasalgethi, 性别: MALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Sadachbia, 性别: MALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Sadaltager, 性别: MALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Schedar, 性别: MALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Sulafat, 性别: FEMALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Umbriel, 性别: MALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Vindemiatrix, 性别: FEMALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Zephyr, 性别: FEMALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Chirp3-HD-Zubenelgenubi, 性别: MALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Standard-A, 性别: FEMALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Standard-B, 性别: MALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Standard-C, 性别: MALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Standard-D, 性别: FEMALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Wavenet-A, 性别: FEMALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Wavenet-B, 性别: MALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Wavenet-C, 性别: MALE, 采样率: 24000Hz | ||
| 名称: cmn-CN-Wavenet-D, 性别: FEMALE, 采样率: 24000Hz | ||
| """ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| 需要在google cloud控制台创建授权项目,分配IAM角色和权限,下载自己的密钥文件, 把密钥文件命名为google-credentials.json放在本目录。 | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tip 💡 缺少安全指引:密钥文件放入代码目录有泄漏风险 note.txt 指导将密钥文件放在本目录并使用固定文件名,容易被误提交到仓库或打包进镜像,导致凭据泄漏。 建议: 补充安全指引:通过环境变量/Secret Manager/挂载方式提供凭据并确保被 .gitignore 忽略;推荐使用 ADC 或工作负载身份。 |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Caution
🚨 导入模块即设置 GOOGLE_APPLICATION_CREDENTIALS,且强依赖仓库目录内凭据文件,存在安全/部署风险与副作用
模块顶层拼接 google-credentials.json 路径并写入 os.environ["GOOGLE_APPLICATION_CREDENTIALS"]:1) 导入即产生全局副作用,影响进程内其他 Google SDK 客户端/模块;2) 强依赖代码目录存在凭据文件,容器/线上环境/只读文件系统下易失效;3) 诱导将密钥文件放入仓库目录,凭据泄露风险高。更合理的是使用 Application Default Credentials(ADC);如需指定 key file,应通过配置/参数显式传入,并在初始化时用显式凭据创建 client,而非改全局环境变量。
建议: 移除模块顶层环境变量写入;在 init 支持通过环境变量/配置传入 key file 路径,并使用 service_account.Credentials.from_service_account_file 创建客户端;未提供则走默认凭据(ADC)。