Spaces:
Runtime error
Runtime error
| import spaces | |
| import sys | |
| import torch | |
| import gradio as gr | |
| import opencc | |
| # 添加第三方库路径 | |
| sys.path.append('third_party/Matcha-TTS') | |
| from cosyvoice.cli.cosyvoice import CosyVoice2 | |
| from cosyvoice.utils.file_utils import load_wav | |
| from huggingface_hub import hf_hub_download | |
| # 繁简转换 | |
| converter = opencc.OpenCC('s2t.json') | |
| # 加载模型 | |
| cosyvoice_base = CosyVoice2( | |
| 'ASLP-lab/Cosyvoice2-Yue', | |
| load_jit=False, load_trt=False, load_vllm=False, fp16=False | |
| ) | |
| print('load model 1') | |
| cosyvoice_zjg = CosyVoice2( | |
| 'ASLP-lab/Cosyvoice2-Yue-ZoengJyutGaai', | |
| load_jit=False, load_trt=False, load_vllm=False, fp16=False | |
| ) | |
| print('load model 2') | |
| # cosyvoice_biaobei = CosyVoice2( | |
| # 'pretrained_models/CosyVoice2-yue-biaobei', | |
| # load_jit=False, load_trt=False, load_vllm=False, fp16=False | |
| # ) | |
| def tts_inference(model_choice, text, prompt_audio): | |
| # 选择模型和默认音频 | |
| if model_choice == "CosyVoice2-张悦楷粤语评书": | |
| model = cosyvoice_zjg | |
| prompt_audio = "asset/sg_017_090.wav" | |
| elif model_choice == "CosyVoice2-精品女音": | |
| model = cosyvoice_base | |
| prompt_audio = "asset/F01_中立_20054.wav" | |
| elif model_choice == "CosyVoice2-base": | |
| model = cosyvoice_base | |
| if prompt_audio is None: | |
| return None, "请上传参考音频" | |
| else: | |
| return None, "未知模型" | |
| model.model.cuda() | |
| # 繁简转换 | |
| text = converter.convert(text) | |
| prompt_speech_16k = load_wav(prompt_audio, 16000) | |
| all_speech = [] | |
| for _, j in enumerate( | |
| model.inference_instruct2( | |
| text, "用粤语说这句话", prompt_speech_16k, stream=False | |
| ) | |
| ): | |
| all_speech.append(j['tts_speech']) | |
| concatenated_speech = torch.cat(all_speech, dim=1) | |
| audio_numpy = concatenated_speech.squeeze(0).cpu().numpy() | |
| sample_rate = model.sample_rate | |
| return (sample_rate, audio_numpy), f"生成成功:{text}" | |
| # ---- Gradio Interface ---- | |
| demo = gr.Interface( | |
| fn=tts_inference, | |
| inputs=[ | |
| gr.Dropdown( | |
| ["CosyVoice2-base", "CosyVoice2-张悦楷粤语评书"], | |
| # ["CosyVoice2-base", "CosyVoice2-张悦楷粤语评书", "CosyVoice2-精品女音"], | |
| label="选择模型", value="CosyVoice2-base" | |
| ), | |
| gr.Textbox(lines=2, label="输入文本"), | |
| # gr.Audio(source="upload", type="filepath", label="上传参考音频(仅 CosyVoice2-base 必需)") | |
| gr.Audio(sources=["upload"], type="filepath", label="上传参考音频(仅 CosyVoice2-base 必需)") | |
| ], | |
| outputs=[ | |
| gr.Audio(type="numpy", label="生成的语音"), | |
| gr.Textbox(label="状态信息") | |
| ] | |
| ) | |
| demo.launch() | |