feat: 支持音色切换

This commit is contained in:
2025-07-01 01:27:29 +08:00
parent faa4ca20b1
commit ec6bd7db88
14 changed files with 308 additions and 57 deletions

View File

@@ -340,55 +340,51 @@ async def save_audio_file(audio_data: bytes, filename: str) -> str:
# 处理单个TTS任务
async def process_tts_task(websocket, message_id: str, text: str):
async def process_tts_task(websocket, message_id: str, text: str, speaker: str = None):
"""处理单个TTS任务独立协程"""
tts_state = None
try:
print(f"开始处理TTS任务 [{message_id}]: {text}")
# 使用传入的speaker如果没有则使用默认的
selected_speaker = speaker if speaker else SPEAKER
try:
print(f"开始处理TTS任务 [{message_id}]: {text}, 使用说话人: {selected_speaker}")
# 获取TTS状态
tts_state = tts_manager.get_tts_state(websocket, message_id)
if not tts_state:
raise Exception(f"找不到TTS状态: {message_id}")
tts_state.is_processing = True
# 生成音频文件名
tts_state.audio_filename = generate_audio_filename()
# 创建独立的TTS连接
tts_state.volc_ws = await create_tts_connection()
# 创建会话
tts_state.session_id = uuid.uuid4().__str__().replace('-', '')
tts_manager.register_session(tts_state.session_id, message_id)
print(f"创建TTS会话 [{message_id}]: {tts_state.session_id}")
header = Header(message_type=FULL_CLIENT_REQUEST,
message_type_specific_flags=MsgTypeFlagWithEvent,
serial_method=JSON).as_bytes()
optional = Optional(event=EVENT_StartSession, sessionId=tts_state.session_id).as_bytes()
payload = get_payload_bytes(event=EVENT_StartSession, speaker=SPEAKER)
# 使用选择的speaker
payload = get_payload_bytes(event=EVENT_StartSession, speaker=selected_speaker)
await send_event(tts_state.volc_ws, header, optional, payload)
raw_data = await tts_state.volc_ws.recv()
res = parser_response(raw_data)
if res.optional.event != EVENT_SessionStarted:
raise Exception("TTS会话启动失败")
print(f"TTS会话创建成功 [{message_id}]: {tts_state.session_id}")
# 发送文本到TTS服务
print(f"发送文本到TTS服务 [{message_id}]...")
header = Header(message_type=FULL_CLIENT_REQUEST,
message_type_specific_flags=MsgTypeFlagWithEvent,
serial_method=JSON).as_bytes()
optional = Optional(event=EVENT_TaskRequest, sessionId=tts_state.session_id).as_bytes()
payload = get_payload_bytes(event=EVENT_TaskRequest, text=text, speaker=SPEAKER)
# 使用选择的speaker
payload = get_payload_bytes(event=EVENT_TaskRequest, text=text, speaker=selected_speaker)
await send_event(tts_state.volc_ws, header, optional, payload)
# 接收TTS响应并发送到前端
print(f"开始接收TTS响应 [{message_id}]...")
audio_count = 0
try:
while True:
raw_data = await asyncio.wait_for(
@@ -396,17 +392,13 @@ async def process_tts_task(websocket, message_id: str, text: str):
timeout=30
)
res = parser_response(raw_data)
print(f"收到TTS事件 [{message_id}]: {res.optional.event}")
if res.optional.event == EVENT_TTSSentenceEnd:
print(f"句子结束事件 [{message_id}] - 直接完成")
break
elif res.optional.event == EVENT_SessionFinished:
print(f"收到会话结束事件 [{message_id}]")
break
elif res.optional.event == EVENT_TTSResponse:
audio_count += 1
print(f"收到音频数据 [{message_id}] #{audio_count},大小: {len(res.payload)}")
@@ -423,10 +415,8 @@ async def process_tts_task(websocket, message_id: str, text: str):
})
else:
print(f"未知TTS事件 [{message_id}]: {res.optional.event}")
except asyncio.TimeoutError:
print(f"TTS响应超时 [{message_id}],强制结束")
# 异步保存音频文件
if tts_state.audio_data:
file_path = await save_audio_file(
@@ -435,15 +425,15 @@ async def process_tts_task(websocket, message_id: str, text: str):
)
print(f"音频文件已保存 [{message_id}]: {file_path}")
# 发送完成消息,包含文件路径
# 发送完成消息,包含文件路径和使用的speaker
await websocket.send_json({
"type": "tts_audio_complete",
"messageId": message_id,
"audioFile": tts_state.audio_filename,
"audioPath": os.path.join(TEMP_AUDIO_DIR, tts_state.audio_filename) if tts_state.audio_data else None
"audioPath": os.path.join(TEMP_AUDIO_DIR, tts_state.audio_filename) if tts_state.audio_data else None,
"speaker": selected_speaker
})
print(f"TTS处理完成 [{message_id}],共发送 {audio_count} 个音频包")
print(f"TTS处理完成 [{message_id}],共发送 {audio_count} 个音频包,使用说话人: {selected_speaker}")
except asyncio.CancelledError:
print(f"TTS任务被取消 [{message_id}]")
await websocket.send_json({
@@ -474,14 +464,14 @@ async def process_tts_task(websocket, message_id: str, text: str):
# 启动TTS文本转换
async def handle_tts_text(websocket, message_id: str, text: str):
async def handle_tts_text(websocket, message_id: str, text: str, speaker: str = None):
"""启动TTS文本转换"""
# 创建新的TTS状态
print(speaker)
tts_state = tts_manager.add_tts_state(websocket, message_id)
# 启动异步任务
# 启动异步任务传入speaker参数
tts_state.task = asyncio.create_task(
process_tts_task(websocket, message_id, text)
process_tts_task(websocket, message_id, text, speaker)
)