feat: 完成大部分功能开发

This commit is contained in:
2025-07-01 00:00:31 +08:00
parent ac5e68f5a5
commit ac549bd939
17 changed files with 278 additions and 382 deletions

View File

@@ -374,8 +374,9 @@ async def process_tts_task(websocket, message_id: str, text: str):
elif res.optional.event == EVENT_TTSResponse:
audio_count += 1
print(f"发送音频数据 [{message_id}] #{audio_count},大小: {len(res.payload)}")
# 发送音频数据包含消息ID
# 发送音频数据
await websocket.send_json({
"id": audio_count,
"type": "tts_audio_data",
"messageId": message_id,
"audioData": res.payload.hex() # 转为hex字符串

View File

@@ -0,0 +1,105 @@
import json
import aiohttp
import asyncio
from fastapi.encoders import jsonable_encoder
from starlette.websockets import WebSocket
from . import tts
from app.constants.model_data import tip_message, base_url, headers
async def process_voice_conversation(websocket: WebSocket, asr_text: str, message_id: str):
try:
print(f"开始处理语音对话 [{message_id}]: {asr_text}")
# 1. 发送ASR识别结果到前端
await websocket.send_json({
"type": "asr_result",
"messageId": message_id,
"result": asr_text
})
# 2. 构建LLM请求
messages = [
tip_message,
{"role": "user", "content": asr_text}
]
payload = {
"model": "gpt-4o",
"messages": messages,
"stream": True
}
print(f"发送LLM请求 [{message_id}]: {json.dumps(payload, ensure_ascii=False)}")
# 3. 流式处理LLM响应
full_response = ""
llm_completed = False
async with aiohttp.ClientSession() as session:
async with session.post(
base_url,
headers=headers,
json=jsonable_encoder(payload)
) as resp:
if resp.status != 200:
error_text = await resp.text()
raise Exception(f"LLM API请求失败: {resp.status} - {error_text}")
# 读取流式响应
async for line in resp.content:
if line:
line = line.decode('utf-8').strip()
if line.startswith('data: '):
data = line[6:].strip()
if data == '[DONE]':
llm_completed = True
print(f"LLM响应完成 [{message_id}]")
break
try:
result = json.loads(data)
# 提取内容
choices = result.get("choices", [])
if not choices:
# 跳过空choices数据包
continue
delta = choices[0].get("delta", {})
content = delta.get("content")
if content:
full_response += content
except json.JSONDecodeError as e:
print(f"JSON解析错误 [{message_id}]: {e}, 数据: {data}")
continue
except Exception as e:
print(f"处理数据包异常 [{message_id}]: {e}, 数据: {data}")
continue
# 4. LLM生成完成后启动完整的TTS处理
if llm_completed and full_response:
print(f"LLM生成完成 [{message_id}], 总内容长度: {len(full_response)}")
print(f"完整内容: {full_response}")
# 发送完成消息
await websocket.send_json({
"type": "llm_complete_response",
"messageId": message_id,
"content": full_response
})
# 启动TTS处理完整内容
print(f"启动完整TTS处理 [{message_id}]: {full_response}")
await tts.handle_tts_text(websocket, message_id, full_response)
except Exception as e:
print(f"语音对话处理异常 [{message_id}]: {e}")
import traceback
traceback.print_exc()
await websocket.send_json({
"type": "voice_conversation_error",
"messageId": message_id,
"message": f"处理失败: {str(e)}"
})

View File

@@ -1,4 +1,6 @@
# websocket_service.py
import uuid
from fastapi import APIRouter, WebSocket, WebSocketDisconnect
from typing import Set
from aip import AipSpeech
@@ -6,6 +8,7 @@ from app.constants.asr import APP_ID, API_KEY, SECRET_KEY
import json
from . import tts
from .voice_conversation import process_voice_conversation
router = APIRouter()
active_connections: Set[WebSocket] = set()
@@ -58,7 +61,12 @@ async def websocket_online_count(websocket: WebSocket):
elif msg_type == "asr_end":
asr_text = await asr_buffer(temp_buffer)
await websocket.send_json({"type": "asr_result", "result": asr_text})
# 从data中获取messageId如果不存在则生成一个新的ID
message_id = data.get("messageId", "voice_" + str(uuid.uuid4()))
if data.get("voiceConversation"):
await process_voice_conversation(websocket, asr_text, message_id)
else:
await websocket.send_json({"type": "asr_result", "result": asr_text})
temp_buffer = bytes()
# TTS处理