feat: 完成大部分功能开发

2025-07-01 00:00:31 +08:00
parent ac5e68f5a5
commit ac549bd939
17 changed files with 278 additions and 382 deletions
--- a/backend/app/api/v1/endpoints/tts.py
+++ b/backend/app/api/v1/endpoints/tts.py
@@ -374,8 +374,9 @@ async def process_tts_task(websocket, message_id: str, text: str):
                elif res.optional.event == EVENT_TTSResponse:
                    audio_count += 1
                    print(f"发送音频数据 [{message_id}] #{audio_count}，大小: {len(res.payload)}")
-                    # 发送音频数据，包含消息ID
+                    # 发送音频数据
                    await websocket.send_json({
+                        "id": audio_count,
                        "type": "tts_audio_data",
                        "messageId": message_id,
                        "audioData": res.payload.hex()  # 转为hex字符串
--- a/backend/app/api/v1/endpoints/voice_conversation.py
+++ b/backend/app/api/v1/endpoints/voice_conversation.py
@@ -0,0 +1,105 @@
+import json
+import aiohttp
+import asyncio
+from fastapi.encoders import jsonable_encoder
+from starlette.websockets import WebSocket
+
+from . import tts
+from app.constants.model_data import tip_message, base_url, headers
+
+
+async def process_voice_conversation(websocket: WebSocket, asr_text: str, message_id: str):
+    try:
+        print(f"开始处理语音对话 [{message_id}]: {asr_text}")
+
+        # 1. 发送ASR识别结果到前端
+        await websocket.send_json({
+            "type": "asr_result",
+            "messageId": message_id,
+            "result": asr_text
+        })
+
+        # 2. 构建LLM请求
+        messages = [
+            tip_message,
+            {"role": "user", "content": asr_text}
+        ]
+        payload = {
+            "model": "gpt-4o",
+            "messages": messages,
+            "stream": True
+        }
+
+        print(f"发送LLM请求 [{message_id}]: {json.dumps(payload, ensure_ascii=False)}")
+
+        # 3. 流式处理LLM响应
+        full_response = ""
+        llm_completed = False
+
+        async with aiohttp.ClientSession() as session:
+            async with session.post(
+                    base_url,
+                    headers=headers,
+                    json=jsonable_encoder(payload)
+            ) as resp:
+                if resp.status != 200:
+                    error_text = await resp.text()
+                    raise Exception(f"LLM API请求失败: {resp.status} - {error_text}")
+
+                # 读取流式响应
+                async for line in resp.content:
+                    if line:
+                        line = line.decode('utf-8').strip()
+                        if line.startswith('data: '):
+                            data = line[6:].strip()
+                            if data == '[DONE]':
+                                llm_completed = True
+                                print(f"LLM响应完成 [{message_id}]")
+                                break
+
+                            try:
+                                result = json.loads(data)
+                                # 提取内容
+                                choices = result.get("choices", [])
+                                if not choices:
+                                    # 跳过空choices数据包
+                                    continue
+
+                                delta = choices[0].get("delta", {})
+                                content = delta.get("content")
+
+                                if content:
+                                    full_response += content
+
+                            except json.JSONDecodeError as e:
+                                print(f"JSON解析错误 [{message_id}]: {e}, 数据: {data}")
+                                continue
+                            except Exception as e:
+                                print(f"处理数据包异常 [{message_id}]: {e}, 数据: {data}")
+                                continue
+
+        # 4. LLM生成完成后，启动完整的TTS处理
+        if llm_completed and full_response:
+            print(f"LLM生成完成 [{message_id}], 总内容长度: {len(full_response)}")
+            print(f"完整内容: {full_response}")
+
+            # 发送完成消息
+            await websocket.send_json({
+                "type": "llm_complete_response",
+                "messageId": message_id,
+                "content": full_response
+            })
+
+            # 启动TTS处理完整内容
+            print(f"启动完整TTS处理 [{message_id}]: {full_response}")
+            await tts.handle_tts_text(websocket, message_id, full_response)
+
+    except Exception as e:
+        print(f"语音对话处理异常 [{message_id}]: {e}")
+        import traceback
+        traceback.print_exc()
+        await websocket.send_json({
+            "type": "voice_conversation_error",
+            "messageId": message_id,
+            "message": f"处理失败: {str(e)}"
+        })
--- a/backend/app/api/v1/endpoints/websocket_service.py
+++ b/backend/app/api/v1/endpoints/websocket_service.py
@@ -1,4 +1,6 @@
 # websocket_service.py
+import uuid
+
 from fastapi import APIRouter, WebSocket, WebSocketDisconnect
 from typing import Set
 from aip import AipSpeech
@@ -6,6 +8,7 @@ from app.constants.asr import APP_ID, API_KEY, SECRET_KEY
 import json

 from . import tts
+from .voice_conversation import process_voice_conversation

 router = APIRouter()
 active_connections: Set[WebSocket] = set()
@@ -58,7 +61,12 @@ async def websocket_online_count(websocket: WebSocket):

                    elif msg_type == "asr_end":
                        asr_text = await asr_buffer(temp_buffer)
-                        await websocket.send_json({"type": "asr_result", "result": asr_text})
+                        # 从data中获取messageId，如果不存在则生成一个新的ID
+                        message_id = data.get("messageId", "voice_" + str(uuid.uuid4()))
+                        if data.get("voiceConversation"):
+                            await process_voice_conversation(websocket, asr_text, message_id)
+                        else:
+                            await websocket.send_json({"type": "asr_result", "result": asr_text})
                        temp_buffer = bytes()

                    # TTS处理