Practical_Training_Assignment/backend/app/api/v1/endpoints/voice_conversation.py

import json
import aiohttp
import asyncio
from fastapi.encoders import jsonable_encoder
from starlette.websockets import WebSocket

from . import tts
from app.constants.model_data import tip_message, base_url, headers


async def process_voice_conversation(websocket: WebSocket, asr_text: str, message_id: str):
    try:
        print(f"开始处理语音对话 [{message_id}]: {asr_text}")

        # 1. 发送ASR识别结果到前端
        await websocket.send_json({
            "type": "asr_result",
            "messageId": message_id,
            "result": asr_text
        })

        # 2. 构建LLM请求
        messages = [
            tip_message,
            {"role": "user", "content": asr_text}
        ]
        payload = {
            "model": "gpt-4o",
            "messages": messages,
            "stream": True
        }

        print(f"发送LLM请求 [{message_id}]: {json.dumps(payload, ensure_ascii=False)}")

        # 3. 流式处理LLM响应
        full_response = ""
        llm_completed = False

        async with aiohttp.ClientSession() as session:
            async with session.post(
                    base_url,
                    headers=headers,
                    json=jsonable_encoder(payload)
            ) as resp:
                if resp.status != 200:
                    error_text = await resp.text()
                    raise Exception(f"LLM API请求失败: {resp.status} - {error_text}")

                # 读取流式响应
                async for line in resp.content:
                    if line:
                        line = line.decode('utf-8').strip()
                        if line.startswith('data: '):
                            data = line[6:].strip()
                            if data == '[DONE]':
                                llm_completed = True
                                print(f"LLM响应完成 [{message_id}]")
                                break

                            try:
                                result = json.loads(data)
                                # 提取内容
                                choices = result.get("choices", [])
                                if not choices:
                                    # 跳过空choices数据包
                                    continue

                                delta = choices[0].get("delta", {})
                                content = delta.get("content")

                                if content:
                                    full_response += content

                            except json.JSONDecodeError as e:
                                print(f"JSON解析错误 [{message_id}]: {e}, 数据: {data}")
                                continue
                            except Exception as e:
                                print(f"处理数据包异常 [{message_id}]: {e}, 数据: {data}")
                                continue

        # 4. LLM生成完成后，启动完整的TTS处理
        if llm_completed and full_response:
            print(f"LLM生成完成 [{message_id}], 总内容长度: {len(full_response)}")
            print(f"完整内容: {full_response}")

            # 发送完成消息
            await websocket.send_json({
                "type": "llm_complete_response",
                "messageId": message_id,
                "content": full_response
            })

            # 启动TTS处理完整内容
            print(f"启动完整TTS处理 [{message_id}]: {full_response}")
            await tts.handle_tts_text(websocket, message_id, full_response)

    except Exception as e:
        print(f"语音对话处理异常 [{message_id}]: {e}")
        import traceback
        traceback.print_exc()
        await websocket.send_json({
            "type": "voice_conversation_error",
            "messageId": message_id,
            "message": f"处理失败: {str(e)}"
        })