feat: 部分语音聊天功能

2025-06-30 10:49:24 +08:00
parent 06e6b4a8c9
commit ac5e68f5a5
10 changed files with 594 additions and 25 deletions
--- a/backend/app/api/v1/endpoints/tts.py
+++ b/backend/app/api/v1/endpoints/tts.py
@@ -9,7 +9,7 @@ from typing import Dict, Any, Optional as OptionalType

 from app.constants.tts import APP_ID, TOKEN, SPEAKER

-# 协议常量保持不变...
+# 协议常量
 PROTOCOL_VERSION = 0b0001
 DEFAULT_HEADER_SIZE = 0b0001
 FULL_CLIENT_REQUEST = 0b0001
@@ -35,7 +35,7 @@ EVENT_TTSSentenceEnd = 351
 EVENT_TTSResponse = 352


-# 所有类定义保持不变...
+# 所有类定义
 class Header:
    def __init__(self,
                 protocol_version=PROTOCOL_VERSION,
@@ -93,7 +93,7 @@ class Response:
        self.payload_json = None


-# 工具函数保持不变...
+# 工具函数
 def gen_log_id():
    """生成logID"""
    ts = int(time.time() * 1000)
@@ -191,7 +191,7 @@ async def send_event(ws, header, optional=None, payload=None):
    await ws.send(full_client_request)


-# 修改：TTS状态管理类，添加消息ID和任务追踪
+# TTS状态管理类，添加消息ID和任务追踪
 class TTSState:
    def __init__(self, message_id: str):
        self.message_id = message_id
--- a/backend/app/api/v1/endpoints/websocket_service.py
+++ b/backend/app/api/v1/endpoints/websocket_service.py
@@ -5,7 +5,6 @@ from aip import AipSpeech
 from app.constants.asr import APP_ID, API_KEY, SECRET_KEY
 import json

-# 导入修改后的TTS模块
 from . import tts

 router = APIRouter()
@@ -62,7 +61,7 @@ async def websocket_online_count(websocket: WebSocket):
                        await websocket.send_json({"type": "asr_result", "result": asr_text})
                        temp_buffer = bytes()

-                    # 修改：TTS处理支持消息ID
+                    # TTS处理
                    elif msg_type == "tts_text":
                        message_id = data.get("messageId")
                        text = data.get("text", "")
--- a/web/src/components/tts.vue
+++ b/web/src/components/tts.vue
@@ -34,13 +34,15 @@ const handleClick = () => {
    ttsStore.convertText(text, messageId);
  }
 };
-// 当文本改变时清理之前的音频
+
+// 文本改变清理之前的音频
 watch(
  () => text,
  () => {
    ttsStore.clearAudio(messageId);
  }
 );
+
 onUnmounted(() => {
  ttsStore.clearAudio(messageId);
 });
--- a/web/src/layouts/BasicLayout.vue
+++ b/web/src/layouts/BasicLayout.vue
@@ -45,7 +45,20 @@ const { hiddenLeftSidebar, simpleMode } = storeToRefs(layoutStore);
        "
        to="/"
      >
-        聊天
+        对话
+      </router-link>
+      <router-link
+        class="w-full h-[52px] px-8 flex items-center cursor-pointer"
+        :class="
+          $route.path === '/voice'
+            ? [
+                'bg-[rgba(37,99,235,0.04)] text-[#0094c5] border-r-2 border-[#0094c5]'
+              ]
+            : []
+        "
+        to="/voice"
+      >
+        语音聊天
      </router-link>

      <div class="w-full h-full flex flex-col items-center text-[#0094c5]">
--- a/web/src/router/index.ts
+++ b/web/src/router/index.ts
@@ -2,7 +2,8 @@ import { createRouter, createWebHistory } from "vue-router";

 import BasicLayout from "@/layouts/BasicLayout.vue";
 import { resetDescription, setTitle } from "@/utils";
-import community from "@/views/CommunityView.vue";
+import ChatLLM from "@/views/ChatLLMView.vue";
+import VoiceView from "@/views/VoiceView.vue";

 const router = createRouter({
  history: createWebHistory(import.meta.env.BASE_URL),
@@ -13,11 +14,19 @@ const router = createRouter({
      children: [
        {
          path: "",
-          name: "community",
-          component: community,
+          name: "ChatLLM",
+          component: ChatLLM,
          meta: {
            title: "对话"
          }
+        },
+        {
+          path: "/voice",
+          name: "Voice",
+          component: VoiceView,
+          meta: {
+            title: "语音对话"
+          }
        }
      ]
    }
--- a/web/src/services/websocket.ts
+++ b/web/src/services/websocket.ts
@@ -1,4 +1,4 @@
-import { useChatStore, useTtsStore } from "@/stores";
+import { useChatStore, useTtsStore,useVoiceStore } from "@/stores";

 // WebSocket
 export const useWebSocketStore = defineStore("websocket", () => {
@@ -6,6 +6,8 @@ export const useWebSocketStore = defineStore("websocket", () => {
  const connected = ref(false);
  const chatStore = useChatStore();
  const ttsStore = useTtsStore();
+  const voiceStore = useVoiceStore();
+  const router = useRouter();

  const { onlineCount } = storeToRefs(chatStore);

@@ -31,7 +33,14 @@ export const useWebSocketStore = defineStore("websocket", () => {
            onlineCount.value = data.online_count;
            break;
          case "asr_result":
-            chatStore.addMessageToHistory(data.result);
+            if (router.currentRoute.value.path === "/") {
+              chatStore.addMessageToHistory(data.result);
+            } else if (router.currentRoute.value.path === "/voice") {
+              // 在语音页面，使用VoiceStore处理
+              voiceStore.handleASRResult(data.result);
+            } else {
+              console.warn(data);
+            }
            break;

          // 新的TTS消息格式处理
@@ -76,7 +85,6 @@ export const useWebSocketStore = defineStore("websocket", () => {
              ttsStore.finishConversion(data.messageId);
            } else {
              console.log("TTS音频传输完成（无messageId）");
-              // 兜底处理，可能是旧格式
              ttsStore.finishConversion(data.messageId);
            }
            break;
@@ -85,7 +93,6 @@ export const useWebSocketStore = defineStore("websocket", () => {
            // TTS会话结束
            if (data.messageId) {
              console.log(`TTS会话结束 [${data.messageId}]`);
-              // 可以添加额外的清理逻辑
            } else {
              console.log("TTS会话结束");
            }
@@ -98,19 +105,10 @@ export const useWebSocketStore = defineStore("websocket", () => {
              ttsStore.handleError(data.message, data.messageId);
            } else {
              console.error("TTS错误:", data.message);
-              // 兜底处理，可能是旧格式
              ttsStore.handleError(data.message, data.messageId || "unknown");
            }
            break;

-          // 保留旧的消息类型作为兜底处理
-          case "tts_audio_complete_legacy":
-          case "tts_complete_legacy":
-          case "tts_error_legacy":
-            console.log("收到旧格式TTS消息:", data.type);
-            // 可以选择处理或忽略
-            break;
-
          default:
            console.log("未知消息类型:", data.type, data);
        }
--- a/web/src/stores/index.ts
+++ b/web/src/stores/index.ts
@@ -2,3 +2,4 @@ export * from "./asr_store";
 export * from "./chat_store";
 export * from "./layout_store";
 export * from "./tts_store";
+export * from "./voice_store";
--- a/web/src/stores/voice_store.ts
+++ b/web/src/stores/voice_store.ts
@@ -0,0 +1,293 @@
+import { useWebSocketStore } from "@/services";
+import { useChatStore, useTtsStore } from "@/stores";
+
+export const useVoiceStore = defineStore("voice", () => {
+  // 状态管理
+  const isListening = ref(false); // 是否正在监听语音输入
+  const isProcessing = ref(false); // 是否正在处理（包括ASR、LLM、TTS全流程）
+  const currentSessionId = ref<string | null>(null); // 当前会话ID
+
+  // 依赖的其他store
+  const chatStore = useChatStore();
+  const ttsStore = useTtsStore();
+  const wsStore = useWebSocketStore();
+
+  // 语音消息历史
+  const voiceMessages = ref<
+    {
+      id: string;
+      type: "user" | "assistant";
+      text: string;
+      audioId?: string;
+      timestamp: number;
+      isProcessing?: boolean;
+    }[]
+  >([]);
+
+  // ASR缓冲区状态
+  const isRecording = ref(false);
+  const recordingStartTime = ref<number | null>(null);
+  const recordingMaxDuration = 60 * 1000; // 最大录音时长 60 秒
+
+  /**
+   * 开始语音输入
+   */
+  const startListening = async () => {
+    if (isListening.value) return;
+
+    try {
+      await wsStore.connect();
+
+      // 创建新的会话ID
+      currentSessionId.value = new Date().getTime().toString();
+      isListening.value = true;
+      isRecording.value = true;
+      recordingStartTime.value = Date.now();
+
+      // 开始录音 - 假设我们有一个 startRecording 方法
+      // 这里通常会调用浏览器的 MediaRecorder API
+      await startRecording();
+
+      console.log("开始语音输入");
+    } catch (error) {
+      console.error("启动语音输入失败:", error);
+      stopListening();
+    }
+  };
+
+  /**
+   * 停止语音输入
+   */
+  const stopListening = async () => {
+    if (!isListening.value) return;
+
+    try {
+      // 停止录音
+      if (isRecording.value) {
+        await stopRecording();
+        isRecording.value = false;
+      }
+
+      isListening.value = false;
+      recordingStartTime.value = null;
+
+      // 发送结束信号
+      wsStore.send(JSON.stringify({ type: "asr_end" }));
+      console.log("停止语音输入，等待ASR结果");
+    } catch (error) {
+      console.error("停止语音输入失败:", error);
+    }
+  };
+
+  /**
+   * 录音时间检查
+   */
+  const checkRecordingTime = () => {
+    if (isRecording.value && recordingStartTime.value) {
+      const currentTime = Date.now();
+      const duration = currentTime - recordingStartTime.value;
+
+      if (duration >= recordingMaxDuration) {
+        console.log("录音达到最大时长，自动停止");
+        stopListening();
+      }
+    }
+  };
+
+  // 定时检查录音时间
+  let recordingTimer: any = null;
+  watch(isRecording, (newVal) => {
+    if (newVal) {
+      recordingTimer = setInterval(checkRecordingTime, 1000);
+    } else if (recordingTimer) {
+      clearInterval(recordingTimer);
+      recordingTimer = null;
+    }
+  });
+
+  /**
+   * 处理ASR结果
+   */
+  const handleASRResult = async (text: string) => {
+    if (!text.trim()) return;
+
+    console.log("收到ASR结果:", text);
+    isProcessing.value = true;
+
+    // 添加用户消息
+    const userMessageId = new Date().getTime().toString();
+    voiceMessages.value.push({
+      id: userMessageId,
+      type: "user",
+      text,
+      timestamp: Date.now()
+    });
+
+    // 添加助手消息占位
+    const assistantMessageId = new Date().getTime().toString();
+    voiceMessages.value.push({
+      id: assistantMessageId,
+      type: "assistant",
+      text: "",
+      timestamp: Date.now(),
+      isProcessing: true
+    });
+
+    // 调用LLM生成回复
+    await generateLLMResponse(text, assistantMessageId);
+  };
+
+  /**
+   * 生成LLM回复
+   */
+  const generateLLMResponse = async (userInput: string, responseId: string) => {
+    try {
+      console.log("生成LLM回复...");
+
+      // 构建消息历史
+      const messages = [
+        ...voiceMessages.value
+          .filter((msg) => !msg.isProcessing)
+          .map((msg) => ({
+            role: msg.type === "user" ? "user" : "assistant",
+            content: msg.text
+          })),
+        { role: "user", content: userInput }
+      ];
+
+      let responseText = "";
+
+      // 调用ChatStore的聊天方法
+      await chatStore.chatWithLLM(
+        {
+          messages,
+          model: chatStore.modelInfo?.model_id || ""
+        },
+        // 处理流式回复
+        (content) => {
+          responseText = content;
+          // 更新助手消息
+          const index = voiceMessages.value.findIndex(
+            (msg) => msg.id === responseId
+          );
+          if (index !== -1) {
+            voiceMessages.value[index].text = content;
+          }
+        }
+      );
+
+      // LLM生成完成，转换为语音
+      console.log("LLM回复生成完成:", responseText);
+      await synthesizeSpeech(responseText, responseId);
+    } catch (error) {
+      console.error("生成LLM回复失败:", error);
+      const index = voiceMessages.value.findIndex(
+        (msg) => msg.id === responseId
+      );
+      if (index !== -1) {
+        voiceMessages.value[index].text = "抱歉，生成回复时出错";
+        voiceMessages.value[index].isProcessing = false;
+      }
+      isProcessing.value = false;
+    }
+  };
+
+  /**
+   * 转换文本为语音
+   */
+  const synthesizeSpeech = async (text: string, messageId: string) => {
+    try {
+      console.log("转换文本为语音...");
+
+      // 调用TTS生成语音
+      await ttsStore.convertText(text, messageId);
+
+      // 注意：TTS音频生成完成后会自动播放
+      // 这部分逻辑在TTS Store的finishConversion方法中处理
+
+      // 更新消息状态
+      const index = voiceMessages.value.findIndex(
+        (msg) => msg.id === messageId
+      );
+      if (index !== -1) {
+        voiceMessages.value[index].audioId = messageId;
+        voiceMessages.value[index].isProcessing = false;
+      }
+    } catch (error) {
+      console.error("转换文本为语音失败:", error);
+      const index = voiceMessages.value.findIndex(
+        (msg) => msg.id === messageId
+      );
+      if (index !== -1) {
+        voiceMessages.value[index].isProcessing = false;
+      }
+    } finally {
+      isProcessing.value = false;
+    }
+  };
+
+  /**
+   * 清除所有消息
+   */
+  const clearMessages = () => {
+    voiceMessages.value = [];
+  };
+
+  /**
+   * 播放指定消息的语音
+   */
+  const playMessageAudio = async (messageId: string) => {
+    const message = voiceMessages.value.find((msg) => msg.id === messageId);
+    if (message && message.audioId) {
+      await ttsStore.play(message.audioId);
+    }
+  };
+
+  /**
+   * 暂停当前播放的语音
+   */
+  const pauseAudio = () => {
+    ttsStore.pauseAll();
+  };
+
+  // 录音相关方法 - 这里需要根据实际情况实现
+  // 通常会使用MediaRecorder API
+  const startRecording = async () => {
+    // 实现录音开始逻辑
+    // 1. 获取麦克风权限
+    // 2. 创建MediaRecorder
+    // 3. 监听数据可用事件，发送到WebSocket
+    console.log("开始录音...");
+  };
+
+  const stopRecording = async () => {
+    // 实现录音停止逻辑
+    console.log("停止录音...");
+  };
+
+  // 在组件卸载时清理资源
+  onUnmounted(() => {
+    if (isRecording.value) {
+      stopRecording();
+    }
+    if (recordingTimer) {
+      clearInterval(recordingTimer);
+    }
+  });
+
+  return {
+    // 状态
+    isListening,
+    isProcessing,
+    isRecording,
+    voiceMessages,
+
+    // 方法
+    startListening,
+    stopListening,
+    handleASRResult,
+    clearMessages,
+    playMessageAudio,
+    pauseAudio
+  };
+});
--- a/web/src/views/CommunityView.vue
+++ b/web/src/views/CommunityView.vue
--- a/web/src/views/VoiceView.vue
+++ b/web/src/views/VoiceView.vue
@@ -0,0 +1,254 @@
+<script setup lang="ts">
+import type { SelectGroupOption, SelectOption } from "naive-ui";
+import type { Message } from "@/interfaces";
+import { throttle } from "lodash-es";
+import AIAvatar from "@/assets/ai_avatar.png";
+import { ExclamationTriangleIcon, microphone, TrashIcon } from "@/assets/Icons";
+import UserAvatar from "@/assets/user_avatar.jpg";
+import markdown from "@/components/markdown.vue";
+import { useAsrStore, useChatStore, useLayoutStore } from "@/stores";
+
+const chatStore = useChatStore();
+const { historyMessages, completing, modelList, modelInfo, thinking } =
+  storeToRefs(chatStore);
+const asrStore = useAsrStore();
+const { isRecording } = storeToRefs(asrStore);
+const layoutStore = useLayoutStore();
+const { hiddenLeftSidebar, simpleMode } = storeToRefs(layoutStore);
+
+const scrollbarRef = ref<HTMLElement | null>(null);
+const options = ref<Array<SelectGroupOption | SelectOption>>([]);
+// NCollapse 组件的折叠状态
+const collapseActive = ref<string[]>(
+  historyMessages.value.map((msg, idx) => String(msg.id ?? idx))
+);
+
+const getName = (msg: Message, idx: number) => String(msg.id ?? idx);
+
+// TODO: bugfix: 未能正确展开
+watch(
+  historyMessages,
+  (newVal, oldVal) => {
+    // 取所有name
+    const newNames = newVal.map((msg, idx) => getName(msg, idx));
+    const oldNames = oldVal ? oldVal.map((msg, idx) => getName(msg, idx)) : [];
+    // 找出新增的name
+    const addedNames = newNames.filter((name) => !oldNames.includes(name));
+    // 保留原有已展开项
+    const currentActive = collapseActive.value.filter((name) =>
+      newNames.includes(name)
+    );
+    // 新增的默认展开
+    collapseActive.value = [...currentActive, ...addedNames];
+  },
+  { immediate: true, deep: true }
+);
+
+// 处理折叠项的点击事件，切换折叠状态
+const handleItemHeaderClick = (name: string) => {
+  if (collapseActive.value.includes(name)) {
+    collapseActive.value = collapseActive.value.filter((n) => n !== name);
+  } else {
+    collapseActive.value.push(name);
+  }
+};
+
+// 处理选中模型的 ID
+const selectedModelId = computed({
+  get: () => modelInfo.value?.model_id ?? null,
+  set: (id: string | null) => {
+    for (const vendor of modelList.value) {
+      const found = vendor.models.find((model) => model.model_id === id);
+      if (found) {
+        modelInfo.value = found;
+        return;
+      }
+    }
+    modelInfo.value = null;
+  }
+});
+
+// 监听模型列表变化，更新选项
+watch(
+  () => modelList.value,
+  (newVal) => {
+    if (newVal) {
+      options.value = newVal.map((vendor) => ({
+        type: "group",
+        label: vendor.vendor,
+        key: vendor.vendor,
+        children: vendor.models.map((model) => ({
+          label: model.model_name,
+          value: model.model_id,
+          type: model.model_type
+        }))
+      }));
+
+      if (newVal.length > 0 && newVal[0].models.length > 0) {
+        modelInfo.value = newVal[0].models[0];
+      }
+    }
+  },
+  { immediate: true, deep: true }
+);
+
+// 开关语音输入
+const toggleRecording = throttle(() => {
+  if (isRecording.value) {
+    asrStore.stopRecording();
+  } else {
+    asrStore.startRecording();
+  }
+}, 500);
+
+watch(completing, (newVal) => {
+  if (newVal) {
+    nextTick(() => {
+      scrollbarRef.value?.scrollTo({ top: 99999, behavior: "smooth" });
+    });
+  }
+});
+
+onMounted(() => {
+  chatStore.getModelList();
+});
+</script>
+
+<template>
+  <div
+    class="p-8 !pr-4 h-full w-full flex flex-col gap-4 border-l-[24px] border-l-[#FAFAFA] transition-all ease-in-out text-base"
+    :class="{ '!border-l-0': hiddenLeftSidebar || simpleMode }"
+  >
+    <!-- 历史消息区 -->
+    <NScrollbar ref="scrollbarRef" class="flex-1 pr-4 relative">
+      <div class="flex items-start mb-4">
+        <span class="rounded-lg overflow-hidden !w-16 !min-w-16 !h-16">
+          <avatar :avatar="AIAvatar" />
+        </span>
+        <div class="text-base w-full max-w-full ml-2 flex flex-col items-start">
+          <span class="text-base font-bold mb-4">助手：</span>
+          <span class="text-base"
+            >你好，我是你的智能助手，请问有什么可以帮助你的吗？</span
+          >
+          <NDivider />
+        </div>
+      </div>
+      <!-- 默认消息↑ 历史消息↓ -->
+      <div
+        v-for="(msg, idx) in historyMessages"
+        :key="idx"
+        class="flex items-start mb-4"
+      >
+        <!-- 头像↓ -->
+        <span
+          v-if="msg.role === 'user'"
+          class="rounded-lg overflow-hidden !w-16 !min-w-16 !h-16"
+        >
+          <avatar :avatar="UserAvatar" />
+        </span>
+        <span v-else class="rounded-lg overflow-hidden">
+          <avatar :avatar="AIAvatar" />
+        </span>
+        <!-- 头像↑ 名称↓ -->
+        <div class="text-base w-full max-w-full ml-2 flex flex-col items-start">
+          <span class="text-base font-bold">{{
+            msg.role === "user" ? "你：" : "助手："
+          }}</span>
+          <!-- 使用信息 -->
+          <div
+            v-if="msg.role !== 'user'"
+            class="text-[12px] text-[#7A7A7A] mb-[2px]"
+          >
+            Tokens: <span class="mr-1">{{ msg.usage?.total_tokens }}</span>
+          </div>
+          <div class="w-full max-w-full">
+            <NCollapse
+              v-if="msg.thinking?.trim()"
+              :expanded-names="collapseActive[idx]"
+            >
+              <NCollapseItem
+                :title="
+                  thinking && idx === historyMessages.length - 1
+                    ? '思考中...'
+                    : '已深度思考'
+                "
+                :name="getName(msg, idx)"
+                @item-header-click="
+                  () => handleItemHeaderClick(getName(msg, idx))
+                "
+              >
+                <div
+                  class="text-[#7A7A7A] mb-4 border-l-2 border-[#E5E5E5] ml-2 pl-2"
+                >
+                  <markdown :content="msg.thinking || ''" />
+                </div>
+              </NCollapseItem>
+            </NCollapse>
+            <!-- 内容↓ 思维链↑ -->
+            <markdown :content="msg.content || ''" />
+            <div v-if="msg.role !== 'user'" class="mt-2">
+              <tts :text="msg.content || ''" :message-id="msg.id!" />
+            </div>
+            <NDivider />
+          </div>
+        </div>
+      </div>
+      <div
+        v-if="isRecording"
+        class="absolute inset-0 pointer-events-none flex items-center justify-center text-[#7A7A7A] text-2xl bg-white/80"
+      >
+        正在语音输入...
+      </div>
+    </NScrollbar>
+    <!-- 操作区 -->
+    <div class="flex justify-between items-center gap-2">
+      <div class="flex items-center gap-2">
+        <NSelect
+          v-model:value="selectedModelId"
+          label-field="label"
+          value-field="value"
+          children-field="children"
+          filterable
+          :options="options"
+        />
+      </div>
+      <div class="flex items-center gap-2">
+        <NPopconfirm
+          :positive-button-props="{ type: 'error' }"
+          positive-text="清除"
+          negative-text="取消"
+          @positive-click="chatStore.clearHistoryMessages"
+          @negative-click="() => {}"
+        >
+          <template #icon>
+            <ExclamationTriangleIcon class="!w-6 !h-6 text-[#d03050]" />
+          </template>
+          <template #trigger>
+            <NButton :disabled="isRecording || completing" type="warning">
+              <template v-if="!simpleMode"> 清除历史 </template>
+              <TrashIcon
+                class="!w-4 !h-4"
+                :class="{
+                  'ml-1': !simpleMode
+                }"
+              />
+            </NButton>
+          </template>
+          <span>确定要清除历史消息吗？</span>
+        </NPopconfirm>
+
+        <NButton :disabled="completing" @click="toggleRecording">
+          <template v-if="!simpleMode">
+            {{ isRecording ? "停止输入" : "语音输入" }}
+          </template>
+          <microphone
+            class="!w-4 !h-4"
+            :class="{
+              'ml-1': !simpleMode
+            }"
+          />
+        </NButton>
+      </div>
+    </div>
+  </div>
+</template>