feat: 部分语音聊天功能

This commit is contained in:
2025-06-30 10:49:24 +08:00
parent 06e6b4a8c9
commit ac5e68f5a5
10 changed files with 594 additions and 25 deletions

View File

@@ -9,7 +9,7 @@ from typing import Dict, Any, Optional as OptionalType
from app.constants.tts import APP_ID, TOKEN, SPEAKER
# 协议常量保持不变...
# 协议常量
PROTOCOL_VERSION = 0b0001
DEFAULT_HEADER_SIZE = 0b0001
FULL_CLIENT_REQUEST = 0b0001
@@ -35,7 +35,7 @@ EVENT_TTSSentenceEnd = 351
EVENT_TTSResponse = 352
# 所有类定义保持不变...
# 所有类定义
class Header:
def __init__(self,
protocol_version=PROTOCOL_VERSION,
@@ -93,7 +93,7 @@ class Response:
self.payload_json = None
# 工具函数保持不变...
# 工具函数
def gen_log_id():
"""生成logID"""
ts = int(time.time() * 1000)
@@ -191,7 +191,7 @@ async def send_event(ws, header, optional=None, payload=None):
await ws.send(full_client_request)
# 修改:TTS状态管理类添加消息ID和任务追踪
# TTS状态管理类添加消息ID和任务追踪
class TTSState:
def __init__(self, message_id: str):
self.message_id = message_id

View File

@@ -5,7 +5,6 @@ from aip import AipSpeech
from app.constants.asr import APP_ID, API_KEY, SECRET_KEY
import json
# 导入修改后的TTS模块
from . import tts
router = APIRouter()
@@ -62,7 +61,7 @@ async def websocket_online_count(websocket: WebSocket):
await websocket.send_json({"type": "asr_result", "result": asr_text})
temp_buffer = bytes()
# 修改:TTS处理支持消息ID
# TTS处理
elif msg_type == "tts_text":
message_id = data.get("messageId")
text = data.get("text", "")

View File

@@ -34,13 +34,15 @@ const handleClick = () => {
ttsStore.convertText(text, messageId);
}
};
// 当文本改变时清理之前的音频
// 文本改变清理之前的音频
watch(
() => text,
() => {
ttsStore.clearAudio(messageId);
}
);
onUnmounted(() => {
ttsStore.clearAudio(messageId);
});

View File

@@ -45,7 +45,20 @@ const { hiddenLeftSidebar, simpleMode } = storeToRefs(layoutStore);
"
to="/"
>
聊天
对话
</router-link>
<router-link
class="w-full h-[52px] px-8 flex items-center cursor-pointer"
:class="
$route.path === '/voice'
? [
'bg-[rgba(37,99,235,0.04)] text-[#0094c5] border-r-2 border-[#0094c5]'
]
: []
"
to="/voice"
>
语音聊天
</router-link>
<div class="w-full h-full flex flex-col items-center text-[#0094c5]">

View File

@@ -2,7 +2,8 @@ import { createRouter, createWebHistory } from "vue-router";
import BasicLayout from "@/layouts/BasicLayout.vue";
import { resetDescription, setTitle } from "@/utils";
import community from "@/views/CommunityView.vue";
import ChatLLM from "@/views/ChatLLMView.vue";
import VoiceView from "@/views/VoiceView.vue";
const router = createRouter({
history: createWebHistory(import.meta.env.BASE_URL),
@@ -13,11 +14,19 @@ const router = createRouter({
children: [
{
path: "",
name: "community",
component: community,
name: "ChatLLM",
component: ChatLLM,
meta: {
title: "对话"
}
},
{
path: "/voice",
name: "Voice",
component: VoiceView,
meta: {
title: "语音对话"
}
}
]
}

View File

@@ -1,4 +1,4 @@
import { useChatStore, useTtsStore } from "@/stores";
import { useChatStore, useTtsStore,useVoiceStore } from "@/stores";
// WebSocket
export const useWebSocketStore = defineStore("websocket", () => {
@@ -6,6 +6,8 @@ export const useWebSocketStore = defineStore("websocket", () => {
const connected = ref(false);
const chatStore = useChatStore();
const ttsStore = useTtsStore();
const voiceStore = useVoiceStore();
const router = useRouter();
const { onlineCount } = storeToRefs(chatStore);
@@ -31,7 +33,14 @@ export const useWebSocketStore = defineStore("websocket", () => {
onlineCount.value = data.online_count;
break;
case "asr_result":
chatStore.addMessageToHistory(data.result);
if (router.currentRoute.value.path === "/") {
chatStore.addMessageToHistory(data.result);
} else if (router.currentRoute.value.path === "/voice") {
// 在语音页面使用VoiceStore处理
voiceStore.handleASRResult(data.result);
} else {
console.warn(data);
}
break;
// 新的TTS消息格式处理
@@ -76,7 +85,6 @@ export const useWebSocketStore = defineStore("websocket", () => {
ttsStore.finishConversion(data.messageId);
} else {
console.log("TTS音频传输完成无messageId");
// 兜底处理,可能是旧格式
ttsStore.finishConversion(data.messageId);
}
break;
@@ -85,7 +93,6 @@ export const useWebSocketStore = defineStore("websocket", () => {
// TTS会话结束
if (data.messageId) {
console.log(`TTS会话结束 [${data.messageId}]`);
// 可以添加额外的清理逻辑
} else {
console.log("TTS会话结束");
}
@@ -98,19 +105,10 @@ export const useWebSocketStore = defineStore("websocket", () => {
ttsStore.handleError(data.message, data.messageId);
} else {
console.error("TTS错误:", data.message);
// 兜底处理,可能是旧格式
ttsStore.handleError(data.message, data.messageId || "unknown");
}
break;
// 保留旧的消息类型作为兜底处理
case "tts_audio_complete_legacy":
case "tts_complete_legacy":
case "tts_error_legacy":
console.log("收到旧格式TTS消息:", data.type);
// 可以选择处理或忽略
break;
default:
console.log("未知消息类型:", data.type, data);
}

View File

@@ -2,3 +2,4 @@ export * from "./asr_store";
export * from "./chat_store";
export * from "./layout_store";
export * from "./tts_store";
export * from "./voice_store";

View File

@@ -0,0 +1,293 @@
import { useWebSocketStore } from "@/services";
import { useChatStore, useTtsStore } from "@/stores";
export const useVoiceStore = defineStore("voice", () => {
// 状态管理
const isListening = ref(false); // 是否正在监听语音输入
const isProcessing = ref(false); // 是否正在处理包括ASR、LLM、TTS全流程
const currentSessionId = ref<string | null>(null); // 当前会话ID
// 依赖的其他store
const chatStore = useChatStore();
const ttsStore = useTtsStore();
const wsStore = useWebSocketStore();
// 语音消息历史
const voiceMessages = ref<
{
id: string;
type: "user" | "assistant";
text: string;
audioId?: string;
timestamp: number;
isProcessing?: boolean;
}[]
>([]);
// ASR缓冲区状态
const isRecording = ref(false);
const recordingStartTime = ref<number | null>(null);
const recordingMaxDuration = 60 * 1000; // 最大录音时长 60 秒
/**
* 开始语音输入
*/
const startListening = async () => {
if (isListening.value) return;
try {
await wsStore.connect();
// 创建新的会话ID
currentSessionId.value = new Date().getTime().toString();
isListening.value = true;
isRecording.value = true;
recordingStartTime.value = Date.now();
// 开始录音 - 假设我们有一个 startRecording 方法
// 这里通常会调用浏览器的 MediaRecorder API
await startRecording();
console.log("开始语音输入");
} catch (error) {
console.error("启动语音输入失败:", error);
stopListening();
}
};
/**
* 停止语音输入
*/
const stopListening = async () => {
if (!isListening.value) return;
try {
// 停止录音
if (isRecording.value) {
await stopRecording();
isRecording.value = false;
}
isListening.value = false;
recordingStartTime.value = null;
// 发送结束信号
wsStore.send(JSON.stringify({ type: "asr_end" }));
console.log("停止语音输入等待ASR结果");
} catch (error) {
console.error("停止语音输入失败:", error);
}
};
/**
* 录音时间检查
*/
const checkRecordingTime = () => {
if (isRecording.value && recordingStartTime.value) {
const currentTime = Date.now();
const duration = currentTime - recordingStartTime.value;
if (duration >= recordingMaxDuration) {
console.log("录音达到最大时长,自动停止");
stopListening();
}
}
};
// 定时检查录音时间
let recordingTimer: any = null;
watch(isRecording, (newVal) => {
if (newVal) {
recordingTimer = setInterval(checkRecordingTime, 1000);
} else if (recordingTimer) {
clearInterval(recordingTimer);
recordingTimer = null;
}
});
/**
* 处理ASR结果
*/
const handleASRResult = async (text: string) => {
if (!text.trim()) return;
console.log("收到ASR结果:", text);
isProcessing.value = true;
// 添加用户消息
const userMessageId = new Date().getTime().toString();
voiceMessages.value.push({
id: userMessageId,
type: "user",
text,
timestamp: Date.now()
});
// 添加助手消息占位
const assistantMessageId = new Date().getTime().toString();
voiceMessages.value.push({
id: assistantMessageId,
type: "assistant",
text: "",
timestamp: Date.now(),
isProcessing: true
});
// 调用LLM生成回复
await generateLLMResponse(text, assistantMessageId);
};
/**
* 生成LLM回复
*/
const generateLLMResponse = async (userInput: string, responseId: string) => {
try {
console.log("生成LLM回复...");
// 构建消息历史
const messages = [
...voiceMessages.value
.filter((msg) => !msg.isProcessing)
.map((msg) => ({
role: msg.type === "user" ? "user" : "assistant",
content: msg.text
})),
{ role: "user", content: userInput }
];
let responseText = "";
// 调用ChatStore的聊天方法
await chatStore.chatWithLLM(
{
messages,
model: chatStore.modelInfo?.model_id || ""
},
// 处理流式回复
(content) => {
responseText = content;
// 更新助手消息
const index = voiceMessages.value.findIndex(
(msg) => msg.id === responseId
);
if (index !== -1) {
voiceMessages.value[index].text = content;
}
}
);
// LLM生成完成转换为语音
console.log("LLM回复生成完成:", responseText);
await synthesizeSpeech(responseText, responseId);
} catch (error) {
console.error("生成LLM回复失败:", error);
const index = voiceMessages.value.findIndex(
(msg) => msg.id === responseId
);
if (index !== -1) {
voiceMessages.value[index].text = "抱歉,生成回复时出错";
voiceMessages.value[index].isProcessing = false;
}
isProcessing.value = false;
}
};
/**
* 转换文本为语音
*/
const synthesizeSpeech = async (text: string, messageId: string) => {
try {
console.log("转换文本为语音...");
// 调用TTS生成语音
await ttsStore.convertText(text, messageId);
// 注意TTS音频生成完成后会自动播放
// 这部分逻辑在TTS Store的finishConversion方法中处理
// 更新消息状态
const index = voiceMessages.value.findIndex(
(msg) => msg.id === messageId
);
if (index !== -1) {
voiceMessages.value[index].audioId = messageId;
voiceMessages.value[index].isProcessing = false;
}
} catch (error) {
console.error("转换文本为语音失败:", error);
const index = voiceMessages.value.findIndex(
(msg) => msg.id === messageId
);
if (index !== -1) {
voiceMessages.value[index].isProcessing = false;
}
} finally {
isProcessing.value = false;
}
};
/**
* 清除所有消息
*/
const clearMessages = () => {
voiceMessages.value = [];
};
/**
* 播放指定消息的语音
*/
const playMessageAudio = async (messageId: string) => {
const message = voiceMessages.value.find((msg) => msg.id === messageId);
if (message && message.audioId) {
await ttsStore.play(message.audioId);
}
};
/**
* 暂停当前播放的语音
*/
const pauseAudio = () => {
ttsStore.pauseAll();
};
// 录音相关方法 - 这里需要根据实际情况实现
// 通常会使用MediaRecorder API
const startRecording = async () => {
// 实现录音开始逻辑
// 1. 获取麦克风权限
// 2. 创建MediaRecorder
// 3. 监听数据可用事件发送到WebSocket
console.log("开始录音...");
};
const stopRecording = async () => {
// 实现录音停止逻辑
console.log("停止录音...");
};
// 在组件卸载时清理资源
onUnmounted(() => {
if (isRecording.value) {
stopRecording();
}
if (recordingTimer) {
clearInterval(recordingTimer);
}
});
return {
// 状态
isListening,
isProcessing,
isRecording,
voiceMessages,
// 方法
startListening,
stopListening,
handleASRResult,
clearMessages,
playMessageAudio,
pauseAudio
};
});

254
web/src/views/VoiceView.vue Normal file
View File

@@ -0,0 +1,254 @@
<script setup lang="ts">
import type { SelectGroupOption, SelectOption } from "naive-ui";
import type { Message } from "@/interfaces";
import { throttle } from "lodash-es";
import AIAvatar from "@/assets/ai_avatar.png";
import { ExclamationTriangleIcon, microphone, TrashIcon } from "@/assets/Icons";
import UserAvatar from "@/assets/user_avatar.jpg";
import markdown from "@/components/markdown.vue";
import { useAsrStore, useChatStore, useLayoutStore } from "@/stores";
const chatStore = useChatStore();
const { historyMessages, completing, modelList, modelInfo, thinking } =
storeToRefs(chatStore);
const asrStore = useAsrStore();
const { isRecording } = storeToRefs(asrStore);
const layoutStore = useLayoutStore();
const { hiddenLeftSidebar, simpleMode } = storeToRefs(layoutStore);
const scrollbarRef = ref<HTMLElement | null>(null);
const options = ref<Array<SelectGroupOption | SelectOption>>([]);
// NCollapse 组件的折叠状态
const collapseActive = ref<string[]>(
historyMessages.value.map((msg, idx) => String(msg.id ?? idx))
);
const getName = (msg: Message, idx: number) => String(msg.id ?? idx);
// TODO: bugfix: 未能正确展开
watch(
historyMessages,
(newVal, oldVal) => {
// 取所有name
const newNames = newVal.map((msg, idx) => getName(msg, idx));
const oldNames = oldVal ? oldVal.map((msg, idx) => getName(msg, idx)) : [];
// 找出新增的name
const addedNames = newNames.filter((name) => !oldNames.includes(name));
// 保留原有已展开项
const currentActive = collapseActive.value.filter((name) =>
newNames.includes(name)
);
// 新增的默认展开
collapseActive.value = [...currentActive, ...addedNames];
},
{ immediate: true, deep: true }
);
// 处理折叠项的点击事件,切换折叠状态
const handleItemHeaderClick = (name: string) => {
if (collapseActive.value.includes(name)) {
collapseActive.value = collapseActive.value.filter((n) => n !== name);
} else {
collapseActive.value.push(name);
}
};
// 处理选中模型的 ID
const selectedModelId = computed({
get: () => modelInfo.value?.model_id ?? null,
set: (id: string | null) => {
for (const vendor of modelList.value) {
const found = vendor.models.find((model) => model.model_id === id);
if (found) {
modelInfo.value = found;
return;
}
}
modelInfo.value = null;
}
});
// 监听模型列表变化,更新选项
watch(
() => modelList.value,
(newVal) => {
if (newVal) {
options.value = newVal.map((vendor) => ({
type: "group",
label: vendor.vendor,
key: vendor.vendor,
children: vendor.models.map((model) => ({
label: model.model_name,
value: model.model_id,
type: model.model_type
}))
}));
if (newVal.length > 0 && newVal[0].models.length > 0) {
modelInfo.value = newVal[0].models[0];
}
}
},
{ immediate: true, deep: true }
);
// 开关语音输入
const toggleRecording = throttle(() => {
if (isRecording.value) {
asrStore.stopRecording();
} else {
asrStore.startRecording();
}
}, 500);
watch(completing, (newVal) => {
if (newVal) {
nextTick(() => {
scrollbarRef.value?.scrollTo({ top: 99999, behavior: "smooth" });
});
}
});
onMounted(() => {
chatStore.getModelList();
});
</script>
<template>
<div
class="p-8 !pr-4 h-full w-full flex flex-col gap-4 border-l-[24px] border-l-[#FAFAFA] transition-all ease-in-out text-base"
:class="{ '!border-l-0': hiddenLeftSidebar || simpleMode }"
>
<!-- 历史消息区 -->
<NScrollbar ref="scrollbarRef" class="flex-1 pr-4 relative">
<div class="flex items-start mb-4">
<span class="rounded-lg overflow-hidden !w-16 !min-w-16 !h-16">
<avatar :avatar="AIAvatar" />
</span>
<div class="text-base w-full max-w-full ml-2 flex flex-col items-start">
<span class="text-base font-bold mb-4">助手</span>
<span class="text-base"
>你好我是你的智能助手请问有什么可以帮助你的吗</span
>
<NDivider />
</div>
</div>
<!-- 默认消息 历史消息 -->
<div
v-for="(msg, idx) in historyMessages"
:key="idx"
class="flex items-start mb-4"
>
<!-- 头像 -->
<span
v-if="msg.role === 'user'"
class="rounded-lg overflow-hidden !w-16 !min-w-16 !h-16"
>
<avatar :avatar="UserAvatar" />
</span>
<span v-else class="rounded-lg overflow-hidden">
<avatar :avatar="AIAvatar" />
</span>
<!-- 头像 名称 -->
<div class="text-base w-full max-w-full ml-2 flex flex-col items-start">
<span class="text-base font-bold">{{
msg.role === "user" ? "你:" : "助手:"
}}</span>
<!-- 使用信息 -->
<div
v-if="msg.role !== 'user'"
class="text-[12px] text-[#7A7A7A] mb-[2px]"
>
Tokens: <span class="mr-1">{{ msg.usage?.total_tokens }}</span>
</div>
<div class="w-full max-w-full">
<NCollapse
v-if="msg.thinking?.trim()"
:expanded-names="collapseActive[idx]"
>
<NCollapseItem
:title="
thinking && idx === historyMessages.length - 1
? '思考中...'
: '已深度思考'
"
:name="getName(msg, idx)"
@item-header-click="
() => handleItemHeaderClick(getName(msg, idx))
"
>
<div
class="text-[#7A7A7A] mb-4 border-l-2 border-[#E5E5E5] ml-2 pl-2"
>
<markdown :content="msg.thinking || ''" />
</div>
</NCollapseItem>
</NCollapse>
<!-- 内容↓ 思维链↑ -->
<markdown :content="msg.content || ''" />
<div v-if="msg.role !== 'user'" class="mt-2">
<tts :text="msg.content || ''" :message-id="msg.id!" />
</div>
<NDivider />
</div>
</div>
</div>
<div
v-if="isRecording"
class="absolute inset-0 pointer-events-none flex items-center justify-center text-[#7A7A7A] text-2xl bg-white/80"
>
正在语音输入...
</div>
</NScrollbar>
<!-- 操作区 -->
<div class="flex justify-between items-center gap-2">
<div class="flex items-center gap-2">
<NSelect
v-model:value="selectedModelId"
label-field="label"
value-field="value"
children-field="children"
filterable
:options="options"
/>
</div>
<div class="flex items-center gap-2">
<NPopconfirm
:positive-button-props="{ type: 'error' }"
positive-text="清除"
negative-text="取消"
@positive-click="chatStore.clearHistoryMessages"
@negative-click="() => {}"
>
<template #icon>
<ExclamationTriangleIcon class="!w-6 !h-6 text-[#d03050]" />
</template>
<template #trigger>
<NButton :disabled="isRecording || completing" type="warning">
<template v-if="!simpleMode"> 清除历史 </template>
<TrashIcon
class="!w-4 !h-4"
:class="{
'ml-1': !simpleMode
}"
/>
</NButton>
</template>
<span>确定要清除历史消息吗?</span>
</NPopconfirm>
<NButton :disabled="completing" @click="toggleRecording">
<template v-if="!simpleMode">
{{ isRecording ? "停止输入" : "语音输入" }}
</template>
<microphone
class="!w-4 !h-4"
:class="{
'ml-1': !simpleMode
}"
/>
</NButton>
</div>
</div>
</div>
</template>