feat: tts语音生成

This commit is contained in:
2025-06-30 09:50:44 +08:00
parent 51e7239c71
commit 06e6b4a8c9
20 changed files with 1135 additions and 30 deletions

2
web/components.d.ts vendored
View File

@@ -18,10 +18,12 @@ declare module 'vue' {
NInput: typeof import('naive-ui')['NInput']
NMessageProvider: typeof import('naive-ui')['NMessageProvider']
NPopconfirm: typeof import('naive-ui')['NPopconfirm']
NPopover: typeof import('naive-ui')['NPopover']
NScrollbar: typeof import('naive-ui')['NScrollbar']
NSelect: typeof import('naive-ui')['NSelect']
NTag: typeof import('naive-ui')['NTag']
RouterLink: typeof import('vue-router')['RouterLink']
RouterView: typeof import('vue-router')['RouterView']
Tts: typeof import('./src/components/tts.vue')['default']
}
}

View File

@@ -31,6 +31,7 @@ export default antfu({
"antfu/top-level-function": "off",
"ts/no-unsafe-function-type": "off",
"no-console": "off",
"unused-imports/no-unused-vars": "warn"
"unused-imports/no-unused-vars": "warn",
"ts/no-use-before-define": "off"
}
});

View File

@@ -2,4 +2,5 @@ export { default as ChevronLeftIcon } from "./svg/heroicons/ChevronLeftIcon.svg?
export { default as ExclamationTriangleIcon } from "./svg/heroicons/ExclamationTriangleIcon.svg?component";
export { default as microphone } from "./svg/heroicons/MicrophoneIcon.svg?component";
export { default as PaperAirplaneIcon } from "./svg/heroicons/PaperAirplaneIcon.svg?component";
export { default as SpeakerWaveIcon } from "./svg/heroicons/SpeakerWaveIcon.svg?component";
export { default as TrashIcon } from "./svg/heroicons/TrashIcon.svg?component";

View File

@@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" class="size-6">
<path stroke-linecap="round" stroke-linejoin="round" d="M19.114 5.636a9 9 0 0 1 0 12.728M16.463 8.288a5.25 5.25 0 0 1 0 7.424M6.75 8.25l4.72-4.72a.75.75 0 0 1 1.28.53v15.88a.75.75 0 0 1-1.28.53l-4.72-4.72H4.51c-.88 0-1.704-.507-1.938-1.354A9.009 9.009 0 0 1 2.25 12c0-.83.112-1.633.322-2.396C2.806 8.756 3.63 8.25 4.51 8.25H6.75Z" />
</svg>

After

Width:  |  Height:  |  Size: 472 B

View File

@@ -0,0 +1,81 @@
<script setup lang="ts">
import { SpeakerWaveIcon } from "@/assets/Icons";
import { useLayoutStore, useTtsStore } from "@/stores";
const { text, messageId } = defineProps<{
text: string;
messageId: string;
}>();
const ttsStore = useTtsStore();
const layoutStore = useLayoutStore();
const { simpleMode } = storeToRefs(layoutStore);
// 获取当前消息的状态
const isPlaying = computed(() => ttsStore.isPlaying(messageId));
const isLoading = computed(() => ttsStore.isLoading(messageId));
const hasAudio = computed(() => ttsStore.hasAudio(messageId));
// 处理按钮点击
const handleClick = () => {
if (isLoading.value) {
return; // 合成中不响应点击
}
if (hasAudio.value) {
// 如果音频已准备好,切换播放/暂停
if (isPlaying.value) {
ttsStore.pause(messageId);
} else {
ttsStore.play(messageId);
}
} else {
// 如果没有音频开始TTS转换
ttsStore.convertText(text, messageId);
}
};
// 当文本改变时清理之前的音频
watch(
() => text,
() => {
ttsStore.clearAudio(messageId);
}
);
onUnmounted(() => {
ttsStore.clearAudio(messageId);
});
</script>
<template>
<NPopover trigger="hover">
<template #trigger>
<NButton
:loading="isLoading"
@click="handleClick"
quaternary
circle
:disabled="!text.trim()"
>
<SpeakerWaveIcon
v-if="!isLoading"
class="!w-4 !h-4"
:class="{
'': !simpleMode,
'animate-pulse': isPlaying
}"
/>
</NButton>
</template>
<span>
{{
isLoading
? "合成中..."
: isPlaying
? "点击暂停"
: hasAudio
? "点击播放"
: "语音合成"
}}
</span>
</NPopover>
</template>

View File

@@ -11,6 +11,7 @@ export interface Message {
thinking?: string;
role?: string;
usage?: UsageInfo;
id?: string;
[property: string]: any;
}

View File

@@ -16,7 +16,7 @@ const router = createRouter({
name: "community",
component: community,
meta: {
title: "社区"
title: "对话"
}
}
]

View File

@@ -0,0 +1,30 @@
import { useWebSocketStore } from "@/services";
export const useAudioWebSocket = () => {
const webSocketStore = useWebSocketStore();
const sendMessage = (data: string | Uint8Array) => {
if (webSocketStore.connected) {
if (typeof data === "string") {
webSocketStore.send(data);
} else {
webSocketStore.websocket?.send(data);
}
}
};
const ensureConnection = async (): Promise<void> => {
if (!webSocketStore.connected) {
webSocketStore.connect();
await new Promise<void>((resolve) => {
const check = () => {
if (webSocketStore.connected) resolve();
else setTimeout(check, 100);
};
check();
});
}
};
return { sendMessage, ensureConnection };
};

View File

@@ -1,3 +1,4 @@
export * from "./audio_websocket";
export * from "./base_service";
export * from "./chat_service";
export * from "./websocket";

View File

@@ -1,28 +1,143 @@
import { useChatStore } from "@/stores";
import { useChatStore, useTtsStore } from "@/stores";
// WebSocket
export const useWebSocketStore = defineStore("websocket", () => {
const websocket = ref<WebSocket>();
const connected = ref(false);
const chatStore = useChatStore();
const ttsStore = useTtsStore();
const { onlineCount } = storeToRefs(chatStore);
const onmessage = (e: MessageEvent) => {
const data = JSON.parse(e.data);
switch (data.type) {
case "count":
onlineCount.value = data.online_count;
break;
case "asr_result":
chatStore.addMessageToHistory(data.result);
// 检查消息类型
if (e.data instanceof ArrayBuffer) {
// 处理二进制音频数据(兜底处理,新版本应该不会用到)
console.log("收到二进制音频数据,大小:", e.data.byteLength);
console.warn("收到旧格式的二进制数据无法确定messageId");
// 可以选择忽略或者作为兜底处理
} else if (e.data instanceof Blob) {
// 如果是Blob转换为ArrayBuffer兜底处理
e.data.arrayBuffer().then((buffer: ArrayBuffer) => {
console.log("收到Blob音频数据大小:", buffer.byteLength);
console.warn("收到旧格式的Blob数据无法确定messageId");
});
} else if (typeof e.data === "string") {
// 处理文本JSON消息
try {
const data = JSON.parse(e.data);
switch (data.type) {
case "count":
onlineCount.value = data.online_count;
break;
case "asr_result":
chatStore.addMessageToHistory(data.result);
break;
// 新的TTS消息格式处理
case "tts_audio_data":
// 新的音频数据格式包含messageId和hex格式的音频数据
if (data.messageId && data.audioData) {
console.log(
`收到TTS音频数据 [${data.messageId}]hex长度:`,
data.audioData.length
);
try {
// 将hex字符串转换为ArrayBuffer
const bytes = data.audioData
.match(/.{1,2}/g)
?.map((byte: string) => Number.parseInt(byte, 16));
if (bytes) {
const buffer = new Uint8Array(bytes).buffer;
console.log(
`转换后的音频数据大小 [${data.messageId}]:`,
buffer.byteLength
);
ttsStore.handleAudioData(buffer, data.messageId);
} else {
console.error(`音频数据格式错误 [${data.messageId}]`);
}
} catch (error) {
console.error(`音频数据转换失败 [${data.messageId}]:`, error);
ttsStore.handleError(
`音频数据转换失败: ${error}`,
data.messageId
);
}
} else {
console.error("tts_audio_data消息格式错误:", data);
}
break;
case "tts_audio_complete":
// TTS音频传输完成
if (data.messageId) {
console.log(`TTS音频传输完成 [${data.messageId}]`);
ttsStore.finishConversion(data.messageId);
} else {
console.log("TTS音频传输完成无messageId");
// 兜底处理,可能是旧格式
ttsStore.finishConversion(data.messageId);
}
break;
case "tts_complete":
// TTS会话结束
if (data.messageId) {
console.log(`TTS会话结束 [${data.messageId}]`);
// 可以添加额外的清理逻辑
} else {
console.log("TTS会话结束");
}
break;
case "tts_error":
// TTS错误
if (data.messageId) {
console.error(`TTS错误 [${data.messageId}]:`, data.message);
ttsStore.handleError(data.message, data.messageId);
} else {
console.error("TTS错误:", data.message);
// 兜底处理,可能是旧格式
ttsStore.handleError(data.message, data.messageId || "unknown");
}
break;
// 保留旧的消息类型作为兜底处理
case "tts_audio_complete_legacy":
case "tts_complete_legacy":
case "tts_error_legacy":
console.log("收到旧格式TTS消息:", data.type);
// 可以选择处理或忽略
break;
default:
console.log("未知消息类型:", data.type, data);
}
} catch (error) {
console.error("JSON解析错误:", error, "原始数据:", e.data);
}
} else {
console.warn("收到未知格式的消息:", typeof e.data, e.data);
}
};
const send = (data: string) => {
if (websocket.value && websocket.value.readyState === WebSocket.OPEN)
if (websocket.value && websocket.value.readyState === WebSocket.OPEN) {
websocket.value?.send(data);
} else {
console.warn("WebSocket未连接无法发送消息:", data);
}
};
const sendBinary = (data: ArrayBuffer | Uint8Array) => {
if (websocket.value && websocket.value.readyState === WebSocket.OPEN) {
websocket.value?.send(data);
} else {
console.warn("WebSocket未连接无法发送二进制数据");
}
};
const close = () => {
websocket.value?.close();
};
@@ -33,11 +148,15 @@ export const useWebSocketStore = defineStore("websocket", () => {
websocket.value.onopen = () => {
connected.value = true;
console.log("WebSocket连接成功");
let pingIntervalId: NodeJS.Timeout | undefined;
if (pingIntervalId) clearInterval(pingIntervalId);
pingIntervalId = setInterval(() => send("ping"), 30 * 1000);
pingIntervalId = setInterval(() => {
// 修改ping格式为JSON格式与后端保持一致
send(JSON.stringify({ type: "ping" }));
}, 30 * 1000);
if (websocket.value) {
websocket.value.onmessage = onmessage;
@@ -45,20 +164,28 @@ export const useWebSocketStore = defineStore("websocket", () => {
websocket.value.onerror = (e: Event) => {
console.error(`WebSocket错误:${(e as ErrorEvent).message}`);
};
websocket.value.onclose = () => {
websocket.value.onclose = (e: CloseEvent) => {
connected.value = false;
console.log(`WebSocket连接关闭: ${e.code} ${e.reason}`);
setTimeout(() => {
console.log("尝试重新连接WebSocket...");
connect(); // 尝试重新连接
}, 1000); // 1秒后重试连接
};
}
};
websocket.value.onerror = (e: Event) => {
console.error("WebSocket连接错误:", e);
};
};
return {
websocket,
connected,
send,
sendBinary,
close,
connect
};

View File

@@ -118,7 +118,10 @@ export const useChatStore = defineStore("chat", () => {
historyMessages.value[historyMessages.value.length - 1].thinking =
thinkingContent;
}
);
).then(() => {
historyMessages.value[historyMessages.value.length - 1].id =
new Date().getTime().toString();
});
}
}
},

View File

@@ -1,3 +1,4 @@
export * from "./asr_store";
export * from "./chat_store";
export * from "./layout_store";
export * from "./layout_store";
export * from "./tts_store";

302
web/src/stores/tts_store.ts Normal file
View File

@@ -0,0 +1,302 @@
import { useAudioWebSocket } from "@/services";
import { createAudioUrl, mergeAudioChunks } from "@/utils";
interface AudioState {
isPlaying: boolean;
isLoading: boolean;
audioElement: HTMLAudioElement | null;
audioUrl: string | null;
audioChunks: ArrayBuffer[];
hasError: boolean;
errorMessage: string;
}
export const useTtsStore = defineStore("tts", () => {
// 多音频状态管理 - 以消息ID为key
const audioStates = ref<Map<string, AudioState>>(new Map());
// 当前活跃的转换请求(保留用于兼容性)
const activeConversion = ref<string | null>(null);
// 会话状态
const hasActiveSession = ref(false);
// WebSocket连接
const { sendMessage, ensureConnection } = useAudioWebSocket();
/**
* 获取或创建音频状态
*/
const getAudioState = (messageId: string): AudioState => {
if (!audioStates.value.has(messageId)) {
audioStates.value.set(messageId, {
isPlaying: false,
isLoading: false,
audioElement: null,
audioUrl: null,
audioChunks: [],
hasError: false,
errorMessage: ""
});
}
return audioStates.value.get(messageId)!;
};
/**
* 发送文本进行TTS转换
*/
const convertText = async (text: string, messageId: string) => {
try {
await ensureConnection();
// 暂停其他正在播放的音频
pauseAll();
// 获取当前消息的状态
const state = getAudioState(messageId);
// 清理之前的音频和错误状态
clearAudioState(state);
state.isLoading = true;
state.audioChunks = [];
// 设置当前活跃转换
activeConversion.value = messageId;
hasActiveSession.value = true;
// 发送文本到TTS服务
sendMessage(JSON.stringify({ type: "tts_text", text, messageId }));
} catch (error) {
handleError(`连接失败: ${error}`, messageId);
}
};
/**
* 处理接收到的音频数据 - 修改为支持messageId参数
*/
const handleAudioData = (data: ArrayBuffer, messageId?: string) => {
// 如果传递了messageId就使用它否则使用activeConversion
const targetMessageId = messageId || activeConversion.value;
if (!targetMessageId) {
console.warn("handleAudioData: 没有有效的messageId");
return;
}
console.log(`接收音频数据 [${targetMessageId}],大小:`, data.byteLength);
const state = getAudioState(targetMessageId);
state.audioChunks.push(data);
};
/**
* 完成TTS转换创建播放器并自动播放 - 修改为支持messageId参数
*/
const finishConversion = async (messageId?: string) => {
// 如果传递了messageId就使用它否则使用activeConversion
const targetMessageId = messageId || activeConversion.value;
if (!targetMessageId) {
console.warn("finishConversion: 没有有效的messageId");
return;
}
const state = getAudioState(targetMessageId);
console.log(
`完成TTS转换 [${targetMessageId}],音频片段数量:`,
state.audioChunks.length
);
if (state.audioChunks.length === 0) {
handleError("没有接收到音频数据", targetMessageId);
return;
}
try {
// 合并音频片段
const mergedAudio = mergeAudioChunks(state.audioChunks);
console.log(
`合并后音频大小 [${targetMessageId}]:`,
mergedAudio.byteLength
);
// 创建音频URL和元素
state.audioUrl = createAudioUrl(mergedAudio);
state.audioElement = new Audio(state.audioUrl);
// 设置音频事件
setupAudioEvents(state, targetMessageId);
state.isLoading = false;
// 清除activeConversion如果是当前活跃的
if (activeConversion.value === targetMessageId) {
activeConversion.value = null;
}
console.log(`TTS音频准备完成 [${targetMessageId}],开始自动播放`);
// 自动播放
await play(targetMessageId);
} catch (error) {
handleError(`音频处理失败: ${error}`, targetMessageId);
}
};
/**
* 设置音频事件监听
*/
const setupAudioEvents = (state: AudioState, messageId: string) => {
if (!state.audioElement) return;
const audio = state.audioElement;
audio.addEventListener("ended", () => {
state.isPlaying = false;
console.log(`音频播放结束 [${messageId}]`);
});
audio.addEventListener("error", (e) => {
console.error(`音频播放错误 [${messageId}]:`, e);
handleError("音频播放失败", messageId);
});
audio.addEventListener("canplaythrough", () => {
console.log(`音频可以播放 [${messageId}]`);
});
};
/**
* 播放指定消息的音频
*/
const play = async (messageId: string) => {
const state = getAudioState(messageId);
if (!state.audioElement) {
handleError("音频未准备好", messageId);
return;
}
try {
// 暂停其他正在播放的音频
pauseAll(messageId);
await state.audioElement.play();
state.isPlaying = true;
state.hasError = false;
state.errorMessage = "";
console.log(`开始播放音频 [${messageId}]`);
} catch (error) {
handleError(`播放失败: ${error}`, messageId);
}
};
/**
* 暂停指定消息的音频
*/
const pause = (messageId: string) => {
const state = getAudioState(messageId);
if (!state.audioElement) return;
state.audioElement.pause();
state.isPlaying = false;
console.log(`暂停音频 [${messageId}]`);
};
/**
* 暂停所有音频
*/
const pauseAll = (excludeMessageId?: string) => {
audioStates.value.forEach((state, messageId) => {
if (excludeMessageId && messageId === excludeMessageId) return;
if (state.isPlaying && state.audioElement) {
state.audioElement.pause();
state.isPlaying = false;
}
});
};
/**
* 处理TTS错误 - 修改为支持messageId参数
*/
const handleError = (errorMsg: string, messageId?: string) => {
// 如果传递了messageId就使用它否则使用activeConversion
const targetMessageId = messageId || activeConversion.value;
if (!targetMessageId) {
console.error(`TTS错误 (无messageId): ${errorMsg}`);
return;
}
console.error(`TTS错误 [${targetMessageId}]: ${errorMsg}`);
const state = getAudioState(targetMessageId);
state.hasError = true;
state.errorMessage = errorMsg;
state.isLoading = false;
if (activeConversion.value === targetMessageId) {
activeConversion.value = null;
hasActiveSession.value = false;
}
};
/**
* 清理指定消息的音频资源
*/
const clearAudio = (messageId: string) => {
const state = getAudioState(messageId);
clearAudioState(state);
audioStates.value.delete(messageId);
};
/**
* 清理音频状态
*/
const clearAudioState = (state: AudioState) => {
if (state.audioElement) {
state.audioElement.pause();
state.audioElement = null;
}
if (state.audioUrl) {
URL.revokeObjectURL(state.audioUrl);
state.audioUrl = null;
}
state.isPlaying = false;
state.audioChunks = [];
state.hasError = false;
state.errorMessage = "";
};
// 状态查询方法
const isPlaying = (messageId: string) => getAudioState(messageId).isPlaying;
const isLoading = (messageId: string) => getAudioState(messageId).isLoading;
const hasAudio = (messageId: string) =>
!!getAudioState(messageId).audioElement;
const hasError = (messageId: string) => getAudioState(messageId).hasError;
const getErrorMessage = (messageId: string) =>
getAudioState(messageId).errorMessage;
// 组件卸载时清理所有资源
onUnmounted(() => {
audioStates.value.forEach((state) => clearAudioState(state));
audioStates.value.clear();
});
return {
// 状态查询方法
isPlaying,
isLoading,
hasAudio,
hasError,
getErrorMessage,
// 核心方法
convertText,
handleAudioData,
finishConversion,
play,
pause,
pauseAll,
clearAudio,
handleError
};
});

20
web/src/utils/audio.ts Normal file
View File

@@ -0,0 +1,20 @@
// 合并音频片段
export const mergeAudioChunks = (chunks: ArrayBuffer[]): Uint8Array => {
const totalLength = chunks.reduce((acc, chunk) => acc + chunk.byteLength, 0);
const merged = new Uint8Array(totalLength);
let offset = 0;
chunks.forEach((chunk) => {
merged.set(new Uint8Array(chunk), offset);
offset += chunk.byteLength;
});
return merged;
};
// 创建音频播放URL
export const createAudioUrl = (
audioData: Uint8Array,
mimeType = "audio/mp3"
): string => {
const blob = new Blob([audioData as BlobPart], { type: mimeType });
return URL.createObjectURL(blob);
};

7
web/src/utils/format.ts Normal file
View File

@@ -0,0 +1,7 @@
export const formatTime = (seconds: number): string => {
if (Number.isNaN(seconds) || !Number.isFinite(seconds)) return "00:00";
const minutes = Math.floor(seconds / 60);
const secs = Math.floor(seconds % 60);
return `${minutes.toString().padStart(2, "0")}:${secs.toString().padStart(2, "0")}`;
};

View File

@@ -1,4 +1,6 @@
export * from "./audio";
export * from "./context";
export * from "./format";
export * from "./media";
export * from "./pcm";
export * from "./title";

View File

@@ -1,5 +1,6 @@
<script setup lang="ts">
import type { SelectGroupOption, SelectOption } from "naive-ui";
import type { Message } from "@/interfaces";
import { throttle } from "lodash-es";
import AIAvatar from "@/assets/ai_avatar.png";
import {
@@ -25,17 +26,18 @@ const scrollbarRef = ref<HTMLElement | null>(null);
const options = ref<Array<SelectGroupOption | SelectOption>>([]);
// NCollapse 组件的折叠状态
const collapseActive = ref<string[]>(
historyMessages.value.map((_, idx) => String(idx))
historyMessages.value.map((msg, idx) => String(msg.id ?? idx))
);
const getName = (idx: number) => String(idx);
const getName = (msg: Message, idx: number) => String(msg.id ?? idx);
// TODO: bugfix: 未能正确展开
watch(
historyMessages,
(newVal, oldVal) => {
// 取所有name
const newNames = newVal.map((_, idx) => getName(idx));
const oldNames = oldVal ? oldVal.map((_, idx) => getName(idx)) : [];
const newNames = newVal.map((msg, idx) => getName(msg, idx));
const oldNames = oldVal ? oldVal.map((msg, idx) => getName(msg, idx)) : [];
// 找出新增的name
const addedNames = newNames.filter((name) => !oldNames.includes(name));
// 保留原有已展开项
@@ -45,9 +47,10 @@ watch(
// 新增的默认展开
collapseActive.value = [...currentActive, ...addedNames];
},
{ deep: true }
{ immediate: true, deep: true }
);
// 处理折叠项的点击事件,切换折叠状态
const handleItemHeaderClick = (name: string) => {
if (collapseActive.value.includes(name)) {
collapseActive.value = collapseActive.value.filter((n) => n !== name);
@@ -177,9 +180,15 @@ onMounted(() => {
:expanded-names="collapseActive[idx]"
>
<NCollapseItem
:title="thinking && idx === historyMessages.length - 1 ? '思考中...' : '已深度思考'"
:name="getName(idx)"
@item-header-click="() => handleItemHeaderClick(getName(idx))"
:title="
thinking && idx === historyMessages.length - 1
? '思考中...'
: '已深度思考'
"
:name="getName(msg, idx)"
@item-header-click="
() => handleItemHeaderClick(getName(msg, idx))
"
>
<div
class="text-[#7A7A7A] mb-4 border-l-2 border-[#E5E5E5] ml-2 pl-2"
@@ -190,6 +199,9 @@ onMounted(() => {
</NCollapse>
<!-- 内容↓ 思维链↑ -->
<markdown :content="msg.content || ''" />
<div v-if="msg.role !== 'user'" class="mt-2">
<tts :text="msg.content || ''" :message-id="msg.id!" />
</div>
<NDivider />
</div>
</div>