From faa4ca20b13288f11bab3ba299bcf8838b8f6754 Mon Sep 17 00:00:00 2001 From: Marcus <1922576605@qq.com> Date: Tue, 1 Jul 2025 00:23:04 +0800 Subject: [PATCH 1/2] =?UTF-8?q?feat:=20=E7=BB=84=E4=BB=B6=E5=B0=81?= =?UTF-8?q?=E8=A3=85=EF=BC=8C=E9=9F=B3=E9=A2=91=E4=BF=9D=E5=AD=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 2 + backend/app/api/v1/endpoints/tts.py | 62 ++++++++++++++++++++++++++--- web/components.d.ts | 2 + web/src/components/MessageTools.vue | 26 ++++++++++++ web/src/views/ChatLLMView.vue | 16 +------- web/src/views/VoiceView.vue | 4 +- 6 files changed, 88 insertions(+), 24 deletions(-) create mode 100644 web/src/components/MessageTools.vue diff --git a/.gitignore b/.gitignore index 669d75e..cd887af 100644 --- a/.gitignore +++ b/.gitignore @@ -114,3 +114,5 @@ node_modules/ *.njsproj *.sln *.sw? + +*.mp3 \ No newline at end of file diff --git a/backend/app/api/v1/endpoints/tts.py b/backend/app/api/v1/endpoints/tts.py index 6caafeb..106a283 100644 --- a/backend/app/api/v1/endpoints/tts.py +++ b/backend/app/api/v1/endpoints/tts.py @@ -1,10 +1,12 @@ -# tts.py import uuid import websockets import time import fastrand import json import asyncio +import os +import aiofiles +from datetime import datetime from typing import Dict, Any, Optional as OptionalType from app.constants.tts import APP_ID, TOKEN, SPEAKER @@ -34,8 +36,26 @@ EVENT_TaskRequest = 200 EVENT_TTSSentenceEnd = 351 EVENT_TTSResponse = 352 +# 音频文件保存目录 +TEMP_AUDIO_DIR = "./temp_audio" + + +# 确保音频目录存在 +async def ensure_audio_dir(): + """异步创建音频目录""" + if not os.path.exists(TEMP_AUDIO_DIR): + os.makedirs(TEMP_AUDIO_DIR, exist_ok=True) + + +# 生成时间戳文件名 +def generate_audio_filename() -> str: + """生成基于时间戳的音频文件名""" + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3] # 精确到毫秒 + return f"{timestamp}.mp3" + + +# ... 保留所有原有的类定义和工具函数 ... -# 所有类定义 class Header: def __init__(self, protocol_version=PROTOCOL_VERSION, @@ -199,6 +219,8 @@ class TTSState: self.session_id: OptionalType[str] = None self.task: OptionalType[asyncio.Task] = None # 用于追踪异步任务 self.is_processing = False + self.audio_data = bytearray() # 用于收集音频数据 + self.audio_filename = None # 保存的文件名 # 全局状态管理 @@ -305,6 +327,18 @@ async def create_tts_connection() -> websockets.WebSocketServerProtocol: return volc_ws +# 异步保存音频文件 +async def save_audio_file(audio_data: bytes, filename: str) -> str: + """异步保存音频文件""" + await ensure_audio_dir() + file_path = os.path.join(TEMP_AUDIO_DIR, filename) + + async with aiofiles.open(file_path, 'wb') as f: + await f.write(audio_data) + + return file_path + + # 处理单个TTS任务 async def process_tts_task(websocket, message_id: str, text: str): """处理单个TTS任务(独立协程)""" @@ -318,6 +352,8 @@ async def process_tts_task(websocket, message_id: str, text: str): raise Exception(f"找不到TTS状态: {message_id}") tts_state.is_processing = True + # 生成音频文件名 + tts_state.audio_filename = generate_audio_filename() # 创建独立的TTS连接 tts_state.volc_ws = await create_tts_connection() @@ -373,8 +409,12 @@ async def process_tts_task(websocket, message_id: str, text: str): elif res.optional.event == EVENT_TTSResponse: audio_count += 1 - print(f"发送音频数据 [{message_id}] #{audio_count},大小: {len(res.payload)}") - # 发送音频数据 + print(f"收到音频数据 [{message_id}] #{audio_count},大小: {len(res.payload)}") + + # 收集音频数据 + tts_state.audio_data.extend(res.payload) + + # 发送音频数据到前端 await websocket.send_json({ "id": audio_count, "type": "tts_audio_data", @@ -387,10 +427,20 @@ async def process_tts_task(websocket, message_id: str, text: str): except asyncio.TimeoutError: print(f"TTS响应超时 [{message_id}],强制结束") - # 发送完成消息 + # 异步保存音频文件 + if tts_state.audio_data: + file_path = await save_audio_file( + bytes(tts_state.audio_data), + tts_state.audio_filename + ) + print(f"音频文件已保存 [{message_id}]: {file_path}") + + # 发送完成消息,包含文件路径 await websocket.send_json({ "type": "tts_audio_complete", - "messageId": message_id + "messageId": message_id, + "audioFile": tts_state.audio_filename, + "audioPath": os.path.join(TEMP_AUDIO_DIR, tts_state.audio_filename) if tts_state.audio_data else None }) print(f"TTS处理完成 [{message_id}],共发送 {audio_count} 个音频包") diff --git a/web/components.d.ts b/web/components.d.ts index 3400cc8..dea5305 100644 --- a/web/components.d.ts +++ b/web/components.d.ts @@ -9,6 +9,8 @@ declare module 'vue' { export interface GlobalComponents { Avatar: typeof import('./src/components/avatar.vue')['default'] Markdown: typeof import('./src/components/markdown.vue')['default'] + Message_tools: typeof import('./src/components/MessageTools.vue')['default'] + MessageTools: typeof import('./src/components/MessageTools.vue')['default'] NButton: typeof import('naive-ui')['NButton'] NCollapse: typeof import('naive-ui')['NCollapse'] NCollapseItem: typeof import('naive-ui')['NCollapseItem'] diff --git a/web/src/components/MessageTools.vue b/web/src/components/MessageTools.vue new file mode 100644 index 0000000..e8aaa1f --- /dev/null +++ b/web/src/components/MessageTools.vue @@ -0,0 +1,26 @@ + + + \ No newline at end of file diff --git a/web/src/views/ChatLLMView.vue b/web/src/views/ChatLLMView.vue index 177e3c6..a70260b 100644 --- a/web/src/views/ChatLLMView.vue +++ b/web/src/views/ChatLLMView.vue @@ -4,7 +4,6 @@ import type { Message } from "@/interfaces"; import { throttle } from "lodash-es"; import AIAvatar from "@/assets/ai_avatar.png"; import { - DocumentDuplicateIcon, ExclamationTriangleIcon, microphone, PaperAirplaneIcon, @@ -13,7 +12,6 @@ import { import UserAvatar from "@/assets/user_avatar.jpg"; import markdown from "@/components/markdown.vue"; import { useAsrStore, useChatStore, useLayoutStore } from "@/stores"; -import { copy } from "@/utils"; const chatStore = useChatStore(); const { historyMessages, completing, modelList, modelInfo, thinking } = @@ -206,19 +204,7 @@ onMounted(() => { -
-
- -
- - - 复制内容 - -
+ diff --git a/web/src/views/VoiceView.vue b/web/src/views/VoiceView.vue index 2a880e4..60efce8 100644 --- a/web/src/views/VoiceView.vue +++ b/web/src/views/VoiceView.vue @@ -191,9 +191,7 @@ onMounted(() => { -
- -
+ From ec6bd7db88e815982cd916d2718195305806a96e Mon Sep 17 00:00:00 2001 From: Marcus <1922576605@qq.com> Date: Tue, 1 Jul 2025 01:27:29 +0800 Subject: [PATCH 2/2] =?UTF-8?q?feat:=20=E6=94=AF=E6=8C=81=E9=9F=B3?= =?UTF-8?q?=E8=89=B2=E5=88=87=E6=8D=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/app/api/v1/endpoints/speaker.py | 10 ++ backend/app/api/v1/endpoints/tts.py | 44 ++++----- .../api/v1/endpoints/voice_conversation.py | 4 +- .../app/api/v1/endpoints/websocket_service.py | 6 +- backend/app/constants/tts.py | 99 ++++++++++++++++++- backend/app/main.py | 3 +- backend/app/schemas/__init__.py | 1 + backend/app/schemas/chat.py | 17 ++++ web/src/interfaces/chat_service.ts | 96 +++++++++++++++++- web/src/services/chat_service.ts | 5 + web/src/stores/asr_store.ts | 2 + web/src/stores/chat_store.ts | 21 +++- web/src/stores/tts_store.ts | 11 ++- web/src/views/VoiceView.vue | 46 +++++---- 14 files changed, 308 insertions(+), 57 deletions(-) create mode 100644 backend/app/api/v1/endpoints/speaker.py diff --git a/backend/app/api/v1/endpoints/speaker.py b/backend/app/api/v1/endpoints/speaker.py new file mode 100644 index 0000000..e4d63e3 --- /dev/null +++ b/backend/app/api/v1/endpoints/speaker.py @@ -0,0 +1,10 @@ +from fastapi import APIRouter +from app.constants.tts import SPEAKER_DATA +from app.schemas import SpeakerResponse + +router = APIRouter() + + +@router.get("/list", response_model=SpeakerResponse) +async def get_model_vendors(): + return SpeakerResponse(data=SPEAKER_DATA) diff --git a/backend/app/api/v1/endpoints/tts.py b/backend/app/api/v1/endpoints/tts.py index 106a283..1d2ed88 100644 --- a/backend/app/api/v1/endpoints/tts.py +++ b/backend/app/api/v1/endpoints/tts.py @@ -340,55 +340,51 @@ async def save_audio_file(audio_data: bytes, filename: str) -> str: # 处理单个TTS任务 -async def process_tts_task(websocket, message_id: str, text: str): +async def process_tts_task(websocket, message_id: str, text: str, speaker: str = None): """处理单个TTS任务(独立协程)""" tts_state = None - try: - print(f"开始处理TTS任务 [{message_id}]: {text}") + # 使用传入的speaker,如果没有则使用默认的 + selected_speaker = speaker if speaker else SPEAKER + try: + print(f"开始处理TTS任务 [{message_id}]: {text}, 使用说话人: {selected_speaker}") # 获取TTS状态 tts_state = tts_manager.get_tts_state(websocket, message_id) if not tts_state: raise Exception(f"找不到TTS状态: {message_id}") - tts_state.is_processing = True # 生成音频文件名 tts_state.audio_filename = generate_audio_filename() - # 创建独立的TTS连接 tts_state.volc_ws = await create_tts_connection() - # 创建会话 tts_state.session_id = uuid.uuid4().__str__().replace('-', '') tts_manager.register_session(tts_state.session_id, message_id) - print(f"创建TTS会话 [{message_id}]: {tts_state.session_id}") header = Header(message_type=FULL_CLIENT_REQUEST, message_type_specific_flags=MsgTypeFlagWithEvent, serial_method=JSON).as_bytes() optional = Optional(event=EVENT_StartSession, sessionId=tts_state.session_id).as_bytes() - payload = get_payload_bytes(event=EVENT_StartSession, speaker=SPEAKER) + # 使用选择的speaker + payload = get_payload_bytes(event=EVENT_StartSession, speaker=selected_speaker) await send_event(tts_state.volc_ws, header, optional, payload) - raw_data = await tts_state.volc_ws.recv() res = parser_response(raw_data) if res.optional.event != EVENT_SessionStarted: raise Exception("TTS会话启动失败") print(f"TTS会话创建成功 [{message_id}]: {tts_state.session_id}") - # 发送文本到TTS服务 print(f"发送文本到TTS服务 [{message_id}]...") header = Header(message_type=FULL_CLIENT_REQUEST, message_type_specific_flags=MsgTypeFlagWithEvent, serial_method=JSON).as_bytes() optional = Optional(event=EVENT_TaskRequest, sessionId=tts_state.session_id).as_bytes() - payload = get_payload_bytes(event=EVENT_TaskRequest, text=text, speaker=SPEAKER) + # 使用选择的speaker + payload = get_payload_bytes(event=EVENT_TaskRequest, text=text, speaker=selected_speaker) await send_event(tts_state.volc_ws, header, optional, payload) - # 接收TTS响应并发送到前端 print(f"开始接收TTS响应 [{message_id}]...") audio_count = 0 - try: while True: raw_data = await asyncio.wait_for( @@ -396,17 +392,13 @@ async def process_tts_task(websocket, message_id: str, text: str): timeout=30 ) res = parser_response(raw_data) - print(f"收到TTS事件 [{message_id}]: {res.optional.event}") - if res.optional.event == EVENT_TTSSentenceEnd: print(f"句子结束事件 [{message_id}] - 直接完成") break - elif res.optional.event == EVENT_SessionFinished: print(f"收到会话结束事件 [{message_id}]") break - elif res.optional.event == EVENT_TTSResponse: audio_count += 1 print(f"收到音频数据 [{message_id}] #{audio_count},大小: {len(res.payload)}") @@ -423,10 +415,8 @@ async def process_tts_task(websocket, message_id: str, text: str): }) else: print(f"未知TTS事件 [{message_id}]: {res.optional.event}") - except asyncio.TimeoutError: print(f"TTS响应超时 [{message_id}],强制结束") - # 异步保存音频文件 if tts_state.audio_data: file_path = await save_audio_file( @@ -435,15 +425,15 @@ async def process_tts_task(websocket, message_id: str, text: str): ) print(f"音频文件已保存 [{message_id}]: {file_path}") - # 发送完成消息,包含文件路径 + # 发送完成消息,包含文件路径和使用的speaker await websocket.send_json({ "type": "tts_audio_complete", "messageId": message_id, "audioFile": tts_state.audio_filename, - "audioPath": os.path.join(TEMP_AUDIO_DIR, tts_state.audio_filename) if tts_state.audio_data else None + "audioPath": os.path.join(TEMP_AUDIO_DIR, tts_state.audio_filename) if tts_state.audio_data else None, + "speaker": selected_speaker }) - print(f"TTS处理完成 [{message_id}],共发送 {audio_count} 个音频包") - + print(f"TTS处理完成 [{message_id}],共发送 {audio_count} 个音频包,使用说话人: {selected_speaker}") except asyncio.CancelledError: print(f"TTS任务被取消 [{message_id}]") await websocket.send_json({ @@ -474,14 +464,14 @@ async def process_tts_task(websocket, message_id: str, text: str): # 启动TTS文本转换 -async def handle_tts_text(websocket, message_id: str, text: str): +async def handle_tts_text(websocket, message_id: str, text: str, speaker: str = None): """启动TTS文本转换""" # 创建新的TTS状态 + print(speaker) tts_state = tts_manager.add_tts_state(websocket, message_id) - - # 启动异步任务 + # 启动异步任务,传入speaker参数 tts_state.task = asyncio.create_task( - process_tts_task(websocket, message_id, text) + process_tts_task(websocket, message_id, text, speaker) ) diff --git a/backend/app/api/v1/endpoints/voice_conversation.py b/backend/app/api/v1/endpoints/voice_conversation.py index 3501091..89c4dae 100644 --- a/backend/app/api/v1/endpoints/voice_conversation.py +++ b/backend/app/api/v1/endpoints/voice_conversation.py @@ -8,7 +8,7 @@ from . import tts from app.constants.model_data import tip_message, base_url, headers -async def process_voice_conversation(websocket: WebSocket, asr_text: str, message_id: str): +async def process_voice_conversation(websocket: WebSocket, asr_text: str, message_id: str, speaker: str): try: print(f"开始处理语音对话 [{message_id}]: {asr_text}") @@ -92,7 +92,7 @@ async def process_voice_conversation(websocket: WebSocket, asr_text: str, messag # 启动TTS处理完整内容 print(f"启动完整TTS处理 [{message_id}]: {full_response}") - await tts.handle_tts_text(websocket, message_id, full_response) + await tts.handle_tts_text(websocket, message_id, full_response, speaker) except Exception as e: print(f"语音对话处理异常 [{message_id}]: {e}") diff --git a/backend/app/api/v1/endpoints/websocket_service.py b/backend/app/api/v1/endpoints/websocket_service.py index d6d32f5..1b2a5ff 100644 --- a/backend/app/api/v1/endpoints/websocket_service.py +++ b/backend/app/api/v1/endpoints/websocket_service.py @@ -64,7 +64,8 @@ async def websocket_online_count(websocket: WebSocket): # 从data中获取messageId,如果不存在则生成一个新的ID message_id = data.get("messageId", "voice_" + str(uuid.uuid4())) if data.get("voiceConversation"): - await process_voice_conversation(websocket, asr_text, message_id) + speaker = data.get("speaker") + await process_voice_conversation(websocket, asr_text, message_id, speaker) else: await websocket.send_json({"type": "asr_result", "result": asr_text}) temp_buffer = bytes() @@ -73,6 +74,7 @@ async def websocket_online_count(websocket: WebSocket): elif msg_type == "tts_text": message_id = data.get("messageId") text = data.get("text", "") + speaker = data.get("speaker") if not message_id: await websocket.send_json({ @@ -83,7 +85,7 @@ async def websocket_online_count(websocket: WebSocket): print(f"收到TTS文本请求 [{message_id}]: {text}") try: - await tts.handle_tts_text(websocket, message_id, text) + await tts.handle_tts_text(websocket, message_id, text, speaker) except Exception as e: print(f"TTS文本处理异常 [{message_id}]: {e}") await websocket.send_json({ diff --git a/backend/app/constants/tts.py b/backend/app/constants/tts.py index 8e8c3aa..6e2a74b 100644 --- a/backend/app/constants/tts.py +++ b/backend/app/constants/tts.py @@ -4,4 +4,101 @@ APP_ID = '2138450044' TOKEN = 'V04_QumeQZhJrQ_In1Z0VBQm7n0ttMNO' -SPEAKER = 'zh_male_beijingxiaoye_moon_bigtts' \ No newline at end of file +SPEAKER = 'zh_male_beijingxiaoye_moon_bigtts' + +SPEAKER_DATA = [ + { + "category": "趣味口音", + "speakers": [ + { + "speaker_id": "zh_male_jingqiangkanye_moon_bigtts", + "speaker_name": "京腔侃爷/Harmony", + "language": "中文-北京口音、英文", + "platforms": ["豆包", "Cici", "web demo"] + }, + { + "speaker_id": "zh_female_wanwanxiaohe_moon_bigtts", + "speaker_name": "湾湾小何", + "language": "中文-台湾口音", + "platforms": ["豆包", "Cici"] + }, + { + "speaker_id": "zh_female_wanqudashu_moon_bigtts", + "speaker_name": "湾区大叔", + "language": "中文-广东口音", + "platforms": ["豆包", "Cici"] + }, + { + "speaker_id": "zh_female_daimengchuanmei_moon_bigtts", + "speaker_name": "呆萌川妹", + "language": "中文-四川口音", + "platforms": ["豆包", "Cici"] + }, + { + "speaker_id": "zh_male_guozhoudege_moon_bigtts", + "speaker_name": "广州德哥", + "language": "中文-广东口音", + "platforms": ["豆包", "Cici"] + }, + { + "speaker_id": "zh_male_beijingxiaoye_moon_bigtts", + "speaker_name": "北京小爷", + "language": "中文-北京口音", + "platforms": ["豆包"] + }, + { + "speaker_id": "zh_male_haoyuxiaoge_moon_bigtts", + "speaker_name": "浩宇小哥", + "language": "中文-青岛口音", + "platforms": ["豆包"] + }, + { + "speaker_id": "zh_male_guangxiyuanzhou_moon_bigtts", + "speaker_name": "广西远舟", + "language": "中文-广西口音", + "platforms": ["豆包"] + }, + { + "speaker_id": "zh_female_meituojieer_moon_bigtts", + "speaker_name": "妹坨洁儿", + "language": "中文-长沙口音", + "platforms": ["豆包", "剪映"] + }, + { + "speaker_id": "zh_male_yuzhouzixuan_moon_bigtts", + "speaker_name": "豫州子轩", + "language": "中文-河南口音", + "platforms": ["豆包"] + } + ] + }, + { + "category": "角色扮演", + "speakers": [ + { + "speaker_id": "zh_male_naiqimengwa_mars_bigtts", + "speaker_name": "奶气萌娃", + "language": "中文", + "platforms": ["剪映", "豆包"] + }, + { + "speaker_id": "zh_female_popo_mars_bigtts", + "speaker_name": "婆婆", + "language": "中文", + "platforms": ["剪映C端", "抖音", "豆包"] + }, + { + "speaker_id": "zh_female_gaolengyujie_moon_bigtts", + "speaker_name": "高冷御姐", + "language": "中文", + "platforms": ["豆包", "Cici"] + }, + { + "speaker_id": "zh_male_aojiaobazong_moon_bigtts", + "speaker_name": "傲娇霸总", + "language": "中文", + "platforms": ["豆包"] + } + ] + } +] diff --git a/backend/app/main.py b/backend/app/main.py index c3ad30c..349d11f 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -1,5 +1,5 @@ from fastapi import FastAPI -from app.api.v1.endpoints import chat, model, websocket_service +from app.api.v1.endpoints import chat, model, websocket_service,speaker app = FastAPI() @@ -9,6 +9,7 @@ app.include_router(websocket_service.router, prefix="", tags=["websocket_service app.include_router(chat.router, prefix="/v1/chat", tags=["chat"]) # 获取模型列表服务 app.include_router(model.router, prefix="/v1/model", tags=["model_list"]) +app.include_router(speaker.router, prefix="/v1/speaker", tags=["speaker_list"]) if __name__ == "__main__": import uvicorn diff --git a/backend/app/schemas/__init__.py b/backend/app/schemas/__init__.py index 2b58eba..94025bd 100644 --- a/backend/app/schemas/__init__.py +++ b/backend/app/schemas/__init__.py @@ -5,4 +5,5 @@ from .chat import ( ModelInfo, VendorModelList, VendorModelResponse, + SpeakerResponse ) diff --git a/backend/app/schemas/chat.py b/backend/app/schemas/chat.py index 70e4233..467eff6 100644 --- a/backend/app/schemas/chat.py +++ b/backend/app/schemas/chat.py @@ -33,3 +33,20 @@ class VendorModelList(BaseModel): class VendorModelResponse(BaseModel): data: List[VendorModelList] + + +# Speaker相关模型 +class Speaker(BaseModel): + speaker_id: str + speaker_name: str + language: str + platforms: List[str] + + +class CategorySpeakers(BaseModel): + category: str + speakers: List[Speaker] + + +class SpeakerResponse(BaseModel): + data: List[CategorySpeakers] diff --git a/web/src/interfaces/chat_service.ts b/web/src/interfaces/chat_service.ts index 2724c15..eaf9dae 100644 --- a/web/src/interfaces/chat_service.ts +++ b/web/src/interfaces/chat_service.ts @@ -12,7 +12,7 @@ export interface Message { role?: string; usage?: UsageInfo; id?: string; - type?: 'chat' | 'voice'; + type?: "chat" | "voice"; [property: string]: any; } @@ -32,3 +32,97 @@ export interface UsageInfo { completion_tokens: number; total_tokens: number; } + +/** + * Speaker 语音合成器基本信息 + */ +export interface Speaker { + /** speaker唯一标识ID */ + speaker_id: string; + /** speaker显示名称 */ + speaker_name: string; + /** 支持的语言/口音 */ + language: string; + /** 支持的平台列表 */ + platforms: string[]; +} + +/** + * Speaker分类信息 + */ +export interface CategorySpeakers { + /** 分类名称 */ + category: string; + /** 该分类下的speaker列表 */ + speakers: Speaker[]; +} + +/** + * Speaker分类枚举 + */ +export enum SpeakerCategory { + /** 趣味口音 */ + ACCENT = "趣味口音", + /** 角色扮演 */ + ROLE_PLAY = "角色扮演" +} + +/** + * 常用平台枚举 + */ +export enum SpeakerPlatform { + DOUYIN = "抖音", + DOUBAO = "豆包", + CICI = "Cici", + JIANYING = "剪映", + JIANYING_C = "剪映C端", + WEB_DEMO = "web demo", + STORY_AI = "StoryAi", + MAOXIANG = "猫箱" +} + +/** + * Speaker选择器组件Props + */ +export interface SpeakerSelectorProps { + /** 当前选中的speaker */ + selectedSpeaker?: Speaker; + /** speaker选择回调 */ + onSpeakerChange: (speaker: Speaker) => void; + /** 是否禁用 */ + disabled?: boolean; + /** 过滤特定分类 */ + filterCategories?: SpeakerCategory[]; + /** 过滤特定平台 */ + filterPlatforms?: SpeakerPlatform[]; +} + +/** + * 语音合成参数 + */ +export interface VoiceSynthesisParams { + /** 使用的speaker */ + speaker: Speaker; + /** 要合成的文本 */ + text: string; + /** 语速 (0.5-2.0) */ + speed?: number; + /** 音调 (0.5-2.0) */ + pitch?: number; + /** 音量 (0.0-1.0) */ + volume?: number; +} + +/** + * 语音合成响应 + */ +export interface VoiceSynthesisResponse { + /** 音频文件URL */ + audio_url: string; + /** 音频时长(秒) */ + duration: number; + /** 合成状态 */ + status: "success" | "error"; + /** 错误信息 */ + error_message?: string; +} diff --git a/web/src/services/chat_service.ts b/web/src/services/chat_service.ts index 4957670..1e7ecc5 100644 --- a/web/src/services/chat_service.ts +++ b/web/src/services/chat_service.ts @@ -137,4 +137,9 @@ export class ChatService { public static GetModelList(config?: AxiosRequestConfig) { return BaseClientService.get(`${this.basePath}/model/list`, config); } + + // 获取音色列表 + public static GetSpeakerList(config?: AxiosRequestConfig) { + return BaseClientService.get(`${this.basePath}/speaker/list`, config); + } } diff --git a/web/src/stores/asr_store.ts b/web/src/stores/asr_store.ts index 39a7fd4..e754b91 100644 --- a/web/src/stores/asr_store.ts +++ b/web/src/stores/asr_store.ts @@ -1,5 +1,6 @@ import { useWebSocketStore } from "@/services"; import { convertToPCM16 } from "@/utils"; +import { useChatStore } from "./chat_store"; export const useAsrStore = defineStore("asr", () => { // 是否正在录音 @@ -125,6 +126,7 @@ export const useAsrStore = defineStore("asr", () => { if (router.currentRoute.value.path === "/voice") { msg.messageId = messageId; msg.voiceConversation = true; + msg.speaker = useChatStore().speakerInfo?.speaker_id; } sendMessage(JSON.stringify(msg)); diff --git a/web/src/stores/chat_store.ts b/web/src/stores/chat_store.ts index 51dfb3e..7df4de4 100644 --- a/web/src/stores/chat_store.ts +++ b/web/src/stores/chat_store.ts @@ -1,7 +1,9 @@ import type { + CategorySpeakers, IChatWithLLMRequest, ModelInfo, ModelListInfo, + Speaker, UsageInfo } from "@/interfaces"; import { ChatService } from "@/services"; @@ -20,6 +22,10 @@ export const useChatStore = defineStore("chat", () => { const thinking = ref(false); // 模型列表 const modelList = ref([]); + // 音色列表 + const speakerList = ref([]); + // 当前音色信息 + const speakerInfo = ref(null); // 在线人数 const onlineCount = ref(0); @@ -151,6 +157,16 @@ export const useChatStore = defineStore("chat", () => { } }; + // 获取音色列表 + const getSpeakerList = async () => { + try { + const response = await ChatService.GetSpeakerList(); + speakerList.value = response.data.data; + } catch (error) { + console.error("获取音色·列表失败:", error); + } + }; + return { token, completing, @@ -162,6 +178,9 @@ export const useChatStore = defineStore("chat", () => { addMessageToHistory, clearHistoryMessages, getModelList, - onlineCount + onlineCount, + speakerList, + getSpeakerList, + speakerInfo }; }); diff --git a/web/src/stores/tts_store.ts b/web/src/stores/tts_store.ts index a8a22fb..ac220c0 100644 --- a/web/src/stores/tts_store.ts +++ b/web/src/stores/tts_store.ts @@ -1,5 +1,6 @@ import { useAudioWebSocket } from "@/services"; import { createAudioUrl, mergeAudioChunks } from "@/utils"; +import { useChatStore } from "./chat_store"; interface AudioState { isPlaying: boolean; @@ -12,6 +13,7 @@ interface AudioState { } export const useTtsStore = defineStore("tts", () => { + const chatStore = useChatStore(); // 多音频状态管理 - 以消息ID为key const audioStates = ref>(new Map()); @@ -65,7 +67,14 @@ export const useTtsStore = defineStore("tts", () => { hasActiveSession.value = true; // 发送文本到TTS服务 - sendMessage(JSON.stringify({ type: "tts_text", text, messageId })); + sendMessage( + JSON.stringify({ + type: "tts_text", + text, + messageId, + speaker: chatStore.speakerInfo?.speaker_id + }) + ); } catch (error) { handleError(`连接失败: ${error}`, messageId); } diff --git a/web/src/views/VoiceView.vue b/web/src/views/VoiceView.vue index 60efce8..126bf9b 100644 --- a/web/src/views/VoiceView.vue +++ b/web/src/views/VoiceView.vue @@ -9,7 +9,7 @@ import markdown from "@/components/markdown.vue"; import { useAsrStore, useChatStore, useLayoutStore } from "@/stores"; const chatStore = useChatStore(); -const { historyMessages, completing, modelList, modelInfo, thinking } = +const { historyMessages, completing, speakerList, speakerInfo, thinking } = storeToRefs(chatStore); const asrStore = useAsrStore(); const { isRecording } = storeToRefs(asrStore); @@ -58,39 +58,43 @@ const handleItemHeaderClick = (name: string) => { } }; -// 处理选中模型的 ID -const selectedModelId = computed({ - get: () => modelInfo.value?.model_id ?? null, +// 处理选中speaker的 ID +const selectedSpeakerId = computed({ + get: () => speakerInfo.value?.speaker_id ?? null, set: (id: string | null) => { - for (const vendor of modelList.value) { - const found = vendor.models.find((model) => model.model_id === id); + for (const category of speakerList.value) { + const found = category.speakers.find( + (speaker) => speaker.speaker_id === id + ); if (found) { - modelInfo.value = found; + speakerInfo.value = found; return; } } - modelInfo.value = null; + speakerInfo.value = null; } }); -// 监听模型列表变化,更新选项 +// 监听speaker列表变化,更新选项 watch( - () => modelList.value, + () => speakerList.value, (newVal) => { if (newVal) { - options.value = newVal.map((vendor) => ({ + options.value = newVal.map((category) => ({ type: "group", - label: vendor.vendor, - key: vendor.vendor, - children: vendor.models.map((model) => ({ - label: model.model_name, - value: model.model_id, - type: model.model_type + label: category.category, + key: category.category, + children: category.speakers.map((speaker) => ({ + label: speaker.speaker_name, + value: speaker.speaker_id, + language: speaker.language, + platforms: speaker.platforms })) })); - if (newVal.length > 0 && newVal[0].models.length > 0) { - modelInfo.value = newVal[0].models[0]; + // 默认选择第一个speaker + if (newVal.length > 0 && newVal[0].speakers.length > 0) { + speakerInfo.value = newVal[0].speakers[0]; } } }, @@ -115,7 +119,7 @@ watch(completing, (newVal) => { }); onMounted(() => { - chatStore.getModelList(); + chatStore.getSpeakerList(); }); @@ -207,7 +211,7 @@ onMounted(() => {