Merge pull request 'feat/1.0.1' (#4) from feat/1.0.1 into main
Reviewed-on: #4
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -114,3 +114,5 @@ node_modules/
|
|||||||
*.njsproj
|
*.njsproj
|
||||||
*.sln
|
*.sln
|
||||||
*.sw?
|
*.sw?
|
||||||
|
|
||||||
|
*.mp3
|
||||||
10
backend/app/api/v1/endpoints/speaker.py
Normal file
10
backend/app/api/v1/endpoints/speaker.py
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
from fastapi import APIRouter
|
||||||
|
from app.constants.tts import SPEAKER_DATA
|
||||||
|
from app.schemas import SpeakerResponse
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/list", response_model=SpeakerResponse)
|
||||||
|
async def get_model_vendors():
|
||||||
|
return SpeakerResponse(data=SPEAKER_DATA)
|
||||||
@@ -1,10 +1,12 @@
|
|||||||
# tts.py
|
|
||||||
import uuid
|
import uuid
|
||||||
import websockets
|
import websockets
|
||||||
import time
|
import time
|
||||||
import fastrand
|
import fastrand
|
||||||
import json
|
import json
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import os
|
||||||
|
import aiofiles
|
||||||
|
from datetime import datetime
|
||||||
from typing import Dict, Any, Optional as OptionalType
|
from typing import Dict, Any, Optional as OptionalType
|
||||||
|
|
||||||
from app.constants.tts import APP_ID, TOKEN, SPEAKER
|
from app.constants.tts import APP_ID, TOKEN, SPEAKER
|
||||||
@@ -34,8 +36,26 @@ EVENT_TaskRequest = 200
|
|||||||
EVENT_TTSSentenceEnd = 351
|
EVENT_TTSSentenceEnd = 351
|
||||||
EVENT_TTSResponse = 352
|
EVENT_TTSResponse = 352
|
||||||
|
|
||||||
|
# 音频文件保存目录
|
||||||
|
TEMP_AUDIO_DIR = "./temp_audio"
|
||||||
|
|
||||||
|
|
||||||
|
# 确保音频目录存在
|
||||||
|
async def ensure_audio_dir():
|
||||||
|
"""异步创建音频目录"""
|
||||||
|
if not os.path.exists(TEMP_AUDIO_DIR):
|
||||||
|
os.makedirs(TEMP_AUDIO_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
# 生成时间戳文件名
|
||||||
|
def generate_audio_filename() -> str:
|
||||||
|
"""生成基于时间戳的音频文件名"""
|
||||||
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3] # 精确到毫秒
|
||||||
|
return f"{timestamp}.mp3"
|
||||||
|
|
||||||
|
|
||||||
|
# ... 保留所有原有的类定义和工具函数 ...
|
||||||
|
|
||||||
# 所有类定义
|
|
||||||
class Header:
|
class Header:
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
protocol_version=PROTOCOL_VERSION,
|
protocol_version=PROTOCOL_VERSION,
|
||||||
@@ -199,6 +219,8 @@ class TTSState:
|
|||||||
self.session_id: OptionalType[str] = None
|
self.session_id: OptionalType[str] = None
|
||||||
self.task: OptionalType[asyncio.Task] = None # 用于追踪异步任务
|
self.task: OptionalType[asyncio.Task] = None # 用于追踪异步任务
|
||||||
self.is_processing = False
|
self.is_processing = False
|
||||||
|
self.audio_data = bytearray() # 用于收集音频数据
|
||||||
|
self.audio_filename = None # 保存的文件名
|
||||||
|
|
||||||
|
|
||||||
# 全局状态管理
|
# 全局状态管理
|
||||||
@@ -305,54 +327,64 @@ async def create_tts_connection() -> websockets.WebSocketServerProtocol:
|
|||||||
return volc_ws
|
return volc_ws
|
||||||
|
|
||||||
|
|
||||||
|
# 异步保存音频文件
|
||||||
|
async def save_audio_file(audio_data: bytes, filename: str) -> str:
|
||||||
|
"""异步保存音频文件"""
|
||||||
|
await ensure_audio_dir()
|
||||||
|
file_path = os.path.join(TEMP_AUDIO_DIR, filename)
|
||||||
|
|
||||||
|
async with aiofiles.open(file_path, 'wb') as f:
|
||||||
|
await f.write(audio_data)
|
||||||
|
|
||||||
|
return file_path
|
||||||
|
|
||||||
|
|
||||||
# 处理单个TTS任务
|
# 处理单个TTS任务
|
||||||
async def process_tts_task(websocket, message_id: str, text: str):
|
async def process_tts_task(websocket, message_id: str, text: str, speaker: str = None):
|
||||||
"""处理单个TTS任务(独立协程)"""
|
"""处理单个TTS任务(独立协程)"""
|
||||||
tts_state = None
|
tts_state = None
|
||||||
try:
|
# 使用传入的speaker,如果没有则使用默认的
|
||||||
print(f"开始处理TTS任务 [{message_id}]: {text}")
|
selected_speaker = speaker if speaker else SPEAKER
|
||||||
|
|
||||||
|
try:
|
||||||
|
print(f"开始处理TTS任务 [{message_id}]: {text}, 使用说话人: {selected_speaker}")
|
||||||
# 获取TTS状态
|
# 获取TTS状态
|
||||||
tts_state = tts_manager.get_tts_state(websocket, message_id)
|
tts_state = tts_manager.get_tts_state(websocket, message_id)
|
||||||
if not tts_state:
|
if not tts_state:
|
||||||
raise Exception(f"找不到TTS状态: {message_id}")
|
raise Exception(f"找不到TTS状态: {message_id}")
|
||||||
|
|
||||||
tts_state.is_processing = True
|
tts_state.is_processing = True
|
||||||
|
# 生成音频文件名
|
||||||
|
tts_state.audio_filename = generate_audio_filename()
|
||||||
# 创建独立的TTS连接
|
# 创建独立的TTS连接
|
||||||
tts_state.volc_ws = await create_tts_connection()
|
tts_state.volc_ws = await create_tts_connection()
|
||||||
|
|
||||||
# 创建会话
|
# 创建会话
|
||||||
tts_state.session_id = uuid.uuid4().__str__().replace('-', '')
|
tts_state.session_id = uuid.uuid4().__str__().replace('-', '')
|
||||||
tts_manager.register_session(tts_state.session_id, message_id)
|
tts_manager.register_session(tts_state.session_id, message_id)
|
||||||
|
|
||||||
print(f"创建TTS会话 [{message_id}]: {tts_state.session_id}")
|
print(f"创建TTS会话 [{message_id}]: {tts_state.session_id}")
|
||||||
header = Header(message_type=FULL_CLIENT_REQUEST,
|
header = Header(message_type=FULL_CLIENT_REQUEST,
|
||||||
message_type_specific_flags=MsgTypeFlagWithEvent,
|
message_type_specific_flags=MsgTypeFlagWithEvent,
|
||||||
serial_method=JSON).as_bytes()
|
serial_method=JSON).as_bytes()
|
||||||
optional = Optional(event=EVENT_StartSession, sessionId=tts_state.session_id).as_bytes()
|
optional = Optional(event=EVENT_StartSession, sessionId=tts_state.session_id).as_bytes()
|
||||||
payload = get_payload_bytes(event=EVENT_StartSession, speaker=SPEAKER)
|
# 使用选择的speaker
|
||||||
|
payload = get_payload_bytes(event=EVENT_StartSession, speaker=selected_speaker)
|
||||||
await send_event(tts_state.volc_ws, header, optional, payload)
|
await send_event(tts_state.volc_ws, header, optional, payload)
|
||||||
|
|
||||||
raw_data = await tts_state.volc_ws.recv()
|
raw_data = await tts_state.volc_ws.recv()
|
||||||
res = parser_response(raw_data)
|
res = parser_response(raw_data)
|
||||||
if res.optional.event != EVENT_SessionStarted:
|
if res.optional.event != EVENT_SessionStarted:
|
||||||
raise Exception("TTS会话启动失败")
|
raise Exception("TTS会话启动失败")
|
||||||
print(f"TTS会话创建成功 [{message_id}]: {tts_state.session_id}")
|
print(f"TTS会话创建成功 [{message_id}]: {tts_state.session_id}")
|
||||||
|
|
||||||
# 发送文本到TTS服务
|
# 发送文本到TTS服务
|
||||||
print(f"发送文本到TTS服务 [{message_id}]...")
|
print(f"发送文本到TTS服务 [{message_id}]...")
|
||||||
header = Header(message_type=FULL_CLIENT_REQUEST,
|
header = Header(message_type=FULL_CLIENT_REQUEST,
|
||||||
message_type_specific_flags=MsgTypeFlagWithEvent,
|
message_type_specific_flags=MsgTypeFlagWithEvent,
|
||||||
serial_method=JSON).as_bytes()
|
serial_method=JSON).as_bytes()
|
||||||
optional = Optional(event=EVENT_TaskRequest, sessionId=tts_state.session_id).as_bytes()
|
optional = Optional(event=EVENT_TaskRequest, sessionId=tts_state.session_id).as_bytes()
|
||||||
payload = get_payload_bytes(event=EVENT_TaskRequest, text=text, speaker=SPEAKER)
|
# 使用选择的speaker
|
||||||
|
payload = get_payload_bytes(event=EVENT_TaskRequest, text=text, speaker=selected_speaker)
|
||||||
await send_event(tts_state.volc_ws, header, optional, payload)
|
await send_event(tts_state.volc_ws, header, optional, payload)
|
||||||
|
|
||||||
# 接收TTS响应并发送到前端
|
# 接收TTS响应并发送到前端
|
||||||
print(f"开始接收TTS响应 [{message_id}]...")
|
print(f"开始接收TTS响应 [{message_id}]...")
|
||||||
audio_count = 0
|
audio_count = 0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
raw_data = await asyncio.wait_for(
|
raw_data = await asyncio.wait_for(
|
||||||
@@ -360,21 +392,21 @@ async def process_tts_task(websocket, message_id: str, text: str):
|
|||||||
timeout=30
|
timeout=30
|
||||||
)
|
)
|
||||||
res = parser_response(raw_data)
|
res = parser_response(raw_data)
|
||||||
|
|
||||||
print(f"收到TTS事件 [{message_id}]: {res.optional.event}")
|
print(f"收到TTS事件 [{message_id}]: {res.optional.event}")
|
||||||
|
|
||||||
if res.optional.event == EVENT_TTSSentenceEnd:
|
if res.optional.event == EVENT_TTSSentenceEnd:
|
||||||
print(f"句子结束事件 [{message_id}] - 直接完成")
|
print(f"句子结束事件 [{message_id}] - 直接完成")
|
||||||
break
|
break
|
||||||
|
|
||||||
elif res.optional.event == EVENT_SessionFinished:
|
elif res.optional.event == EVENT_SessionFinished:
|
||||||
print(f"收到会话结束事件 [{message_id}]")
|
print(f"收到会话结束事件 [{message_id}]")
|
||||||
break
|
break
|
||||||
|
|
||||||
elif res.optional.event == EVENT_TTSResponse:
|
elif res.optional.event == EVENT_TTSResponse:
|
||||||
audio_count += 1
|
audio_count += 1
|
||||||
print(f"发送音频数据 [{message_id}] #{audio_count},大小: {len(res.payload)}")
|
print(f"收到音频数据 [{message_id}] #{audio_count},大小: {len(res.payload)}")
|
||||||
# 发送音频数据
|
|
||||||
|
# 收集音频数据
|
||||||
|
tts_state.audio_data.extend(res.payload)
|
||||||
|
|
||||||
|
# 发送音频数据到前端
|
||||||
await websocket.send_json({
|
await websocket.send_json({
|
||||||
"id": audio_count,
|
"id": audio_count,
|
||||||
"type": "tts_audio_data",
|
"type": "tts_audio_data",
|
||||||
@@ -383,17 +415,25 @@ async def process_tts_task(websocket, message_id: str, text: str):
|
|||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
print(f"未知TTS事件 [{message_id}]: {res.optional.event}")
|
print(f"未知TTS事件 [{message_id}]: {res.optional.event}")
|
||||||
|
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
print(f"TTS响应超时 [{message_id}],强制结束")
|
print(f"TTS响应超时 [{message_id}],强制结束")
|
||||||
|
# 异步保存音频文件
|
||||||
|
if tts_state.audio_data:
|
||||||
|
file_path = await save_audio_file(
|
||||||
|
bytes(tts_state.audio_data),
|
||||||
|
tts_state.audio_filename
|
||||||
|
)
|
||||||
|
print(f"音频文件已保存 [{message_id}]: {file_path}")
|
||||||
|
|
||||||
# 发送完成消息
|
# 发送完成消息,包含文件路径和使用的speaker
|
||||||
await websocket.send_json({
|
await websocket.send_json({
|
||||||
"type": "tts_audio_complete",
|
"type": "tts_audio_complete",
|
||||||
"messageId": message_id
|
"messageId": message_id,
|
||||||
|
"audioFile": tts_state.audio_filename,
|
||||||
|
"audioPath": os.path.join(TEMP_AUDIO_DIR, tts_state.audio_filename) if tts_state.audio_data else None,
|
||||||
|
"speaker": selected_speaker
|
||||||
})
|
})
|
||||||
print(f"TTS处理完成 [{message_id}],共发送 {audio_count} 个音频包")
|
print(f"TTS处理完成 [{message_id}],共发送 {audio_count} 个音频包,使用说话人: {selected_speaker}")
|
||||||
|
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
print(f"TTS任务被取消 [{message_id}]")
|
print(f"TTS任务被取消 [{message_id}]")
|
||||||
await websocket.send_json({
|
await websocket.send_json({
|
||||||
@@ -424,14 +464,14 @@ async def process_tts_task(websocket, message_id: str, text: str):
|
|||||||
|
|
||||||
|
|
||||||
# 启动TTS文本转换
|
# 启动TTS文本转换
|
||||||
async def handle_tts_text(websocket, message_id: str, text: str):
|
async def handle_tts_text(websocket, message_id: str, text: str, speaker: str = None):
|
||||||
"""启动TTS文本转换"""
|
"""启动TTS文本转换"""
|
||||||
# 创建新的TTS状态
|
# 创建新的TTS状态
|
||||||
|
print(speaker)
|
||||||
tts_state = tts_manager.add_tts_state(websocket, message_id)
|
tts_state = tts_manager.add_tts_state(websocket, message_id)
|
||||||
|
# 启动异步任务,传入speaker参数
|
||||||
# 启动异步任务
|
|
||||||
tts_state.task = asyncio.create_task(
|
tts_state.task = asyncio.create_task(
|
||||||
process_tts_task(websocket, message_id, text)
|
process_tts_task(websocket, message_id, text, speaker)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ from . import tts
|
|||||||
from app.constants.model_data import tip_message, base_url, headers
|
from app.constants.model_data import tip_message, base_url, headers
|
||||||
|
|
||||||
|
|
||||||
async def process_voice_conversation(websocket: WebSocket, asr_text: str, message_id: str):
|
async def process_voice_conversation(websocket: WebSocket, asr_text: str, message_id: str, speaker: str):
|
||||||
try:
|
try:
|
||||||
print(f"开始处理语音对话 [{message_id}]: {asr_text}")
|
print(f"开始处理语音对话 [{message_id}]: {asr_text}")
|
||||||
|
|
||||||
@@ -92,7 +92,7 @@ async def process_voice_conversation(websocket: WebSocket, asr_text: str, messag
|
|||||||
|
|
||||||
# 启动TTS处理完整内容
|
# 启动TTS处理完整内容
|
||||||
print(f"启动完整TTS处理 [{message_id}]: {full_response}")
|
print(f"启动完整TTS处理 [{message_id}]: {full_response}")
|
||||||
await tts.handle_tts_text(websocket, message_id, full_response)
|
await tts.handle_tts_text(websocket, message_id, full_response, speaker)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"语音对话处理异常 [{message_id}]: {e}")
|
print(f"语音对话处理异常 [{message_id}]: {e}")
|
||||||
|
|||||||
@@ -64,7 +64,8 @@ async def websocket_online_count(websocket: WebSocket):
|
|||||||
# 从data中获取messageId,如果不存在则生成一个新的ID
|
# 从data中获取messageId,如果不存在则生成一个新的ID
|
||||||
message_id = data.get("messageId", "voice_" + str(uuid.uuid4()))
|
message_id = data.get("messageId", "voice_" + str(uuid.uuid4()))
|
||||||
if data.get("voiceConversation"):
|
if data.get("voiceConversation"):
|
||||||
await process_voice_conversation(websocket, asr_text, message_id)
|
speaker = data.get("speaker")
|
||||||
|
await process_voice_conversation(websocket, asr_text, message_id, speaker)
|
||||||
else:
|
else:
|
||||||
await websocket.send_json({"type": "asr_result", "result": asr_text})
|
await websocket.send_json({"type": "asr_result", "result": asr_text})
|
||||||
temp_buffer = bytes()
|
temp_buffer = bytes()
|
||||||
@@ -73,6 +74,7 @@ async def websocket_online_count(websocket: WebSocket):
|
|||||||
elif msg_type == "tts_text":
|
elif msg_type == "tts_text":
|
||||||
message_id = data.get("messageId")
|
message_id = data.get("messageId")
|
||||||
text = data.get("text", "")
|
text = data.get("text", "")
|
||||||
|
speaker = data.get("speaker")
|
||||||
|
|
||||||
if not message_id:
|
if not message_id:
|
||||||
await websocket.send_json({
|
await websocket.send_json({
|
||||||
@@ -83,7 +85,7 @@ async def websocket_online_count(websocket: WebSocket):
|
|||||||
|
|
||||||
print(f"收到TTS文本请求 [{message_id}]: {text}")
|
print(f"收到TTS文本请求 [{message_id}]: {text}")
|
||||||
try:
|
try:
|
||||||
await tts.handle_tts_text(websocket, message_id, text)
|
await tts.handle_tts_text(websocket, message_id, text, speaker)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"TTS文本处理异常 [{message_id}]: {e}")
|
print(f"TTS文本处理异常 [{message_id}]: {e}")
|
||||||
await websocket.send_json({
|
await websocket.send_json({
|
||||||
|
|||||||
@@ -5,3 +5,100 @@
|
|||||||
APP_ID = '2138450044'
|
APP_ID = '2138450044'
|
||||||
TOKEN = 'V04_QumeQZhJrQ_In1Z0VBQm7n0ttMNO'
|
TOKEN = 'V04_QumeQZhJrQ_In1Z0VBQm7n0ttMNO'
|
||||||
SPEAKER = 'zh_male_beijingxiaoye_moon_bigtts'
|
SPEAKER = 'zh_male_beijingxiaoye_moon_bigtts'
|
||||||
|
|
||||||
|
SPEAKER_DATA = [
|
||||||
|
{
|
||||||
|
"category": "趣味口音",
|
||||||
|
"speakers": [
|
||||||
|
{
|
||||||
|
"speaker_id": "zh_male_jingqiangkanye_moon_bigtts",
|
||||||
|
"speaker_name": "京腔侃爷/Harmony",
|
||||||
|
"language": "中文-北京口音、英文",
|
||||||
|
"platforms": ["豆包", "Cici", "web demo"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"speaker_id": "zh_female_wanwanxiaohe_moon_bigtts",
|
||||||
|
"speaker_name": "湾湾小何",
|
||||||
|
"language": "中文-台湾口音",
|
||||||
|
"platforms": ["豆包", "Cici"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"speaker_id": "zh_female_wanqudashu_moon_bigtts",
|
||||||
|
"speaker_name": "湾区大叔",
|
||||||
|
"language": "中文-广东口音",
|
||||||
|
"platforms": ["豆包", "Cici"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"speaker_id": "zh_female_daimengchuanmei_moon_bigtts",
|
||||||
|
"speaker_name": "呆萌川妹",
|
||||||
|
"language": "中文-四川口音",
|
||||||
|
"platforms": ["豆包", "Cici"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"speaker_id": "zh_male_guozhoudege_moon_bigtts",
|
||||||
|
"speaker_name": "广州德哥",
|
||||||
|
"language": "中文-广东口音",
|
||||||
|
"platforms": ["豆包", "Cici"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"speaker_id": "zh_male_beijingxiaoye_moon_bigtts",
|
||||||
|
"speaker_name": "北京小爷",
|
||||||
|
"language": "中文-北京口音",
|
||||||
|
"platforms": ["豆包"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"speaker_id": "zh_male_haoyuxiaoge_moon_bigtts",
|
||||||
|
"speaker_name": "浩宇小哥",
|
||||||
|
"language": "中文-青岛口音",
|
||||||
|
"platforms": ["豆包"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"speaker_id": "zh_male_guangxiyuanzhou_moon_bigtts",
|
||||||
|
"speaker_name": "广西远舟",
|
||||||
|
"language": "中文-广西口音",
|
||||||
|
"platforms": ["豆包"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"speaker_id": "zh_female_meituojieer_moon_bigtts",
|
||||||
|
"speaker_name": "妹坨洁儿",
|
||||||
|
"language": "中文-长沙口音",
|
||||||
|
"platforms": ["豆包", "剪映"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"speaker_id": "zh_male_yuzhouzixuan_moon_bigtts",
|
||||||
|
"speaker_name": "豫州子轩",
|
||||||
|
"language": "中文-河南口音",
|
||||||
|
"platforms": ["豆包"]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"category": "角色扮演",
|
||||||
|
"speakers": [
|
||||||
|
{
|
||||||
|
"speaker_id": "zh_male_naiqimengwa_mars_bigtts",
|
||||||
|
"speaker_name": "奶气萌娃",
|
||||||
|
"language": "中文",
|
||||||
|
"platforms": ["剪映", "豆包"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"speaker_id": "zh_female_popo_mars_bigtts",
|
||||||
|
"speaker_name": "婆婆",
|
||||||
|
"language": "中文",
|
||||||
|
"platforms": ["剪映C端", "抖音", "豆包"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"speaker_id": "zh_female_gaolengyujie_moon_bigtts",
|
||||||
|
"speaker_name": "高冷御姐",
|
||||||
|
"language": "中文",
|
||||||
|
"platforms": ["豆包", "Cici"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"speaker_id": "zh_male_aojiaobazong_moon_bigtts",
|
||||||
|
"speaker_name": "傲娇霸总",
|
||||||
|
"language": "中文",
|
||||||
|
"platforms": ["豆包"]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
from fastapi import FastAPI
|
from fastapi import FastAPI
|
||||||
from app.api.v1.endpoints import chat, model, websocket_service
|
from app.api.v1.endpoints import chat, model, websocket_service,speaker
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
@@ -9,6 +9,7 @@ app.include_router(websocket_service.router, prefix="", tags=["websocket_service
|
|||||||
app.include_router(chat.router, prefix="/v1/chat", tags=["chat"])
|
app.include_router(chat.router, prefix="/v1/chat", tags=["chat"])
|
||||||
# 获取模型列表服务
|
# 获取模型列表服务
|
||||||
app.include_router(model.router, prefix="/v1/model", tags=["model_list"])
|
app.include_router(model.router, prefix="/v1/model", tags=["model_list"])
|
||||||
|
app.include_router(speaker.router, prefix="/v1/speaker", tags=["speaker_list"])
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import uvicorn
|
import uvicorn
|
||||||
|
|||||||
@@ -5,4 +5,5 @@ from .chat import (
|
|||||||
ModelInfo,
|
ModelInfo,
|
||||||
VendorModelList,
|
VendorModelList,
|
||||||
VendorModelResponse,
|
VendorModelResponse,
|
||||||
|
SpeakerResponse
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -33,3 +33,20 @@ class VendorModelList(BaseModel):
|
|||||||
|
|
||||||
class VendorModelResponse(BaseModel):
|
class VendorModelResponse(BaseModel):
|
||||||
data: List[VendorModelList]
|
data: List[VendorModelList]
|
||||||
|
|
||||||
|
|
||||||
|
# Speaker相关模型
|
||||||
|
class Speaker(BaseModel):
|
||||||
|
speaker_id: str
|
||||||
|
speaker_name: str
|
||||||
|
language: str
|
||||||
|
platforms: List[str]
|
||||||
|
|
||||||
|
|
||||||
|
class CategorySpeakers(BaseModel):
|
||||||
|
category: str
|
||||||
|
speakers: List[Speaker]
|
||||||
|
|
||||||
|
|
||||||
|
class SpeakerResponse(BaseModel):
|
||||||
|
data: List[CategorySpeakers]
|
||||||
|
|||||||
2
web/components.d.ts
vendored
2
web/components.d.ts
vendored
@@ -9,6 +9,8 @@ declare module 'vue' {
|
|||||||
export interface GlobalComponents {
|
export interface GlobalComponents {
|
||||||
Avatar: typeof import('./src/components/avatar.vue')['default']
|
Avatar: typeof import('./src/components/avatar.vue')['default']
|
||||||
Markdown: typeof import('./src/components/markdown.vue')['default']
|
Markdown: typeof import('./src/components/markdown.vue')['default']
|
||||||
|
Message_tools: typeof import('./src/components/MessageTools.vue')['default']
|
||||||
|
MessageTools: typeof import('./src/components/MessageTools.vue')['default']
|
||||||
NButton: typeof import('naive-ui')['NButton']
|
NButton: typeof import('naive-ui')['NButton']
|
||||||
NCollapse: typeof import('naive-ui')['NCollapse']
|
NCollapse: typeof import('naive-ui')['NCollapse']
|
||||||
NCollapseItem: typeof import('naive-ui')['NCollapseItem']
|
NCollapseItem: typeof import('naive-ui')['NCollapseItem']
|
||||||
|
|||||||
26
web/src/components/MessageTools.vue
Normal file
26
web/src/components/MessageTools.vue
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
<script setup lang="ts">
|
||||||
|
import type { Message } from "@/interfaces";
|
||||||
|
|
||||||
|
import { DocumentDuplicateIcon } from "@/assets/Icons";
|
||||||
|
import { copy } from "@/utils";
|
||||||
|
|
||||||
|
const { msg } = defineProps<{
|
||||||
|
msg: Message;
|
||||||
|
}>();
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<template>
|
||||||
|
<div class="flex items-center gap-2 justify-end mt-2">
|
||||||
|
<div v-if="msg.role !== 'user'">
|
||||||
|
<tts :text="msg.content || ''" :message-id="msg.id!" />
|
||||||
|
</div>
|
||||||
|
<NPopover trigger="hover">
|
||||||
|
<template #trigger>
|
||||||
|
<NButton quaternary circle @click="copy(msg.content || '')">
|
||||||
|
<DocumentDuplicateIcon class="!w-4 !h-4" />
|
||||||
|
</NButton>
|
||||||
|
</template>
|
||||||
|
<span>复制内容</span>
|
||||||
|
</NPopover>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
@@ -12,7 +12,7 @@ export interface Message {
|
|||||||
role?: string;
|
role?: string;
|
||||||
usage?: UsageInfo;
|
usage?: UsageInfo;
|
||||||
id?: string;
|
id?: string;
|
||||||
type?: 'chat' | 'voice';
|
type?: "chat" | "voice";
|
||||||
[property: string]: any;
|
[property: string]: any;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -32,3 +32,97 @@ export interface UsageInfo {
|
|||||||
completion_tokens: number;
|
completion_tokens: number;
|
||||||
total_tokens: number;
|
total_tokens: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Speaker 语音合成器基本信息
|
||||||
|
*/
|
||||||
|
export interface Speaker {
|
||||||
|
/** speaker唯一标识ID */
|
||||||
|
speaker_id: string;
|
||||||
|
/** speaker显示名称 */
|
||||||
|
speaker_name: string;
|
||||||
|
/** 支持的语言/口音 */
|
||||||
|
language: string;
|
||||||
|
/** 支持的平台列表 */
|
||||||
|
platforms: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Speaker分类信息
|
||||||
|
*/
|
||||||
|
export interface CategorySpeakers {
|
||||||
|
/** 分类名称 */
|
||||||
|
category: string;
|
||||||
|
/** 该分类下的speaker列表 */
|
||||||
|
speakers: Speaker[];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Speaker分类枚举
|
||||||
|
*/
|
||||||
|
export enum SpeakerCategory {
|
||||||
|
/** 趣味口音 */
|
||||||
|
ACCENT = "趣味口音",
|
||||||
|
/** 角色扮演 */
|
||||||
|
ROLE_PLAY = "角色扮演"
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 常用平台枚举
|
||||||
|
*/
|
||||||
|
export enum SpeakerPlatform {
|
||||||
|
DOUYIN = "抖音",
|
||||||
|
DOUBAO = "豆包",
|
||||||
|
CICI = "Cici",
|
||||||
|
JIANYING = "剪映",
|
||||||
|
JIANYING_C = "剪映C端",
|
||||||
|
WEB_DEMO = "web demo",
|
||||||
|
STORY_AI = "StoryAi",
|
||||||
|
MAOXIANG = "猫箱"
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Speaker选择器组件Props
|
||||||
|
*/
|
||||||
|
export interface SpeakerSelectorProps {
|
||||||
|
/** 当前选中的speaker */
|
||||||
|
selectedSpeaker?: Speaker;
|
||||||
|
/** speaker选择回调 */
|
||||||
|
onSpeakerChange: (speaker: Speaker) => void;
|
||||||
|
/** 是否禁用 */
|
||||||
|
disabled?: boolean;
|
||||||
|
/** 过滤特定分类 */
|
||||||
|
filterCategories?: SpeakerCategory[];
|
||||||
|
/** 过滤特定平台 */
|
||||||
|
filterPlatforms?: SpeakerPlatform[];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 语音合成参数
|
||||||
|
*/
|
||||||
|
export interface VoiceSynthesisParams {
|
||||||
|
/** 使用的speaker */
|
||||||
|
speaker: Speaker;
|
||||||
|
/** 要合成的文本 */
|
||||||
|
text: string;
|
||||||
|
/** 语速 (0.5-2.0) */
|
||||||
|
speed?: number;
|
||||||
|
/** 音调 (0.5-2.0) */
|
||||||
|
pitch?: number;
|
||||||
|
/** 音量 (0.0-1.0) */
|
||||||
|
volume?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 语音合成响应
|
||||||
|
*/
|
||||||
|
export interface VoiceSynthesisResponse {
|
||||||
|
/** 音频文件URL */
|
||||||
|
audio_url: string;
|
||||||
|
/** 音频时长(秒) */
|
||||||
|
duration: number;
|
||||||
|
/** 合成状态 */
|
||||||
|
status: "success" | "error";
|
||||||
|
/** 错误信息 */
|
||||||
|
error_message?: string;
|
||||||
|
}
|
||||||
|
|||||||
@@ -137,4 +137,9 @@ export class ChatService {
|
|||||||
public static GetModelList(config?: AxiosRequestConfig<any>) {
|
public static GetModelList(config?: AxiosRequestConfig<any>) {
|
||||||
return BaseClientService.get(`${this.basePath}/model/list`, config);
|
return BaseClientService.get(`${this.basePath}/model/list`, config);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 获取音色列表
|
||||||
|
public static GetSpeakerList(config?: AxiosRequestConfig<any>) {
|
||||||
|
return BaseClientService.get(`${this.basePath}/speaker/list`, config);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import { useWebSocketStore } from "@/services";
|
import { useWebSocketStore } from "@/services";
|
||||||
import { convertToPCM16 } from "@/utils";
|
import { convertToPCM16 } from "@/utils";
|
||||||
|
import { useChatStore } from "./chat_store";
|
||||||
|
|
||||||
export const useAsrStore = defineStore("asr", () => {
|
export const useAsrStore = defineStore("asr", () => {
|
||||||
// 是否正在录音
|
// 是否正在录音
|
||||||
@@ -125,6 +126,7 @@ export const useAsrStore = defineStore("asr", () => {
|
|||||||
if (router.currentRoute.value.path === "/voice") {
|
if (router.currentRoute.value.path === "/voice") {
|
||||||
msg.messageId = messageId;
|
msg.messageId = messageId;
|
||||||
msg.voiceConversation = true;
|
msg.voiceConversation = true;
|
||||||
|
msg.speaker = useChatStore().speakerInfo?.speaker_id;
|
||||||
}
|
}
|
||||||
sendMessage(JSON.stringify(msg));
|
sendMessage(JSON.stringify(msg));
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
import type {
|
import type {
|
||||||
|
CategorySpeakers,
|
||||||
IChatWithLLMRequest,
|
IChatWithLLMRequest,
|
||||||
ModelInfo,
|
ModelInfo,
|
||||||
ModelListInfo,
|
ModelListInfo,
|
||||||
|
Speaker,
|
||||||
UsageInfo
|
UsageInfo
|
||||||
} from "@/interfaces";
|
} from "@/interfaces";
|
||||||
import { ChatService } from "@/services";
|
import { ChatService } from "@/services";
|
||||||
@@ -20,6 +22,10 @@ export const useChatStore = defineStore("chat", () => {
|
|||||||
const thinking = ref<boolean>(false);
|
const thinking = ref<boolean>(false);
|
||||||
// 模型列表
|
// 模型列表
|
||||||
const modelList = ref<ModelListInfo[]>([]);
|
const modelList = ref<ModelListInfo[]>([]);
|
||||||
|
// 音色列表
|
||||||
|
const speakerList = ref<CategorySpeakers[]>([]);
|
||||||
|
// 当前音色信息
|
||||||
|
const speakerInfo = ref<Speaker | null>(null);
|
||||||
// 在线人数
|
// 在线人数
|
||||||
const onlineCount = ref<number>(0);
|
const onlineCount = ref<number>(0);
|
||||||
|
|
||||||
@@ -151,6 +157,16 @@ export const useChatStore = defineStore("chat", () => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// 获取音色列表
|
||||||
|
const getSpeakerList = async () => {
|
||||||
|
try {
|
||||||
|
const response = await ChatService.GetSpeakerList();
|
||||||
|
speakerList.value = response.data.data;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("获取音色·列表失败:", error);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
return {
|
return {
|
||||||
token,
|
token,
|
||||||
completing,
|
completing,
|
||||||
@@ -162,6 +178,9 @@ export const useChatStore = defineStore("chat", () => {
|
|||||||
addMessageToHistory,
|
addMessageToHistory,
|
||||||
clearHistoryMessages,
|
clearHistoryMessages,
|
||||||
getModelList,
|
getModelList,
|
||||||
onlineCount
|
onlineCount,
|
||||||
|
speakerList,
|
||||||
|
getSpeakerList,
|
||||||
|
speakerInfo
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import { useAudioWebSocket } from "@/services";
|
import { useAudioWebSocket } from "@/services";
|
||||||
import { createAudioUrl, mergeAudioChunks } from "@/utils";
|
import { createAudioUrl, mergeAudioChunks } from "@/utils";
|
||||||
|
import { useChatStore } from "./chat_store";
|
||||||
|
|
||||||
interface AudioState {
|
interface AudioState {
|
||||||
isPlaying: boolean;
|
isPlaying: boolean;
|
||||||
@@ -12,6 +13,7 @@ interface AudioState {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export const useTtsStore = defineStore("tts", () => {
|
export const useTtsStore = defineStore("tts", () => {
|
||||||
|
const chatStore = useChatStore();
|
||||||
// 多音频状态管理 - 以消息ID为key
|
// 多音频状态管理 - 以消息ID为key
|
||||||
const audioStates = ref<Map<string, AudioState>>(new Map());
|
const audioStates = ref<Map<string, AudioState>>(new Map());
|
||||||
|
|
||||||
@@ -65,7 +67,14 @@ export const useTtsStore = defineStore("tts", () => {
|
|||||||
hasActiveSession.value = true;
|
hasActiveSession.value = true;
|
||||||
|
|
||||||
// 发送文本到TTS服务
|
// 发送文本到TTS服务
|
||||||
sendMessage(JSON.stringify({ type: "tts_text", text, messageId }));
|
sendMessage(
|
||||||
|
JSON.stringify({
|
||||||
|
type: "tts_text",
|
||||||
|
text,
|
||||||
|
messageId,
|
||||||
|
speaker: chatStore.speakerInfo?.speaker_id
|
||||||
|
})
|
||||||
|
);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
handleError(`连接失败: ${error}`, messageId);
|
handleError(`连接失败: ${error}`, messageId);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ import type { Message } from "@/interfaces";
|
|||||||
import { throttle } from "lodash-es";
|
import { throttle } from "lodash-es";
|
||||||
import AIAvatar from "@/assets/ai_avatar.png";
|
import AIAvatar from "@/assets/ai_avatar.png";
|
||||||
import {
|
import {
|
||||||
DocumentDuplicateIcon,
|
|
||||||
ExclamationTriangleIcon,
|
ExclamationTriangleIcon,
|
||||||
microphone,
|
microphone,
|
||||||
PaperAirplaneIcon,
|
PaperAirplaneIcon,
|
||||||
@@ -13,7 +12,6 @@ import {
|
|||||||
import UserAvatar from "@/assets/user_avatar.jpg";
|
import UserAvatar from "@/assets/user_avatar.jpg";
|
||||||
import markdown from "@/components/markdown.vue";
|
import markdown from "@/components/markdown.vue";
|
||||||
import { useAsrStore, useChatStore, useLayoutStore } from "@/stores";
|
import { useAsrStore, useChatStore, useLayoutStore } from "@/stores";
|
||||||
import { copy } from "@/utils";
|
|
||||||
|
|
||||||
const chatStore = useChatStore();
|
const chatStore = useChatStore();
|
||||||
const { historyMessages, completing, modelList, modelInfo, thinking } =
|
const { historyMessages, completing, modelList, modelInfo, thinking } =
|
||||||
@@ -206,19 +204,7 @@ onMounted(() => {
|
|||||||
</NCollapse>
|
</NCollapse>
|
||||||
<!-- 内容↓ 思维链↑ -->
|
<!-- 内容↓ 思维链↑ -->
|
||||||
<markdown :content="msg.content || ''" />
|
<markdown :content="msg.content || ''" />
|
||||||
<div class="flex items-center gap-2 justify-end mt-2">
|
<MessageTools :msg="msg" />
|
||||||
<div v-if="msg.role !== 'user'">
|
|
||||||
<tts :text="msg.content || ''" :message-id="msg.id!" />
|
|
||||||
</div>
|
|
||||||
<NPopover trigger="hover">
|
|
||||||
<template #trigger>
|
|
||||||
<NButton quaternary circle @click="copy(msg.content || '')">
|
|
||||||
<DocumentDuplicateIcon class="!w-4 !h-4" />
|
|
||||||
</NButton>
|
|
||||||
</template>
|
|
||||||
<span>复制内容</span>
|
|
||||||
</NPopover>
|
|
||||||
</div>
|
|
||||||
<NDivider />
|
<NDivider />
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import markdown from "@/components/markdown.vue";
|
|||||||
import { useAsrStore, useChatStore, useLayoutStore } from "@/stores";
|
import { useAsrStore, useChatStore, useLayoutStore } from "@/stores";
|
||||||
|
|
||||||
const chatStore = useChatStore();
|
const chatStore = useChatStore();
|
||||||
const { historyMessages, completing, modelList, modelInfo, thinking } =
|
const { historyMessages, completing, speakerList, speakerInfo, thinking } =
|
||||||
storeToRefs(chatStore);
|
storeToRefs(chatStore);
|
||||||
const asrStore = useAsrStore();
|
const asrStore = useAsrStore();
|
||||||
const { isRecording } = storeToRefs(asrStore);
|
const { isRecording } = storeToRefs(asrStore);
|
||||||
@@ -58,39 +58,43 @@ const handleItemHeaderClick = (name: string) => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// 处理选中模型的 ID
|
// 处理选中speaker的 ID
|
||||||
const selectedModelId = computed({
|
const selectedSpeakerId = computed({
|
||||||
get: () => modelInfo.value?.model_id ?? null,
|
get: () => speakerInfo.value?.speaker_id ?? null,
|
||||||
set: (id: string | null) => {
|
set: (id: string | null) => {
|
||||||
for (const vendor of modelList.value) {
|
for (const category of speakerList.value) {
|
||||||
const found = vendor.models.find((model) => model.model_id === id);
|
const found = category.speakers.find(
|
||||||
|
(speaker) => speaker.speaker_id === id
|
||||||
|
);
|
||||||
if (found) {
|
if (found) {
|
||||||
modelInfo.value = found;
|
speakerInfo.value = found;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
modelInfo.value = null;
|
speakerInfo.value = null;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// 监听模型列表变化,更新选项
|
// 监听speaker列表变化,更新选项
|
||||||
watch(
|
watch(
|
||||||
() => modelList.value,
|
() => speakerList.value,
|
||||||
(newVal) => {
|
(newVal) => {
|
||||||
if (newVal) {
|
if (newVal) {
|
||||||
options.value = newVal.map((vendor) => ({
|
options.value = newVal.map((category) => ({
|
||||||
type: "group",
|
type: "group",
|
||||||
label: vendor.vendor,
|
label: category.category,
|
||||||
key: vendor.vendor,
|
key: category.category,
|
||||||
children: vendor.models.map((model) => ({
|
children: category.speakers.map((speaker) => ({
|
||||||
label: model.model_name,
|
label: speaker.speaker_name,
|
||||||
value: model.model_id,
|
value: speaker.speaker_id,
|
||||||
type: model.model_type
|
language: speaker.language,
|
||||||
|
platforms: speaker.platforms
|
||||||
}))
|
}))
|
||||||
}));
|
}));
|
||||||
|
|
||||||
if (newVal.length > 0 && newVal[0].models.length > 0) {
|
// 默认选择第一个speaker
|
||||||
modelInfo.value = newVal[0].models[0];
|
if (newVal.length > 0 && newVal[0].speakers.length > 0) {
|
||||||
|
speakerInfo.value = newVal[0].speakers[0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -115,7 +119,7 @@ watch(completing, (newVal) => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
onMounted(() => {
|
onMounted(() => {
|
||||||
chatStore.getModelList();
|
chatStore.getSpeakerList();
|
||||||
});
|
});
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
@@ -191,9 +195,7 @@ onMounted(() => {
|
|||||||
</NCollapse>
|
</NCollapse>
|
||||||
<!-- 内容↓ 思维链↑ -->
|
<!-- 内容↓ 思维链↑ -->
|
||||||
<markdown :content="msg.content || ''" />
|
<markdown :content="msg.content || ''" />
|
||||||
<div v-if="msg.role !== 'user'" class="mt-2">
|
<MessageTools :msg="msg" />
|
||||||
<tts :text="msg.content || ''" :message-id="msg.id!" />
|
|
||||||
</div>
|
|
||||||
<NDivider />
|
<NDivider />
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -209,7 +211,7 @@ onMounted(() => {
|
|||||||
<div class="flex justify-between items-center gap-2">
|
<div class="flex justify-between items-center gap-2">
|
||||||
<div class="flex items-center gap-2">
|
<div class="flex items-center gap-2">
|
||||||
<NSelect
|
<NSelect
|
||||||
v-model:value="selectedModelId"
|
v-model:value="selectedSpeakerId"
|
||||||
label-field="label"
|
label-field="label"
|
||||||
value-field="value"
|
value-field="value"
|
||||||
children-field="children"
|
children-field="children"
|
||||||
|
|||||||
Reference in New Issue
Block a user