1. 完善了服务端web部分,使用Vue编写(见serve_view)

2. 增加了对嵌入式设备完全自定义控制的功能
This commit is contained in:
Misaki
2026-04-25 08:48:50 +08:00
parent 8ffc609388
commit 39c54a4452
51 changed files with 7070 additions and 95 deletions
+320 -39
View File
@@ -25,6 +25,7 @@
import asyncio
from typing import Optional, List, Dict, Any
from loguru import logger
import json
from src.modules.websocket_base_module.dto.third_dtos import (
AudioDataDTO, AudioDataTransferObject,
ScreenShotDataDTO, ScreenShotDataTransferObject
@@ -43,9 +44,15 @@ from src.server_core.llm_core.llm_core import (
YosugaLLMCore, ModelProvider,
LLMCoreAnalysisBase,
YosugaAudioResponseData, YosugaUITARSResponseData,
YosugaUITARSRequestData
YosugaUITARSRequestData, YosugaEmbeddedResponseData
)
from src.server_core.yosuga_embedded_server import (
YosugaServer, ServerConfig
)
from src.server_core.yosuga_embedded_server.device_dto import DeviceDataDTO
from src.server_core.llm_core.llm_core_prompt_manager import YosugaEmbedded
from src.modules.websocket_base_module.dto.dto_templates.auto_agent_data_dto import AutoAgentDataTransferObject
from src.config.config import cfg
@@ -70,6 +77,66 @@ class YosugaServerCore:
llm_core: YosugaLLMCore = None # llm core
embedded_server: YosugaServer # 嵌入式设备管理框架
device_dto: DeviceDataDTO # 设备数据分发器
# @classmethod
# async def get_instance(cls) -> "YosugaServerCore":
# """异步单例工厂"""
# if cls._instance is None:
# async with cls._lock:
# if cls._instance is None:
# logger.info("Initializing YosugaServerCore...")
# # 创建实例
# instance = cls.__new__(cls)
#
# # 按依赖顺序初始化数据分发器
# instance.ws_server = await get_ws_server()
# instance.json_dto = await get_json_dto_instance(instance.ws_server)
# instance.audio_dto = AudioDataDTO(instance.json_dto) # 音频分发器
# instance.audio_dto.register_audio_callback(instance._handle_audio_data) # 注册音频处理函数
# instance.screenshot_dto = ScreenShotDataDTO(instance.json_dto) # 截图分发器
# instance.screenshot_dto.register_screenshot_callback(instance._handle_screenshot_data) # 注册截图处理函数
#
# instance.asr_client = create_asr_client(use_async=True, base_url=cfg.asr.url)
# instance.tts_client = GPTSoVITSClient(host=cfg.tts.host, port=cfg.tts.port, debug=True)
# # 切换GPT_SoVITS模型
# await instance.tts_client.set_gpt_weights(cfg.tts.gpt_model_name)
# await instance.tts_client.set_sovits_weights(cfg.tts.sovits_model_name)
#
# instance.auto_agent_client = UITarsClient(UITarsClientConfig(
# deployment_type=cfg.auto_agent.deployment_type,
# base_url=cfg.auto_agent.base_url,
# model_name=cfg.auto_agent.model_name,
# temperature=cfg.auto_agent.temperature,
# max_tokens=cfg.auto_agent.max_tokens
# ))
#
# instance.llm_core = YosugaLLMCore(
# model_config=ModelConfig( # TODO 同上
# provider=ModelProvider.OPENAI,
# model_name=cfg.ai.model_name,
# base_url=cfg.ai.base_url,
# api_key=cfg.ai.api_key,
# temperature=cfg.ai.temperature,
# max_tokens=cfg.ai.max_tokens
# ),
# core_config=LLMCoreConfig( # TODO 同上
# max_context_tokens=cfg.llm_core.max_context_tokens,
# enable_history=cfg.llm_core.enable_history,
# role_setting=cfg.llm_core.role_character,
# language=cfg.llm_core.language, # 回复使用语言
# auto_dispatch=True,
# dispatch_async=True # 启用异步分发
# )
# )
# instance.register_llm_core_analysis() # 注册解析器
# instance.register_llm_core_action() # 注册分发器
# instance.llm_core.register_overflow_handler(instance._handle_overflow_logger) # 注册上下文溢出处理回调
#
# cls._instance = instance
# logger.success("YosugaServerCore initialized")
# return cls._instance
@classmethod
async def get_instance(cls) -> "YosugaServerCore":
"""异步单例工厂"""
@@ -77,57 +144,141 @@ class YosugaServerCore:
async with cls._lock:
if cls._instance is None:
logger.info("Initializing YosugaServerCore...")
# 强制初始化配置
from src.config.config import _ensure_initialized
from dataclasses import asdict, is_dataclass
real_cfg = _ensure_initialized()
# 辅助函数:递归转换为 dict
def to_dict(obj):
if isinstance(obj, dict):
return obj
if is_dataclass(obj) and not isinstance(obj, type):
return asdict(obj)
return {}
# 提取各个配置段并转换为 dict(关键修复)
cfg_dict = {
'ai': to_dict(getattr(real_cfg, 'ai', {})),
'tts': to_dict(getattr(real_cfg, 'tts', {})),
'asr': to_dict(getattr(real_cfg, 'asr', {})),
'auto_agent': to_dict(getattr(real_cfg, 'auto_agent', {})),
'llm_core': to_dict(getattr(real_cfg, 'llm_core', {})),
}
logger.debug(f"配置提取完成: ai={type(cfg_dict['ai'])}, tts={type(cfg_dict['tts'])}")
# 创建实例
instance = cls.__new__(cls)
# 按依赖顺序初始化数据分发器
instance.ws_server = await get_ws_server()
instance.json_dto = await get_json_dto_instance(instance.ws_server)
instance.audio_dto = AudioDataDTO(instance.json_dto) # 音频分发器
instance.audio_dto.register_audio_callback(instance._handle_audio_data) # 注册音频处理函数
instance.screenshot_dto = ScreenShotDataDTO(instance.json_dto) # 截图分发器
instance.screenshot_dto.register_screenshot_callback(instance._handle_screenshot_data) # 注册截图处理函数
instance.audio_dto = AudioDataDTO(instance.json_dto)
instance.audio_dto.register_audio_callback(instance._handle_audio_data)
instance.screenshot_dto = ScreenShotDataDTO(instance.json_dto)
instance.screenshot_dto.register_screenshot_callback(instance._handle_screenshot_data)
instance.asr_client = create_asr_client(use_async=True, base_url=cfg.asr.url)
instance.tts_client = GPTSoVITSClient(host=cfg.tts.host, port=cfg.tts.port, debug=True)
# 切换GPT_SoVITS模型
await instance.tts_client.set_gpt_weights(cfg.tts.gpt_model_name)
await instance.tts_client.set_sovits_weights(cfg.tts.sovits_model_name)
# ASR 客户端
asr_cfg = cfg_dict.get('asr', {})
instance.asr_client = create_asr_client(
use_async=True,
base_url=asr_cfg.get('url', 'http://localhost:20260/')
)
# TTS 客户端
tts_cfg = cfg_dict.get('tts', {})
instance.tts_client = GPTSoVITSClient(
host=tts_cfg.get('host', 'localhost'),
port=tts_cfg.get('port', 20261),
debug=True
)
# 切换 GPT_SoVITS 模型
# await instance.tts_client.set_gpt_weights(
# tts_cfg.get('gpt_model_name', 'GPT_weights_v2Pro/Yosuga_Airi-e32.ckpt')
# )
# await instance.tts_client.set_sovits_weights(
# tts_cfg.get('sovits_model_name', 'SoVITS_weights_v2Pro/Yosuga_Airi_e16_s864.pth')
# )
# Auto Agent 客户端
auto_cfg = cfg_dict.get('auto_agent', {})
instance.auto_agent_client = UITarsClient(UITarsClientConfig(
deployment_type=cfg.auto_agent.deployment_type,
base_url=cfg.auto_agent.base_url,
model_name=cfg.auto_agent.model_name,
temperature=cfg.auto_agent.temperature,
max_tokens=cfg.auto_agent.max_tokens
deployment_type=auto_cfg.get('deployment_type', 'lmstudio'),
base_url=auto_cfg.get('base_url', 'http://localhost:1234/v1'),
model_name=auto_cfg.get('model_name', 'ui-tars-1.5-7b@q4_k_m'),
temperature=auto_cfg.get('temperature', 0.1),
max_tokens=auto_cfg.get('max_tokens', 16384)
))
# LLM Core
ai_cfg = cfg_dict.get('ai', {})
llm_cfg = cfg_dict.get('llm_core', {})
instance.llm_core = YosugaLLMCore(
model_config=ModelConfig( # TODO 同上
model_config=ModelConfig(
provider=ModelProvider.OPENAI,
model_name=cfg.ai.model_name,
base_url=cfg.ai.base_url,
api_key=cfg.ai.api_key,
temperature=cfg.ai.temperature,
max_tokens=cfg.ai.max_tokens
model_name=ai_cfg.get('model_name', 'qwen/qwen3-4b-2507'),
base_url=ai_cfg.get('base_url', 'http://localhost:1234/v1'),
api_key=ai_cfg.get('api_key'),
temperature=ai_cfg.get('temperature', 0.4),
max_tokens=ai_cfg.get('max_tokens', 8192)
),
core_config=LLMCoreConfig( # TODO 同上
max_context_tokens=cfg.llm_core.max_context_tokens,
enable_history=cfg.llm_core.enable_history,
role_setting=cfg.llm_core.role_character,
language=cfg.llm_core.language, # 回复使用语言
core_config=LLMCoreConfig(
max_context_tokens=llm_cfg.get('max_context_tokens', 2048),
enable_history=llm_cfg.get('enable_history', True),
role_setting=llm_cfg.get('role_character',
'你是由Misakiotoha开发的助手稲葉愛理ちゃん,可以和用户一起玩游戏,聊天,做各种事情,性格抽象,没事爱整整活。'),
language=llm_cfg.get('language', '中文'),
auto_dispatch=True,
dispatch_async=True # 启用异步分发
dispatch_async=True
)
)
instance.register_llm_core_analysis() # 注册解析器
instance.register_llm_core_action() # 注册分发器
instance.llm_core.register_overflow_handler(instance._handle_overflow_logger) # 注册上下文溢出处理回调
# 注册 YosugaEmbedded 提示词模块
instance.llm_core.register_prompt_module(YosugaEmbedded())
logger.info("[Core] 嵌入式设备提示词模块已注册")
# 初始化嵌入式设备管理框架
instance.embedded_server = YosugaServer(
config=ServerConfig(
device_conflict_strategy="rename",
max_concurrent_calls=10,
device_timeout=30.0,
)
)
instance.device_dto = DeviceDataDTO(
instance.json_dto, instance.embedded_server
)
# 当 YosugaServer 需要发送 RPC 到设备时,通过 WebSocket 发出 device_command
instance.embedded_server.on_device_message = (
instance._on_device_message
)
# 当设备能力变更时,更新 LLM 系统提示词中的状态表
instance.embedded_server.on_capabilities_changed = (
instance._on_capabilities_changed
)
logger.success("[Core] 嵌入式设备管理框架已初始化")
# 注册设备 RPC 响应回调(设备结果回来后喂回 LLM)
instance.device_dto.register_device_callback(
instance._on_device_rpc_response
)
instance._pending_rpc: Optional[dict] = None
instance.register_llm_core_analysis()
instance.register_llm_core_action()
instance.llm_core.register_overflow_handler(instance._handle_overflow_logger)
cls._instance = instance
logger.success("YosugaServerCore initialized")
return cls._instance
def register_llm_core_action(self):
"""
注册llm_core的分发器
@@ -137,6 +288,7 @@ class YosugaServerCore:
self.llm_core.register_action_handler("audio_text", self._handle_audio_response, is_async=True)
self.llm_core.register_action_handler("auto_agent", self._handle_auto_agent, is_async=True)
self.llm_core.register_action_handler("call_auto_agent", self._handle_call_auto_agent, is_async=True)
self.llm_core.register_action_handler("embedded_control", self._handle_embedded_control, is_async=True)
self.llm_core.set_fallback_handler(self._handle_fallback)
def register_llm_core_analysis(self):
@@ -148,6 +300,7 @@ class YosugaServerCore:
self.llm_core.register_analysis_model(YosugaAudioResponseData)
self.llm_core.register_analysis_model(YosugaUITARSResponseData)
self.llm_core.register_analysis_model(YosugaUITARSRequestData)
self.llm_core.register_analysis_model(YosugaEmbeddedResponseData)
def _handle_overflow_logger(self, history: List[Any], metadata: Dict[str, Any]):
"""上下文溢出记录,仅打印日志"""
@@ -211,14 +364,23 @@ class YosugaServerCore:
try:
# 使用最快模式流式输出
chunk_count = 0
# async for chunk in await self.tts_client.tts(
# text=data.response_text,
# ref_audio_path="uploaded_audio/test_voice.wav", # TODO 需要替换成config或者后续设计情感系统
# text_lang="ja",
# prompt_lang="ja",
# prompt_text="もう!こんなところで何やってるんだよ!", # 参考语音的真实文本
# streaming_mode=StreamingMode.FASTEST, # 模式3:快速流式
# media_type="wav"
# ):
async for chunk in await self.tts_client.tts(
text=data.response_text,
ref_audio_path="uploaded_audio/test_voice.wav", # TODO 需要替换成config或者后续设计情感系统
text_lang="ja",
prompt_lang="ja",
prompt_text="もう!こんなところで何やってるんだよ!", # 参考语音的真实文本
streaming_mode=StreamingMode.FASTEST, # 模式3:快速流式
media_type="wav"
text=data.response_text,
ref_audio_path="uploaded_audio/kq.wav", # TODO 需要替换成config或者后续设计情感系统
text_lang="zh",
prompt_lang="zh",
prompt_text="电闪雷鸣虽然有点吓人,但璃月港的防雷防火工事是一流的,不用担心。", # 参考语音的真实文本
streaming_mode=StreamingMode.FASTEST, # 模式3:快速流式
media_type="wav"
):
chunk_count += 1
# print(f"🎵 收到音频块 #{chunk_count}: {len(chunk.audio_data)} bytes")
@@ -245,7 +407,7 @@ class YosugaServerCore:
text=data.response_text
)
)
print(f"流式TTS完成!共{chunk_count}个音频块")
print(f"流式TTS完成!共{chunk_count}个音频块")
# 构造音频尾包发送给客户端(虚假的音频数据)
await self.audio_dto.send_audio_data(
AudioDataTransferObject(
@@ -258,7 +420,7 @@ class YosugaServerCore:
)
)
except Exception as e:
print(f"流式错误: {e}")
print(f"流式错误: {e}")
return {"status": "success", "executed": data.response_text}
return None
@@ -284,6 +446,125 @@ class YosugaServerCore:
await self.screenshot_dto.send_screenshot_data(ScreenShotDataTransferObject(LLMResponse=data.llm_translation))
return {"status": "success", "executed": data.type}
async def _handle_embedded_control(self, data: YosugaEmbeddedResponseData):
"""
llm_core异步处理器:嵌入式设备控制
将LLM输出的 JSON-RPC 调用列表交由 YosugaServer 框架处理并路由到对应设备
"""
logger.info(f"Handling embedded control: {len(data.calls)} calls")
results = self.embedded_server.process_ai_response(json.dumps(data.calls))
logger.info(f"Embedded control results: {results}")
# 保存 pending RPC 信息,等设备异步响应回来后喂回 LLM
if results and len(results) > 0:
first_call = results[0]
self._pending_rpc = {
"device_id": first_call.get("device_id"),
"method": first_call.get("method"),
"call_id": first_call.get("id"),
"original_response_text": data.response_text or "",
}
# 如果 LLM 同时返回了需要回复用户的文本,通过 TTS 播报
if data.response_text:
try:
chunk_count = 0
# async for chunk in await self.tts_client.tts(
# text=data.response_text,
# ref_audio_path="uploaded_audio/test_voice.wav",
# text_lang="ja",
# prompt_lang="ja",
# prompt_text="もう!こんなところで何やってるんだよ!",
# streaming_mode=StreamingMode.FASTEST,
# media_type="wav"
# ):
async for chunk in await self.tts_client.tts(
text=data.response_text,
ref_audio_path="uploaded_audio/kq.wav", # TODO 需要替换成config或者后续设计情感系统
text_lang="zh",
prompt_lang="zh",
prompt_text="电闪雷鸣虽然有点吓人,但璃月港的防雷防火工事是一流的,不用担心。", # 参考语音的真实文本
streaming_mode=StreamingMode.FASTEST, # 模式3:快速流式
media_type="wav"
):
chunk_count += 1
if chunk_count == 1:
await self.audio_dto.send_audio_data(
AudioDataTransferObject(
data=chunk.audio_data,
isStream=True, isStart=True,
sequence=chunk_count, isEnd=False,
text=data.response_text
)
)
else:
await self.audio_dto.send_audio_data(
AudioDataTransferObject(
data=chunk.audio_data,
isStream=True, isStart=False,
sequence=chunk_count, isEnd=False,
text=data.response_text
)
)
await self.audio_dto.send_audio_data(
AudioDataTransferObject(
data=b"0",
isStream=True, isStart=False,
sequence=chunk_count + 1, isEnd=True,
text=data.response_text
)
)
except Exception as e:
logger.error(f"Embedded control TTS error: {e}")
return {"status": "success", "calls": len(data.calls)}
def _on_device_rpc_response(self, device_id: str, payload: dict):
"""DeviceDataDTO 回调:设备 RPC 响应回来时触发,喂回 LLM"""
if self._pending_rpc and self._pending_rpc.get("device_id") == device_id:
call_id = payload.get("id")
if call_id is None or call_id == self._pending_rpc.get("call_id"):
pending = self._pending_rpc
self._pending_rpc = None
asyncio.create_task(self._continue_with_device_result(device_id, payload, pending))
async def _continue_with_device_result(self, device_id: str, payload: dict, pending: dict):
"""设备 RPC 结果回来后,喂回 LLM 生成最终回复并 TTS"""
method = pending.get("method", "unknown")
original_text = pending.get("original_response_text", "")
result_str = json.dumps(payload.get("result", payload), ensure_ascii=False)
followup_input = (
f"你之前请求设备 {device_id} 执行了 {method} 操作,"
f"现在设备返回了结果:{result_str}\n"
f"你之前的回复是:'{original_text}'\n"
f"请基于设备返回的实际结果,用自然语言重新组织回复,告诉用户结果。"
)
try:
llm_result = await self.llm_core.interact(user_input={"text": followup_input})
logger.info(f"[Core] 设备结果回送 LLM 完成: {llm_result}")
except Exception as e:
logger.error(f"[Core] 设备结果回送 LLM 失败: {e}")
def _on_device_message(self, device_id: str, rpc_call: str) -> Optional[str]:
"""YosugaServer 的设备消息回调:通过 WebSocket 发送 RPC 到客户端"""
logger.info(f"[Core] 发送设备命令到 {device_id}")
asyncio.create_task(self.device_dto.send_device_command(device_id, rpc_call))
return None
def _on_capabilities_changed(self, capabilities: dict):
"""设备能力变更回调:更新 LLM 系统提示词中的状态表"""
functions_str = json.dumps(capabilities.get("functions", []), ensure_ascii=False, indent=2)
device_str = json.dumps(capabilities.get("devices", {}), ensure_ascii=False, indent=2)
state_table = (
f"【当前在线设备】\n{device_str}\n\n"
f"【设备可用函数】\n{functions_str}"
)
self.llm_core.core_config.system_state_table = state_table
logger.info(f"[Core] 系统状态表已更新 | 设备: {capabilities.get('device_count', 0)} 台 | 函数: {capabilities.get('function_count', 0)}")
def _handle_fallback(self, data: LLMCoreAnalysisBase):
"""
llm_core同步处理器:回退处理器