first pull

2026-02-03 01:20:00 +08:00
commit 0753da86a8
79 changed files with 7134 additions and 0 deletions
@@ -0,0 +1,173 @@
+import asyncio
+from src.modules.tts_module.tts_core.gpt_sovits.gpt_sovits_client import GPTSoVITSClient, StreamingMode
+from src.modules.tts_module.tts_core.async_audio_player import AsyncAudioPlayer
+import sounddevice as sd
+
+test_text = "春の午後、公園のベンチに座って本を読んでいると、小さな子供が凧あげをしているのが目に入った。風に乗って凧が高く上がるたびに、彼の顔には真っすぐな笑顔が広がる。母親がそばで見守りながら、時折声をかけている。\
+空は雲一つない青さで、桜の花びらが風に舞っている。遠くで犬の鳴き声が聞こえ、芝生の上では老夫婦がお茶を楽しんでいた。すべてがゆっくりと流れる時間の中で、自分の心も不思議と落ち着いてくる。\
+ふと、子供の凧が木の枝に引っかかってしまった。少し焦る様子だったが、母親が助けてくれて、すぐにまた空に舞い上がった。失敗しても、誰かが助けてくれる。そんな当たり前のことに、今日は特別な温かさを感じた。\
+日が傾き始める頃、私は本を閉じて家路についた。明日もきっと、誰かの笑顔があるだろう。"
+
+async def test_tts():
+    # 创建客户端（推荐上下文管理器）
+    async with GPTSoVITSClient(debug=True, port= 20261, host="192.168.1.8") as client:
+        # 基础TTS调用
+        try:
+            audio = await client.tts(
+                text="あのさ、いやまあ、なんていうか...要するに、そういうことじゃなくて、ほら、前に言ってたやつ、あれなんだけど、とにかく、後ででもいいから、ちょっと相談に乗ってくれない？",
+                ref_audio_path="uploaded_audio/test_voice.wav",  # 服务器上的路径
+                text_lang="ja",
+                prompt_lang="ja",
+                media_type="wav",
+                prompt_text="もう!こんなところで何やってるんだよ!"
+            )
+
+            # 保存音频
+            audio.save("outputs/output.wav")
+            print(f"✅ TTS成功！音频大小: {len(audio.audio_data)} bytes")
+
+        except Exception as e:
+            print(f"❌ 错误: {e}")
+async def test_model_change():
+    async with GPTSoVITSClient(debug=True, port= 20261, host="192.168.1.8") as client:
+        # 切换模型
+        print("🔄 切换GPT模型...")
+        await client.set_gpt_weights(
+            "GPT_weights_v2Pro/Yosuga_Airi-e32.ckpt"
+        )
+
+        print("🔄 切换SoVITS模型...")
+        await client.set_sovits_weights(
+            "SoVITS_weights_v2Pro/Yosuga_Airi_e16_s864.pth"
+        )
+
+
+async def stream_tts():
+    async with GPTSoVITSClient(debug=True, port= 20261, host="192.168.1.8") as client:
+        try:
+            # 使用最快模式流式输出
+            chunk_count = 0
+            async for chunk in await client.tts(
+                    text="要するに、そういうことじゃなくて、ほら、前に言ってたやつ、あれなんだけど、とにかく、後ででもいいから、ちょっと相談に乗ってくれない？",
+                    ref_audio_path="uploaded_audio/test_voice.wav",
+                    text_lang="ja",
+                    prompt_lang="ja",
+                    prompt_text="もう!こんなところで何やってるんだよ!",
+                    streaming_mode=StreamingMode.FASTEST,  # 模式3：快速流式
+                    media_type="wav"
+            ):
+                chunk_count += 1
+                print(f"🎵 收到音频块 #{chunk_count}: {len(chunk.audio_data)} bytes")
+
+                # 实时播放处理
+                # await play_audio_chunk(chunk.audio_data)
+
+            print(f"✅ 流式TTS完成！共{chunk_count}个音频块")
+
+        except Exception as e:
+            print(f"❌ 流式错误: {e}")
+
+
+async def stream_tts_and_play(
+        text: str,
+        ref_audio_path: str,
+        text_lang: str = "zh",
+        prompt_lang: str = "zh",
+        streaming_mode: StreamingMode = StreamingMode.FASTEST
+):
+    """
+    实时流式TTS + 播放一体化
+
+    Args:
+        text: 要合成的文本
+        ref_audio_path: 参考音频路径
+        text_lang: 文本语言
+        prompt_lang: 提示语言
+    """
+    # 创建音频播放器（缓冲区大小=5，平衡延迟和稳定性）
+    async with AsyncAudioPlayer(buffer_size=5) as player:
+        # 创建TTS客户端
+        async with GPTSoVITSClient(debug=True, port= 20261, host="192.168.1.8") as client:
+            try:
+                print(f"🎤 开始流式合成: {text[:30]}...")
+                print(f"🎯 流式模式: {streaming_mode.name}")
+
+                # 获取音频流（异步生成器）
+                audio_stream = await client.tts(
+                    text=text,
+                    ref_audio_path=ref_audio_path,
+                    text_lang=text_lang,
+                    prompt_lang=prompt_lang,
+                    prompt_text="もう!こんなところで何やってるんだよ!",
+                    streaming_mode=streaming_mode,
+                    media_type="wav",
+                    sample_steps=32,
+                    top_k=5,
+                    temperature=1.0
+                )
+
+                # 动态读取并播放
+                chunk_idx = 0
+                async for audio_chunk in audio_stream:
+                    chunk_idx += 1
+                    print(f"📥 收到音频块 #{chunk_idx}: {len(audio_chunk.audio_data):6d} bytes")
+
+                    # 立即加入播放队列（非阻塞）
+                    await player.add_chunk(audio_chunk.audio_data)
+
+                print(f"✅ 合成完成! 共接收 {chunk_idx} 个音频块")
+
+                # 等待播放完成（所有块播完）
+                await player.audio_queue.join()
+                print("🎵 播放完成!")
+
+            except Exception as e:
+                print(f"❌ 错误: {e}")
+                raise
+
+
+async def test_japanese():
+    """测试日语长文本流式播放"""
+    print("=" * 50)
+    print("🗾 日语流式TTS测试")
+    print("=" * 50)
+
+    await stream_tts_and_play(
+        text=test_text,
+        ref_audio_path="uploaded_audio/test_voice.wav",
+        text_lang="ja",
+        prompt_lang="ja",
+
+        streaming_mode=StreamingMode.FASTEST  # 模式3：最快
+    )
+
+async def batch_test():
+    """批量处理示例"""
+    async with GPTSoVITSClient() as client:
+        texts = [
+            "你好，世界！",
+            "这是一个批量测试。",
+            "异步批量处理非常高效。"
+        ]
+
+        results = await client.batch_tts(
+            texts=texts,
+            ref_audio_path="archive_jingyuan_1.wav",
+            text_lang="zh"
+        )
+
+        for i, audio in enumerate(results):
+            audio.save(f"output/batch_{i}.wav")
+            print(f"✅ 批量任务 {i + 1}/{len(results)} 完成")
+
+
+
+
+
+if __name__ == "__main__":
+    # 检查音频设备
+    print("🔍 检查音频设备...")
+    print(sd.query_devices())
+    sd.default.device = (None, "pulse")  # 使用PulseAudio
+
+    asyncio.run(test_japanese())
@@ -0,0 +1,24 @@
+import asyncio
+import json
+from websockets.asyncio.client import connect
+
+async def test_all_types():
+    """测试三种消息类型"""
+    async with connect("ws://localhost:8765") as ws:
+        print("=== 测试JSON消息 ===")
+        await ws.send(json.dumps({
+            "type": "chat",
+            "content": "你好服务器！"
+        }))
+        print(f"收到: {await ws.recv()}")
+
+        print("\n=== 测试文本消息 ===")
+        await ws.send("这是纯文本消息")
+        print(f"收到: {await ws.recv()}")
+
+        print("\n=== 测试二进制消息 ===")
+        await ws.send(b"\x00\x01\x02\x03\x04")
+        print(f"收到: {await ws.recv()}")
+
+if __name__ == "__main__":
+    asyncio.run(test_all_types())
@@ -0,0 +1,174 @@
+"""
+极简 WebSocket 测试服务器 - 修复版本
+"""
+import asyncio
+import json
+import logging
+from datetime import datetime
+from typing import Set
+
+import websockets
+
+# 配置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(message)s'
+)
+
+class SimpleWebSocketServer:
+    def __init__(self, host="localhost", port=8765):
+        self.host = host
+        self.port = port
+        self.clients: Set = set()
+        
+    async def handle_connection(self, websocket, path):
+        """处理客户端连接"""
+        client_id = f"{websocket.remote_address[0]}:{websocket.remote_address[1]}"
+        self.clients.add(websocket)
+        logging.info(f"✅ 客户端连接: {client_id} (当前连接数: {len(self.clients)})")
+        
+        try:
+            # 发送欢迎消息
+            welcome = {
+                "type": "connect",
+                "data": {
+                    "message": "WebSocket 服务器连接成功",
+                    "client_id": client_id,
+                    "server_time": datetime.now().isoformat(),
+                    "status": "connected"
+                },
+                "timestamp": int(datetime.now().timestamp() * 1000)
+            }
+            await websocket.send(json.dumps(welcome))
+            
+            async for message in websocket:
+                await self.handle_message(websocket, client_id, message)
+                
+        except websockets.exceptions.ConnectionClosed:
+            logging.info(f"❌ 客户端断开: {client_id}")
+        finally:
+            self.clients.discard(websocket)
+            logging.info(f"📊 剩余连接: {len(self.clients)}")
+    
+    async def handle_message(self, websocket, client_id, message):
+        """处理收到的消息"""
+        logging.info(f"📨 收到消息 from {client_id}: {message}")
+        
+        try:
+            # 尝试解析为 JSON
+            data = json.loads(message)
+            msg_type = data.get("type", "unknown")
+            
+            # 根据消息类型回复
+            if msg_type == "ping":
+                # 心跳响应
+                response = {
+                    "type": "pong",
+                    "data": {
+                        "server_time": datetime.now().isoformat(),
+                        "latency": "0ms"
+                    },
+                    "timestamp": int(datetime.now().timestamp() * 1000)
+                }
+                
+            elif msg_type == "login":
+                # 登录响应
+                username = data.get("data", {}).get("username", "anonymous")
+                response = {
+                    "type": "login_success",
+                    "data": {
+                        "user_id": f"user_{abs(hash(username)) % 10000}",
+                        "username": username,
+                        "status": "authenticated"
+                    },
+                    "timestamp": int(datetime.now().timestamp() * 1000)
+                }
+                
+            elif msg_type == "chat":
+                # 聊天消息回应
+                msg_content = data.get("data", {}).get("message", "")
+                response = {
+                    "type": "chat_response",
+                    "data": {
+                        "message": f"服务器收到: {msg_content}",
+                        "sender": "server",
+                        "received_at": datetime.now().isoformat()
+                    },
+                    "timestamp": int(datetime.now().timestamp() * 1000)
+                }
+                
+            else:
+                # 默认回显
+                response = {
+                    "type": "echo",
+                    "data": {
+                        "original": data.get("data", {}),
+                        "original_type": msg_type,
+                        "server_processed_at": datetime.now().isoformat()
+                    },
+                    "timestamp": int(datetime.now().timestamp() * 1000)
+                }
+                
+            await websocket.send(json.dumps(response))
+            
+        except json.JSONDecodeError:
+            # 不是 JSON，当作纯文本处理
+            response = {
+                "type": "text_echo",
+                "data": {
+                    "original": message,
+                    "note": "这是文本消息"
+                },
+                "timestamp": int(datetime.now().timestamp() * 1000)
+            }
+            await websocket.send(json.dumps(response))
+    
+    async def start(self):
+        """启动服务器"""
+        logging.info(f"🚀 启动 WebSocket 服务器: ws://{self.host}:{self.port}")
+        
+        # 创建处理函数包装器（解决参数问题）
+        async def connection_handler(websocket, path):
+            await self.handle_connection(websocket, path)
+        
+        # 启动服务器
+        server = await websockets.serve(
+            connection_handler,
+            self.host,
+            self.port,
+            ping_interval=None,
+            ping_timeout=None,
+            close_timeout=None,
+            max_size=10 * 1024 * 1024
+        )
+        
+        logging.info("📌 服务器已启动，等待连接...")
+        logging.info("🛑 按 Ctrl+C 停止服务器")
+        
+        # 保持服务器运行
+        try:
+            await asyncio.Future()  # 永久运行
+        finally:
+            server.close()
+            await server.wait_closed()
+            logging.info("👋 服务器已关闭")
+
+def main():
+    """主函数"""
+    import argparse
+    
+    parser = argparse.ArgumentParser(description='极简 WebSocket 测试服务器')
+    parser.add_argument('--host', default='localhost', help='监听地址')
+    parser.add_argument('--port', type=int, default=8088, help='监听端口')
+    
+    args = parser.parse_args()
+    
+    server = SimpleWebSocketServer(args.host, args.port)
+    
+    try:
+        asyncio.run(server.start())
+    except KeyboardInterrupt:
+        logging.info("👋 服务器被用户中断")
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,33 @@
+# requestTest.py
+import requests
+from pathlib import Path
+
+# 指定正确的 MIME 类型
+url = "http://192.168.1.8:20260/transcribe"
+audio_path = Path("test_files/z105300938.wav")
+
+with open(audio_path, "rb") as f:
+    # 明确指定文件名和 MIME 类型
+    files = {
+        "file": (
+            audio_path.name,  # 文件名
+            f,  # 文件对象
+            "audio/wav"  # MIME 类型
+        )
+    }
+
+    response = requests.post(url, files=files)
+
+# 打印响应详情
+print(f"状态码: {response.status_code}")
+print(f"响应头: {response.headers.get('content-type')}")
+
+# 检查响应是否成功
+if response.status_code == 200:
+    result = response.json()
+    print(f"识别结果: {result['data']['text']}")
+    print(f"语言: {result['data']['language']}")
+    print(f"置信度: {result['data']['confidence']}")
+    print(f"处理时间: {result['data']['processing_time']}s")
+else:
+    print(f"错误响应: {response.text}")
@@ -0,0 +1,14 @@
+# 一个小Test, 展示设计的dtos模块与tts和asr的集成
+from src.modules.websocket_base_module.dto.third_dtos import AudioDataDTO
+from src.modules.tts_module.tts_core.async_audio_player import AsyncAudioPlayer
+from src.modules.tts_module.tts_core.gpt_sovits.gpt_sovits_client import GPTSoVITSClient, StreamingMode
+from src.modules.asr_module.client.asr_client import create_asr_client
+
+
+# with create_asr_client(base_url="http://192.168.1.5:20260") as client:
+#     # 转录文件
+#     result = client.transcribe_file("test_files/test.wav")
+#     print(f"识别结果: {result.data.text}")
+#     print(f"置信度: {result.data.confidence:.2f}")
+#     print(f"耗时: {result.data.processing_time:.3f}s")
+
@@ -0,0 +1,30 @@
+from src.modules.websocket_base_module.dto.second_dtos import get_json_dto_instance
+from src.modules.websocket_base_module.dto.third_dtos import AudioDataDTO
+from src.modules.websocket_base_module.websocket_core.core_ws_server import get_ws_server
+import asyncio
+from loguru import logger
+async def main():
+    # 获取WebSocket服务器单例
+    ws_server = await get_ws_server()
+    # 获取二级json分发器单例
+    json_dto = await get_json_dto_instance(ws_server)
+
+    # 创建DTO实例（自动注册接收函数）
+    audio_dto = AudioDataDTO(json_dto)
+
+    logger.info("所有DTO接收器已注册，等待客户端连接...")
+
+    # 启动服务器（阻塞）
+    try:
+        await ws_server.run("localhost", 8765)
+    except asyncio.CancelledError:
+        logger.info("服务器任务已取消，正在优雅退出...")
+    finally:
+        logger.info("服务器已停止")
+
+
+if __name__ == "__main__":
+    try:
+        asyncio.run(main())
+    except KeyboardInterrupt:
+        print("\n✓ 服务器已手动终止（按 Ctrl+C）")
@@ -0,0 +1,88 @@
+from src.modules.text_ai_module.text_ai_core.general_text_ai_req import UnifiedLLM, ModelConfig, ModelProvider, create_llm_client
+from src.config.config import get_settings
+from src.config.convert_env import EnvConverter
+from src.config.file_config import DirectoryInitializer
+
+EnvConverter().convert(backup_existing=True)    # 若是首次启动则从env模板中生成env文件
+DirectoryInitializer(get_settings())            # 初始化必要的目录(若不存在则创建)
+
+def test1():
+    """
+    测试常规调用
+    """
+    # 配置模型
+    config = ModelConfig(
+        provider=ModelProvider.OPENAI,
+        model_name=get_settings().ai_model_name,
+        base_url=get_settings().ai_api_base_url,
+        api_key=get_settings().ai_api_key,  # 从环境中取出相关的api_key
+        temperature=0.7,
+        max_tokens=2048
+    )
+
+    # 创建客户端
+    llm = UnifiedLLM(config)
+
+    # 发送消息
+    response = llm.chat([
+        {"role": "system", "content": "你是一个DeepSeek助手"},
+        {"role": "user", "content": "请介绍一下DeepSeek模型的特点"}
+    ])
+
+    print(response.content)
+
+def base_test2():
+    """
+    测试流式响应
+    """
+    # 使用快捷函数
+    deepseek_llm = create_llm_client(
+        provider="openai",  # DeepSeek使用OpenAI兼容接口
+        model_name=get_settings().ai_model_name,
+        api_key=get_settings().ai_api_key,
+        base_url=get_settings().ai_api_base_url
+    )
+
+    # 流式聊天
+    messages = [
+        {"role": "user", "content": "用Python写一个快速排序算法"}
+    ]
+
+    print("正在生成响应...")
+    for chunk in deepseek_llm.stream_chat(messages):
+        print(chunk.content, end="", flush=True)
+
+
+def test_lm_studio():
+    """测试本地 LM Studio 模型"""
+    print("=== 测试本地 LM Studio ===")
+
+    # 使用UnifiedLLM类
+    config = ModelConfig(
+        provider=ModelProvider.LM_STUDIO,
+        model_name="qwen/qwen3-4b-2507",
+        base_url="http://192.168.1.8:1234/v1",
+        api_key="",  # LM Studio不需要API密钥，留空
+        temperature=0.7,
+        max_tokens=1024,
+        streaming=False  # 启用流式响应
+    )
+
+    llm = UnifiedLLM(config)
+
+    # 发送消息
+    messages = [
+        {"role": "system", "content": "你是一个有用的助手"},
+        {"role": "user", "content": "用中文介绍一下自己"}
+    ]
+
+    print("非流式响应:")
+    response = llm.chat(messages, streaming=False)
+    print(response.content)
+
+    print("\n流式响应:")
+    for chunk in llm.stream_chat(messages):
+        print(chunk.content, end="", flush=True)
+
+if __name__ == "__main__":
+    test_lm_studio()
@@ -0,0 +1,66 @@
+import asyncio
+import base64
+from pathlib import Path
+from src.modules.device_control_module.device_control_core.ui_tars_.ui_tars_client import UITarsClient, UITarsClientConfig
+
+
+async def test_ui_tars_stream():
+    """测试 UI-TARS 流式调用"""
+    # 创建客户端
+    config = UITarsClientConfig(
+        deployment_type="lmstudio",
+        base_url="http://192.168.1.8:1234/v1",
+        model_name="ui-tars-1.5-7b@q4_k_m",
+        temperature=0.1
+    )
+    client = UITarsClient(config)
+
+    # 使用工具方法编码
+    image_base64 = base64.b64encode(Path("test_files/Screenshot_test.png").read_bytes()).decode()
+    print(f"✅ 图片编码完成，长度: {len(image_base64)} 字符\n")
+
+    # 流式调用并实时打印
+    print("🤖 开始流式调用 UI-TARS...\n")
+    print("思考过程:\n")
+
+    import time
+    # 计算耗时
+    start_time = time.time()
+    full_response = ""
+    chunk_count = 0
+
+    full_response = await client.call_async("打开AK加速器", image_base64)
+    # 传入 base64 字符串
+    # for chunk in client.stream_async("我的桌面系统是KDE, 帮我打开设置", image_base64):
+    #     chunk_count += 1
+    #     content = chunk.content
+    #
+    #     # 实时打印每个 chunk
+    #     print(content, end="", flush=True)
+    #
+    #     # 累积完整内容
+    #     full_response += content
+
+    end_time = time.time()
+    print(f"\n\n耗时: {end_time - start_time:.2f} 秒")
+    print(f"\n\n{'=' * 50}")
+    print(f"✅ 流式调用完成！共接收 {chunk_count} 个 chunk")
+    print(f"完整响应长度: {len(full_response)} 字符")
+
+    print("响应内容:\n")
+    print(full_response)
+
+import pyautogui
+def auto_click(x : int, y : int):
+    pyautogui.moveTo(x, y, duration=1.5)
+    pyautogui.click()
+
+def auto_drag(x1 : int, y1 : int, x2 : int, y2 : int):
+    pyautogui.moveTo(x1, y1, duration=1.5)
+    pyautogui.dragTo(x2, y2, duration=1.5)
+
+# 运行异步函数
+if __name__ == "__main__":
+    asyncio.run(test_ui_tars_stream())
+    auto_click(173,48)
+    # auto_drag(56,39, 170,39)