first pull

This commit is contained in:
2026-02-03 01:20:00 +08:00
commit 0753da86a8
79 changed files with 7134 additions and 0 deletions
+173
View File
@@ -0,0 +1,173 @@
import asyncio
from src.modules.tts_module.tts_core.gpt_sovits.gpt_sovits_client import GPTSoVITSClient, StreamingMode
from src.modules.tts_module.tts_core.async_audio_player import AsyncAudioPlayer
import sounddevice as sd
test_text = "春の午後、公園のベンチに座って本を読んでいると、小さな子供が凧あげをしているのが目に入った。風に乗って凧が高く上がるたびに、彼の顔には真っすぐな笑顔が広がる。母親がそばで見守りながら、時折声をかけている。\
空は雲一つない青さで、桜の花びらが風に舞っている。遠くで犬の鳴き声が聞こえ、芝生の上では老夫婦がお茶を楽しんでいた。すべてがゆっくりと流れる時間の中で、自分の心も不思議と落ち着いてくる。\
ふと、子供の凧が木の枝に引っかかってしまった。少し焦る様子だったが、母親が助けてくれて、すぐにまた空に舞い上がった。失敗しても、誰かが助けてくれる。そんな当たり前のことに、今日は特別な温かさを感じた。\
日が傾き始める頃、私は本を閉じて家路についた。明日もきっと、誰かの笑顔があるだろう。"
async def test_tts():
# 创建客户端(推荐上下文管理器)
async with GPTSoVITSClient(debug=True, port= 20261, host="192.168.1.8") as client:
# 基础TTS调用
try:
audio = await client.tts(
text="あのさ、いやまあ、なんていうか...要するに、そういうことじゃなくて、ほら、前に言ってたやつ、あれなんだけど、とにかく、後ででもいいから、ちょっと相談に乗ってくれない?",
ref_audio_path="uploaded_audio/test_voice.wav", # 服务器上的路径
text_lang="ja",
prompt_lang="ja",
media_type="wav",
prompt_text="もう!こんなところで何やってるんだよ!"
)
# 保存音频
audio.save("outputs/output.wav")
print(f"✅ TTS成功!音频大小: {len(audio.audio_data)} bytes")
except Exception as e:
print(f"❌ 错误: {e}")
async def test_model_change():
async with GPTSoVITSClient(debug=True, port= 20261, host="192.168.1.8") as client:
# 切换模型
print("🔄 切换GPT模型...")
await client.set_gpt_weights(
"GPT_weights_v2Pro/Yosuga_Airi-e32.ckpt"
)
print("🔄 切换SoVITS模型...")
await client.set_sovits_weights(
"SoVITS_weights_v2Pro/Yosuga_Airi_e16_s864.pth"
)
async def stream_tts():
async with GPTSoVITSClient(debug=True, port= 20261, host="192.168.1.8") as client:
try:
# 使用最快模式流式输出
chunk_count = 0
async for chunk in await client.tts(
text="要するに、そういうことじゃなくて、ほら、前に言ってたやつ、あれなんだけど、とにかく、後ででもいいから、ちょっと相談に乗ってくれない?",
ref_audio_path="uploaded_audio/test_voice.wav",
text_lang="ja",
prompt_lang="ja",
prompt_text="もう!こんなところで何やってるんだよ!",
streaming_mode=StreamingMode.FASTEST, # 模式3:快速流式
media_type="wav"
):
chunk_count += 1
print(f"🎵 收到音频块 #{chunk_count}: {len(chunk.audio_data)} bytes")
# 实时播放处理
# await play_audio_chunk(chunk.audio_data)
print(f"✅ 流式TTS完成!共{chunk_count}个音频块")
except Exception as e:
print(f"❌ 流式错误: {e}")
async def stream_tts_and_play(
text: str,
ref_audio_path: str,
text_lang: str = "zh",
prompt_lang: str = "zh",
streaming_mode: StreamingMode = StreamingMode.FASTEST
):
"""
实时流式TTS + 播放一体化
Args:
text: 要合成的文本
ref_audio_path: 参考音频路径
text_lang: 文本语言
prompt_lang: 提示语言
"""
# 创建音频播放器(缓冲区大小=5,平衡延迟和稳定性)
async with AsyncAudioPlayer(buffer_size=5) as player:
# 创建TTS客户端
async with GPTSoVITSClient(debug=True, port= 20261, host="192.168.1.8") as client:
try:
print(f"🎤 开始流式合成: {text[:30]}...")
print(f"🎯 流式模式: {streaming_mode.name}")
# 获取音频流(异步生成器)
audio_stream = await client.tts(
text=text,
ref_audio_path=ref_audio_path,
text_lang=text_lang,
prompt_lang=prompt_lang,
prompt_text="もう!こんなところで何やってるんだよ!",
streaming_mode=streaming_mode,
media_type="wav",
sample_steps=32,
top_k=5,
temperature=1.0
)
# 动态读取并播放
chunk_idx = 0
async for audio_chunk in audio_stream:
chunk_idx += 1
print(f"📥 收到音频块 #{chunk_idx}: {len(audio_chunk.audio_data):6d} bytes")
# 立即加入播放队列(非阻塞)
await player.add_chunk(audio_chunk.audio_data)
print(f"✅ 合成完成! 共接收 {chunk_idx} 个音频块")
# 等待播放完成(所有块播完)
await player.audio_queue.join()
print("🎵 播放完成!")
except Exception as e:
print(f"❌ 错误: {e}")
raise
async def test_japanese():
"""测试日语长文本流式播放"""
print("=" * 50)
print("🗾 日语流式TTS测试")
print("=" * 50)
await stream_tts_and_play(
text=test_text,
ref_audio_path="uploaded_audio/test_voice.wav",
text_lang="ja",
prompt_lang="ja",
streaming_mode=StreamingMode.FASTEST # 模式3:最快
)
async def batch_test():
"""批量处理示例"""
async with GPTSoVITSClient() as client:
texts = [
"你好,世界!",
"这是一个批量测试。",
"异步批量处理非常高效。"
]
results = await client.batch_tts(
texts=texts,
ref_audio_path="archive_jingyuan_1.wav",
text_lang="zh"
)
for i, audio in enumerate(results):
audio.save(f"output/batch_{i}.wav")
print(f"✅ 批量任务 {i + 1}/{len(results)} 完成")
if __name__ == "__main__":
# 检查音频设备
print("🔍 检查音频设备...")
print(sd.query_devices())
sd.default.device = (None, "pulse") # 使用PulseAudio
asyncio.run(test_japanese())
+24
View File
@@ -0,0 +1,24 @@
import asyncio
import json
from websockets.asyncio.client import connect
async def test_all_types():
"""测试三种消息类型"""
async with connect("ws://localhost:8765") as ws:
print("=== 测试JSON消息 ===")
await ws.send(json.dumps({
"type": "chat",
"content": "你好服务器!"
}))
print(f"收到: {await ws.recv()}")
print("\n=== 测试文本消息 ===")
await ws.send("这是纯文本消息")
print(f"收到: {await ws.recv()}")
print("\n=== 测试二进制消息 ===")
await ws.send(b"\x00\x01\x02\x03\x04")
print(f"收到: {await ws.recv()}")
if __name__ == "__main__":
asyncio.run(test_all_types())
+174
View File
@@ -0,0 +1,174 @@
"""
极简 WebSocket 测试服务器 - 修复版本
"""
import asyncio
import json
import logging
from datetime import datetime
from typing import Set
import websockets
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(message)s'
)
class SimpleWebSocketServer:
def __init__(self, host="localhost", port=8765):
self.host = host
self.port = port
self.clients: Set = set()
async def handle_connection(self, websocket, path):
"""处理客户端连接"""
client_id = f"{websocket.remote_address[0]}:{websocket.remote_address[1]}"
self.clients.add(websocket)
logging.info(f"✅ 客户端连接: {client_id} (当前连接数: {len(self.clients)})")
try:
# 发送欢迎消息
welcome = {
"type": "connect",
"data": {
"message": "WebSocket 服务器连接成功",
"client_id": client_id,
"server_time": datetime.now().isoformat(),
"status": "connected"
},
"timestamp": int(datetime.now().timestamp() * 1000)
}
await websocket.send(json.dumps(welcome))
async for message in websocket:
await self.handle_message(websocket, client_id, message)
except websockets.exceptions.ConnectionClosed:
logging.info(f"❌ 客户端断开: {client_id}")
finally:
self.clients.discard(websocket)
logging.info(f"📊 剩余连接: {len(self.clients)}")
async def handle_message(self, websocket, client_id, message):
"""处理收到的消息"""
logging.info(f"📨 收到消息 from {client_id}: {message}")
try:
# 尝试解析为 JSON
data = json.loads(message)
msg_type = data.get("type", "unknown")
# 根据消息类型回复
if msg_type == "ping":
# 心跳响应
response = {
"type": "pong",
"data": {
"server_time": datetime.now().isoformat(),
"latency": "0ms"
},
"timestamp": int(datetime.now().timestamp() * 1000)
}
elif msg_type == "login":
# 登录响应
username = data.get("data", {}).get("username", "anonymous")
response = {
"type": "login_success",
"data": {
"user_id": f"user_{abs(hash(username)) % 10000}",
"username": username,
"status": "authenticated"
},
"timestamp": int(datetime.now().timestamp() * 1000)
}
elif msg_type == "chat":
# 聊天消息回应
msg_content = data.get("data", {}).get("message", "")
response = {
"type": "chat_response",
"data": {
"message": f"服务器收到: {msg_content}",
"sender": "server",
"received_at": datetime.now().isoformat()
},
"timestamp": int(datetime.now().timestamp() * 1000)
}
else:
# 默认回显
response = {
"type": "echo",
"data": {
"original": data.get("data", {}),
"original_type": msg_type,
"server_processed_at": datetime.now().isoformat()
},
"timestamp": int(datetime.now().timestamp() * 1000)
}
await websocket.send(json.dumps(response))
except json.JSONDecodeError:
# 不是 JSON,当作纯文本处理
response = {
"type": "text_echo",
"data": {
"original": message,
"note": "这是文本消息"
},
"timestamp": int(datetime.now().timestamp() * 1000)
}
await websocket.send(json.dumps(response))
async def start(self):
"""启动服务器"""
logging.info(f"🚀 启动 WebSocket 服务器: ws://{self.host}:{self.port}")
# 创建处理函数包装器(解决参数问题)
async def connection_handler(websocket, path):
await self.handle_connection(websocket, path)
# 启动服务器
server = await websockets.serve(
connection_handler,
self.host,
self.port,
ping_interval=None,
ping_timeout=None,
close_timeout=None,
max_size=10 * 1024 * 1024
)
logging.info("📌 服务器已启动,等待连接...")
logging.info("🛑 按 Ctrl+C 停止服务器")
# 保持服务器运行
try:
await asyncio.Future() # 永久运行
finally:
server.close()
await server.wait_closed()
logging.info("👋 服务器已关闭")
def main():
"""主函数"""
import argparse
parser = argparse.ArgumentParser(description='极简 WebSocket 测试服务器')
parser.add_argument('--host', default='localhost', help='监听地址')
parser.add_argument('--port', type=int, default=8088, help='监听端口')
args = parser.parse_args()
server = SimpleWebSocketServer(args.host, args.port)
try:
asyncio.run(server.start())
except KeyboardInterrupt:
logging.info("👋 服务器被用户中断")
if __name__ == "__main__":
main()
+33
View File
@@ -0,0 +1,33 @@
# requestTest.py
import requests
from pathlib import Path
# 指定正确的 MIME 类型
url = "http://192.168.1.8:20260/transcribe"
audio_path = Path("test_files/z105300938.wav")
with open(audio_path, "rb") as f:
# 明确指定文件名和 MIME 类型
files = {
"file": (
audio_path.name, # 文件名
f, # 文件对象
"audio/wav" # MIME 类型
)
}
response = requests.post(url, files=files)
# 打印响应详情
print(f"状态码: {response.status_code}")
print(f"响应头: {response.headers.get('content-type')}")
# 检查响应是否成功
if response.status_code == 200:
result = response.json()
print(f"识别结果: {result['data']['text']}")
print(f"语言: {result['data']['language']}")
print(f"置信度: {result['data']['confidence']}")
print(f"处理时间: {result['data']['processing_time']}s")
else:
print(f"错误响应: {response.text}")
+14
View File
@@ -0,0 +1,14 @@
# 一个小Test, 展示设计的dtos模块与tts和asr的集成
from src.modules.websocket_base_module.dto.third_dtos import AudioDataDTO
from src.modules.tts_module.tts_core.async_audio_player import AsyncAudioPlayer
from src.modules.tts_module.tts_core.gpt_sovits.gpt_sovits_client import GPTSoVITSClient, StreamingMode
from src.modules.asr_module.client.asr_client import create_asr_client
# with create_asr_client(base_url="http://192.168.1.5:20260") as client:
# # 转录文件
# result = client.transcribe_file("test_files/test.wav")
# print(f"识别结果: {result.data.text}")
# print(f"置信度: {result.data.confidence:.2f}")
# print(f"耗时: {result.data.processing_time:.3f}s")
+30
View File
@@ -0,0 +1,30 @@
from src.modules.websocket_base_module.dto.second_dtos import get_json_dto_instance
from src.modules.websocket_base_module.dto.third_dtos import AudioDataDTO
from src.modules.websocket_base_module.websocket_core.core_ws_server import get_ws_server
import asyncio
from loguru import logger
async def main():
# 获取WebSocket服务器单例
ws_server = await get_ws_server()
# 获取二级json分发器单例
json_dto = await get_json_dto_instance(ws_server)
# 创建DTO实例(自动注册接收函数)
audio_dto = AudioDataDTO(json_dto)
logger.info("所有DTO接收器已注册,等待客户端连接...")
# 启动服务器(阻塞)
try:
await ws_server.run("localhost", 8765)
except asyncio.CancelledError:
logger.info("服务器任务已取消,正在优雅退出...")
finally:
logger.info("服务器已停止")
if __name__ == "__main__":
try:
asyncio.run(main())
except KeyboardInterrupt:
print("\n✓ 服务器已手动终止(按 Ctrl+C)")
Binary file not shown.
Binary file not shown.
Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 MiB

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
+88
View File
@@ -0,0 +1,88 @@
from src.modules.text_ai_module.text_ai_core.general_text_ai_req import UnifiedLLM, ModelConfig, ModelProvider, create_llm_client
from src.config.config import get_settings
from src.config.convert_env import EnvConverter
from src.config.file_config import DirectoryInitializer
EnvConverter().convert(backup_existing=True) # 若是首次启动则从env模板中生成env文件
DirectoryInitializer(get_settings()) # 初始化必要的目录(若不存在则创建)
def test1():
"""
测试常规调用
"""
# 配置模型
config = ModelConfig(
provider=ModelProvider.OPENAI,
model_name=get_settings().ai_model_name,
base_url=get_settings().ai_api_base_url,
api_key=get_settings().ai_api_key, # 从环境中取出相关的api_key
temperature=0.7,
max_tokens=2048
)
# 创建客户端
llm = UnifiedLLM(config)
# 发送消息
response = llm.chat([
{"role": "system", "content": "你是一个DeepSeek助手"},
{"role": "user", "content": "请介绍一下DeepSeek模型的特点"}
])
print(response.content)
def base_test2():
"""
测试流式响应
"""
# 使用快捷函数
deepseek_llm = create_llm_client(
provider="openai", # DeepSeek使用OpenAI兼容接口
model_name=get_settings().ai_model_name,
api_key=get_settings().ai_api_key,
base_url=get_settings().ai_api_base_url
)
# 流式聊天
messages = [
{"role": "user", "content": "用Python写一个快速排序算法"}
]
print("正在生成响应...")
for chunk in deepseek_llm.stream_chat(messages):
print(chunk.content, end="", flush=True)
def test_lm_studio():
"""测试本地 LM Studio 模型"""
print("=== 测试本地 LM Studio ===")
# 使用UnifiedLLM类
config = ModelConfig(
provider=ModelProvider.LM_STUDIO,
model_name="qwen/qwen3-4b-2507",
base_url="http://192.168.1.8:1234/v1",
api_key="", # LM Studio不需要API密钥,留空
temperature=0.7,
max_tokens=1024,
streaming=False # 启用流式响应
)
llm = UnifiedLLM(config)
# 发送消息
messages = [
{"role": "system", "content": "你是一个有用的助手"},
{"role": "user", "content": "用中文介绍一下自己"}
]
print("非流式响应:")
response = llm.chat(messages, streaming=False)
print(response.content)
print("\n流式响应:")
for chunk in llm.stream_chat(messages):
print(chunk.content, end="", flush=True)
if __name__ == "__main__":
test_lm_studio()
+66
View File
@@ -0,0 +1,66 @@
import asyncio
import base64
from pathlib import Path
from src.modules.device_control_module.device_control_core.ui_tars_.ui_tars_client import UITarsClient, UITarsClientConfig
async def test_ui_tars_stream():
"""测试 UI-TARS 流式调用"""
# 创建客户端
config = UITarsClientConfig(
deployment_type="lmstudio",
base_url="http://192.168.1.8:1234/v1",
model_name="ui-tars-1.5-7b@q4_k_m",
temperature=0.1
)
client = UITarsClient(config)
# 使用工具方法编码
image_base64 = base64.b64encode(Path("test_files/Screenshot_test.png").read_bytes()).decode()
print(f"✅ 图片编码完成,长度: {len(image_base64)} 字符\n")
# 流式调用并实时打印
print("🤖 开始流式调用 UI-TARS...\n")
print("思考过程:\n")
import time
# 计算耗时
start_time = time.time()
full_response = ""
chunk_count = 0
full_response = await client.call_async("打开AK加速器", image_base64)
# 传入 base64 字符串
# for chunk in client.stream_async("我的桌面系统是KDE, 帮我打开设置", image_base64):
# chunk_count += 1
# content = chunk.content
#
# # 实时打印每个 chunk
# print(content, end="", flush=True)
#
# # 累积完整内容
# full_response += content
end_time = time.time()
print(f"\n\n耗时: {end_time - start_time:.2f}")
print(f"\n\n{'=' * 50}")
print(f"✅ 流式调用完成!共接收 {chunk_count} 个 chunk")
print(f"完整响应长度: {len(full_response)} 字符")
print("响应内容:\n")
print(full_response)
import pyautogui
def auto_click(x : int, y : int):
pyautogui.moveTo(x, y, duration=1.5)
pyautogui.click()
def auto_drag(x1 : int, y1 : int, x2 : int, y2 : int):
pyautogui.moveTo(x1, y1, duration=1.5)
pyautogui.dragTo(x2, y2, duration=1.5)
# 运行异步函数
if __name__ == "__main__":
asyncio.run(test_ui_tars_stream())
auto_click(173,48)
# auto_drag(56,39, 170,39)