1. 完成了语音识别的C++业务层封装,测试通过
2. 试着测试了一下LVGL_GIF渲染+音乐播放+语音识别的组合简单优化后,
发现lvgl渲染略显卡顿,语音识别有缓冲区空警告,不过无伤大雅,还需要进一步深度优化。
This commit is contained in:
@@ -4,3 +4,4 @@ log
|
|||||||
# 组件库,配置好idf环境后,运行idf.py reconfigure就会自动生成出来
|
# 组件库,配置好idf环境后,运行idf.py reconfigure就会自动生成出来
|
||||||
managed_components
|
managed_components
|
||||||
.idea
|
.idea
|
||||||
|
.venv
|
||||||
|
|||||||
@@ -113,8 +113,98 @@ void testPetSystem() {
|
|||||||
std::cout << SDFileManager::getInstance()->catCommand("/sdcard/pet_data/my_pet.json") << std::endl;
|
std::cout << SDFileManager::getInstance()->catCommand("/sdcard/pet_data/my_pet.json") << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#include "SpeechRecognizer.h"
|
||||||
|
#include <nvs.h>
|
||||||
|
#include <nvs_flash.h>
|
||||||
|
// 命令回调函数
|
||||||
|
void commandCallback(int command_id, const std::string& phrase, float probability) {
|
||||||
|
ESP_LOGI("Example", "Received command: ID=%d, Phrase='%s', Probability=%.2f",
|
||||||
|
command_id, phrase.c_str(), probability);
|
||||||
|
|
||||||
|
// 根据命令执行相应操作
|
||||||
|
switch (command_id) {
|
||||||
|
case 0:
|
||||||
|
ESP_LOGI("Example", "执行命令0");
|
||||||
|
// 执行命令0的操作
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
ESP_LOGI("Example", "执行命令1");
|
||||||
|
// 执行命令1的操作
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
ESP_LOGI("Example", "执行命令2");
|
||||||
|
// 执行命令2的操作
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ESP_LOGI("Example", "未知的命令ID: %d", command_id);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 状态回调函数
|
||||||
|
void stateCallback(const std::string& state) {
|
||||||
|
ESP_LOGI("Example", "状态改变到: %s", state.c_str());
|
||||||
|
}
|
||||||
|
#include "SDFileManager.h"
|
||||||
|
void testMIC() {
|
||||||
|
// 初始化NVS
|
||||||
|
esp_err_t ret = nvs_flash_init();
|
||||||
|
if (ret == ESP_ERR_NVS_NO_FREE_PAGES || ret == ESP_ERR_NVS_NEW_VERSION_FOUND) {
|
||||||
|
ESP_ERROR_CHECK(nvs_flash_erase());
|
||||||
|
ret = nvs_flash_init();
|
||||||
|
}
|
||||||
|
ESP_ERROR_CHECK(ret);
|
||||||
|
|
||||||
|
// 初始化SD卡管理器
|
||||||
|
SDFileManager::getInstance()->tryInitSDCard();
|
||||||
|
|
||||||
|
// 获取SpeechRecognizer实例
|
||||||
|
SpeechRecognizer* recognizer = SpeechRecognizer::getInstance();
|
||||||
|
|
||||||
|
// 配置识别器
|
||||||
|
SpeechRecognizerConfig config;
|
||||||
|
config.enable_vad = true;
|
||||||
|
config.vad_mode = VAD_MODE_3; // 更高的VAD灵敏度
|
||||||
|
config.model_path = "/sdcard/srmodels";
|
||||||
|
|
||||||
|
// 初始化
|
||||||
|
if (!recognizer->init(config)) {
|
||||||
|
ESP_LOGE("main", "Failed to initialize speech recognizer");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 添加自定义命令
|
||||||
|
std::vector<std::pair<int, std::string>> commands = {
|
||||||
|
{0, "kai deng"}, // 开灯
|
||||||
|
{1, "guan deng"}, // 关灯
|
||||||
|
{2, "ti gao liang du"}, // 提高亮度
|
||||||
|
{3, "jiang di liang du"}, // 降低亮度
|
||||||
|
{4, "bo fang yin yue"}, // 播放音乐
|
||||||
|
{5, "ting zhi bo fang"} // 停止播放
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!recognizer->addCommands(commands)) {
|
||||||
|
ESP_LOGE("main", "Failed to add some commands");
|
||||||
|
}
|
||||||
|
|
||||||
|
// 注册回调函数
|
||||||
|
recognizer->registerCommandCallback(commandCallback);
|
||||||
|
recognizer->registerStateCallback(stateCallback);
|
||||||
|
|
||||||
|
// 开始识别
|
||||||
|
if (!recognizer->start()) {
|
||||||
|
ESP_LOGE("main", "Failed to start speech recognition");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ESP_LOGI("main", "Speech recognition system started successfully");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
void Cpp_Hand() {
|
void Cpp_Hand() {
|
||||||
testPetSystem();
|
testMIC();
|
||||||
|
// testPetSystem();
|
||||||
|
|
||||||
OTAClass oc;
|
OTAClass oc;
|
||||||
oc.Init();
|
oc.Init();
|
||||||
|
|||||||
@@ -170,7 +170,7 @@ void OTAClass::Init() {
|
|||||||
AudioOutput::getInstance()->setVolume(5);
|
AudioOutput::getInstance()->setVolume(5);
|
||||||
|
|
||||||
// 同步播放
|
// 同步播放
|
||||||
AudioOutput::getInstance()->playSync("/sdcard/music", "Old_Memory.mp3");
|
AudioOutput::getInstance()->playSync("/sdcard/music", "kokoronashi.mp3");
|
||||||
|
|
||||||
|
|
||||||
// // 配置Wifi连接线程参数
|
// // 配置Wifi连接线程参数
|
||||||
|
|||||||
@@ -5,7 +5,6 @@
|
|||||||
#include "PetDao.h"
|
#include "PetDao.h"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
||||||
using namespace PetEnumConverter;
|
using namespace PetEnumConverter;
|
||||||
|
|
||||||
// PetEnumConverter 实现
|
// PetEnumConverter 实现
|
||||||
|
|||||||
@@ -6,7 +6,6 @@
|
|||||||
#include "PetBaseClass.h"
|
#include "PetBaseClass.h"
|
||||||
#include "SDFileManager.h"
|
#include "SDFileManager.h"
|
||||||
#include "cJSON.h"
|
#include "cJSON.h"
|
||||||
#include <unordered_map>
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
// 辅助函数:枚举类型与字符串的转换
|
// 辅助函数:枚举类型与字符串的转换
|
||||||
@@ -62,4 +61,48 @@ private:
|
|||||||
|
|
||||||
// 宠物数据存储目录
|
// 宠物数据存储目录
|
||||||
static constexpr const char* PET_DATA_DIR = "/sdcard/pet_data";
|
static constexpr const char* PET_DATA_DIR = "/sdcard/pet_data";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 宠物数据结构(JSON)
|
||||||
|
{
|
||||||
|
"name": "芝士雪豹",
|
||||||
|
"hp": 85,
|
||||||
|
"density": 120,
|
||||||
|
"identity": "我是顶真,是妈妈省的",
|
||||||
|
"stage_strategy": {
|
||||||
|
"current_stage": "PET_STAGE_ADULT",
|
||||||
|
"stage_model_map": {
|
||||||
|
"PET_STAGE_YOUNG": "/models/snow_leopard_young.gif",
|
||||||
|
"PET_STAGE_ADULT": "/models/snow_leopard_adult.gif",
|
||||||
|
"PET_STAGE_OLD": "/models/snow_leopard_old.gif"
|
||||||
|
},
|
||||||
|
"stage_audio_map": {
|
||||||
|
"PET_STAGE_YOUNG": "/audio/snow_leopard_young.mp3",
|
||||||
|
"PET_STAGE_ADULT": "/audio/snow_leopard_adult.mp3",
|
||||||
|
"PET_STAGE_OLD": "/audio/snow_leopard_old.mp3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"action_strategy": {
|
||||||
|
"current_action": "PET_ACTION_SLEEP",
|
||||||
|
"action_model_map": {
|
||||||
|
"PET_ACTION_SLEEP": "/models/actions/sleep.gif",
|
||||||
|
"PET_ACTION_EAT": "/models/actions/eat.gif",
|
||||||
|
"PET_ACTION_HAPPY": "/models/actions/happy.gif",
|
||||||
|
"PET_ACTION_ANGRY": "/models/actions/angry.gif",
|
||||||
|
"PET_ACTION_SAD": "/models/actions/sad.gif",
|
||||||
|
"PET_ACTION_EVOLVE": "/models/actions/evolve.gif",
|
||||||
|
"PET_ACTION_TOUCH": "/models/actions/touch.gif"
|
||||||
|
},
|
||||||
|
"action_audio_map": {
|
||||||
|
"PET_ACTION_SLEEP": "/audio/actions/sleep.mp3",
|
||||||
|
"PET_ACTION_EAT": "/audio/actions/eat.mp3",
|
||||||
|
"PET_ACTION_HAPPY": "/audio/actions/happy.mp3",
|
||||||
|
"PET_ACTION_ANGRY": "/audio/actions/angry.mp3",
|
||||||
|
"PET_ACTION_SAD": "/audio/actions/sad.mp3",
|
||||||
|
"PET_ACTION_EVOLVE": "/audio/actions/evolve.mp3",
|
||||||
|
"PET_ACTION_TOUCH": "/audio/actions/touch.mp3"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
@@ -47,9 +47,9 @@ LVGLRender::LVGLRender() {
|
|||||||
ESP_LOGI("LVGL_Render", "LVGL_Render构造函数...创建LVGL心跳...");
|
ESP_LOGI("LVGL_Render", "LVGL_Render构造函数...创建LVGL心跳...");
|
||||||
|
|
||||||
ThreadConfig trickConfig;
|
ThreadConfig trickConfig;
|
||||||
trickConfig.core_id = 1; // 渲染分配给核1
|
trickConfig.core_id = 1; // 渲染分配给核0
|
||||||
trickConfig.name = "LVGL_Render_Heartbeat";
|
trickConfig.name = "LVGL_Render";
|
||||||
trickConfig.priority = 5;
|
trickConfig.priority = 5; //
|
||||||
trickConfig.stack_size = 4096; // 给LVGL一个较大的堆栈,避免栈溢出
|
trickConfig.stack_size = 4096; // 给LVGL一个较大的堆栈,避免栈溢出
|
||||||
|
|
||||||
std::thread tick_thread = ThreadManager::createMemberThread(trickConfig, this, &LVGLRender::LVGL_Update);
|
std::thread tick_thread = ThreadManager::createMemberThread(trickConfig, this, &LVGLRender::LVGL_Update);
|
||||||
|
|||||||
@@ -0,0 +1,465 @@
|
|||||||
|
//
|
||||||
|
// Created by misaki on 2025/9/15.
|
||||||
|
//
|
||||||
|
#include "SpeechRecognizer.h"
|
||||||
|
#include "esp_afe_sr_models.h"
|
||||||
|
#include "esp_mn_models.h"
|
||||||
|
#include "esp_wn_iface.h"
|
||||||
|
#include "esp_mn_speech_commands.h"
|
||||||
|
#include "model_path.h"
|
||||||
|
#include "driver/gpio.h"
|
||||||
|
#include "soc/soc_caps.h"
|
||||||
|
#include "esp_err.h"
|
||||||
|
#include "nvs_flash.h"
|
||||||
|
#include "freertos/FreeRTOS.h"
|
||||||
|
#include "freertos/task.h"
|
||||||
|
#include <atomic>
|
||||||
|
#include <cstring>
|
||||||
|
#include <memory>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
// 初始化静态成员变量
|
||||||
|
SpeechRecognizer* SpeechRecognizer::instance = nullptr;
|
||||||
|
std::mutex SpeechRecognizer::instanceMutex;
|
||||||
|
|
||||||
|
SpeechRecognizer* SpeechRecognizer::getInstance() {
|
||||||
|
std::lock_guard<std::mutex> lock(instanceMutex);
|
||||||
|
if (!instance) {
|
||||||
|
instance = new SpeechRecognizer();
|
||||||
|
}
|
||||||
|
return instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
SpeechRecognizer::SpeechRecognizer()
|
||||||
|
: initialized(false),
|
||||||
|
running(false),
|
||||||
|
enabled(true),
|
||||||
|
rx_handle(nullptr),
|
||||||
|
afe_handle(nullptr),
|
||||||
|
afe_data(nullptr),
|
||||||
|
models(nullptr),
|
||||||
|
multinet(nullptr),
|
||||||
|
model_data(nullptr),
|
||||||
|
tasksRunning(false) {
|
||||||
|
}
|
||||||
|
|
||||||
|
SpeechRecognizer::~SpeechRecognizer() {
|
||||||
|
deinit();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SpeechRecognizer::init(const SpeechRecognizerConfig& config) {
|
||||||
|
if (initialized) {
|
||||||
|
ESP_LOGI("SpeechRecognizer", "Already initialized");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
this->config = config;
|
||||||
|
// 初始化I2S
|
||||||
|
if (!initI2S()) {
|
||||||
|
ESP_LOGE("SpeechRecognizer", "I2S initialization failed");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// 初始化ESP-SR
|
||||||
|
if (!initESP_SR()) {
|
||||||
|
ESP_LOGE("SpeechRecognizer", "ESP-SR initialization failed");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
initialized = true;
|
||||||
|
ESP_LOGI("SpeechRecognizer", "Initialization completed successfully");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SpeechRecognizer::deinit() {
|
||||||
|
if (!initialized) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
stop();
|
||||||
|
// 释放ESP-SR资源
|
||||||
|
if (model_data && multinet) {
|
||||||
|
multinet->destroy(model_data);
|
||||||
|
model_data = nullptr;
|
||||||
|
}
|
||||||
|
if (afe_data && afe_handle) {
|
||||||
|
afe_handle->destroy(afe_data);
|
||||||
|
afe_data = nullptr;
|
||||||
|
}
|
||||||
|
if (models) {
|
||||||
|
// 注意:esp_srmodel_init分配的资源可能需要特殊清理
|
||||||
|
// 根据ESP-SR文档进行适当清理
|
||||||
|
}
|
||||||
|
// 释放I2S资源
|
||||||
|
if (rx_handle) {
|
||||||
|
i2s_channel_disable(rx_handle);
|
||||||
|
i2s_del_channel(rx_handle);
|
||||||
|
rx_handle = nullptr;
|
||||||
|
}
|
||||||
|
initialized = false;
|
||||||
|
ESP_LOGI("SpeechRecognizer", "Deinitialized");
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SpeechRecognizer::initI2S() {
|
||||||
|
esp_err_t ret = ESP_OK;
|
||||||
|
i2s_chan_config_t chan_cfg = I2S_CHANNEL_DEFAULT_CONFIG(I2S_NUM_1, I2S_ROLE_MASTER);
|
||||||
|
ret = i2s_new_channel(&chan_cfg, nullptr, &rx_handle);
|
||||||
|
if (ret != ESP_OK) {
|
||||||
|
ESP_LOGE("SpeechRecognizer", "Failed to create I2S channel: %s", esp_err_to_name(ret));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
i2s_std_config_t std_cfg = {
|
||||||
|
.clk_cfg = I2S_STD_CLK_DEFAULT_CONFIG(16000),
|
||||||
|
.slot_cfg = I2S_STD_PHILIPS_SLOT_DEFAULT_CONFIG(I2S_DATA_BIT_WIDTH_32BIT, I2S_SLOT_MODE_MONO),
|
||||||
|
.gpio_cfg = {
|
||||||
|
.mclk = GPIO_NUM_NC,
|
||||||
|
.bclk = config.bclk_pin,
|
||||||
|
.ws = config.ws_pin,
|
||||||
|
.dout = GPIO_NUM_NC,
|
||||||
|
.din = config.din_pin,
|
||||||
|
.invert_flags = {
|
||||||
|
.mclk_inv = false,
|
||||||
|
.bclk_inv = false,
|
||||||
|
.ws_inv = false,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
std_cfg.slot_cfg.slot_mask = I2S_STD_SLOT_RIGHT;
|
||||||
|
ret = i2s_channel_init_std_mode(rx_handle, &std_cfg);
|
||||||
|
if (ret != ESP_OK) {
|
||||||
|
ESP_LOGE("SpeechRecognizer", "Failed to init I2S standard mode: %s", esp_err_to_name(ret));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
ESP_LOGI("SpeechRecognizer", "I2S initialized successfully");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SpeechRecognizer::initESP_SR() {
|
||||||
|
// 获取AFE句柄
|
||||||
|
afe_handle = &ESP_AFE_SR_HANDLE;
|
||||||
|
if (!afe_handle) {
|
||||||
|
ESP_LOGE("SpeechRecognizer", "Failed to get AFE handle");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// 初始化模型
|
||||||
|
models = esp_srmodel_init(config.model_path.c_str());
|
||||||
|
if (!models) {
|
||||||
|
ESP_LOGE("SpeechRecognizer", "Failed to initialize models from path: %s", config.model_path.c_str());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// 配置AFE
|
||||||
|
afe_config_t afe_config = {
|
||||||
|
.aec_init = config.enable_aec,
|
||||||
|
.se_init = config.enable_se,
|
||||||
|
.vad_init = config.enable_vad,
|
||||||
|
.wakenet_init = false, // 禁用唤醒词
|
||||||
|
.voice_communication_init = false,
|
||||||
|
.voice_communication_agc_init = false,
|
||||||
|
.voice_communication_agc_gain = 15,
|
||||||
|
.vad_mode = config.vad_mode,
|
||||||
|
.wakenet_model_name = nullptr,
|
||||||
|
.wakenet_model_name_2 = nullptr,
|
||||||
|
.wakenet_mode = DET_MODE_2CH_90,
|
||||||
|
.afe_mode = SR_MODE_LOW_COST,
|
||||||
|
.afe_perferred_core = 0,
|
||||||
|
.afe_perferred_priority = 5,
|
||||||
|
.afe_ringbuf_size = 10,
|
||||||
|
.memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM,
|
||||||
|
.afe_linear_gain = 1.0,
|
||||||
|
.agc_mode = AFE_MN_PEAK_AGC_MODE_2,
|
||||||
|
.pcm_config = {
|
||||||
|
.total_ch_num = 2,
|
||||||
|
.mic_num = 1,
|
||||||
|
.ref_num = 1,
|
||||||
|
.sample_rate = 16000,
|
||||||
|
},
|
||||||
|
.debug_init = false,
|
||||||
|
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, nullptr}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, nullptr}},
|
||||||
|
};
|
||||||
|
afe_data = afe_handle->create_from_config(&afe_config);
|
||||||
|
if (!afe_data) {
|
||||||
|
ESP_LOGE("SpeechRecognizer", "Failed to create AFE data from config");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// 加载MultiNet模型(采用esp-sr提供的宏来处理不同语种的模型的处理问题)
|
||||||
|
#if defined(CONFIG_SR_MN_CN_MULTINET5_RECOGNITION_QUANT8) || defined(CONFIG_SR_MN_CN_MULTINET6_QUANT) || defined(CONFIG_SR_MN_CN_MULTINET6_AC_QUANT)
|
||||||
|
char *mn_name = esp_srmodel_filter(models, ESP_MN_PREFIX, ESP_MN_CHINESE);
|
||||||
|
#else
|
||||||
|
char *mn_name = esp_srmodel_filter(models, ESP_MN_PREFIX, ESP_MN_ENGLISH);
|
||||||
|
#endif
|
||||||
|
if (!mn_name) {
|
||||||
|
ESP_LOGE("SpeechRecognizer", "No MultiNet model found");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
multinet = esp_mn_handle_from_name(mn_name);
|
||||||
|
if (!multinet) {
|
||||||
|
ESP_LOGE("SpeechRecognizer", "Failed to get MultiNet handle");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
model_data = multinet->create(mn_name, config.detection_timeout);
|
||||||
|
if (!model_data) {
|
||||||
|
ESP_LOGE("SpeechRecognizer", "Failed to create MultiNet model data");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
ESP_LOGI("SpeechRecognizer", "ESP-SR initialized successfully with model: %s", mn_name);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SpeechRecognizer::start() {
|
||||||
|
if (!initialized) {
|
||||||
|
ESP_LOGE("SpeechRecognizer", "Not initialized");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (running) {
|
||||||
|
ESP_LOGI("SpeechRecognizer", "Already running");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// 启用I2S通道
|
||||||
|
esp_err_t ret = i2s_channel_enable(rx_handle);
|
||||||
|
if (ret != ESP_OK) {
|
||||||
|
ESP_LOGE("SpeechRecognizer", "Failed to enable I2S channel: %s", esp_err_to_name(ret));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// 启动任务
|
||||||
|
tasksRunning = true;
|
||||||
|
// 使用ThreadManager创建任务
|
||||||
|
feedThread = ThreadManager::createMemberThread(
|
||||||
|
config.feed_thread_config, this, &SpeechRecognizer::feedTask);
|
||||||
|
|
||||||
|
detectThread = ThreadManager::createMemberThread(
|
||||||
|
config.detect_thread_config, this, &SpeechRecognizer::detectTask);
|
||||||
|
running = true;
|
||||||
|
updateState("started");
|
||||||
|
ESP_LOGI("SpeechRecognizer", "Speech recognition started");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SpeechRecognizer::stop() {
|
||||||
|
if (!running) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
tasksRunning = false;
|
||||||
|
// 等待任务结束
|
||||||
|
if (feedThread.joinable()) {
|
||||||
|
feedThread.join();
|
||||||
|
}
|
||||||
|
if (detectThread.joinable()) {
|
||||||
|
detectThread.join();
|
||||||
|
}
|
||||||
|
// 禁用I2S通道
|
||||||
|
if (rx_handle) {
|
||||||
|
i2s_channel_disable(rx_handle);
|
||||||
|
}
|
||||||
|
running = false;
|
||||||
|
updateState("stopped");
|
||||||
|
ESP_LOGI("SpeechRecognizer", "Speech recognition stopped");
|
||||||
|
}
|
||||||
|
|
||||||
|
void SpeechRecognizer::feedTask() {
|
||||||
|
ThreadManager::printThreadInfo("Feed task started");
|
||||||
|
int audio_chunksize = afe_handle->get_feed_chunksize(afe_data);
|
||||||
|
int nch = afe_handle->get_channel_num(afe_data);
|
||||||
|
size_t samp_len = audio_chunksize;
|
||||||
|
size_t samp_len_bytes = samp_len * sizeof(int32_t); // 单声道32位
|
||||||
|
auto *i2s_buff = static_cast<int32_t *>(malloc(samp_len_bytes));
|
||||||
|
if (!i2s_buff) {
|
||||||
|
ESP_LOGE("SpeechRecognizer", "Failed to allocate memory for I2S buffer");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
size_t bytes_read;
|
||||||
|
while (tasksRunning) {
|
||||||
|
esp_err_t ret = i2s_channel_read(rx_handle, i2s_buff, samp_len_bytes, &bytes_read, portMAX_DELAY);
|
||||||
|
if (ret != ESP_OK) {
|
||||||
|
ESP_LOGE("SpeechRecognizer", "I2S read error: %s", esp_err_to_name(ret));
|
||||||
|
vTaskDelay(pdMS_TO_TICKS(10));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// 处理音频数据(32位转16位)
|
||||||
|
for (int i = 0; i < samp_len; ++i) {
|
||||||
|
i2s_buff[i] = i2s_buff[i] >> 14; // 32:8是有效位,转换为16位音频数据
|
||||||
|
}
|
||||||
|
// 喂数据给AFE
|
||||||
|
afe_handle->feed(afe_data, reinterpret_cast<int16_t *>(i2s_buff));
|
||||||
|
}
|
||||||
|
free(i2s_buff);
|
||||||
|
ESP_LOGI("SpeechRecognizer", "Feed task exited");
|
||||||
|
}
|
||||||
|
|
||||||
|
void SpeechRecognizer::detectTask() {
|
||||||
|
ThreadManager::printThreadInfo("Detect task started");
|
||||||
|
int afe_chunksize = afe_handle->get_fetch_chunksize(afe_data);
|
||||||
|
int mu_chunksize = multinet->get_samp_chunksize(model_data);
|
||||||
|
if (mu_chunksize != afe_chunksize) {
|
||||||
|
ESP_LOGE("SpeechRecognizer", "Chunk size mismatch: AFE=%d, MultiNet=%d", afe_chunksize, mu_chunksize);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
updateState("ready");
|
||||||
|
ESP_LOGI("SpeechRecognizer", "Ready for speech recognition");
|
||||||
|
while (tasksRunning) {
|
||||||
|
afe_fetch_result_t* res = afe_handle->fetch(afe_data);
|
||||||
|
if (!res || res->ret_value == ESP_FAIL) {
|
||||||
|
ESP_LOGE("SpeechRecognizer", "AFE fetch error");
|
||||||
|
vTaskDelay(pdMS_TO_TICKS(10));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!enabled) {
|
||||||
|
vTaskDelay(pdMS_TO_TICKS(100));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// 使用MultiNet进行语音检测
|
||||||
|
esp_mn_state_t mn_state = multinet->detect(model_data, res->data);
|
||||||
|
if (mn_state == ESP_MN_STATE_DETECTING) {
|
||||||
|
// 检测中,不做处理
|
||||||
|
continue;
|
||||||
|
} else if (mn_state == ESP_MN_STATE_DETECTED) {
|
||||||
|
// 检测到语音命令
|
||||||
|
esp_mn_results_t *mn_result = multinet->get_results(model_data);
|
||||||
|
if (mn_result && mn_result->num > 0) {
|
||||||
|
SpeechRecognitionResult result;
|
||||||
|
result.command_id = mn_result->command_id[0];
|
||||||
|
result.phrase = mn_result->string;
|
||||||
|
result.probability = mn_result->prob[0];
|
||||||
|
result.phrase_id = mn_result->phrase_id[0];
|
||||||
|
|
||||||
|
handleRecognitionResult(result);
|
||||||
|
}
|
||||||
|
} else if (mn_state == ESP_MN_STATE_TIMEOUT) {
|
||||||
|
// 识别超时
|
||||||
|
updateState("timeout");
|
||||||
|
esp_mn_results_t *mn_result = multinet->get_results(model_data);
|
||||||
|
ESP_LOGI("SpeechRecognizer", "Detection timeout: %s",
|
||||||
|
mn_result && mn_result->string ? mn_result->string : "");
|
||||||
|
// 重置检测状态
|
||||||
|
multinet->clean(model_data);
|
||||||
|
updateState("ready");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ESP_LOGI("SpeechRecognizer", "Detect task exited");
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SpeechRecognizer::addCommand(int command_id, const std::string& phrase) {
|
||||||
|
// 加强检查,确保所有相关指针都有效
|
||||||
|
if (!multinet) {
|
||||||
|
ESP_LOGE("SpeechRecognizer", "MultiNet handle is null");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!model_data) {
|
||||||
|
ESP_LOGE("SpeechRecognizer", "Model data is null");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
esp_err_t ret = esp_mn_commands_add(command_id, phrase.c_str());
|
||||||
|
if (ret != ESP_OK) {
|
||||||
|
ESP_LOGE("SpeechRecognizer", "Failed to add command: %s", esp_err_to_name(ret));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// 更新命令列表
|
||||||
|
esp_mn_error_t *ret_mn = esp_mn_commands_update();
|
||||||
|
if (ret_mn) {
|
||||||
|
if (ret_mn->num >= 1) {
|
||||||
|
ESP_LOGE("SpeechRecognizer", "无法更新的指令数量: %d", ret_mn->num);
|
||||||
|
for (int i = 0; i < ret_mn->num; i++) {
|
||||||
|
ESP_LOGE("SpeechRecognizer", "无法更新的指令名称: %s,对应的id=%d", ret_mn->phrases[i]->string, ret_mn->phrases[i]->command_id);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ESP_LOGE("SpeechRecognizer", "无法更新的指令短语数组为空");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 打印缓存的指令
|
||||||
|
ESP_LOGI("SpeechRecognizer", "Added command: ID=%d, Phrase=%s", command_id, phrase.c_str());
|
||||||
|
ESP_LOGI("SpeechRecognizer", "下面是当前已经缓存的指令:");
|
||||||
|
esp_mn_commands_print();
|
||||||
|
ESP_LOGI("SpeechRecognizer", "下面是当前已经应用的指令:");
|
||||||
|
esp_mn_active_commands_print();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SpeechRecognizer::addCommands(const std::vector<std::pair<int, std::string>>& commands) {
|
||||||
|
bool success = true;
|
||||||
|
for (const auto& cmd : commands) {
|
||||||
|
if (!addCommand(cmd.first, cmd.second)) {
|
||||||
|
success = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ESP_LOGI("SpeechRecognizer", "下面是当前已经缓存的指令:");
|
||||||
|
esp_mn_commands_print();
|
||||||
|
ESP_LOGI("SpeechRecognizer", "下面是当前已经应用的指令:");
|
||||||
|
esp_mn_active_commands_print();
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SpeechRecognizer::clearCommands() {
|
||||||
|
if (multinet && model_data) {
|
||||||
|
esp_mn_commands_clear();
|
||||||
|
ESP_LOGI("SpeechRecognizer", "All commands cleared");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void SpeechRecognizer::registerCommandCallback(SpeechCommandCallback callback) {
|
||||||
|
commandCallback = std::move(callback);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SpeechRecognizer::registerStateCallback(SpeechStateCallback callback) {
|
||||||
|
stateCallback = std::move(callback);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SpeechRecognizer::handleRecognitionResult(const SpeechRecognitionResult& result) {
|
||||||
|
// 保存到历史记录
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(historyMutex);
|
||||||
|
lastResult = result;
|
||||||
|
history.push_back(result);
|
||||||
|
|
||||||
|
// 限制历史记录大小
|
||||||
|
if (history.size() > 100) {
|
||||||
|
history.erase(history.begin());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 记录日志
|
||||||
|
ESP_LOGI("SpeechRecognizer", "Command detected: ID=%d, Phrase='%s', Probability=%.2f",
|
||||||
|
result.command_id, result.phrase.c_str(), result.probability);
|
||||||
|
updateState("command_detected");
|
||||||
|
// 调用回调函数
|
||||||
|
if (commandCallback) {
|
||||||
|
commandCallback(result.command_id, result.phrase, result.probability);
|
||||||
|
}
|
||||||
|
// 重置检测状态,准备下一次识别
|
||||||
|
if (multinet && model_data) {
|
||||||
|
multinet->clean(model_data);
|
||||||
|
}
|
||||||
|
updateState("ready");
|
||||||
|
}
|
||||||
|
|
||||||
|
void SpeechRecognizer::updateState(const std::string& state) {
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(stateMutex);
|
||||||
|
currentState = state;
|
||||||
|
}
|
||||||
|
ESP_LOGI("SpeechRecognizer", "State changed: %s", state.c_str());
|
||||||
|
if (stateCallback) {
|
||||||
|
stateCallback(state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SpeechRecognitionResult SpeechRecognizer::getLastResult() const {
|
||||||
|
std::lock_guard<std::mutex> lock(historyMutex);
|
||||||
|
return lastResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<SpeechRecognitionResult> SpeechRecognizer::getHistory() const {
|
||||||
|
std::lock_guard<std::mutex> lock(historyMutex);
|
||||||
|
return history;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SpeechRecognizer::setEnabled(bool enabled) {
|
||||||
|
this->enabled = enabled;
|
||||||
|
updateState(enabled ? "enabled" : "disabled");
|
||||||
|
ESP_LOGI("SpeechRecognizer", "Speech recognition %s", enabled ? "enabled" : "disabled");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string SpeechRecognizer::getCurrentState() const {
|
||||||
|
std::lock_guard<std::mutex> lock(stateMutex);
|
||||||
|
return currentState;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SpeechRecognizer::isRunning() const {
|
||||||
|
return running;
|
||||||
|
}
|
||||||
|
|
||||||
@@ -0,0 +1,171 @@
|
|||||||
|
//
|
||||||
|
// Created by misaki on 2025/9/15.
|
||||||
|
//
|
||||||
|
#pragma once
|
||||||
|
#include <functional>
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <mutex>
|
||||||
|
#include <atomic>
|
||||||
|
#include "esp_afe_sr_iface.h"
|
||||||
|
#include "model_path.h"
|
||||||
|
#include "esp_mn_iface.h"
|
||||||
|
#include "driver/i2s_std.h"
|
||||||
|
#include "esp_log.h"
|
||||||
|
#include "ThreadManager.h"
|
||||||
|
|
||||||
|
// 前向声明
|
||||||
|
struct model_iface_data_t;
|
||||||
|
|
||||||
|
// 语音命令回调函数类型
|
||||||
|
using SpeechCommandCallback = std::function<void(int command_id, const std::string& phrase, float probability)>;
|
||||||
|
|
||||||
|
// 识别状态回调函数类型
|
||||||
|
using SpeechStateCallback = std::function<void(const std::string& state)>;
|
||||||
|
|
||||||
|
// 识别结果结构体
|
||||||
|
struct SpeechRecognitionResult {
|
||||||
|
int command_id;
|
||||||
|
std::string phrase;
|
||||||
|
float probability;
|
||||||
|
int phrase_id;
|
||||||
|
};
|
||||||
|
|
||||||
|
// 配置结构体
|
||||||
|
struct SpeechRecognizerConfig {
|
||||||
|
// I2S配置
|
||||||
|
gpio_num_t bclk_pin = GPIO_NUM_15;
|
||||||
|
gpio_num_t ws_pin = GPIO_NUM_2;
|
||||||
|
gpio_num_t din_pin = GPIO_NUM_39;
|
||||||
|
|
||||||
|
// 音频处理配置
|
||||||
|
bool enable_aec = false; // 回声消除
|
||||||
|
bool enable_se = false; // 降噪
|
||||||
|
bool enable_vad = false; // 语音活动检测
|
||||||
|
vad_mode_t vad_mode = VAD_MODE_0; // VAD灵敏度
|
||||||
|
// 模型路径
|
||||||
|
std::string model_path = "/sdcard/srmodels";
|
||||||
|
// 线程配置
|
||||||
|
ThreadConfig feed_thread_config = {"SR_Feed", 0, 4096, 3, false};
|
||||||
|
ThreadConfig detect_thread_config = {"SR_Detect", 1, 6 * 1024, 5, false};
|
||||||
|
// 识别超时时间(ms)
|
||||||
|
int detection_timeout = 6000;
|
||||||
|
};
|
||||||
|
|
||||||
|
class SpeechRecognizer {
|
||||||
|
public:
|
||||||
|
// 获取单例实例
|
||||||
|
static SpeechRecognizer* getInstance();
|
||||||
|
|
||||||
|
// 删除拷贝构造函数和赋值运算符
|
||||||
|
SpeechRecognizer(const SpeechRecognizer&) = delete;
|
||||||
|
SpeechRecognizer& operator=(const SpeechRecognizer&) = delete;
|
||||||
|
|
||||||
|
// 初始化语音识别系统
|
||||||
|
bool init(const SpeechRecognizerConfig& config = SpeechRecognizerConfig());
|
||||||
|
|
||||||
|
// 反初始化
|
||||||
|
void deinit();
|
||||||
|
|
||||||
|
// 添加自定义语音命令
|
||||||
|
bool addCommand(int command_id, const std::string& phrase);
|
||||||
|
|
||||||
|
// 批量添加语音命令
|
||||||
|
bool addCommands(const std::vector<std::pair<int, std::string>>& commands);
|
||||||
|
|
||||||
|
// 清除所有语音命令
|
||||||
|
void clearCommands();
|
||||||
|
|
||||||
|
// 开始语音识别
|
||||||
|
bool start();
|
||||||
|
|
||||||
|
// 停止语音识别
|
||||||
|
void stop();
|
||||||
|
|
||||||
|
// 是否正在运行
|
||||||
|
bool isRunning() const;
|
||||||
|
|
||||||
|
// 注册命令回调函数
|
||||||
|
void registerCommandCallback(SpeechCommandCallback callback);
|
||||||
|
|
||||||
|
// 注册状态回调函数
|
||||||
|
void registerStateCallback(SpeechStateCallback callback);
|
||||||
|
|
||||||
|
// 获取最后一次识别结果
|
||||||
|
SpeechRecognitionResult getLastResult() const;
|
||||||
|
|
||||||
|
// 获取识别历史
|
||||||
|
std::vector<SpeechRecognitionResult> getHistory() const;
|
||||||
|
|
||||||
|
// 设置VAD灵敏度
|
||||||
|
// void setVadSensitivity(vad_mode_t mode);
|
||||||
|
|
||||||
|
// 启用/禁用语音识别
|
||||||
|
void setEnabled(bool enabled);
|
||||||
|
|
||||||
|
// 获取当前识别状态
|
||||||
|
std::string getCurrentState() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
SpeechRecognizer();
|
||||||
|
~SpeechRecognizer();
|
||||||
|
|
||||||
|
// I2S初始化
|
||||||
|
bool initI2S();
|
||||||
|
|
||||||
|
// ESP-SR初始化
|
||||||
|
bool initESP_SR();
|
||||||
|
|
||||||
|
// 喂数据任务(音频采集)
|
||||||
|
void feedTask();
|
||||||
|
|
||||||
|
// 检测任务(语音识别)
|
||||||
|
void detectTask();
|
||||||
|
|
||||||
|
// 处理识别结果
|
||||||
|
void handleRecognitionResult(const SpeechRecognitionResult& result);
|
||||||
|
|
||||||
|
// 更新识别状态
|
||||||
|
void updateState(const std::string& state);
|
||||||
|
|
||||||
|
// 静态成员函数用于C接口兼容
|
||||||
|
static void feedTaskWrapper(void* arg);
|
||||||
|
static void detectTaskWrapper(void* arg);
|
||||||
|
|
||||||
|
private:
|
||||||
|
static SpeechRecognizer* instance;
|
||||||
|
static std::mutex instanceMutex;
|
||||||
|
|
||||||
|
SpeechRecognizerConfig config;
|
||||||
|
bool initialized;
|
||||||
|
bool running;
|
||||||
|
bool enabled;
|
||||||
|
|
||||||
|
// I2S相关
|
||||||
|
i2s_chan_handle_t rx_handle;
|
||||||
|
|
||||||
|
// ESP-SR相关
|
||||||
|
const esp_afe_sr_iface_t* afe_handle;
|
||||||
|
esp_afe_sr_data_t* afe_data;
|
||||||
|
srmodel_list_t* models;
|
||||||
|
esp_mn_iface_t* multinet;
|
||||||
|
model_iface_data_t* model_data;
|
||||||
|
|
||||||
|
// 命令回调
|
||||||
|
SpeechCommandCallback commandCallback;
|
||||||
|
SpeechStateCallback stateCallback;
|
||||||
|
|
||||||
|
// 识别结果
|
||||||
|
SpeechRecognitionResult lastResult;
|
||||||
|
std::vector<SpeechRecognitionResult> history;
|
||||||
|
mutable std::mutex historyMutex;
|
||||||
|
|
||||||
|
// 任务句柄
|
||||||
|
std::thread feedThread;
|
||||||
|
std::thread detectThread;
|
||||||
|
std::atomic<bool> tasksRunning;
|
||||||
|
|
||||||
|
// 当前状态
|
||||||
|
std::string currentState;
|
||||||
|
mutable std::mutex stateMutex;
|
||||||
|
};
|
||||||
Binary file not shown.
@@ -0,0 +1,146 @@
|
|||||||
|
# pip3 install g2p_en
|
||||||
|
from g2p_en import G2p
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
# python3 gen_sr_commands.py "Turn on the light,Switch on the light;Turn off the light,Switch off the light,Go dark;\
|
||||||
|
# Start fan;Stop fan;Volume down,Turn down;Mute sound;Next song;Pause playback"
|
||||||
|
# enum {
|
||||||
|
# SR_CMD_TURN_ON_THE_LIGHT,
|
||||||
|
# SR_CMD_TURN_OFF_THE_LIGHT,
|
||||||
|
# SR_CMD_START_FAN,
|
||||||
|
# SR_CMD_STOP_FAN,
|
||||||
|
# SR_CMD_VOLUME_DOWN,
|
||||||
|
# SR_CMD_MUTE_SOUND,
|
||||||
|
# SR_CMD_NEXT_SONG,
|
||||||
|
# SR_CMD_PAUSE_PLAYBACK,
|
||||||
|
# };
|
||||||
|
# static const sr_cmd_t sr_commands[] = {
|
||||||
|
# { 0, "Turn on the light", "TkN nN jc LiT"},
|
||||||
|
# { 0, "Switch on the light", "SWgp nN jc LiT"},
|
||||||
|
# { 1, "Turn off the light", "TkN eF jc LiT"},
|
||||||
|
# { 1, "Switch off the light", "SWgp eF jc LiT"},
|
||||||
|
# { 1, "Go dark", "Gb DnRK"},
|
||||||
|
# { 2, "Start fan", "STnRT FaN"},
|
||||||
|
# { 3, "Stop fan", "STnP FaN"},
|
||||||
|
# { 4, "Volume down", "VnLYoM DtN"},
|
||||||
|
# { 4, "Turn down", "TkN DtN"},
|
||||||
|
# { 5, "Mute sound", "MYoT StND"},
|
||||||
|
# { 6, "Next song", "NfKST Sel"},
|
||||||
|
# { 7, "Pause playback", "PeZ PLdBaK"},
|
||||||
|
# };
|
||||||
|
|
||||||
|
|
||||||
|
def english_g2p(text):
|
||||||
|
g2p = G2p()
|
||||||
|
out = "static const sr_cmd_t sr_commands[] = {\n"
|
||||||
|
enum = "enum {\n"
|
||||||
|
alphabet = {
|
||||||
|
"AE1": "a",
|
||||||
|
"N": "N",
|
||||||
|
" ": " ",
|
||||||
|
"OW1": "b",
|
||||||
|
"V": "V",
|
||||||
|
"AH0": "c",
|
||||||
|
"L": "L",
|
||||||
|
"F": "F",
|
||||||
|
"EY1": "d",
|
||||||
|
"S": "S",
|
||||||
|
"B": "B",
|
||||||
|
"R": "R",
|
||||||
|
"AO1": "e",
|
||||||
|
"D": "D",
|
||||||
|
"AH1": "c",
|
||||||
|
"EH1": "f",
|
||||||
|
"OW0": "b",
|
||||||
|
"IH0": "g",
|
||||||
|
"G": "G",
|
||||||
|
"HH": "h",
|
||||||
|
"K": "K",
|
||||||
|
"IH1": "g",
|
||||||
|
"W": "W",
|
||||||
|
"AY1": "i",
|
||||||
|
"T": "T",
|
||||||
|
"M": "M",
|
||||||
|
"Z": "Z",
|
||||||
|
"DH": "j",
|
||||||
|
"ER0": "k",
|
||||||
|
"P": "P",
|
||||||
|
"NG": "l",
|
||||||
|
"IY1": "m",
|
||||||
|
"AA1": "n",
|
||||||
|
"Y": "Y",
|
||||||
|
"UW1": "o",
|
||||||
|
"IY0": "m",
|
||||||
|
"EH2": "f",
|
||||||
|
"CH": "p",
|
||||||
|
"AE0": "a",
|
||||||
|
"JH": "q",
|
||||||
|
"ZH": "r",
|
||||||
|
"AA2": "n",
|
||||||
|
"SH": "s",
|
||||||
|
"AW1": "t",
|
||||||
|
"OY1": "u",
|
||||||
|
"AW2": "t",
|
||||||
|
"IH2": "g",
|
||||||
|
"AE2": "a",
|
||||||
|
"EY2": "d",
|
||||||
|
"ER1": "k",
|
||||||
|
"TH": "v",
|
||||||
|
"UH1": "w",
|
||||||
|
"UW2": "o",
|
||||||
|
"OW2": "b",
|
||||||
|
"AY2": "i",
|
||||||
|
"UW0": "o",
|
||||||
|
"AH2": "c",
|
||||||
|
"EH0": "f",
|
||||||
|
"AW0": "t",
|
||||||
|
"AO2": "e",
|
||||||
|
"AO0": "e",
|
||||||
|
"UH0": "w",
|
||||||
|
"UH2": "w",
|
||||||
|
"AA0": "n",
|
||||||
|
"AY0": "i",
|
||||||
|
"IY2": "m",
|
||||||
|
"EY0": "d",
|
||||||
|
"ER2": "k",
|
||||||
|
"OY2": "u",
|
||||||
|
"OY0": "u",
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_id = 0
|
||||||
|
phrase_id = 0
|
||||||
|
text_list = text.split(";")
|
||||||
|
for item in text_list:
|
||||||
|
item = item.split(",")
|
||||||
|
phrase_id = 0
|
||||||
|
for phrase in item:
|
||||||
|
labels = g2p(phrase)
|
||||||
|
phoneme = ""
|
||||||
|
for char in labels:
|
||||||
|
if char not in alphabet:
|
||||||
|
print("skip %s, not found in alphabet")
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
phoneme += alphabet[char]
|
||||||
|
out += " { " + str(cmd_id) + ', "' + phrase + '", "' + phoneme + '"},\n'
|
||||||
|
if phrase_id == 0:
|
||||||
|
enum += " SR_CMD_" + phrase.upper().replace(" ", "_") + ",\n"
|
||||||
|
phrase_id += 1
|
||||||
|
cmd_id += 1
|
||||||
|
out += "};"
|
||||||
|
enum += "};"
|
||||||
|
# print(text)
|
||||||
|
print(enum)
|
||||||
|
print(out)
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(prog="English Speech Commands G2P")
|
||||||
|
parser.add_argument("text", type=str, default=None, help="input text")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.text is not None:
|
||||||
|
english_g2p(args.text)
|
||||||
@@ -0,0 +1,2 @@
|
|||||||
|
import nltk
|
||||||
|
nltk.download('averaged_perceptron_tagger')
|
||||||
Binary file not shown.
@@ -0,0 +1,76 @@
|
|||||||
|
The Carnegie Mellon Pronouncing Dictionary [cmudict.0.7a]
|
||||||
|
|
||||||
|
ftp://ftp.cs.cmu.edu/project/speech/dict/
|
||||||
|
https://cmusphinx.svn.sourceforge.net/svnroot/cmusphinx/trunk/cmudict/cmudict.0.7a
|
||||||
|
|
||||||
|
Copyright (C) 1993-2008 Carnegie Mellon University. All rights reserved.
|
||||||
|
|
||||||
|
File Format: Each line consists of an uppercased word,
|
||||||
|
a counter (for alternative pronunciations), and a transcription.
|
||||||
|
Vowels are marked for stress (1=primary, 2=secondary, 0=no stress).
|
||||||
|
E.g.: NATURAL 1 N AE1 CH ER0 AH0 L
|
||||||
|
|
||||||
|
The dictionary contains 127069 entries. Of these, 119400 words are assigned
|
||||||
|
a unique pronunciation, 6830 words have two pronunciations, and 839 words have
|
||||||
|
three or more pronunciations. Many of these are fast-speech variants.
|
||||||
|
|
||||||
|
Phonemes: There are 39 phonemes, as shown below:
|
||||||
|
|
||||||
|
Phoneme Example Translation Phoneme Example Translation
|
||||||
|
------- ------- ----------- ------- ------- -----------
|
||||||
|
AA odd AA D AE at AE T
|
||||||
|
AH hut HH AH T AO ought AO T
|
||||||
|
AW cow K AW AY hide HH AY D
|
||||||
|
B be B IY CH cheese CH IY Z
|
||||||
|
D dee D IY DH thee DH IY
|
||||||
|
EH Ed EH D ER hurt HH ER T
|
||||||
|
EY ate EY T F fee F IY
|
||||||
|
G green G R IY N HH he HH IY
|
||||||
|
IH it IH T IY eat IY T
|
||||||
|
JH gee JH IY K key K IY
|
||||||
|
L lee L IY M me M IY
|
||||||
|
N knee N IY NG ping P IH NG
|
||||||
|
OW oat OW T OY toy T OY
|
||||||
|
P pee P IY R read R IY D
|
||||||
|
S sea S IY SH she SH IY
|
||||||
|
T tea T IY TH theta TH EY T AH
|
||||||
|
UH hood HH UH D UW two T UW
|
||||||
|
V vee V IY W we W IY
|
||||||
|
Y yield Y IY L D Z zee Z IY
|
||||||
|
ZH seizure S IY ZH ER
|
||||||
|
|
||||||
|
(For NLTK, entries have been sorted so that, e.g. FIRE 1 and FIRE 2
|
||||||
|
are contiguous, and not separated by FIRE'S 1.)
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
The contents of this file are deemed to be source code.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
|
||||||
|
This work was supported in part by funding from the Defense Advanced
|
||||||
|
Research Projects Agency, the Office of Naval Research and the National
|
||||||
|
Science Foundation of the United States of America, and by member
|
||||||
|
companies of the Carnegie Mellon Sphinx Speech Consortium. We acknowledge
|
||||||
|
the contributions of many volunteers to the expansion and improvement of
|
||||||
|
this dictionary.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
|
||||||
|
ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
|
||||||
|
NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
+1
@@ -0,0 +1 @@
|
|||||||
|
[".", "(", ")", ":", "''", "EX", "JJS", "WRB", "VBG", "VBP", "NN", "SYM", "VB", "UH", "NNPS", "NNP", "``", "$", "NNS", "JJR", "MD", "RP", "VBD", "DT", "POS", "RBR", ",", "VBZ", "PDT", "VBN", "WP$", "WDT", "WP", "PRP$", "CD", "IN", "#", "CC", "RB", "FW", "RBS", "PRP", "LS", "JJ", "TO"]
|
||||||
+1
File diff suppressed because one or more lines are too long
+1
File diff suppressed because one or more lines are too long
@@ -91,7 +91,7 @@ void Audio_Init(void)
|
|||||||
.mute_fn = audio_mute_function,
|
.mute_fn = audio_mute_function,
|
||||||
.write_fn = bsp_i2s_write,
|
.write_fn = bsp_i2s_write,
|
||||||
.clk_set_fn = bsp_i2s_reconfig_clk,
|
.clk_set_fn = bsp_i2s_reconfig_clk,
|
||||||
.priority = 5,
|
.priority = 3,
|
||||||
.coreID = 0 // 运行在0号核,避免与lvgl抢占资源
|
.coreID = 0 // 运行在0号核,避免与lvgl抢占资源
|
||||||
};
|
};
|
||||||
ret = audio_player_new(config);
|
ret = audio_player_new(config);
|
||||||
|
|||||||
@@ -344,7 +344,7 @@ void MIC_Speech_init()
|
|||||||
afe_config.pcm_config.mic_num = 1;
|
afe_config.pcm_config.mic_num = 1;
|
||||||
afe_config.pcm_config.ref_num = 1;
|
afe_config.pcm_config.ref_num = 1;
|
||||||
afe_config.pcm_config.sample_rate = 16000;
|
afe_config.pcm_config.sample_rate = 16000;
|
||||||
afe_config.wakenet_model_name = esp_srmodel_filter(MIC_Speech.models, ESP_WN_PREFIX, NULL);
|
afe_config.wakenet_model_name = esp_srmodel_filter(MIC_Speech.models, ESP_WN_PREFIX, NULL); // 获取唤醒词模型
|
||||||
MIC_Speech.afe_data = MIC_Speech.afe_handle->create_from_config(&afe_config);
|
MIC_Speech.afe_data = MIC_Speech.afe_handle->create_from_config(&afe_config);
|
||||||
|
|
||||||
// 注意两个任务被分配了不同的核心与优先级,这是为了防止AFE(Audio Front-End)内部环形缓冲区溢出
|
// 注意两个任务被分配了不同的核心与优先级,这是为了防止AFE(Audio Front-End)内部环形缓冲区溢出
|
||||||
|
|||||||
@@ -38,6 +38,7 @@ idf_component_register(SRCS "Bionic_sphere.c"
|
|||||||
"../Bionic_Core/ToolsClass/AudioOutput/AudioOutput.cpp" # 音频输出类库
|
"../Bionic_Core/ToolsClass/AudioOutput/AudioOutput.cpp" # 音频输出类库
|
||||||
"../Bionic_Core/ToolsClass/LVGL_Render/LVGLRender.cpp" # LVGL渲染类库
|
"../Bionic_Core/ToolsClass/LVGL_Render/LVGLRender.cpp" # LVGL渲染类库
|
||||||
"../Bionic_Core/ToolsClass/SDFileManager/SDFileManager.cpp" # SD文件管理类库
|
"../Bionic_Core/ToolsClass/SDFileManager/SDFileManager.cpp" # SD文件管理类库
|
||||||
|
"../Bionic_Core/ToolsClass/SpeechRecognizer/SpeechRecognizer.cpp" # 语音识别类库
|
||||||
"../Bionic_Core/ToolsClass/WifiConnectors/WifiConnectors.cpp" # WIFI连接类库
|
"../Bionic_Core/ToolsClass/WifiConnectors/WifiConnectors.cpp" # WIFI连接类库
|
||||||
"../Bionic_Core/ToolsClass/ThreadManager/ThreadManager.cpp" # 线程管理类库
|
"../Bionic_Core/ToolsClass/ThreadManager/ThreadManager.cpp" # 线程管理类库
|
||||||
"../Bionic_Core/CppHandle/CppHandle.cpp" # C++&C兼容库
|
"../Bionic_Core/CppHandle/CppHandle.cpp" # C++&C兼容库
|
||||||
@@ -73,6 +74,7 @@ idf_component_register(SRCS "Bionic_sphere.c"
|
|||||||
"../Bionic_Core/ToolsClass/AudioOutput"
|
"../Bionic_Core/ToolsClass/AudioOutput"
|
||||||
"../Bionic_Core/ToolsClass/LVGL_Render"
|
"../Bionic_Core/ToolsClass/LVGL_Render"
|
||||||
"../Bionic_Core/ToolsClass/SDFileManager"
|
"../Bionic_Core/ToolsClass/SDFileManager"
|
||||||
|
"../Bionic_Core/ToolsClass/SpeechRecognizer"
|
||||||
"../Bionic_Core/ToolsClass/WifiConnectors"
|
"../Bionic_Core/ToolsClass/WifiConnectors"
|
||||||
"../Bionic_Core/ToolsClass/ThreadManager"
|
"../Bionic_Core/ToolsClass/ThreadManager"
|
||||||
"../Bionic_Core/CppHandle"
|
"../Bionic_Core/CppHandle"
|
||||||
|
|||||||
@@ -560,26 +560,26 @@ CONFIG_SR_MN_EN_NONE=y
|
|||||||
#
|
#
|
||||||
# Add Chinese speech commands
|
# Add Chinese speech commands
|
||||||
#
|
#
|
||||||
CONFIG_CN_SPEECH_COMMAND_ID0="da kai kong tiao"
|
CONFIG_CN_SPEECH_COMMAND_ID0=""
|
||||||
CONFIG_CN_SPEECH_COMMAND_ID1="guan bi kong tiao"
|
CONFIG_CN_SPEECH_COMMAND_ID1=""
|
||||||
CONFIG_CN_SPEECH_COMMAND_ID2="zeng da feng su"
|
CONFIG_CN_SPEECH_COMMAND_ID2=""
|
||||||
CONFIG_CN_SPEECH_COMMAND_ID3="jian xiao feng su"
|
CONFIG_CN_SPEECH_COMMAND_ID3=""
|
||||||
CONFIG_CN_SPEECH_COMMAND_ID4="sheng gao yi du"
|
CONFIG_CN_SPEECH_COMMAND_ID4=""
|
||||||
CONFIG_CN_SPEECH_COMMAND_ID5="jiang di yi du"
|
CONFIG_CN_SPEECH_COMMAND_ID5=""
|
||||||
CONFIG_CN_SPEECH_COMMAND_ID6="zhi re mo shi"
|
CONFIG_CN_SPEECH_COMMAND_ID6=""
|
||||||
CONFIG_CN_SPEECH_COMMAND_ID7="zhi leng mo shi"
|
CONFIG_CN_SPEECH_COMMAND_ID7=""
|
||||||
CONFIG_CN_SPEECH_COMMAND_ID8="song feng mo shi"
|
CONFIG_CN_SPEECH_COMMAND_ID8=""
|
||||||
CONFIG_CN_SPEECH_COMMAND_ID9="jie neng mo shi"
|
CONFIG_CN_SPEECH_COMMAND_ID9=""
|
||||||
CONFIG_CN_SPEECH_COMMAND_ID10="chu shi mo shi"
|
CONFIG_CN_SPEECH_COMMAND_ID10=""
|
||||||
CONFIG_CN_SPEECH_COMMAND_ID11="jian kang mo shi"
|
CONFIG_CN_SPEECH_COMMAND_ID11=""
|
||||||
CONFIG_CN_SPEECH_COMMAND_ID12="shui mian mo shi"
|
CONFIG_CN_SPEECH_COMMAND_ID12=""
|
||||||
CONFIG_CN_SPEECH_COMMAND_ID13="da kai lan ya"
|
CONFIG_CN_SPEECH_COMMAND_ID13=""
|
||||||
CONFIG_CN_SPEECH_COMMAND_ID14="guan bi lan ya"
|
CONFIG_CN_SPEECH_COMMAND_ID14=""
|
||||||
CONFIG_CN_SPEECH_COMMAND_ID15="kai shi bo fang"
|
CONFIG_CN_SPEECH_COMMAND_ID15=""
|
||||||
CONFIG_CN_SPEECH_COMMAND_ID16="zan ting bo fang"
|
CONFIG_CN_SPEECH_COMMAND_ID16=""
|
||||||
CONFIG_CN_SPEECH_COMMAND_ID17="ding shi yi xiao shi"
|
CONFIG_CN_SPEECH_COMMAND_ID17=""
|
||||||
CONFIG_CN_SPEECH_COMMAND_ID18="da kai dian deng"
|
CONFIG_CN_SPEECH_COMMAND_ID18=""
|
||||||
CONFIG_CN_SPEECH_COMMAND_ID19="guan bi dian deng"
|
CONFIG_CN_SPEECH_COMMAND_ID19=""
|
||||||
CONFIG_CN_SPEECH_COMMAND_ID20=""
|
CONFIG_CN_SPEECH_COMMAND_ID20=""
|
||||||
CONFIG_CN_SPEECH_COMMAND_ID21=""
|
CONFIG_CN_SPEECH_COMMAND_ID21=""
|
||||||
CONFIG_CN_SPEECH_COMMAND_ID22=""
|
CONFIG_CN_SPEECH_COMMAND_ID22=""
|
||||||
|
|||||||
+2
-2
@@ -1679,7 +1679,7 @@ CONFIG_ESP_SYSTEM_MEMPROT_FEATURE_LOCK=y
|
|||||||
|
|
||||||
CONFIG_ESP_SYSTEM_EVENT_QUEUE_SIZE=32
|
CONFIG_ESP_SYSTEM_EVENT_QUEUE_SIZE=32
|
||||||
CONFIG_ESP_SYSTEM_EVENT_TASK_STACK_SIZE=2304
|
CONFIG_ESP_SYSTEM_EVENT_TASK_STACK_SIZE=2304
|
||||||
CONFIG_ESP_MAIN_TASK_STACK_SIZE=3584
|
CONFIG_ESP_MAIN_TASK_STACK_SIZE=8192
|
||||||
CONFIG_ESP_MAIN_TASK_AFFINITY_CPU0=y
|
CONFIG_ESP_MAIN_TASK_AFFINITY_CPU0=y
|
||||||
# CONFIG_ESP_MAIN_TASK_AFFINITY_CPU1 is not set
|
# CONFIG_ESP_MAIN_TASK_AFFINITY_CPU1 is not set
|
||||||
# CONFIG_ESP_MAIN_TASK_AFFINITY_NO_AFFINITY is not set
|
# CONFIG_ESP_MAIN_TASK_AFFINITY_NO_AFFINITY is not set
|
||||||
@@ -3221,7 +3221,7 @@ CONFIG_ESP32S3_DEFAULT_CPU_FREQ_240=y
|
|||||||
CONFIG_ESP32S3_DEFAULT_CPU_FREQ_MHZ=240
|
CONFIG_ESP32S3_DEFAULT_CPU_FREQ_MHZ=240
|
||||||
CONFIG_SYSTEM_EVENT_QUEUE_SIZE=32
|
CONFIG_SYSTEM_EVENT_QUEUE_SIZE=32
|
||||||
CONFIG_SYSTEM_EVENT_TASK_STACK_SIZE=2304
|
CONFIG_SYSTEM_EVENT_TASK_STACK_SIZE=2304
|
||||||
CONFIG_MAIN_TASK_STACK_SIZE=3584
|
CONFIG_MAIN_TASK_STACK_SIZE=8192
|
||||||
CONFIG_CONSOLE_UART_DEFAULT=y
|
CONFIG_CONSOLE_UART_DEFAULT=y
|
||||||
# CONFIG_CONSOLE_UART_CUSTOM is not set
|
# CONFIG_CONSOLE_UART_CUSTOM is not set
|
||||||
# CONFIG_CONSOLE_UART_NONE is not set
|
# CONFIG_CONSOLE_UART_NONE is not set
|
||||||
|
|||||||
@@ -224,3 +224,11 @@
|
|||||||
- [x] 1. 历时两天,完整且完美的设计了宠物类,使用到了多种设计模式,完成了低耦合,高内聚的完美代码,测试也完美通过。
|
- [x] 1. 历时两天,完整且完美的设计了宠物类,使用到了多种设计模式,完成了低耦合,高内聚的完美代码,测试也完美通过。
|
||||||
|
|
||||||
- [x] 2. 顺便完善了底层通信类的封装,基于websocket,基准测试通过,但存在一点很小的线程bug,似乎是来自于esp32 idf底层的问题,待解决
|
- [x] 2. 顺便完善了底层通信类的封装,基于websocket,基准测试通过,但存在一点很小的线程bug,似乎是来自于esp32 idf底层的问题,待解决
|
||||||
|
|
||||||
|
#### Day15 2025.9.16
|
||||||
|
##### 主要目标:完成具体业务开发&各种优化
|
||||||
|
实际完成任务:
|
||||||
|
- [x] 1. 完成了语音识别的C++业务层封装,测试通过
|
||||||
|
|
||||||
|
- [x] 2. 试着测试了一下LVGL_GIF渲染+音乐播放+语音识别的组合简单优化后,
|
||||||
|
发现lvgl渲染略显卡顿,语音识别有缓冲区空警告,不过无伤大雅,还需要进一步深度优化。
|
||||||
|
|||||||
Reference in New Issue
Block a user