1. 进一步拓展了语音识别,目前可以从sd卡导入模型,避免了model文件占用flash分区大小

2. 进一步修改了语音识别,关闭了关键词唤醒功能,只保留了指令识别功能
3. 构建了业务层的基本框架(增加了底层驱动对于的C++兼容),业务代码采用C++编写,启用了RTTI(运行时类型识别)
This commit is contained in:
Misaki
2025-09-03 00:19:14 +08:00
parent ce0998c1c6
commit 5d79f88918
42 changed files with 822 additions and 100 deletions
+9 -1
View File
@@ -1,5 +1,9 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include "esp_log.h"
#include "esp_check.h"
#include "unity.h"
@@ -51,4 +55,8 @@ void Music_pause(void);
uint32_t Music_Duration(void);
uint32_t Music_Elapsed(void);
uint16_t Music_Energy(void);
void Volume_adjustment(uint8_t Volume);
void Volume_adjustment(uint8_t Volume);
#ifdef __cplusplus
}
#endif
+9 -1
View File
@@ -1,5 +1,9 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include "esp_log.h"
#include "esp_adc/adc_oneshot.h"
#include "esp_adc/adc_cali.h"
@@ -18,4 +22,8 @@
extern float BAT_analogVolts;
void BAT_Init(void);
float BAT_Get_Volts(void);
float BAT_Get_Volts(void);
#ifdef __cplusplus
}
#endif
+10
View File
@@ -1,4 +1,9 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include "esp_err.h"
#include "esp_log.h"
#include <stdio.h>
@@ -68,3 +73,8 @@ void LCD_addWindow(uint16_t Xstart, uint16_t Ystart, uint16_t Xend, uint16_t Yen
void Backlight_Init(void); // Initialize the LCD backlight, which has been called in the LCD_Init function, ignore it
void Set_Backlight(uint8_t Light); // Call this function to adjust the brightness of the backlight. The value of the parameter Light ranges from 0 to 100
#ifdef __cplusplus
}
#endif
+11 -1
View File
@@ -1,4 +1,9 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
@@ -22,4 +27,9 @@ void example_lvgl_flush_cb(lv_disp_drv_t *drv, const lv_area_t *area, lv_color_t
void example_lvgl_port_update_callback(lv_disp_drv_t *drv);
void example_increase_lvgl_tick(void *arg);
void LVGL_Init(void); // Call this function to initialize the screen (must be called in the main function) !!!!!
void LVGL_Init(void); // Call this function to initialize the screen (must be called in the main function) !!!!!
#ifdef __cplusplus
}
#endif
+10 -1
View File
@@ -1,5 +1,9 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include "lvgl.h"
#include "demos/lv_demos.h"
@@ -16,4 +20,9 @@
void Backlight_adjustment_event_cb(lv_event_t * e);
void Lvgl_Example1(void);
void LVGL_Backlight_adjustment(uint8_t Backlight);
void LVGL_Backlight_adjustment(uint8_t Backlight);
#ifdef __cplusplus
}
#endif
+9
View File
@@ -1,4 +1,9 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
/*********************
* INCLUDES
*********************/
@@ -67,3 +72,7 @@ void LVGL_Resume_Music();
void LVGL_Pause_Music();
void LVGL_Play_Music(uint32_t ID);
void LVGL_volume_adjustment(uint8_t Volume);
#ifdef __cplusplus
}
#endif
+10
View File
@@ -4,6 +4,12 @@
*/
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include <inttypes.h>
#include <stdio.h>
#include <string.h>
@@ -68,3 +74,7 @@ esp_err_t esp_lcd_touch_new_i2c_cst816s(const esp_lcd_panel_io_handle_t io, cons
extern esp_lcd_touch_handle_t tp;
void Touch_Init(void);
#ifdef __cplusplus
}
#endif
+11
View File
@@ -1,5 +1,10 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
#include "I2C_Driver.h"
@@ -42,3 +47,9 @@ void Set_Toggle(uint8_t Pin); // Flip the level of
void TCA9554PWR_Init(uint8_t PinState); // Set the seven pins to PinState state, for example :PinState=0x23, 0010 0011 State (the highest bit is not used) (Output mode or input mode) 0= Output mode 1= Input mode. The default value is output mode
esp_err_t EXIO_Init(void);
#ifdef __cplusplus
}
#endif
+10 -1
View File
@@ -1,5 +1,9 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h> // For memcpy
#include "esp_log.h"
@@ -20,4 +24,9 @@
void I2C_Init(void);
// Reg addr is 8 bit
esp_err_t I2C_Write(uint8_t Driver_addr, uint8_t Reg_addr, const uint8_t *Reg_data, uint32_t Length);
esp_err_t I2C_Read(uint8_t Driver_addr, uint8_t Reg_addr, uint8_t *Reg_data, uint32_t Length);
esp_err_t I2C_Read(uint8_t Driver_addr, uint8_t Reg_addr, uint8_t *Reg_data, uint32_t Length);
#ifdef __cplusplus
}
#endif
+55
View File
@@ -0,0 +1,55 @@
### 如你所见,这是一份帮助文档。遍布于项目的子目录当中
下面把这段 `afe_config_t` 里的 **每一个成员** 按“它是干什么的 → 典型取值 → 建议” 三步法讲清。
看完你就知道哪些可以大胆改、哪些最好别动。
---
### 1 开关类(true/false
| 成员 | 作用 | 典型取值 | 建议 |
|---|---|---|---|
| **aec_init** | 是否启用 **回声消除**AEC)。<br>用于“喇叭→麦克风”回声场景。 | true:有扬声器<br>false:无扬声器 | 无喇叭就关 |
| **se_init** | 是否启用 **稳态噪声抑制**(降噪)。<br>抑制风扇/空调/胎噪等。 | true:嘈杂环境<br>false:安静环境 | 按场景开关 |
| **vad_init** | 是否启用 **语音活动检测**。<br>只把“有人说话”的区段送给 MultiNet,降低误识别。 | true:怕误触发<br>false:想最省电 | 见上一贴 |
| **wakenet_init** | 是否启用 **唤醒词**WakeNet)。 | true:需要“Hi, Lexin”唤醒<br>false:仅指令识别 | 本帖关 |
| **voice_communication_init** | 是否把 AFE 调成“通话模式”<br>(打开 WebRTC 级 AEC/NS/AGC)。 | trueVoIP/会议<br>false:离线语音识别 | 99% 场景关 |
| **voice_communication_agc_init** | 通话模式下才生效的 AGC 开关。 | 同上 | 同上 |
| **debug_init** | 打开 AFE 内部调试日志。 | true:调试<br>false:量产 | 量产关 |
---
### 2 数值/枚举类
| 成员 | 作用 | 取值范围 & 含义 | 建议 |
|---|---|---|---|
| **vad_mode** | VAD 的灵敏度等级。<br>`VAD_MODE_0` 最松,`VAD_MODE_3` 最严。 | 0~3 | 0=关,1=轻,2=中,3=严 |
| **wakenet_mode** | WakeNet 工作模式(决定通道数/检测阈值)。 | `DET_MODE_0`=关<br>`DET_MODE_2CH_90`=2 麦 90% 阈值 | 关唤醒就设 0 |
| **afe_mode** | AFE 整体运行档。<br>`SR_MODE_HIGH_PERF` 最准但吃资源,`SR_MODE_LOW_COST` 最省。 | LOW_COST / HIGH_PERF | ESP32-S3 建议 LOW_COST |
| **afe_perferred_core** | feed/fetch 任务优先跑在哪个核。 | 0 / 1 | 与业务任务错峰即可 |
| **afe_perferred_priority** | AFE 内部线程优先级(5~15)。 | 5=低,15=高 | 一般 5 就够 |
| **afe_ringbuf_size** | AFE 内部环形缓冲帧数。<br>越大越抗抖动,越大吃 RAM。 | 10~100 | 不开唤醒 10~20 即可 |
| **memory_alloc_mode** | 模型/缓冲放在 PSRAM 还是内部 SRAM。 | `AFE_MEMORY_ALLOC_MORE_PSRAM` → 优先 PSRAM,省内部 RAM | 有 PSRAM 就开 |
| **afe_linear_gain** | 线性数字增益(1.0=不变)。 | 0.1~4.0 | 麦克风灵敏度低可调到 1.5 |
| **agc_mode** | 自动增益控制策略。 | `AFE_MN_PEAK_AGC_MODE_0/1/2` | 默认 2 即可 |
| **voice_communication_agc_gain** | 通话模式下 AGC 目标增益 dB。 | 0~31 | 仅通话模式生效 |
---
### 3 子结构体 `pcm_config`
| 成员 | 作用 | 典型取值 | 说明 |
|---|---|---|---|
| **total_ch_num** | 前端接收的 **总通道数**mic + ref 之和)。 | 2 | I²S 数据里一共几路 |
| **mic_num** | 其中 **麦克风通道** 数量。 | 1 | 单麦就写 1 |
| **ref_num** | **参考通道**(回声参考、噪声参考)数量。 | 1 | 无回声可 0,有回声就 1 |
| **sample_rate** | 采样率。 | 16000 | MultiNet 固定 16 kHz |
---
### 4 一句话总结
- **想最省电/省 RAM**
`aec/se/vad/wakenet` 全关,ringbuf 10LOW_COST,优先 PSRAM。
- **想最稳最抗噪**
`vad_init=true, vad_mode=2, se_init=true`ringbuf 50。
+144 -29
View File
@@ -107,6 +107,117 @@ static void detect_hander(AppSpeech *self)
break;
}
// if (res->wakeup_state == WAKENET_DETECTED) {
// ESP_LOGI(TAG, "WAKEWORD DETECTED\n");
// multinet->clean(model_data); // clean all status of multinet
// LCD_Backlight_original = LCD_Backlight;
// } else if (res->wakeup_state == WAKENET_CHANNEL_VERIFIED) {
// ESP_LOGI(TAG, "AFE_FETCH_CHANNEL_VERIFIED, channel index: %d\n", res->trigger_channel_id);
// ESP_LOGI(TAG, ">>> Say your command <<<");
// self->detected = true;
// self->afe_handle->disable_wakenet(afe_data);
// LCD_Backlight = 35;
//
// }
esp_mn_state_t mn_state = multinet->detect(model_data, res->data);
if (mn_state == ESP_MN_STATE_DETECTING) {
self->command = COMMAND_NOT_DETECTED;
continue;
} else if (mn_state == ESP_MN_STATE_DETECTED) {
esp_mn_results_t *mn_result = multinet->get_results(model_data);
// for (int i = 0; i < mn_result->num; i++) {
// ESP_LOGI(TAG, "TOP %d, command_id: %d, phrase_id: %d, string:%s prob: %f\n",
// i+1, mn_result->command_id[i], mn_result->phrase_id[i], mn_result->string, mn_result->prob[i]);
// }
ESP_LOGI(TAG, "TOP %d, command_id: %d, phrase_id: %d, string:%s prob: %f\n",
1, mn_result->command_id[0], mn_result->phrase_id[0], mn_result->string, mn_result->prob[0]);
switch (mn_result->command_id[0]) {
case 0:
LCD_Backlight = 100;
break;
case 1:
LCD_Backlight = 30;
break;
case 2:
LCD_Backlight = 0;
break;
case 3:
LCD_Backlight = 100;
break;
case 4:
play_Music_Flag = 1;
break;
default: printf("Unknown Command!\r\n"); break;
}
self->command = (command_word_t)mn_result->command_id[0];
// self->afe_handle->enable_wakenet(afe_data);
// self->detected = false;
// self->afe_handle->disable_wakenet(afe_data); // 停止唤醒
self->detected = true;
ESP_LOGI(TAG, ">>> Say your command <<<");
self->command = COMMAND_TIMEOUT;
} else if (mn_state == ESP_MN_STATE_TIMEOUT) {
esp_mn_results_t *mn_result = multinet->get_results(model_data);
ESP_LOGI(TAG, "timeout, string:%s\n", mn_result->string);
self->command = COMMAND_TIMEOUT;
// self->afe_handle->enable_wakenet(afe_data);
self->detected = false;
ESP_LOGI(TAG, ">>> Waiting to be waken up <<<");
LCD_Backlight = LCD_Backlight_original;
if(play_Music_Flag){
play_Music_Flag = 0;
if(ACTIVE_TRACK_CNT)
_lv_demo_music_resume();
else
printf("No MP3 file found in SD card!\r\n");
}
}
}
if (model_data) {
multinet->destroy(model_data);
model_data = NULL;
}
self->afe_handle->destroy(afe_data);
vTaskDelete(NULL);
}
// 下面的函数是上面的备份,使用前需要在idf.py menuconfig中先配置打开唤醒模型
static void detect_handler_continuous(AppSpeech *self)
{
esp_afe_sr_data_t *afe_data = self->afe_data;
int afe_chunksize = self->afe_handle->get_fetch_chunksize(afe_data);
#if defined(CONFIG_SR_MN_CN_MULTINET5_RECOGNITION_QUANT8) || defined(CONFIG_SR_MN_CN_MULTINET6_QUANT) || defined(CONFIG_SR_MN_CN_MULTINET6_AC_QUANT)
char *mn_name = esp_srmodel_filter(self->models, ESP_MN_PREFIX, ESP_MN_CHINESE);
#else
char *mn_name = esp_srmodel_filter(self->models, ESP_MN_PREFIX, ESP_MN_ENGLISH);
#endif // CONFIG_IDF_TARGET_ESP32S3
ESP_LOGI(TAG, "multinet:%s\n", mn_name);
esp_mn_iface_t *multinet = esp_mn_handle_from_name(mn_name);
model_iface_data_t *model_data = multinet->create(mn_name, 6000);
esp_mn_commands_update_from_sdkconfig(multinet, model_data); // Add speech commands from sdkconfig
int mu_chunksize = multinet->get_samp_chunksize(model_data);
assert(mu_chunksize == afe_chunksize);
// FILE *fp = fopen("/sdcard/out", "w");
// if (fp == NULL) ESP_LOGE(TAG,"can not open file\n");
//print active speech commands
multinet->print_active_speech_commands(model_data);
ESP_LOGI(TAG, "Ready");
self->detected = false;
while (true)
{
afe_fetch_result_t* res = self->afe_handle->fetch(afe_data);
if (!res || res->ret_value == ESP_FAIL) {
ESP_LOGE(TAG, "fetch error!\n");
break;
}
if (res->wakeup_state == WAKENET_DETECTED) {
ESP_LOGI(TAG, "WAKEWORD DETECTED\n");
multinet->clean(model_data); // clean all status of multinet
@@ -117,7 +228,7 @@ static void detect_hander(AppSpeech *self)
self->detected = true;
self->afe_handle->disable_wakenet(afe_data);
LCD_Backlight = 35;
}
if (self->detected) {
@@ -129,33 +240,33 @@ static void detect_hander(AppSpeech *self)
} else if (mn_state == ESP_MN_STATE_DETECTED) {
esp_mn_results_t *mn_result = multinet->get_results(model_data);
// for (int i = 0; i < mn_result->num; i++) {
// ESP_LOGI(TAG, "TOP %d, command_id: %d, phrase_id: %d, string:%s prob: %f\n",
// ESP_LOGI(TAG, "TOP %d, command_id: %d, phrase_id: %d, string:%s prob: %f\n",
// i+1, mn_result->command_id[i], mn_result->phrase_id[i], mn_result->string, mn_result->prob[i]);
// }
ESP_LOGI(TAG, "TOP %d, command_id: %d, phrase_id: %d, string:%s prob: %f\n",
ESP_LOGI(TAG, "TOP %d, command_id: %d, phrase_id: %d, string:%s prob: %f\n",
1, mn_result->command_id[0], mn_result->phrase_id[0], mn_result->string, mn_result->prob[0]);
switch (mn_result->command_id[0]) {
case 0:
LCD_Backlight = 100;
case 0:
LCD_Backlight = 100;
break;
case 1:
LCD_Backlight = 30;
case 1:
LCD_Backlight = 30;
break;
case 2:
LCD_Backlight = 0;
case 2:
LCD_Backlight = 0;
break;
case 3:
LCD_Backlight = 100;
case 3:
LCD_Backlight = 100;
break;
case 4:
play_Music_Flag = 1;
case 4:
play_Music_Flag = 1;
break;
default: printf("Unknown Command!\r\n"); break;
}
self->command = (command_word_t)mn_result->command_id[0];
// self->afe_handle->enable_wakenet(afe_data);
// self->detected = false;
self->afe_handle->disable_wakenet(afe_data);
self->detected = true;
ESP_LOGI(TAG, ">>> Say your command <<<");
@@ -171,9 +282,9 @@ static void detect_hander(AppSpeech *self)
if(play_Music_Flag){
play_Music_Flag = 0;
if(ACTIVE_TRACK_CNT)
_lv_demo_music_resume();
_lv_demo_music_resume();
else
printf("No MP3 file found in SD card!\r\n");
printf("No MP3 file found in SD card!\r\n");
}
}
}
@@ -187,32 +298,32 @@ static void detect_hander(AppSpeech *self)
}
// 初始化
void MIC_Speech_init()
{
MIC_Speech.afe_handle = &ESP_AFE_SR_HANDLE;
MIC_Speech.detected = false;
MIC_Speech.command = COMMAND_TIMEOUT;
MIC_Speech.models = esp_srmodel_init("model"); // 这边配置为SD卡当中的文件路径
MIC_Speech.models = esp_srmodel_init("/sdcard/srmodels"); // 这边配置为SD卡当中的文件路径
i2s_init(I2S_NUM_1, 16000, 2, 32);
// sd_card_mount("/sdcard");
afe_config_t afe_config = {
.aec_init = true,
.se_init = true,
.vad_init = true,
.wakenet_init = true,
.aec_init = true, // 回声消除(当用户在播放音频的时候使用语音识别可以有效提告识别率)
.se_init = true, // 降噪
.vad_init = true, // VDA(语音活动检测),用于检测当前是否处于说话状态,如果是,就将音频数据发送给 multinet
.wakenet_init = false, // 关闭唤醒词
.voice_communication_init = false,
.voice_communication_agc_init = false,
.voice_communication_agc_gain = 15,
.vad_mode = VAD_MODE_3,
.wakenet_model_name = NULL,
.vad_mode = VAD_MODE_0, /*VAD_MODE_3,*/ // VAD 灵敏度等级
.wakenet_model_name = NULL, // 不再指定 wakenet
.wakenet_model_name_2 = NULL,
.wakenet_mode = DET_MODE_2CH_90,
.wakenet_mode = DET_MODE_2CH_90, // 0 = 关闭
.afe_mode = SR_MODE_LOW_COST,
.afe_perferred_core = 0,
.afe_perferred_priority = 5,
.afe_ringbuf_size = 50,
.afe_ringbuf_size = 50, // AFE ringbuffer 环形缓冲区大小
.memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM,
.afe_linear_gain = 1.0,
.agc_mode = AFE_MN_PEAK_AGC_MODE_2,
@@ -222,7 +333,7 @@ void MIC_Speech_init()
.ref_num = 1,
.sample_rate = 16000,
},
.debug_init = false,
.debug_init = false, // afe内部调试
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}},
};
afe_config.aec_init = false;
@@ -235,6 +346,10 @@ void MIC_Speech_init()
afe_config.pcm_config.sample_rate = 16000;
afe_config.wakenet_model_name = esp_srmodel_filter(MIC_Speech.models, ESP_WN_PREFIX, NULL);
MIC_Speech.afe_data = MIC_Speech.afe_handle->create_from_config(&afe_config);
xTaskCreatePinnedToCore((TaskFunction_t)feed_handler, "App/SR/Feed", 4 * 1024, &MIC_Speech, 5, NULL, 1);
xTaskCreatePinnedToCore((TaskFunction_t)detect_hander, "App/SR/Detect", 5 * 1024, &MIC_Speech, 5, NULL, 1);
// 注意两个任务被分配了不同的核心与优先级,这是为了防止AFE(Audio Front-End)内部环形缓冲区溢出
// 也就是“喂数据线程” 比 “取数据线程” 跑得快,生产 > 消费,经典的生产者消费者问题
// 但即使这么做了,由于i2s在开始读取数据的时候,识别模型还没加载完成,因此在开始阶段必然会出现环形缓冲区满的警告,问题不大
xTaskCreatePinnedToCore((TaskFunction_t)feed_handler, "App/SR/Feed", 4 * 1024, &MIC_Speech, 4, NULL, 0);
xTaskCreatePinnedToCore((TaskFunction_t)detect_hander, "App/SR/Detect", 5 * 1024, &MIC_Speech, 6, NULL, 1);
}
+9
View File
@@ -1,5 +1,9 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include "esp_afe_sr_iface.h"
#include "esp_process_sdkconfig.h"
#include "model_path.h"
@@ -45,3 +49,8 @@ typedef struct {
void MIC_Speech_init();
#ifdef __cplusplus
}
#endif
+11 -1
View File
@@ -1,5 +1,10 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include "I2C_Driver.h"
@@ -99,4 +104,9 @@ void datetime_to_str(char *datetime_str,datetime_t time);
// 3 - wednesday
// 4 - thursday
// 5 - friday
// 6 - saturday
// 6 - saturday
#ifdef __cplusplus
}
#endif
+12 -1
View File
@@ -1,4 +1,10 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include "ST77916.h"
#define PWR_KEY_Input_PIN 6 // 电源按键输入引脚(GPIO6)
@@ -13,4 +19,9 @@ void Shutdown(void);
void Restart(void);
void PWR_Init(void);
void PWR_Loop(void);
void PWR_Loop(void);
#ifdef __cplusplus
}
#endif
+12 -1
View File
@@ -1,5 +1,11 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include "I2C_Driver.h"
//device address
@@ -158,4 +164,9 @@ float getGyroX();
float getGyroY();
float getGyroZ();
void getAccelerometer(void);
void getGyroscope(void);
void getGyroscope(void);
#ifdef __cplusplus
}
#endif
+10 -2
View File
@@ -1,6 +1,9 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include <string.h>
#include <sys/unistd.h>
#include <sys/stat.h>
@@ -35,4 +38,9 @@ extern uint32_t Flash_Size;
void SD_Init(void);
void Flash_Searching(void);
FILE* Open_File(const char *file_path);
uint16_t Folder_retrieval(const char* directory, const char* fileExtension, char File_Name[][100],uint16_t maxFiles);
uint16_t Folder_retrieval(const char* directory, const char* fileExtension, char File_Name[][100],uint16_t maxFiles);
#ifdef __cplusplus
}
#endif
+10 -2
View File
@@ -1,11 +1,14 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include "esp_log.h"
#include "freertos/FreeRTOS.h"
#include "esp_wifi.h"
#include "nvs_flash.h"
#include "esp_log.h"
#include <stdio.h>
#include <string.h> // For memcpy
#include "esp_system.h"
@@ -42,4 +45,9 @@ uint16_t BLE_Scan(void);
uint16_t wireless_get_wifi_ap_list(wifi_ap_info_t *ap_list, uint16_t max_aps);
uint16_t wireless_get_ble_device_list(ble_device_info_t *device_list, uint16_t max_devices);
void wireless_print_wifi_aps(void);
void wireless_print_ble_devices(void);
void wireless_print_ble_devices(void);
#ifdef __cplusplus
}
#endif