4cc761aab3
2. 试着测试了一下LVGL_GIF渲染+音乐播放+语音识别的组合简单优化后,
发现lvgl渲染略显卡顿,语音识别有缓冲区空警告,不过无伤大雅,还需要进一步深度优化。
356 lines
15 KiB
C
356 lines
15 KiB
C
#include "MIC_Speech.h"
|
|
|
|
#include "driver/gpio.h"
|
|
#include "driver/i2s_std.h"
|
|
#include "driver/i2s_tdm.h"
|
|
#include "soc/soc_caps.h"
|
|
#include "esp_err.h"
|
|
#include "esp_log.h"
|
|
|
|
#include "esp_wn_iface.h"
|
|
#include "esp_wn_models.h"
|
|
#include "esp_afe_sr_models.h"
|
|
#include "esp_mn_iface.h"
|
|
#include "esp_mn_models.h"
|
|
|
|
#define I2S_CHANNEL_NUM 1
|
|
|
|
static const char *TAG = "App/Speech";
|
|
|
|
static i2s_chan_handle_t rx_handle = NULL; // I2S rx channel handler
|
|
static AppSpeech MIC_Speech;
|
|
bool play_Music_Flag = 0;
|
|
uint8_t LCD_Backlight_original = 0;
|
|
|
|
|
|
static esp_err_t i2s_init(i2s_port_t i2s_num, uint32_t sample_rate, int channel_format, int bits_per_chan)
|
|
{
|
|
esp_err_t ret_val = ESP_OK;
|
|
|
|
i2s_chan_config_t chan_cfg = I2S_CHANNEL_DEFAULT_CONFIG(i2s_num, I2S_ROLE_MASTER);
|
|
|
|
ret_val |= i2s_new_channel(&chan_cfg, NULL, &rx_handle);
|
|
i2s_std_config_t std_cfg = I2S_CONFIG_DEFAULT(16000, I2S_SLOT_MODE_MONO, I2S_DATA_BIT_WIDTH_32BIT);
|
|
// std_cfg.slot_cfg.slot_mask = I2S_STD_SLOT_LEFT;
|
|
std_cfg.slot_cfg.slot_mask = I2S_STD_SLOT_RIGHT;
|
|
// std_cfg.clk_cfg.mclk_multiple = EXAMPLE_MCLK_MULTIPLE; //The default is I2S_MCLK_MULTIPLE_256. If not using 24-bit data width, 256 should be enough
|
|
ret_val |= i2s_channel_init_std_mode(rx_handle, &std_cfg);
|
|
ret_val |= i2s_channel_enable(rx_handle);
|
|
|
|
return ret_val;
|
|
}
|
|
|
|
static void feed_handler(AppSpeech *self)
|
|
{
|
|
esp_afe_sr_data_t *afe_data = self->afe_data;
|
|
int audio_chunksize = self->afe_handle->get_feed_chunksize(afe_data);
|
|
int nch = self->afe_handle->get_channel_num(afe_data);
|
|
(void)nch;
|
|
size_t samp_len = audio_chunksize;
|
|
size_t samp_len_bytes = samp_len * I2S_CHANNEL_NUM * sizeof(int32_t);
|
|
int32_t *i2s_buff = (int32_t *)malloc(samp_len_bytes);
|
|
assert(i2s_buff);
|
|
size_t bytes_read;
|
|
// FILE *fp = fopen("/sdcard/out", "a+");
|
|
// if (fp == NULL) ESP_LOGE(TAG,"can not open file\n");
|
|
|
|
while (true)
|
|
{
|
|
i2s_channel_read(rx_handle, i2s_buff, samp_len_bytes, &bytes_read, portMAX_DELAY);
|
|
|
|
for (int i = 0; i < samp_len; ++i)
|
|
{
|
|
i2s_buff[i] = i2s_buff[i] >> 14; // 32:8 is the significant bit, 8:0 is the low 8 bits, all 0, the AFE input is 16 bits of voice data, the 29:13 bit is to amplify the voice signal.
|
|
}
|
|
// FatfsComboWrite(i2s_buff, audio_chunksize * I2S_CHANNEL_NUM * sizeof(int16_t), 1, fp);
|
|
|
|
self->afe_handle->feed(afe_data, (int16_t *)i2s_buff);
|
|
}
|
|
self->afe_handle->destroy(afe_data);
|
|
if (i2s_buff) {
|
|
free(i2s_buff);
|
|
i2s_buff = NULL;
|
|
}
|
|
vTaskDelete(NULL);
|
|
}
|
|
|
|
static void detect_hander(AppSpeech *self)
|
|
{
|
|
esp_afe_sr_data_t *afe_data = self->afe_data;
|
|
int afe_chunksize = self->afe_handle->get_fetch_chunksize(afe_data);
|
|
#if defined(CONFIG_SR_MN_CN_MULTINET5_RECOGNITION_QUANT8) || defined(CONFIG_SR_MN_CN_MULTINET6_QUANT) || defined(CONFIG_SR_MN_CN_MULTINET6_AC_QUANT)
|
|
char *mn_name = esp_srmodel_filter(self->models, ESP_MN_PREFIX, ESP_MN_CHINESE);
|
|
#else
|
|
char *mn_name = esp_srmodel_filter(self->models, ESP_MN_PREFIX, ESP_MN_ENGLISH);
|
|
#endif // CONFIG_IDF_TARGET_ESP32S3
|
|
ESP_LOGI(TAG, "multinet:%s\n", mn_name);
|
|
esp_mn_iface_t *multinet = esp_mn_handle_from_name(mn_name);
|
|
model_iface_data_t *model_data = multinet->create(mn_name, 6000);
|
|
esp_mn_commands_update_from_sdkconfig(multinet, model_data); // Add speech commands from sdkconfig
|
|
int mu_chunksize = multinet->get_samp_chunksize(model_data);
|
|
assert(mu_chunksize == afe_chunksize);
|
|
|
|
// FILE *fp = fopen("/sdcard/out", "w");
|
|
// if (fp == NULL) ESP_LOGE(TAG,"can not open file\n");
|
|
|
|
//print active speech commands
|
|
multinet->print_active_speech_commands(model_data);
|
|
ESP_LOGI(TAG, "Ready");
|
|
|
|
self->detected = false;
|
|
|
|
while (true)
|
|
{
|
|
afe_fetch_result_t* res = self->afe_handle->fetch(afe_data);
|
|
if (!res || res->ret_value == ESP_FAIL) {
|
|
ESP_LOGE(TAG, "fetch error!\n");
|
|
break;
|
|
}
|
|
|
|
// if (res->wakeup_state == WAKENET_DETECTED) {
|
|
// ESP_LOGI(TAG, "WAKEWORD DETECTED\n");
|
|
// multinet->clean(model_data); // clean all status of multinet
|
|
// LCD_Backlight_original = LCD_Backlight;
|
|
// } else if (res->wakeup_state == WAKENET_CHANNEL_VERIFIED) {
|
|
// ESP_LOGI(TAG, "AFE_FETCH_CHANNEL_VERIFIED, channel index: %d\n", res->trigger_channel_id);
|
|
// ESP_LOGI(TAG, ">>> Say your command <<<");
|
|
// self->detected = true;
|
|
// self->afe_handle->disable_wakenet(afe_data);
|
|
// LCD_Backlight = 35;
|
|
//
|
|
// }
|
|
|
|
esp_mn_state_t mn_state = multinet->detect(model_data, res->data);
|
|
|
|
if (mn_state == ESP_MN_STATE_DETECTING) {
|
|
self->command = COMMAND_NOT_DETECTED;
|
|
continue;
|
|
} else if (mn_state == ESP_MN_STATE_DETECTED) {
|
|
esp_mn_results_t *mn_result = multinet->get_results(model_data);
|
|
// for (int i = 0; i < mn_result->num; i++) {
|
|
// ESP_LOGI(TAG, "TOP %d, command_id: %d, phrase_id: %d, string:%s prob: %f\n",
|
|
// i+1, mn_result->command_id[i], mn_result->phrase_id[i], mn_result->string, mn_result->prob[i]);
|
|
// }
|
|
ESP_LOGI(TAG, "TOP %d, command_id: %d, phrase_id: %d, string:%s prob: %f\n",
|
|
1, mn_result->command_id[0], mn_result->phrase_id[0], mn_result->string, mn_result->prob[0]);
|
|
switch (mn_result->command_id[0]) {
|
|
case 0:
|
|
LCD_Backlight = 100;
|
|
break;
|
|
case 1:
|
|
LCD_Backlight = 30;
|
|
break;
|
|
case 2:
|
|
LCD_Backlight = 0;
|
|
break;
|
|
case 3:
|
|
LCD_Backlight = 100;
|
|
break;
|
|
case 4:
|
|
play_Music_Flag = 1;
|
|
break;
|
|
default: printf("Unknown Command!\r\n"); break;
|
|
}
|
|
self->command = (command_word_t)mn_result->command_id[0];
|
|
// self->afe_handle->enable_wakenet(afe_data);
|
|
// self->detected = false;
|
|
|
|
// self->afe_handle->disable_wakenet(afe_data); // 停止唤醒
|
|
self->detected = true;
|
|
ESP_LOGI(TAG, ">>> Say your command <<<");
|
|
self->command = COMMAND_TIMEOUT;
|
|
} else if (mn_state == ESP_MN_STATE_TIMEOUT) {
|
|
esp_mn_results_t *mn_result = multinet->get_results(model_data);
|
|
ESP_LOGI(TAG, "timeout, string:%s\n", mn_result->string);
|
|
self->command = COMMAND_TIMEOUT;
|
|
// self->afe_handle->enable_wakenet(afe_data);
|
|
self->detected = false;
|
|
ESP_LOGI(TAG, ">>> Waiting to be waken up <<<");
|
|
LCD_Backlight = LCD_Backlight_original;
|
|
if(play_Music_Flag){
|
|
play_Music_Flag = 0;
|
|
if(ACTIVE_TRACK_CNT)
|
|
_lv_demo_music_resume();
|
|
else
|
|
printf("No MP3 file found in SD card!\r\n");
|
|
}
|
|
}
|
|
}
|
|
if (model_data) {
|
|
multinet->destroy(model_data);
|
|
model_data = NULL;
|
|
}
|
|
self->afe_handle->destroy(afe_data);
|
|
vTaskDelete(NULL);
|
|
}
|
|
|
|
// 下面的函数是上面的备份,使用前需要在idf.py menuconfig中先配置打开唤醒模型
|
|
static void detect_handler_continuous(AppSpeech *self)
|
|
{
|
|
esp_afe_sr_data_t *afe_data = self->afe_data;
|
|
int afe_chunksize = self->afe_handle->get_fetch_chunksize(afe_data);
|
|
#if defined(CONFIG_SR_MN_CN_MULTINET5_RECOGNITION_QUANT8) || defined(CONFIG_SR_MN_CN_MULTINET6_QUANT) || defined(CONFIG_SR_MN_CN_MULTINET6_AC_QUANT)
|
|
char *mn_name = esp_srmodel_filter(self->models, ESP_MN_PREFIX, ESP_MN_CHINESE);
|
|
#else
|
|
char *mn_name = esp_srmodel_filter(self->models, ESP_MN_PREFIX, ESP_MN_ENGLISH);
|
|
#endif // CONFIG_IDF_TARGET_ESP32S3
|
|
ESP_LOGI(TAG, "multinet:%s\n", mn_name);
|
|
esp_mn_iface_t *multinet = esp_mn_handle_from_name(mn_name);
|
|
model_iface_data_t *model_data = multinet->create(mn_name, 6000);
|
|
esp_mn_commands_update_from_sdkconfig(multinet, model_data); // Add speech commands from sdkconfig
|
|
int mu_chunksize = multinet->get_samp_chunksize(model_data);
|
|
assert(mu_chunksize == afe_chunksize);
|
|
|
|
// FILE *fp = fopen("/sdcard/out", "w");
|
|
// if (fp == NULL) ESP_LOGE(TAG,"can not open file\n");
|
|
|
|
//print active speech commands
|
|
multinet->print_active_speech_commands(model_data);
|
|
ESP_LOGI(TAG, "Ready");
|
|
|
|
self->detected = false;
|
|
|
|
while (true)
|
|
{
|
|
afe_fetch_result_t* res = self->afe_handle->fetch(afe_data);
|
|
if (!res || res->ret_value == ESP_FAIL) {
|
|
ESP_LOGE(TAG, "fetch error!\n");
|
|
break;
|
|
}
|
|
|
|
if (res->wakeup_state == WAKENET_DETECTED) {
|
|
ESP_LOGI(TAG, "WAKEWORD DETECTED\n");
|
|
multinet->clean(model_data); // clean all status of multinet
|
|
LCD_Backlight_original = LCD_Backlight;
|
|
} else if (res->wakeup_state == WAKENET_CHANNEL_VERIFIED) {
|
|
ESP_LOGI(TAG, "AFE_FETCH_CHANNEL_VERIFIED, channel index: %d\n", res->trigger_channel_id);
|
|
ESP_LOGI(TAG, ">>> Say your command <<<");
|
|
self->detected = true;
|
|
self->afe_handle->disable_wakenet(afe_data);
|
|
LCD_Backlight = 35;
|
|
|
|
}
|
|
|
|
if (self->detected) {
|
|
esp_mn_state_t mn_state = multinet->detect(model_data, res->data);
|
|
|
|
if (mn_state == ESP_MN_STATE_DETECTING) {
|
|
self->command = COMMAND_NOT_DETECTED;
|
|
continue;
|
|
} else if (mn_state == ESP_MN_STATE_DETECTED) {
|
|
esp_mn_results_t *mn_result = multinet->get_results(model_data);
|
|
// for (int i = 0; i < mn_result->num; i++) {
|
|
// ESP_LOGI(TAG, "TOP %d, command_id: %d, phrase_id: %d, string:%s prob: %f\n",
|
|
// i+1, mn_result->command_id[i], mn_result->phrase_id[i], mn_result->string, mn_result->prob[i]);
|
|
// }
|
|
ESP_LOGI(TAG, "TOP %d, command_id: %d, phrase_id: %d, string:%s prob: %f\n",
|
|
1, mn_result->command_id[0], mn_result->phrase_id[0], mn_result->string, mn_result->prob[0]);
|
|
switch (mn_result->command_id[0]) {
|
|
case 0:
|
|
LCD_Backlight = 100;
|
|
break;
|
|
case 1:
|
|
LCD_Backlight = 30;
|
|
break;
|
|
case 2:
|
|
LCD_Backlight = 0;
|
|
break;
|
|
case 3:
|
|
LCD_Backlight = 100;
|
|
break;
|
|
case 4:
|
|
play_Music_Flag = 1;
|
|
break;
|
|
default: printf("Unknown Command!\r\n"); break;
|
|
}
|
|
self->command = (command_word_t)mn_result->command_id[0];
|
|
// self->afe_handle->enable_wakenet(afe_data);
|
|
// self->detected = false;
|
|
|
|
self->afe_handle->disable_wakenet(afe_data);
|
|
self->detected = true;
|
|
ESP_LOGI(TAG, ">>> Say your command <<<");
|
|
self->command = COMMAND_TIMEOUT;
|
|
} else if (mn_state == ESP_MN_STATE_TIMEOUT) {
|
|
esp_mn_results_t *mn_result = multinet->get_results(model_data);
|
|
ESP_LOGI(TAG, "timeout, string:%s\n", mn_result->string);
|
|
self->command = COMMAND_TIMEOUT;
|
|
self->afe_handle->enable_wakenet(afe_data);
|
|
self->detected = false;
|
|
ESP_LOGI(TAG, ">>> Waiting to be waken up <<<");
|
|
LCD_Backlight = LCD_Backlight_original;
|
|
if(play_Music_Flag){
|
|
play_Music_Flag = 0;
|
|
if(ACTIVE_TRACK_CNT)
|
|
_lv_demo_music_resume();
|
|
else
|
|
printf("No MP3 file found in SD card!\r\n");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (model_data) {
|
|
multinet->destroy(model_data);
|
|
model_data = NULL;
|
|
}
|
|
self->afe_handle->destroy(afe_data);
|
|
vTaskDelete(NULL);
|
|
}
|
|
|
|
|
|
// 初始化
|
|
void MIC_Speech_init()
|
|
{
|
|
MIC_Speech.afe_handle = &ESP_AFE_SR_HANDLE;
|
|
|
|
MIC_Speech.detected = false;
|
|
MIC_Speech.command = COMMAND_TIMEOUT;
|
|
MIC_Speech.models = esp_srmodel_init("/sdcard/srmodels"); // 这边配置为SD卡当中的文件路径
|
|
i2s_init(I2S_NUM_1, 16000, 2, 32);
|
|
// sd_card_mount("/sdcard");
|
|
afe_config_t afe_config = {
|
|
.aec_init = true, // 回声消除(当用户在播放音频的时候使用语音识别可以有效提告识别率)
|
|
.se_init = true, // 降噪
|
|
.vad_init = true, // VDA(语音活动检测),用于检测当前是否处于说话状态,如果是,就将音频数据发送给 multinet
|
|
.wakenet_init = false, // 关闭唤醒词
|
|
.voice_communication_init = false,
|
|
.voice_communication_agc_init = false,
|
|
.voice_communication_agc_gain = 15,
|
|
.vad_mode = VAD_MODE_0, /*VAD_MODE_3,*/ // VAD 灵敏度等级
|
|
.wakenet_model_name = NULL, // 不再指定 wakenet
|
|
.wakenet_model_name_2 = NULL,
|
|
.wakenet_mode = DET_MODE_2CH_90, // 0 = 关闭
|
|
.afe_mode = SR_MODE_LOW_COST,
|
|
.afe_perferred_core = 0,
|
|
.afe_perferred_priority = 5,
|
|
.afe_ringbuf_size = 50, // AFE ringbuffer 环形缓冲区大小
|
|
.memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM,
|
|
.afe_linear_gain = 1.0,
|
|
.agc_mode = AFE_MN_PEAK_AGC_MODE_2,
|
|
.pcm_config = {
|
|
.total_ch_num = 3,
|
|
.mic_num = 2,
|
|
.ref_num = 1,
|
|
.sample_rate = 16000,
|
|
},
|
|
.debug_init = false, // afe内部调试
|
|
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}},
|
|
};
|
|
afe_config.aec_init = false;
|
|
afe_config.se_init = false;
|
|
afe_config.vad_init = false;
|
|
afe_config.afe_ringbuf_size = 10;
|
|
afe_config.pcm_config.total_ch_num = 2;
|
|
afe_config.pcm_config.mic_num = 1;
|
|
afe_config.pcm_config.ref_num = 1;
|
|
afe_config.pcm_config.sample_rate = 16000;
|
|
afe_config.wakenet_model_name = esp_srmodel_filter(MIC_Speech.models, ESP_WN_PREFIX, NULL); // 获取唤醒词模型
|
|
MIC_Speech.afe_data = MIC_Speech.afe_handle->create_from_config(&afe_config);
|
|
|
|
// 注意两个任务被分配了不同的核心与优先级,这是为了防止AFE(Audio Front-End)内部环形缓冲区溢出
|
|
// 也就是“喂数据线程” 比 “取数据线程” 跑得快,生产 > 消费,经典的生产者消费者问题
|
|
// 但即使这么做了,由于i2s在开始读取数据的时候,识别模型还没加载完成,因此在开始阶段必然会出现环形缓冲区满的警告,问题不大
|
|
xTaskCreatePinnedToCore((TaskFunction_t)feed_handler, "App/SR/Feed", 4 * 1024, &MIC_Speech, 4, NULL, 0);
|
|
xTaskCreatePinnedToCore((TaskFunction_t)detect_hander, "App/SR/Detect", 5 * 1024, &MIC_Speech, 6, NULL, 1);
|
|
}
|