/* * SPDX-FileCopyrightText: 2015-2022 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Unlicense OR CC0-1.0 */ #include #include #include #include #include #include "freertos/FreeRTOS.h" #include "freertos/task.h" #include "esp_task_wdt.h" #include "esp_check.h" #include "esp_err.h" #include "esp_log.h" #include "app_sr.h" #include "esp_mn_speech_commands.h" #include "esp_process_sdkconfig.h" #include "esp_afe_sr_models.h" #include "esp_mn_models.h" #include "esp_wn_iface.h" #include "esp_wn_models.h" #include "esp_afe_sr_iface.h" #include "esp_mn_iface.h" #include "model_path.h" #include "bsp_board.h" #include "app_audio.h" #include "app_wifi.h" static const char *TAG = "app_sr"; static esp_afe_sr_iface_t *afe_handle = NULL; static srmodel_list_t *models = NULL; static bool manul_detect_flag = false; sr_data_t *g_sr_data = NULL; #define I2S_CHANNEL_NUM 2 extern bool record_flag; extern uint32_t record_total_len; static void audio_feed_task(void *arg) { ESP_LOGI(TAG, "Feed Task"); size_t bytes_read = 0; esp_afe_sr_data_t *afe_data = (esp_afe_sr_data_t *) arg; int audio_chunksize = afe_handle->get_feed_chunksize(afe_data); int feed_channel = 3; ESP_LOGI(TAG, "audio_chunksize=%d, feed_channel=%d", audio_chunksize, feed_channel); /* Allocate audio buffer and check for result */ int16_t *audio_buffer = heap_caps_malloc(audio_chunksize * sizeof(int16_t) * feed_channel, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); assert(audio_buffer); g_sr_data->afe_in_buffer = audio_buffer; while (true) { if (g_sr_data->event_group && xEventGroupGetBits(g_sr_data->event_group)) { xEventGroupSetBits(g_sr_data->event_group, FEED_DELETED); vTaskDelete(NULL); } /* Read audio data from I2S bus */ bsp_i2s_read((char *)audio_buffer, audio_chunksize * I2S_CHANNEL_NUM * sizeof(int16_t), &bytes_read, portMAX_DELAY); /* Channel Adjust */ for (int i = audio_chunksize - 1; i >= 0; i--) { audio_buffer[i * 3 + 2] = 0; audio_buffer[i * 3 + 1] = audio_buffer[i * 2 + 1]; audio_buffer[i * 3 + 0] = audio_buffer[i * 2 + 0]; } /* Checking if WIFI is connected */ if (WIFI_STATUS_CONNECTED_OK == wifi_connected_already()) { /* Feed samples of an audio stream to the AFE_SR */ afe_handle->feed(afe_data, audio_buffer); } audio_record_save(audio_buffer, audio_chunksize); } } static void audio_detect_task(void *arg) { ESP_LOGI(TAG, "Detection task"); static afe_vad_state_t local_state; static uint8_t frame_keep = 0; bool detect_flag = false; esp_afe_sr_data_t *afe_data = arg; while (true) { if (NEED_DELETE && xEventGroupGetBits(g_sr_data->event_group)) { xEventGroupSetBits(g_sr_data->event_group, DETECT_DELETED); vTaskDelete(g_sr_data->handle_task); vTaskDelete(NULL); } afe_fetch_result_t *res = afe_handle->fetch(afe_data); if (!res || res->ret_value == ESP_FAIL) { ESP_LOGW(TAG, "AFE Fetch Fail"); continue; } if (res->wakeup_state == WAKENET_DETECTED) { ESP_LOGI(TAG, LOG_BOLD(LOG_COLOR_GREEN) "wakeword detected"); sr_result_t result = { .wakenet_mode = WAKENET_DETECTED, .state = ESP_MN_STATE_DETECTING, .command_id = 0, }; xQueueSend(g_sr_data->result_que, &result, 0); } else if (res->wakeup_state == WAKENET_CHANNEL_VERIFIED || manul_detect_flag) { detect_flag = true; if (manul_detect_flag) { manul_detect_flag = false; sr_result_t result = { .wakenet_mode = WAKENET_DETECTED, .state = ESP_MN_STATE_DETECTING, .command_id = 0, }; xQueueSend(g_sr_data->result_que, &result, 0); } frame_keep = 0; g_sr_data->afe_handle->disable_wakenet(afe_data); ESP_LOGI(TAG, LOG_BOLD(LOG_COLOR_GREEN) "AFE_FETCH_CHANNEL_VERIFIED, channel index: %d\n", res->trigger_channel_id); } if (true == detect_flag) { if (local_state != res->vad_state) { local_state = res->vad_state; frame_keep = 0; } else { frame_keep++; } if ((100 == frame_keep) && (AFE_VAD_SILENCE == res->vad_state)) { sr_result_t result = { .wakenet_mode = WAKENET_NO_DETECT, .state = ESP_MN_STATE_TIMEOUT, .command_id = 0, }; xQueueSend(g_sr_data->result_que, &result, 0); g_sr_data->afe_handle->enable_wakenet(afe_data); detect_flag = false; continue; } } } /* Task never returns */ vTaskDelete(NULL); } esp_err_t app_sr_set_language(sr_language_t new_lang) { ESP_RETURN_ON_FALSE(NULL != g_sr_data, ESP_ERR_INVALID_STATE, TAG, "SR is not running"); if (new_lang == g_sr_data->lang) { ESP_LOGW(TAG, "nothing to do"); return ESP_OK; } else { g_sr_data->lang = new_lang; } ESP_LOGI(TAG, "Set language %s", SR_LANG_EN == g_sr_data->lang ? "EN" : "CN"); if (g_sr_data->model_data) { g_sr_data->multinet->destroy(g_sr_data->model_data); } char *wn_name = esp_srmodel_filter(models, ESP_WN_PREFIX, ""); ESP_LOGI(TAG, "load wakenet:%s", wn_name); g_sr_data->afe_handle->set_wakenet(g_sr_data->afe_data, wn_name); return ESP_OK; } esp_err_t app_sr_start(bool record_en) { esp_err_t ret = ESP_OK; ESP_RETURN_ON_FALSE(NULL == g_sr_data, ESP_ERR_INVALID_STATE, TAG, "SR already running"); g_sr_data = heap_caps_calloc(1, sizeof(sr_data_t), MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); ESP_RETURN_ON_FALSE(NULL != g_sr_data, ESP_ERR_NO_MEM, TAG, "Failed create sr data"); g_sr_data->result_que = xQueueCreate(3, sizeof(sr_result_t)); ESP_GOTO_ON_FALSE(NULL != g_sr_data->result_que, ESP_ERR_NO_MEM, err, TAG, "Failed create result queue"); g_sr_data->event_group = xEventGroupCreate(); ESP_GOTO_ON_FALSE(NULL != g_sr_data->event_group, ESP_ERR_NO_MEM, err, TAG, "Failed create event_group"); BaseType_t ret_val; models = esp_srmodel_init("model"); afe_handle = (esp_afe_sr_iface_t *)&ESP_AFE_SR_HANDLE; afe_config_t afe_config = AFE_CONFIG_DEFAULT(); afe_config.wakenet_model_name = esp_srmodel_filter(models, ESP_WN_PREFIX, NULL); afe_config.aec_init = false; esp_afe_sr_data_t *afe_data = afe_handle->create_from_config(&afe_config); g_sr_data->afe_handle = afe_handle; g_sr_data->afe_data = afe_data; g_sr_data->lang = SR_LANG_MAX; ret = app_sr_set_language(SR_LANG_EN); ESP_GOTO_ON_FALSE(ESP_OK == ret, ESP_FAIL, err, TAG, "Failed to set language"); ret_val = xTaskCreatePinnedToCore(&audio_feed_task, "Feed Task", 8 * 1024, (void *)afe_data, 5, &g_sr_data->feed_task, 0); ESP_GOTO_ON_FALSE(pdPASS == ret_val, ESP_FAIL, err, TAG, "Failed create audio feed task"); ret_val = xTaskCreatePinnedToCore(&audio_detect_task, "Detect Task", 10 * 1024, (void *)afe_data, 5, &g_sr_data->detect_task, 1); ESP_GOTO_ON_FALSE(pdPASS == ret_val, ESP_FAIL, err, TAG, "Failed create audio detect task"); ret_val = xTaskCreatePinnedToCore(&sr_handler_task, "SR Handler Task", 8 * 1024, NULL, 5, &g_sr_data->handle_task, 0); ESP_GOTO_ON_FALSE(pdPASS == ret_val, ESP_FAIL, err, TAG, "Failed create audio handler task"); audio_record_init(); return ESP_OK; err: app_sr_stop(); return ret; } esp_err_t app_sr_stop(void) { ESP_RETURN_ON_FALSE(NULL != g_sr_data, ESP_ERR_INVALID_STATE, TAG, "SR is not running"); xEventGroupSetBits(g_sr_data->event_group, NEED_DELETE); xEventGroupWaitBits(g_sr_data->event_group, NEED_DELETE | FEED_DELETED | DETECT_DELETED | HANDLE_DELETED, 1, 1, portMAX_DELAY); if (g_sr_data->result_que) { vQueueDelete(g_sr_data->result_que); g_sr_data->result_que = NULL; } if (g_sr_data->event_group) { vEventGroupDelete(g_sr_data->event_group); g_sr_data->event_group = NULL; } if (g_sr_data->fp) { fclose(g_sr_data->fp); g_sr_data->fp = NULL; } if (g_sr_data->model_data) { g_sr_data->multinet->destroy(g_sr_data->model_data); } if (g_sr_data->afe_data) { g_sr_data->afe_handle->destroy(g_sr_data->afe_data); } if (g_sr_data->afe_in_buffer) { heap_caps_free(g_sr_data->afe_in_buffer); } if (g_sr_data->afe_out_buffer) { heap_caps_free(g_sr_data->afe_out_buffer); } heap_caps_free(g_sr_data); g_sr_data = NULL; return ESP_OK; } esp_err_t app_sr_get_result(sr_result_t *result, TickType_t xTicksToWait) { ESP_RETURN_ON_FALSE(NULL != g_sr_data, ESP_ERR_INVALID_STATE, TAG, "SR is not running"); xQueueReceive(g_sr_data->result_que, result, xTicksToWait); return ESP_OK; } esp_err_t app_sr_start_once(void) { ESP_RETURN_ON_FALSE(NULL != g_sr_data, ESP_ERR_INVALID_STATE, TAG, "SR is not running"); manul_detect_flag = true; return ESP_OK; }