You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
253 lines
8.8 KiB
C
253 lines
8.8 KiB
C
/*
|
|
* SPDX-FileCopyrightText: 2023-2024 Espressif Systems (Shanghai) CO LTD
|
|
*
|
|
* SPDX-License-Identifier: CC0-1.0
|
|
*/
|
|
|
|
#include <string.h>
|
|
#include "freertos/FreeRTOS.h"
|
|
#include "freertos/task.h"
|
|
#include "freertos/event_groups.h"
|
|
#include "esp_system.h"
|
|
#include "esp_log.h"
|
|
#include "esp_check.h"
|
|
#include "nvs_flash.h"
|
|
#include "app_ui_ctrl.h"
|
|
#include "OpenAI.h"
|
|
#include "audio_player.h"
|
|
#include "app_sr.h"
|
|
#include "bsp/esp-bsp.h"
|
|
#include "bsp_board.h"
|
|
#include "app_audio.h"
|
|
#include "app_wifi.h"
|
|
#include "settings.h"
|
|
#include "cJSON.h"
|
|
#include "utils/utils.h"
|
|
|
|
#define SCROLL_START_DELAY_S (1.5)
|
|
#define LISTEN_SPEAK_PANEL_DELAY_MS 2000
|
|
#define SERVER_ERROR "服务错误"
|
|
#define INVALID_REQUEST_ERROR "未知请求错误"
|
|
#define SORRY_CANNOT_UNDERSTAND "抱歉,我没听懂"
|
|
#define API_KEY_NOT_VALID "OpenAI凭据无效"
|
|
|
|
static char *TAG = "app_main";
|
|
static sys_param_t *sys_param = NULL;
|
|
|
|
/* program flow. This function is called in app_audio.c */
|
|
esp_err_t start_openai(uint8_t *audio, int audio_len)
|
|
{
|
|
|
|
esp_err_t ret = ESP_OK;
|
|
static OpenAI_t *openai = NULL;
|
|
static OpenAI_AudioTranscription_t *audioTranscription = NULL;
|
|
static OpenAI_ChatCompletion_t *chatCompletion = NULL;
|
|
static OpenAI_AudioSpeech_t *audioSpeech = NULL;
|
|
|
|
OpenAI_SpeechResponse_t *speechresult = NULL;
|
|
OpenAI_StringResponse_t *result = NULL;
|
|
FILE *fp = NULL;
|
|
|
|
if (openai == NULL) {
|
|
openai = OpenAICreate(sys_param->key);
|
|
ESP_RETURN_ON_FALSE(NULL != openai, ESP_ERR_INVALID_ARG, TAG, "OpenAICreate faield");
|
|
|
|
OpenAIChangeBaseURL(openai, sys_param->url);
|
|
|
|
audioTranscription = openai->audioTranscriptionCreate(openai);
|
|
chatCompletion = openai->chatCreate(openai);
|
|
audioSpeech = openai->audioSpeechCreate(openai);
|
|
|
|
audioTranscription->setResponseFormat(audioTranscription, OPENAI_AUDIO_RESPONSE_FORMAT_JSON);
|
|
audioTranscription->setLanguage(audioTranscription, "en");
|
|
audioTranscription->setTemperature(audioTranscription, 0.2);
|
|
|
|
chatCompletion->setModel(chatCompletion, "gpt-3.5-turbo");
|
|
chatCompletion->setSystem(chatCompletion, "user");
|
|
chatCompletion->setMaxTokens(chatCompletion, CONFIG_MAX_TOKEN);
|
|
chatCompletion->setTemperature(chatCompletion, 0.2);
|
|
chatCompletion->setStop(chatCompletion, "\r");
|
|
chatCompletion->setPresencePenalty(chatCompletion, 0);
|
|
chatCompletion->setFrequencyPenalty(chatCompletion, 0);
|
|
chatCompletion->setUser(chatCompletion, "OpenAI-ESP32");
|
|
|
|
audioSpeech->setModel(audioSpeech, "tts-1");
|
|
audioSpeech->setVoice(audioSpeech, "nova");
|
|
audioSpeech->setResponseFormat(audioSpeech, OPENAI_AUDIO_OUTPUT_FORMAT_MP3);
|
|
audioSpeech->setSpeed(audioSpeech, 1.0);
|
|
}
|
|
|
|
ui_ctrl_show_panel(UI_CTRL_PANEL_GET, 0);
|
|
|
|
// OpenAI Audio Transcription
|
|
// char *text = audioTranscription->stt((uint8_t *)audio, audio_len);
|
|
char *text = Utils_STT((uint8_t *)audio, audio_len);
|
|
ESP_LOGW(TAG, "[Main.c] get resp - %s", text);
|
|
if (NULL == text) {
|
|
ret = ESP_ERR_INVALID_RESPONSE;
|
|
ui_ctrl_label_show_text(UI_CTRL_LABEL_LISTEN_SPEAK, INVALID_REQUEST_ERROR);
|
|
ESP_GOTO_ON_ERROR(ret, err, TAG, "[audioTranscription]: invalid url");
|
|
}
|
|
|
|
if (strstr(text, "\"code\": ")) {
|
|
ret = ESP_ERR_INVALID_RESPONSE;
|
|
ui_ctrl_label_show_text(UI_CTRL_LABEL_LISTEN_SPEAK, text);
|
|
ESP_GOTO_ON_ERROR(ret, err, TAG, "[audioTranscription]: invalid response");
|
|
}
|
|
|
|
if (strcmp(text, INVALID_REQUEST_ERROR) == 0 || strcmp(text, SERVER_ERROR) == 0) {
|
|
ret = ESP_ERR_INVALID_RESPONSE;
|
|
ui_ctrl_label_show_text(UI_CTRL_LABEL_LISTEN_SPEAK, SORRY_CANNOT_UNDERSTAND);
|
|
ui_ctrl_show_panel(UI_CTRL_PANEL_SLEEP, LISTEN_SPEAK_PANEL_DELAY_MS);
|
|
ESP_GOTO_ON_ERROR(ret, err, TAG, "[audioTranscription]: invalid response");
|
|
}
|
|
|
|
// 解析JSON字符串
|
|
cJSON *json = cJSON_Parse(text);
|
|
if (json == NULL) {
|
|
ret = ESP_ERR_INVALID_RESPONSE;
|
|
ui_ctrl_label_show_text(UI_CTRL_LABEL_LISTEN_SPEAK, SORRY_CANNOT_UNDERSTAND);
|
|
ui_ctrl_show_panel(UI_CTRL_PANEL_SLEEP, LISTEN_SPEAK_PANEL_DELAY_MS);
|
|
ESP_GOTO_ON_ERROR(ret, err, TAG, "[chatCompletion]: Error parsing JSON");
|
|
}
|
|
|
|
exampleFunction();
|
|
cJSON *said = cJSON_GetObjectItemCaseSensitive(json, "said");
|
|
cJSON *reply = cJSON_GetObjectItemCaseSensitive(json, "data");
|
|
cJSON *msg_id = cJSON_GetObjectItemCaseSensitive(json, "msg_id");
|
|
|
|
// UI listen success
|
|
ui_ctrl_label_show_text(UI_CTRL_LABEL_REPLY_QUESTION, said->valuestring);
|
|
ui_ctrl_label_show_text(UI_CTRL_LABEL_LISTEN_SPEAK, reply->valuestring);
|
|
|
|
if (strcmp(text, INVALID_REQUEST_ERROR) == 0) {
|
|
ret = ESP_ERR_INVALID_RESPONSE;
|
|
ui_ctrl_label_show_text(UI_CTRL_LABEL_LISTEN_SPEAK, SORRY_CANNOT_UNDERSTAND);
|
|
ui_ctrl_show_panel(UI_CTRL_PANEL_SLEEP, LISTEN_SPEAK_PANEL_DELAY_MS);
|
|
ESP_GOTO_ON_ERROR(ret, err, TAG, "[chatCompletion]: invalid response");
|
|
}
|
|
|
|
ui_ctrl_label_show_text(UI_CTRL_LABEL_REPLY_CONTENT, reply->valuestring);
|
|
ui_ctrl_show_panel(UI_CTRL_PANEL_REPLY, 0);
|
|
|
|
uint8_t *audioData = NULL;
|
|
size_t audioLen = 0;
|
|
esp_err_t status = ESP_FAIL;
|
|
// 请求声音数据
|
|
esp_err_t err = Utils_GetAudio(msg_id->valuestring, &audioData, &audioLen);
|
|
if (err == ESP_OK) {
|
|
ESP_LOGI(TAG, "Audio data fetched successfully, length: %d", audioLen);
|
|
// 在这里处理音频数据,例如播放或存储
|
|
fp = fmemopen((void *)audioData, audioLen, "rb");
|
|
if (fp) {
|
|
status = audio_player_play(fp);
|
|
}
|
|
free(audioData); // 使用完音频数据后释放内存
|
|
} else {
|
|
ESP_LOGE(TAG, "Failed to fetch audio data");
|
|
}
|
|
|
|
// OpenAI Speech Response
|
|
// speechresult = audioSpeech->speech(audioSpeech, reply->valuestring);
|
|
// if (NULL == speechresult) {
|
|
// ret = ESP_ERR_INVALID_RESPONSE;
|
|
// ui_ctrl_show_panel(UI_CTRL_PANEL_SLEEP, 5 * LISTEN_SPEAK_PANEL_DELAY_MS);
|
|
// fp = fopen("/spiffs/tts_failed.mp3", "r");
|
|
// if (fp) {
|
|
// audio_player_play(fp);
|
|
// }
|
|
// ESP_GOTO_ON_ERROR(ret, err, TAG, "[audioSpeech]: invalid response");
|
|
// }
|
|
|
|
// uint32_t dataLength = speechresult->getLen(speechresult);
|
|
// char *speechptr = speechresult->getData(speechresult);
|
|
// esp_err_t status = ESP_FAIL;
|
|
// fp = fmemopen((void *)speechptr, dataLength, "rb");
|
|
// if (fp) {
|
|
// status = audio_player_play(fp);
|
|
// }
|
|
|
|
if (status != ESP_OK) {
|
|
ESP_LOGE(TAG, "Error creating ChatGPT request: %s\n", esp_err_to_name(status));
|
|
// UI reply audio fail
|
|
ui_ctrl_show_panel(UI_CTRL_PANEL_SLEEP, 0);
|
|
} else {
|
|
// Wait a moment before starting to scroll the reply content
|
|
vTaskDelay(pdMS_TO_TICKS(SCROLL_START_DELAY_S * 1000));
|
|
ui_ctrl_reply_set_audio_start_flag(true);
|
|
}
|
|
|
|
err:
|
|
// Clearing resources
|
|
if (speechresult) {
|
|
speechresult->deleteResponse (speechresult);
|
|
}
|
|
|
|
if (result) {
|
|
result->deleteResponse (result);
|
|
}
|
|
|
|
if (text) {
|
|
free(text);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/* play audio function */
|
|
|
|
static void audio_play_finish_cb(void)
|
|
{
|
|
ESP_LOGI(TAG, "replay audio end");
|
|
if (ui_ctrl_reply_get_audio_start_flag()) {
|
|
ui_ctrl_reply_set_audio_end_flag(true);
|
|
}
|
|
}
|
|
|
|
void app_main()
|
|
{
|
|
//Initialize NVS
|
|
esp_err_t ret = nvs_flash_init();
|
|
if (ret == ESP_ERR_NVS_NO_FREE_PAGES || ret == ESP_ERR_NVS_NEW_VERSION_FOUND) {
|
|
ESP_ERROR_CHECK(nvs_flash_erase());
|
|
ret = nvs_flash_init();
|
|
}
|
|
ESP_ERROR_CHECK(ret);
|
|
ESP_ERROR_CHECK(settings_read_parameter_from_nvs());
|
|
sys_param = settings_get_parameter();
|
|
|
|
bsp_spiffs_mount();
|
|
bsp_i2c_init();
|
|
|
|
bsp_display_cfg_t cfg = {
|
|
.lvgl_port_cfg = ESP_LVGL_PORT_INIT_CONFIG(),
|
|
.buffer_size = BSP_LCD_H_RES * CONFIG_BSP_LCD_DRAW_BUF_HEIGHT,
|
|
.double_buffer = 0,
|
|
.flags = {
|
|
.buff_dma = true,
|
|
}
|
|
};
|
|
bsp_display_start_with_config(&cfg);
|
|
bsp_board_init();
|
|
|
|
ESP_LOGI(TAG, "Display LVGL demo");
|
|
bsp_display_backlight_on();
|
|
ui_ctrl_init();
|
|
app_network_start();
|
|
|
|
ESP_LOGI(TAG, "speech recognition start");
|
|
app_sr_start(false);
|
|
audio_register_play_finish_cb(audio_play_finish_cb);
|
|
|
|
while (true) {
|
|
|
|
ESP_LOGD(TAG, "\tDescription\tInternal\tSPIRAM");
|
|
ESP_LOGD(TAG, "Current Free Memory\t%d\t\t%d",
|
|
heap_caps_get_free_size(MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL),
|
|
heap_caps_get_free_size(MALLOC_CAP_SPIRAM));
|
|
ESP_LOGD(TAG, "Min. Ever Free Size\t%d\t\t%d",
|
|
heap_caps_get_minimum_free_size(MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL),
|
|
heap_caps_get_minimum_free_size(MALLOC_CAP_SPIRAM));
|
|
vTaskDelay(pdMS_TO_TICKS(5 * 1000));
|
|
}
|
|
}
|