Skip to content

Commit

Permalink
Merge branch 'feature/update_algorithm_ns_model' into 'master'
Browse files Browse the repository at this point in the history
algorithm_stream: Add nsnet related configurations

See merge request adf/esp-adf-internal!1299
  • Loading branch information
jason-mao committed Aug 7, 2024
2 parents 5478bce + 9712588 commit 501921b
Show file tree
Hide file tree
Showing 13 changed files with 126 additions and 40 deletions.
71 changes: 54 additions & 17 deletions components/audio_stream/algorithm_stream.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@
#include "esp_afe_sr_iface.h"
#include "esp_afe_sr_models.h"

#ifdef CONFIG_USE_NSNET
#include "esp_nsn_models.h"
#include "model_path.h"
#endif /* CONFIG_USE_NSNET */

#define ALGORITHM_CHUNK_MAX_SIZE (1024)
#define ALGORITHM_FETCH_TASK_STACK_SIZE (3 * 1024)
#define ALGORITHM_GET_REFERENCE_TIMEOUT (32 / portTICK_PERIOD_MS)
Expand All @@ -45,23 +50,26 @@ static const char *TAG = "ALGORITHM_STREAM";
const int FETCH_STOPPED_BIT = BIT0;

typedef struct {
int16_t *record;
int16_t *reference;
int16_t *aec_buff;
int8_t algo_mask;
int8_t mic_ch;
bool afe_fetch_run;
int sample_rate;
int rec_linear_factor;
int ref_linear_factor;
int16_t *record;
int16_t *reference;
int16_t *aec_buff;
int8_t algo_mask;
int8_t mic_ch;
bool afe_fetch_run;
int sample_rate;
int rec_linear_factor;
int ref_linear_factor;
algorithm_stream_input_type_t input_type;
const esp_afe_sr_iface_t *afe_handle;
esp_afe_sr_data_t *afe_data;
EventGroupHandle_t state;
bool debug_input;
bool swap_ch;
bool aec_low_cost;
int agc_gain;
const esp_afe_sr_iface_t *afe_handle;
esp_afe_sr_data_t *afe_data;
EventGroupHandle_t state;
bool debug_input;
bool swap_ch;
bool aec_low_cost;
int agc_gain;
#ifdef CONFIG_USE_NSNET
srmodel_list_t *models;
#endif /* CONFIG_USE_NSNET */
} algo_stream_t;

esp_err_t algorithm_mono_fix(uint8_t *sbuff, uint32_t len)
Expand All @@ -87,7 +95,12 @@ static esp_err_t _algo_close(audio_element_handle_t self)
while (xEventGroupWaitBits(algo->state, FETCH_STOPPED_BIT, false, true, 10 / portTICK_PERIOD_MS) != FETCH_STOPPED_BIT) {
algo->afe_handle->feed(algo->afe_data, algo->aec_buff);
}
return ESP_OK;
}

static esp_err_t _algo_destroy(audio_element_handle_t self)
{
algo_stream_t *algo = (algo_stream_t *)audio_element_getdata(self);
if (algo->afe_data) {
algo->afe_handle->destroy(algo->afe_data);
algo->afe_data = NULL;
Expand All @@ -114,6 +127,12 @@ static esp_err_t _algo_close(audio_element_handle_t self)
vEventGroupDelete(algo->state);
}

#ifdef CONFIG_USE_NSNET
if (algo->models) {
esp_srmodel_deinit(algo->models);
}
#endif /* CONFIG_USE_NSNET */

if (algo) {
audio_free(algo);
algo = NULL;
Expand Down Expand Up @@ -163,6 +182,11 @@ static esp_err_t _algo_open(audio_element_handle_t self)
afe_config.pcm_config.mic_num = algo->mic_ch;
afe_config.pcm_config.ref_num = 1;
afe_config.pcm_config.total_ch_num = algo->mic_ch + 1;
#ifdef CONFIG_USE_NSNET
char *model_name = esp_srmodel_filter(algo->models, ESP_NSNET_PREFIX, NULL);
afe_config.afe_ns_mode = NS_MODE_NET;
afe_config.afe_ns_model_name = model_name;
#endif /* CONFIG_USE_NSNET */

if (!algo->aec_low_cost) {
afe_config.pcm_config.sample_rate = algo->sample_rate;
Expand Down Expand Up @@ -198,7 +222,7 @@ static esp_err_t _algo_open(audio_element_handle_t self)
xEventGroupSetBits(algo->state, FETCH_STOPPED_BIT);
} else {
audio_thread_create(NULL, "algo_fetch", _algo_fetch_task, (void *)self, ALGORITHM_FETCH_TASK_STACK_SIZE,
ALGORITHM_STREAM_TASK_PERIOD, true, ALGORITHM_STREAM_PINNED_TO_CORE);
ALGORITHM_STREAM_TASK_PERIOD, true, ALGORITHM_STREAM_PINNED_TO_CORE);
}

AUDIO_NULL_CHECK(TAG, algo->afe_data, {
Expand Down Expand Up @@ -316,6 +340,7 @@ audio_element_handle_t algo_stream_init(algorithm_stream_cfg_t *config)
cfg.open = _algo_open;
cfg.close = _algo_close;
cfg.process = _algo_process;
cfg.destroy = _algo_destroy;
cfg.task_stack = config->task_stack;
cfg.task_prio = config->task_prio;
cfg.task_core = config->task_core;
Expand Down Expand Up @@ -348,6 +373,18 @@ audio_element_handle_t algo_stream_init(algorithm_stream_cfg_t *config)
_success &= ((algo->reference = audio_calloc(1, ALGORITHM_CHUNK_MAX_SIZE)) != NULL);
}

#ifdef CONFIG_USE_NSNET
algo->models = esp_srmodel_init(config->partition_label);
if (algo->models != NULL) {
for (int i = 0; i < algo->models->num; i++) {
ESP_LOGI(TAG, "Load: %s", algo->models->model_name[i]);
}
} else {
ESP_LOGE(TAG, "Failed to load models");
_success = false;
}
#endif /* CONFIG_USE_NSNET */

AUDIO_NULL_CHECK(TAG, _success, {
ESP_LOGE(TAG, "Error occured");
_algo_close(el);
Expand Down
5 changes: 3 additions & 2 deletions components/audio_stream/include/algorithm_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,8 @@ typedef struct {
int sample_rate; /*!< The sampling rate of the input PCM (in Hz) */
int mic_ch; /*!< MIC channel num */
int agc_gain; /*!< AGC gain(dB) for voice communication */
bool aec_low_cost; /*!< AEC uses less cpu and ram resources,
but has poor suppression of nonlinear distortion */
bool aec_low_cost; /*!< AEC uses less cpu and ram resources, but has poor suppression of nonlinear distortion */
char *partition_label; /*!< Partition label which stored the model data */
} algorithm_stream_cfg_t;

#define ALGORITHM_STREAM_DEFAULT_MASK (ALGORITHM_STREAM_USE_AEC | ALGORITHM_STREAM_USE_NS)
Expand All @@ -147,6 +147,7 @@ typedef struct {
.mic_ch = ALGORITHM_STREAM_DEFAULT_MIC_CHANNELS, \
.agc_gain = ALGORITHM_STREAM_DEFAULT_AGC_GAIN_DB, \
.aec_low_cost = false, \
.partition_label = "model", \
}

/**
Expand Down
2 changes: 1 addition & 1 deletion components/esp-sr
Submodule esp-sr updated 149 files
1 change: 1 addition & 0 deletions examples/advanced_examples/algorithm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ Load and run the example:
- The board will start playing automatically.
- After finish, you can open `/sdcard/aec_out.wav` to hear the recorded file.

NSNET has better noise suppression. If using NSNET, please enable 'ESP Speech Recognition ->USE_SNET' in menuconfig.

### Build and Flash
Build the project and flash it to the board, then run monitor tool to view serial output (replace `PORT` with your board's serial port name):
Expand Down
1 change: 1 addition & 0 deletions examples/advanced_examples/algorithm/README_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
- 开发板上电后后自动运行例程。
- 例程完成后,你可以打开 microSD 卡目录 `/sdcard/aec_out.wav` 收听录音文件。

NSNET 具备更好噪声抑制。若使用 NSNET,请在 menuconfig 中使能 `ESP Speech Recognition -> USE_NSNET`

### 编译和下载

Expand Down
3 changes: 2 additions & 1 deletion examples/advanced_examples/algorithm/partitions_algo.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
# Note: if you change the phy_init or app partition offset, make sure to change the offset in Kconfig.projbuild
nvs, data, nvs, 0x9000, 0x6000,
phy_init, data, phy, 0xf000, 0x1000,
factory, app, factory, 0x10000, 3M,
factory, app, factory, 0x10000, 2M,
model, data, spiffs, , 5168K,
18 changes: 18 additions & 0 deletions examples/advanced_examples/algorithm/sdkconfig.defaults.esp32
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,21 @@ CONFIG_ESP_LYRAT_V4_3_BOARD=y
#
CONFIG_ESP32_DEFAULT_CPU_FREQ_240=y
CONFIG_ESP32_DEFAULT_CPU_FREQ_MHZ=240

#
# Serial flasher config
#
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
CONFIG_ESPTOOLPY_FLASH_SAMPLE_MODE_STR=y
CONFIG_ESPTOOLPY_FLASHMODE="dio"
CONFIG_ESPTOOLPY_FLASHFREQ_80M=y
CONFIG_ESPTOOLPY_FLASHFREQ="80m"
CONFIG_ESPTOOLPY_FLASHSIZE_8MB=y
CONFIG_ESPTOOLPY_FLASHSIZE="8MB"
CONFIG_ESPTOOLPY_HEADER_FLASHSIZE_UPDATE=y
CONFIG_ESPTOOLPY_BEFORE_RESET=y
CONFIG_ESPTOOLPY_BEFORE="default_reset"
CONFIG_ESPTOOLPY_AFTER_RESET=y
CONFIG_ESPTOOLPY_AFTER="hard_reset"
CONFIG_ESPTOOLPY_MONITOR_BAUD=115200
# end of Serial flasher config
41 changes: 41 additions & 0 deletions examples/advanced_examples/algorithm/sdkconfig.defaults.esp32s3
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,44 @@ CONFIG_ESP_SYSTEM_EVENT_TASK_STACK_SIZE=3072
# Audio HAL
#
CONFIG_ESP32_S3_KORVO2_V3_BOARD=y

#
# Serial flasher config
#
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
CONFIG_ESPTOOLPY_FLASH_SAMPLE_MODE_STR=y
CONFIG_ESPTOOLPY_FLASHMODE="dio"
CONFIG_ESPTOOLPY_FLASHFREQ_80M=y
CONFIG_ESPTOOLPY_FLASHFREQ="80m"
CONFIG_ESPTOOLPY_FLASHSIZE_8MB=y
CONFIG_ESPTOOLPY_FLASHSIZE="8MB"
CONFIG_ESPTOOLPY_HEADER_FLASHSIZE_UPDATE=y
CONFIG_ESPTOOLPY_BEFORE_RESET=y
CONFIG_ESPTOOLPY_BEFORE="default_reset"
CONFIG_ESPTOOLPY_AFTER_RESET=y
CONFIG_ESPTOOLPY_AFTER="hard_reset"
CONFIG_ESPTOOLPY_MONITOR_BAUD=115200
# end of Serial flasher config

#
# ESP Speech Recognition
#
CONFIG_MODEL_IN_FLASH=y
CONFIG_USE_AFE=y
CONFIG_AFE_INTERFACE_V1=y
CONFIG_USE_WAKENET=y
CONFIG_SR_WN_WN9_HILEXIN=y
CONFIG_USE_MULTINET=y
CONFIG_SR_MN_CN_MULTINET6_QUANT=y
CONFIG_SR_MN_EN_NONE=y
# end of ESP Speech Recognition

#
# Partition Table
#
CONFIG_PARTITION_TABLE_CUSTOM=y
CONFIG_PARTITION_TABLE_CUSTOM_FILENAME="partitions_algo.csv"
CONFIG_PARTITION_TABLE_FILENAME="partitions_algo.csv"
CONFIG_PARTITION_TABLE_OFFSET=0x8000
CONFIG_PARTITION_TABLE_MD5=y
# end of Partition Table
2 changes: 1 addition & 1 deletion examples/dueros/sdkconfig.defaults.esp32s3
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ CONFIG_ESP32_S3_KORVO2_V3_BOARD=y
#
# ESP Speech Recognition
#
CONFIG_MODEL_IN_SPIFFS=y
CONFIG_MODEL_IN_FLASH=y
# CONFIG_MODEL_IN_SDCARD is not set
CONFIG_USE_WAKENET=y
CONFIG_SR_WN_WN9_HILEXIN=y
Expand Down
2 changes: 1 addition & 1 deletion examples/protocols/esp-rtc/sdkconfig.defaults
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ CONFIG_LWIP_TCPIP_RECVMBOX_SIZE=64
#
# ESP Speech Recognition
#
CONFIG_MODEL_IN_SPIFFS=y
CONFIG_MODEL_IN_FLASH=y
CONFIG_USE_AFE=y
CONFIG_AFE_INTERFACE_V1=y
# CONFIG_USE_WAKENET is not set
Expand Down
2 changes: 1 addition & 1 deletion examples/protocols/esp-rtsp/sdkconfig.defaults
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ CONFIG_LWIP_TCPIP_RECVMBOX_SIZE=64
#
# ESP Speech Recognition
#
CONFIG_MODEL_IN_SPIFFS=y
CONFIG_MODEL_IN_FLASH=y
CONFIG_USE_AFE=y
CONFIG_AFE_INTERFACE_V1=y
# CONFIG_USE_WAKENET is not set
Expand Down
2 changes: 1 addition & 1 deletion examples/protocols/voip/sdkconfig.defaults
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ CONFIG_LWIP_TCPIP_RECVMBOX_SIZE=64
#
# ESP Speech Recognition
#
CONFIG_MODEL_IN_SPIFFS=y
CONFIG_MODEL_IN_FLASH=y
CONFIG_USE_AFE=y
CONFIG_AFE_INTERFACE_V1=y
# CONFIG_USE_WAKENET is not set
Expand Down
16 changes: 1 addition & 15 deletions examples/speech_recognition/wwe/sdkconfig.defaults.esp32s3
Original file line number Diff line number Diff line change
Expand Up @@ -38,28 +38,14 @@ CONFIG_AFE_MIC_NUM=2
#
# ESP Speech Recognition
#
CONFIG_MODEL_IN_SPIFFS=y
# CONFIG_MODEL_IN_SDCARD is not set
CONFIG_MODEL_IN_FLASH=y
CONFIG_USE_AFE=y
CONFIG_AFE_INTERFACE_V1=y
CONFIG_USE_WAKENET=y
# CONFIG_SR_WN_WN8_ALEXA is not set
CONFIG_SR_WN_WN9_HILEXIN=y
# CONFIG_SR_WN_WN9_XIAOAITONGXUE is not set
# CONFIG_SR_WN_WN9_ALEXA is not set
# CONFIG_SR_WN_WN9_HIESP is not set
# CONFIG_SR_WN_WN9_NIHAOXIAOZHI is not set
# CONFIG_SR_WN_WN9_CUSTOMWORD is not set
# CONFIG_SR_WN_LOAD_MULIT_WORD is not set
CONFIG_USE_MULTINET=y
# CONFIG_SR_MN_CN_NONE is not set
# CONFIG_SR_MN_CN_MULTINET5_RECOGNITION_QUANT8 is not set
CONFIG_SR_MN_CN_MULTINET6_QUANT=y
# CONFIG_SR_MN_CN_MULTINET6_AC_QUANT is not set
CONFIG_SR_MN_EN_NONE=y
# CONFIG_SR_MN_EN_MULTINET5_SINGLE_RECOGNITION_QUANT8 is not set
# CONFIG_SR_MN_EN_MULTINET6_QUANT is not set
# CONFIG_SR_MN_EN_MULTINET7_QUANT is not set
# end of ESP Speech Recognition

#
Expand Down

0 comments on commit 501921b

Please sign in to comment.