From 481fd8ced874623c5400089bbc77f7c8f8bd1df7 Mon Sep 17 00:00:00 2001 From: Robert Alfaro Date: Wed, 10 Dec 2025 09:47:20 -0800 Subject: [PATCH 01/12] Refactor player api to allow for multi-instance. Preserved legacy singleton audio_player_* functionality. --- audio_instance.h | 32 ++++++ audio_player.cpp | 246 +++++++++++++++++++++++++++-------------- include/audio_player.h | 5 + 3 files changed, 197 insertions(+), 86 deletions(-) create mode 100644 audio_instance.h diff --git a/audio_instance.h b/audio_instance.h new file mode 100644 index 0000000..b532b05 --- /dev/null +++ b/audio_instance.h @@ -0,0 +1,32 @@ +#pragma once + +#include "esp_err.h" +#include "include/audio_player.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Opaque handle for a player instance. + * Used for multi-instance control in mixer + */ +typedef void* audio_instance_handle_t; + +#define CHECK_INSTANCE(i) \ + ESP_RETURN_ON_FALSE(i != NULL, ESP_ERR_INVALID_ARG, "audio_instance", "instance is NULL") + +audio_player_state_t audio_instance_get_state(audio_instance_handle_t h); +esp_err_t audio_instance_callback_register(audio_instance_handle_t h, audio_player_cb_t call_back, void *user_ctx); + +esp_err_t audio_instance_play(audio_instance_handle_t h, FILE *fp); +esp_err_t audio_instance_pause(audio_instance_handle_t h); +esp_err_t audio_instance_resume(audio_instance_handle_t h); +esp_err_t audio_instance_stop(audio_instance_handle_t h); + +esp_err_t audio_instance_new(audio_instance_handle_t *h, audio_player_config_t *config); +esp_err_t audio_instance_delete(audio_instance_handle_t h); + +#ifdef __cplusplus +} +#endif diff --git a/audio_player.cpp b/audio_player.cpp index 3dfb2d7..9736f34 100644 --- a/audio_player.cpp +++ b/audio_player.cpp @@ -35,6 +35,7 @@ #include "sdkconfig.h" #include "audio_player.h" +#include "audio_instance.h" #include "audio_wav.h" #include "audio_mp3.h" @@ -98,14 +99,14 @@ typedef struct audio_instance { format i2s_format; // last configured i2s format } audio_instance_t; -static audio_instance_t instance; +static audio_instance_t *g_instance = NULL; // when non-null, in legacy non-mixer mode -audio_player_state_t audio_player_get_state() { - return instance.state; +audio_player_state_t audio_instance_get_state(audio_instance_handle_t h) { + audio_instance_t *i = (audio_instance_t*)h; + return i ? i->state : AUDIO_PLAYER_STATE_IDLE; } -esp_err_t audio_player_callback_register(audio_player_cb_t call_back, void *user_ctx) -{ +esp_err_t audio_instance_callback_register(audio_instance_handle_t h, audio_player_cb_t call_back, void *user_ctx) { #if CONFIG_IDF_TARGET_ARCH_XTENSA ESP_RETURN_ON_FALSE(esp_ptr_executable(reinterpret_cast(call_back)), ESP_ERR_INVALID_ARG, TAG, "Not a valid call back"); @@ -113,8 +114,10 @@ esp_err_t audio_player_callback_register(audio_player_cb_t call_back, void *user ESP_RETURN_ON_FALSE(reinterpret_cast(call_back), ESP_ERR_INVALID_ARG, TAG, "Not a valid call back"); #endif - instance.s_audio_cb = call_back; - instance.audio_cb_usrt_ctx = user_ctx; + audio_instance_t *i = (audio_instance_t*)h; + CHECK_INSTANCE(i); + i->s_audio_cb = call_back; + i->audio_cb_usrt_ctx = user_ctx; return ESP_OK; } @@ -188,15 +191,15 @@ static void set_state(audio_instance_t *i, audio_player_state_t new_state) { } } -static void audio_instance_init(audio_instance_t &i) { - i.event_queue = NULL; - i.s_audio_cb = NULL; - i.audio_cb_usrt_ctx = NULL; - i.state = AUDIO_PLAYER_STATE_IDLE; +static void audio_instance_init(audio_instance_t *i) { + i->event_queue = NULL; + i->s_audio_cb = NULL; + i->audio_cb_usrt_ctx = NULL; + i->state = AUDIO_PLAYER_STATE_IDLE; + memset(&i->i2s_format, 0, sizeof(i->i2s_format)); } -static esp_err_t mono_to_stereo(uint32_t output_bits_per_sample, decode_data &adata) -{ +static esp_err_t mono_to_stereo(uint32_t output_bits_per_sample, decode_data &adata) { size_t data = adata.frame_count * (output_bits_per_sample / BITS_PER_BYTE); data *= 2; @@ -236,8 +239,7 @@ static esp_err_t mono_to_stereo(uint32_t output_bits_per_sample, decode_data &ad return ESP_OK; } -static esp_err_t aplay_file(audio_instance_t *i, FILE *fp) -{ +static esp_err_t aplay_file(audio_instance_t *i, FILE *fp) { LOGI_1("start to decode"); esp_err_t ret = ESP_OK; @@ -347,9 +349,9 @@ static esp_err_t aplay_file(audio_instance_t *i, FILE *fp) // break out and exit if we aren't supposed to continue decoding if(decode_status == DECODE_STATUS_CONTINUE) { - // if mono, convert to stereo as es8311 requires stereo input + // if mono and force_stereo set, convert to stereo as es8311 requires stereo input // even though it is mono output - if(i->output.fmt.channels == 1) { + if(i->output.fmt.channels == 1 && i->config.force_stereo) { LOGI_3("c == 1, mono -> stereo"); ret = mono_to_stereo(i->output.fmt.bits_per_sample, i->output); if(ret != ESP_OK) { @@ -358,17 +360,17 @@ static esp_err_t aplay_file(audio_instance_t *i, FILE *fp) } /* Configure I2S clock if the output format changed */ - if ((instance.i2s_format.sample_rate != i->output.fmt.sample_rate) || - (instance.i2s_format.channels != i->output.fmt.channels) || - (instance.i2s_format.bits_per_sample != i->output.fmt.bits_per_sample)) { - instance.i2s_format = i->output.fmt; + if ((i->i2s_format.sample_rate != i->output.fmt.sample_rate) || + (i->i2s_format.channels != i->output.fmt.channels) || + (i->i2s_format.bits_per_sample != i->output.fmt.bits_per_sample)) { + i->i2s_format = i->output.fmt; LOGI_1("format change: sr=%d, bit=%lu, ch=%lu", - instance.i2s_format.sample_rate, - instance.i2s_format.bits_per_sample, - instance.i2s_format.channels); - i2s_slot_mode_t channel_setting = (instance.i2s_format.channels == 1) ? I2S_SLOT_MODE_MONO : I2S_SLOT_MODE_STEREO; - ret = i->config.clk_set_fn(instance.i2s_format.sample_rate, - instance.i2s_format.bits_per_sample, + i->i2s_format.sample_rate, + i->i2s_format.bits_per_sample, + i->i2s_format.channels); + i2s_slot_mode_t channel_setting = (i->i2s_format.channels == 1) ? I2S_SLOT_MODE_MONO : I2S_SLOT_MODE_STEREO; + ret = i->config.clk_set_fn(i->i2s_format.sample_rate, + i->i2s_format.bits_per_sample, channel_setting); ESP_GOTO_ON_ERROR(ret, clean_up, TAG, "i2s_set_clk"); } @@ -379,17 +381,22 @@ static esp_err_t aplay_file(audio_instance_t *i, FILE *fp) * audio decoding to occur while the previous set of samples is finishing playback, in order * to ensure playback without interruption. */ - size_t i2s_bytes_written = 0; - size_t bytes_to_write = i->output.frame_count * i->output.fmt.channels * (instance.i2s_format.bits_per_sample / 8); + size_t bytes_written = 0; + size_t bytes_to_write = i->output.frame_count * i->output.fmt.channels * (i->i2s_format.bits_per_sample / 8); LOGI_2("c %d, bps %d, bytes %d, frame_count %d", i->output.fmt.channels, i2s_format.bits_per_sample, bytes_to_write, i->output.frame_count); - i->config.write_fn(i->output.samples, bytes_to_write, &i2s_bytes_written, portMAX_DELAY); - if(bytes_to_write != i2s_bytes_written) { - ESP_LOGE(TAG, "to write %d != written %d", bytes_to_write, i2s_bytes_written); + // NOTE: to aid transition in api, using write_fn2 based on write_ctx assignment + if (i->config.write_ctx) + i->config.write_fn2(i->output.samples, bytes_to_write, &bytes_written, portMAX_DELAY, i->config.write_ctx); + else + i->config.write_fn(i->output.samples, bytes_to_write, &bytes_written, portMAX_DELAY); + + if(bytes_to_write != bytes_written) { + ESP_LOGE(TAG, "to write %d != written %d", bytes_to_write, bytes_written); } } else if(decode_status == DECODE_STATUS_NO_DATA_CONTINUE) { @@ -404,8 +411,7 @@ static esp_err_t aplay_file(audio_instance_t *i, FILE *fp) return ret; } -static void audio_task(void *pvParam) -{ +static void audio_task(void *pvParam) { audio_instance_t *i = static_cast(pvParam); audio_player_event_t audio_event; @@ -450,13 +456,13 @@ static void audio_task(void *pvParam) } } - i->config.mute_fn(AUDIO_PLAYER_UNMUTE); + if (i->config.mute_fn) i->config.mute_fn(AUDIO_PLAYER_UNMUTE); esp_err_t ret_val = aplay_file(i, audio_event.fp); if(ret_val != ESP_OK) { ESP_LOGE(TAG, "aplay_file() %d", ret_val); } - i->config.mute_fn(AUDIO_PLAYER_MUTE); + if (i->config.mute_fn) i->config.mute_fn(AUDIO_PLAYER_MUTE); if(audio_event.fp) fclose(audio_event.fp); } @@ -475,130 +481,155 @@ static esp_err_t audio_send_event(audio_instance_t *i, audio_player_event_t even return ESP_OK; } -esp_err_t audio_player_play(FILE *fp) -{ +/* ================= New multi-instance API ================= */ + +esp_err_t audio_instance_play(audio_instance_handle_t h, FILE *fp) { + audio_instance_t *i = (audio_instance_t*)h; + CHECK_INSTANCE(i); + LOGI_1("%s", __FUNCTION__); audio_player_event_t event = { .type = AUDIO_PLAYER_REQUEST_PLAY, .fp = fp }; - return audio_send_event(&instance, event); + return audio_send_event(i, event); } -esp_err_t audio_player_pause(void) -{ +esp_err_t audio_instance_pause(audio_instance_handle_t h) { + audio_instance_t *i = (audio_instance_t*)h; + CHECK_INSTANCE(i); + LOGI_1("%s", __FUNCTION__); audio_player_event_t event = { .type = AUDIO_PLAYER_REQUEST_PAUSE, .fp = NULL }; - return audio_send_event(&instance, event); + return audio_send_event(i, event); } -esp_err_t audio_player_resume(void) -{ +esp_err_t audio_instance_resume(audio_instance_handle_t h) { + audio_instance_t *i = (audio_instance_t*)h; + CHECK_INSTANCE(i); + LOGI_1("%s", __FUNCTION__); audio_player_event_t event = { .type = AUDIO_PLAYER_REQUEST_RESUME, .fp = NULL }; - return audio_send_event(&instance, event); + return audio_send_event(i, event); } -esp_err_t audio_player_stop(void) -{ +esp_err_t audio_instance_stop(audio_instance_handle_t h) { + audio_instance_t *i = (audio_instance_t*)h; + CHECK_INSTANCE(i); + LOGI_1("%s", __FUNCTION__); audio_player_event_t event = { .type = AUDIO_PLAYER_REQUEST_STOP, .fp = NULL }; - return audio_send_event(&instance, event); + return audio_send_event(i, event); } /** * Can only shut down the playback thread if the thread is not presently playing audio. * Call audio_player_stop() */ -static esp_err_t _internal_audio_player_shutdown_thread(void) -{ +static esp_err_t _internal_audio_player_shutdown_thread(audio_instance_t *i) { + CHECK_INSTANCE(i); + LOGI_1("%s", __FUNCTION__); audio_player_event_t event = { .type = AUDIO_PLAYER_REQUEST_SHUTDOWN_THREAD, .fp = NULL }; - return audio_send_event(&instance, event); + return audio_send_event(i, event); } -static void cleanup_memory(audio_instance_t &i) -{ +static void cleanup_memory(audio_instance_t *i) { #if defined(CONFIG_AUDIO_PLAYER_ENABLE_MP3) - if(i.mp3_decoder) MP3FreeDecoder(i.mp3_decoder); - if(i.mp3_data.data_buf) free(i.mp3_data.data_buf); + if(i->mp3_decoder) MP3FreeDecoder(i->mp3_decoder); + if(i->mp3_data.data_buf) free(i->mp3_data.data_buf); #endif - if(i.output.samples) free(i.output.samples); + if(i->output.samples) free(i->output.samples); - vQueueDelete(i.event_queue); + vQueueDelete(i->event_queue); } -esp_err_t audio_player_new(audio_player_config_t config) -{ +esp_err_t audio_instance_new(audio_instance_handle_t *h, audio_player_config_t *config) { BaseType_t task_val; - audio_instance_init(instance); + ESP_RETURN_ON_FALSE(h != NULL, ESP_ERR_INVALID_ARG, TAG, "handle pointer is NULL"); + ESP_RETURN_ON_FALSE(*h == NULL, ESP_ERR_INVALID_ARG, TAG, "instance is not NULL"); + ESP_RETURN_ON_FALSE(config, ESP_ERR_INVALID_ARG, TAG, "null config"); - instance.config = config; + audio_instance_t *i = (audio_instance_t*)calloc(1, sizeof(audio_instance_t)); + if (i == NULL) return ESP_ERR_NO_MEM; + + audio_instance_init(i); + + i->config = *config; /* Audio control event queue */ - instance.event_queue = xQueueCreate(4, sizeof(audio_player_event_t)); - ESP_RETURN_ON_FALSE(NULL != instance.event_queue, -1, TAG, "xQueueCreate"); + i->event_queue = xQueueCreate(4, sizeof(audio_player_event_t)); + ESP_RETURN_ON_FALSE(NULL != i->event_queue, -1, TAG, "xQueueCreate"); /** See https://github.com/ultraembedded/libhelix-mp3/blob/0a0e0673f82bc6804e5a3ddb15fb6efdcde747cd/testwrap/main.c#L74 */ - instance.output.samples_capacity = MAX_NCHAN * MAX_NGRAN * MAX_NSAMP; - instance.output.samples_capacity_max = instance.output.samples_capacity * 2; - instance.output.samples = static_cast(malloc(instance.output.samples_capacity_max)); - LOGI_1("samples_capacity %d bytes", instance.output.samples_capacity_max); + i->output.samples_capacity = MAX_NCHAN * MAX_NGRAN * MAX_NSAMP; + i->output.samples_capacity_max = i->output.samples_capacity * 2; + i->output.samples = static_cast(malloc(i->output.samples_capacity_max)); + LOGI_1("samples_capacity %d bytes", i->output.samples_capacity_max); int ret = ESP_OK; - ESP_GOTO_ON_FALSE(NULL != instance.output.samples, ESP_ERR_NO_MEM, cleanup, + ESP_GOTO_ON_FALSE(NULL != i->output.samples, ESP_ERR_NO_MEM, cleanup, TAG, "Failed allocate output buffer"); #if defined(CONFIG_AUDIO_PLAYER_ENABLE_MP3) - instance.mp3_data.data_buf_size = MAINBUF_SIZE * 3; - instance.mp3_data.data_buf = static_cast(malloc(instance.mp3_data.data_buf_size)); - ESP_GOTO_ON_FALSE(NULL != instance.mp3_data.data_buf, ESP_ERR_NO_MEM, cleanup, + i->mp3_data.data_buf_size = MAINBUF_SIZE * 3; + i->mp3_data.data_buf = static_cast(malloc(i->mp3_data.data_buf_size)); + ESP_GOTO_ON_FALSE(NULL != i->mp3_data.data_buf, ESP_ERR_NO_MEM, cleanup, TAG, "Failed allocate mp3 data buffer"); - instance.mp3_decoder = MP3InitDecoder(); - ESP_GOTO_ON_FALSE(NULL != instance.mp3_decoder, ESP_ERR_NO_MEM, cleanup, + i->mp3_decoder = MP3InitDecoder(); + ESP_GOTO_ON_FALSE(NULL != i->mp3_decoder, ESP_ERR_NO_MEM, cleanup, TAG, "Failed create MP3 decoder"); #endif - memset(&instance.i2s_format, 0, sizeof(instance.i2s_format)); + memset(&i->i2s_format, 0, sizeof(i->i2s_format)); - instance.running = true; + i->running = true; task_val = xTaskCreatePinnedToCore( (TaskFunction_t) audio_task, "Audio Task", 4 * 1024, - &instance, - (UBaseType_t) instance.config.priority, + i, + (UBaseType_t) i->config.priority, (TaskHandle_t *) NULL, - (BaseType_t) instance.config.coreID); + (BaseType_t) i->config.coreID); ESP_GOTO_ON_FALSE(pdPASS == task_val, ESP_ERR_NO_MEM, cleanup, TAG, "Failed create audio task"); // start muted - instance.config.mute_fn(AUDIO_PLAYER_MUTE); + if (i->config.mute_fn) + i->config.mute_fn(AUDIO_PLAYER_MUTE); + *h = i; return ret; // At the moment when we run cppcheck there is a lack of esp-idf header files this // means cppcheck doesn't know that ESP_GOTO_ON_FALSE() etc are making use of this label // cppcheck-suppress unusedLabelConfiguration cleanup: - cleanup_memory(instance); + cleanup_memory(i); + free(i); + i = NULL; return ret; } -esp_err_t audio_player_delete() { +esp_err_t audio_instance_delete(audio_instance_handle_t h) { + audio_instance_t *i = (audio_instance_t*)h; + CHECK_INSTANCE(i); + const int MAX_RETRIES = 5; int retries = MAX_RETRIES; - while(instance.running && retries) { + while(i->running && retries) { // stop any playback and shutdown the thread - audio_player_stop(); - _internal_audio_player_shutdown_thread(); + audio_instance_stop(i); + _internal_audio_player_shutdown_thread(i); vTaskDelay(pdMS_TO_TICKS(100)); retries--; } - cleanup_memory(instance); + cleanup_memory(i); + free(i); + i = NULL; // if we ran out of retries, return fail code if(retries == 0) { @@ -607,3 +638,46 @@ esp_err_t audio_player_delete() { return ESP_OK; } + +/* ================= Legacy API implemented via default instance ================= */ + +audio_player_state_t audio_player_get_state() { + return audio_instance_get_state(g_instance); +} + +esp_err_t audio_player_callback_register(audio_player_cb_t call_back, void *user_ctx) { + return audio_instance_callback_register(g_instance, call_back, user_ctx); +} + +esp_err_t audio_player_play(FILE *fp) { + return audio_instance_play(g_instance, fp); +} + +esp_err_t audio_player_pause() { + return audio_instance_pause(g_instance); +} + +esp_err_t audio_player_resume() { + return audio_instance_resume(g_instance); +} + +esp_err_t audio_player_stop() { + return audio_instance_stop(g_instance); +} + +esp_err_t audio_player_new(audio_player_config_t config) { + if (g_instance) return ESP_OK; + config.force_stereo = true; // preserve legacy behavior + audio_instance_handle_t h = NULL; + ESP_RETURN_ON_ERROR(audio_instance_new(&h, &config), TAG, "failed to create new audio instance"); + g_instance = (audio_instance_t*)h; + return ESP_OK; +} + +esp_err_t audio_player_delete() { + if (g_instance) { + audio_instance_delete(g_instance); + g_instance = NULL; + } + return ESP_OK; +} diff --git a/include/audio_player.h b/include/audio_player.h index fd849f0..f21fc1e 100644 --- a/include/audio_player.h +++ b/include/audio_player.h @@ -152,6 +152,7 @@ typedef enum { typedef esp_err_t (*audio_player_mute_fn)(AUDIO_PLAYER_MUTE_SETTING setting); typedef esp_err_t (*audio_reconfig_std_clock)(uint32_t rate, uint32_t bits_cfg, i2s_slot_mode_t ch); typedef esp_err_t (*audio_player_write_fn)(void *audio_buffer, size_t len, size_t *bytes_written, uint32_t timeout_ms); +typedef esp_err_t (*audio_player_write_fn2)(void *audio_buffer, size_t len, size_t *bytes_written, uint32_t timeout_ms, void *ctx); typedef struct { audio_player_mute_fn mute_fn; @@ -159,6 +160,10 @@ typedef struct { audio_player_write_fn write_fn; UBaseType_t priority; /*< FreeRTOS task priority */ BaseType_t coreID; /*< ESP32 core ID */ + bool force_stereo; /*< upmix mono -> stereo */ + + audio_player_write_fn2 write_fn2; + void *write_ctx; } audio_player_config_t; /** From f1ce28c4ff7a3cedc2fa3d752fcab8f2eaa5a69e Mon Sep 17 00:00:00 2001 From: Robert Alfaro Date: Wed, 10 Dec 2025 09:49:08 -0800 Subject: [PATCH 02/12] Add concept of audio stream (wraps audio player using multi instance api); Add support for audio mixer of N streams. --- CMakeLists.txt | 1 + audio_mixer.cpp | 405 +++++++++++++++++++++++++++++++++++++++++ include/audio_mixer.h | 60 ++++++ include/audio_stream.h | 42 +++++ 4 files changed, 508 insertions(+) create mode 100644 audio_mixer.cpp create mode 100644 include/audio_mixer.h create mode 100644 include/audio_stream.h diff --git a/CMakeLists.txt b/CMakeLists.txt index e9c414e..f3c6e43 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,7 @@ set(srcs "audio_player.cpp" + "audio_mixer.cpp" ) set(includes diff --git a/audio_mixer.cpp b/audio_mixer.cpp new file mode 100644 index 0000000..440b0a5 --- /dev/null +++ b/audio_mixer.cpp @@ -0,0 +1,405 @@ +/** + * @file audio_mixer.cpp + */ + +#include +#include +#include +#include + +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "freertos/ringbuf.h" +#include "freertos/semphr.h" +#include "esp_check.h" +#include "esp_log.h" + +#include "audio_mixer.h" +#include "audio_player.h" +#include "audio_instance.h" +#include "audio_stream.h" + +static const char *TAG = "audio_mixer"; + +static TaskHandle_t s_mixer_task = nullptr; +static audio_mixer_config_t s_cfg = {}; +static volatile bool s_running = false; +static audio_mixer_cb_t s_mixer_user_cb = NULL; + +typedef struct audio_stream { + char name[16]; + audio_instance_handle_t instance; + QueueHandle_t file_queue; + RingbufHandle_t pcm_rb; + + SLIST_ENTRY(audio_stream) next; +} audio_stream_t; + +SLIST_HEAD(audio_stream_list, audio_stream); +static audio_stream_list s_stream_list = SLIST_HEAD_INITIALIZER(s_stream_list); +static uint32_t s_stream_name_counter = 0; // counter for unique naming (monotonic) +static uint32_t s_active_streams = 0; // counter for stream counting +static SemaphoreHandle_t s_stream_mutex = NULL; + + +static int16_t sat_add16(int32_t a, int32_t b) { + int32_t s = a + b; + if (s > INT16_MAX) return INT16_MAX; + if (s < INT16_MIN) return INT16_MIN; + return (int16_t)s; +} + +static void mixer_task(void *arg) { + const size_t frames = 512; // tune as needed + const size_t bytes = frames * s_cfg.i2s_format.channels * sizeof(int16_t); + + int16_t *mix = (int16_t*)heap_caps_malloc(bytes, MALLOC_CAP_8BIT); + if (!mix) { vTaskDelete(nullptr); return; } + + while (s_running) { + memset(mix, 0, bytes); + + audio_mixer_lock(); + + audio_stream_t *stream; + SLIST_FOREACH(stream, &s_stream_list, next) { + if (!stream->pcm_rb) continue; + + size_t received_bytes = 0; + void *item = xRingbufferReceiveUpTo(stream->pcm_rb, &received_bytes, pdMS_TO_TICKS(5), bytes); + + if (item && received_bytes > 0) { + int16_t *samples = (int16_t*)item; + size_t count = received_bytes / sizeof(int16_t); + + for (size_t k = 0; k < count; ++k) { + mix[k] = sat_add16(mix[k], samples[k]); + } + vRingbufferReturnItem(stream->pcm_rb, item); + } else if (item) { + vRingbufferReturnItem(stream->pcm_rb, item); + } + } + + audio_mixer_unlock(); + + size_t written = 0; + if (s_cfg.write_fn) { + s_cfg.write_fn(mix, bytes, &written, portMAX_DELAY); + if (written != bytes) { + ESP_LOGW(TAG, "mixer short write %u/%u", (unsigned)written, (unsigned)bytes); + } + } + } + + free(mix); + vTaskDelete(nullptr); +} + +IRAM_ATTR static esp_err_t mixer_stream_write(void *data, size_t size, size_t *bytes_written, uint32_t timeout, void *stream) { + audio_stream_t *s = (audio_stream_t*)stream; + if (!s || !s->pcm_rb) { + if (bytes_written) *bytes_written = 0; + return ESP_ERR_INVALID_ARG; + } + + /* send data to the stream's ring buffer */ + BaseType_t res = xRingbufferSend(s->pcm_rb, data, size, timeout); + if (res == pdTRUE) { + if (bytes_written) *bytes_written = size; + } else { + if (bytes_written) *bytes_written = 0; + ESP_LOGW(TAG, "stream ringbuf full"); + } + return ESP_OK; +} + +static esp_err_t mixer_stream_clk_set_fn(uint32_t rate, uint32_t bits_cfg, i2s_slot_mode_t ch) { + if (rate != s_cfg.i2s_format.sample_rate) { + ESP_LOGE(TAG, "stream sample rate mismatch: %lu Hz (mixer expects %u Hz)", rate, s_cfg.i2s_format.sample_rate); + return ESP_ERR_INVALID_ARG; + } + + if (bits_cfg != s_cfg.i2s_format.bits_per_sample) { + ESP_LOGE(TAG, "stream bit depth mismatch: %lu bits (mixer expects %lu bits)", bits_cfg, s_cfg.i2s_format.bits_per_sample); + return ESP_ERR_INVALID_ARG; + } + + if (ch != s_cfg.i2s_format.channels) { + ESP_LOGE(TAG, "stream channels mismatch: %u (mixer expects %lu)", ch, s_cfg.i2s_format.channels); + return ESP_ERR_INVALID_ARG; + } + + return ESP_OK; +} + +static void mixer_stream_event_handler(audio_player_cb_ctx_t *ctx) { + if (!ctx || !ctx->user_ctx) return; + + audio_stream_t *s = (audio_stream_t *)ctx->user_ctx; + + // handle auto-queueing + if (ctx->audio_event == AUDIO_PLAYER_CALLBACK_EVENT_IDLE) { + if (s_stream_mutex) xSemaphoreTake(s_stream_mutex, portMAX_DELAY); + + // Check if there is anything in the queue to play next + FILE *next_fp = NULL; + if (xQueueReceive(s->file_queue, &next_fp, 0) == pdTRUE) { + ESP_LOGD(TAG, "stream '%s' auto-advancing queue", s->name); + audio_instance_play(s->instance, next_fp); + } + audio_mixer_unlock(); + } + + // service callback + if (s_mixer_user_cb) { + s_mixer_user_cb(ctx); + } +} + +static void mixer_free_stream_resources(audio_stream_t *s) { + if (s->instance) audio_instance_delete(s->instance); + if (s->pcm_rb) vRingbufferDelete(s->pcm_rb); + if (s->file_queue) { + FILE *fp = NULL; + while(xQueueReceive(s->file_queue, &fp, 0) == pdTRUE) { + if (fp) fclose(fp); + } + vQueueDelete(s->file_queue); + } + free(s); +} + +///////////////////////////// + +inline uint8_t audio_mixer_stream_count() { + return s_active_streams; +} + +inline void audio_mixer_lock() { + if (s_stream_mutex) xSemaphoreTake(s_stream_mutex, portMAX_DELAY); +} + +inline void audio_mixer_unlock() { + if (s_stream_mutex) xSemaphoreGive(s_stream_mutex); +} + +void audio_mixer_add_stream(audio_stream_handle_t stream) { + audio_mixer_lock(); + SLIST_INSERT_HEAD(&s_stream_list, (audio_stream_t*)stream, next); + s_active_streams++; + audio_mixer_unlock(); +} + +void audio_mixer_remove_stream(audio_stream_handle_t stream) { + audio_mixer_lock(); + SLIST_REMOVE(&s_stream_list, (audio_stream_t*)stream, audio_stream, next); + if (s_active_streams > 0) s_active_streams--; + audio_mixer_unlock(); +} + +void audio_mixer_get_output_format(uint32_t *sample_rate, uint32_t *bits_per_sample, uint32_t *channels) { + if (sample_rate) *sample_rate = s_cfg.i2s_format.sample_rate; + if (bits_per_sample) *bits_per_sample = s_cfg.i2s_format.bits_per_sample; + if (channels) *channels = s_cfg.i2s_format.channels; +} + +void audio_mixer_callback_register(audio_mixer_cb_t cb) { + s_mixer_user_cb = cb; +} + +esp_err_t audio_mixer_init(audio_mixer_config_t *cfg) { + if (s_running) return ESP_OK; + ESP_RETURN_ON_FALSE(cfg && cfg->write_fn && cfg->clk_set_fn, ESP_ERR_INVALID_ARG, TAG, "invalid mixer config"); + s_cfg = *cfg; + + i2s_slot_mode_t channel_setting = (s_cfg.i2s_format.channels == 1) ? I2S_SLOT_MODE_MONO : I2S_SLOT_MODE_STEREO; + ESP_RETURN_ON_ERROR(s_cfg.clk_set_fn(s_cfg.i2s_format.sample_rate, s_cfg.i2s_format.bits_per_sample, channel_setting), TAG, "clk set failed"); + + s_running = true; + if (!s_stream_mutex) s_stream_mutex = xSemaphoreCreateMutex(); + + SLIST_INIT(&s_stream_list); + + BaseType_t ok = xTaskCreatePinnedToCore(mixer_task, "audio_mixer", 4096, NULL, s_cfg.priority, &s_mixer_task, s_cfg.coreID); + ESP_RETURN_ON_FALSE(ok == pdPASS, ESP_FAIL, TAG, "failed to start mixer"); + + ESP_LOGD(TAG, "mixer started"); + return ESP_OK; +} + +void audio_mixer_deinit() { + if (!s_running) return; + + s_running = false; + // Task will exit on next loop; no join primitive in FreeRTOS here. + + // Clean up any remaining channels (safe teardown) + audio_mixer_lock(); + + while (!SLIST_EMPTY(&s_stream_list)) { + audio_stream_t *it = SLIST_FIRST(&s_stream_list); + SLIST_REMOVE_HEAD(&s_stream_list, next); + mixer_free_stream_resources(it); + } + s_active_streams = 0; + + audio_mixer_unlock(); +} + +/* ================= Stream (mixer channel) API ================= */ + +static void stream_purge_ringbuf(audio_stream_t *s) { + if (!s || !s->pcm_rb) return; + + size_t item_size; + void *item; + while ((item = xRingbufferReceive(s->pcm_rb, &item_size, 0)) != NULL) { + vRingbufferReturnItem(s->pcm_rb, item); + } +} + +esp_err_t audio_stream_play(audio_stream_handle_t sh, FILE *fp) { + audio_stream_t *s = (audio_stream_t*)sh; + CHECK_STREAM(s); + + // stop current playback? + if (audio_instance_get_state(s->instance) == AUDIO_PLAYER_STATE_PLAYING) + audio_stream_stop(s); + + return audio_instance_play(s->instance, fp); +} + +esp_err_t audio_stream_queue(audio_stream_handle_t sh, FILE *fp, bool play_now) { + if (play_now) { + return audio_stream_play(sh, fp); + } + + audio_stream_t *s = (audio_stream_t*)sh; + CHECK_STREAM(s); + + audio_mixer_lock(); + + // add to queue + if (xQueueSend(s->file_queue, &fp, 0) != pdTRUE) { + ESP_LOGE(TAG, "stream '%s' queue full", s->name); + fclose(fp); // Take ownership and close if we can't queue + audio_mixer_unlock(); + return ESP_FAIL; + } + + // if stream is IDLE, we need to kickstart it + if (audio_instance_get_state(s->instance) == AUDIO_PLAYER_STATE_IDLE) { + FILE *next_fp = NULL; + // pop the one we just pushed (or the one at head) + if (xQueueReceive(s->file_queue, &next_fp, 0) == pdTRUE) { + audio_instance_play(s->instance, next_fp); + } + } + + audio_mixer_unlock(); + return ESP_OK; +} + +esp_err_t audio_stream_stop(audio_stream_handle_t sh) { + audio_stream_t *s = (audio_stream_t*)sh; + CHECK_STREAM(s); + esp_err_t err; + + // clear any pending queue items + FILE *pending = NULL; + while (xQueueReceive(s->file_queue, &pending, 0) == pdTRUE) { + if (pending) fclose(pending); + } + + err = audio_instance_stop(s->instance); + stream_purge_ringbuf(s); + return err; +} + +esp_err_t audio_stream_pause(audio_stream_handle_t sh) { + audio_stream_t *s = (audio_stream_t*)sh; + CHECK_STREAM(s); + return audio_instance_pause(s->instance); +} + +esp_err_t audio_stream_resume(audio_stream_handle_t sh) { + audio_stream_t *s = (audio_stream_t*)sh; + CHECK_STREAM(s); + return audio_instance_resume(s->instance); +} + +audio_player_state_t audio_stream_get_state(audio_stream_handle_t sh) { + audio_stream_t *s = (audio_stream_t*)sh; + if (!s) return AUDIO_PLAYER_STATE_IDLE; + return audio_instance_get_state(s->instance); +} + +audio_stream_handle_t audio_stream_new(audio_stream_config_t *cfg) { + ESP_RETURN_ON_FALSE(cfg, NULL, TAG, "null config"); + + audio_stream_t *stream = (audio_stream_t*)calloc(1, sizeof(audio_stream_t)); + + /* use provided name? */ + if (cfg->name[0] != '\0') { + strncpy(stream->name, cfg->name, sizeof(stream->name) - 1); + stream->name[sizeof(stream->name) - 1] = 0; + } + /* otherwise, generate a unique monotonic name */ + else { + snprintf(stream->name, sizeof(stream->name), "stream_%lu", s_stream_name_counter++); + } + + /* create player instance */ + audio_player_config_t instance_cfg; + instance_cfg.mute_fn = NULL; + instance_cfg.clk_set_fn = mixer_stream_clk_set_fn; + instance_cfg.coreID = cfg->coreID; + instance_cfg.priority = cfg->priority; + instance_cfg.force_stereo = false; + instance_cfg.write_fn2 = mixer_stream_write; + instance_cfg.write_ctx = stream; + + audio_instance_handle_t h = NULL; + esp_err_t err = audio_instance_new(&h, &instance_cfg); + + if (err != ESP_OK) { + free(stream); + return NULL; + } + stream->instance = h; + + /* create stream's ringbuffer, file queue */ + stream->file_queue = xQueueCreate(4, sizeof(FILE*)); + stream->pcm_rb = xRingbufferCreate(16 * 1024, RINGBUF_TYPE_BYTEBUF); + if (!stream->file_queue || !stream->pcm_rb) { + if (stream->file_queue) vQueueDelete(stream->file_queue); + if (stream->pcm_rb) vRingbufferDelete(stream->pcm_rb); + free(stream); + return NULL; + } + + audio_instance_callback_register(stream->instance, mixer_stream_event_handler, stream); + + /* add to stream tracking */ + audio_mixer_add_stream(stream); + + ESP_LOGI(TAG, "Created stream '%s' (active: %u)", stream->name, audio_mixer_stream_count()); + + return (audio_stream_handle_t)stream; +} + +esp_err_t audio_stream_delete(audio_stream_handle_t sh) { + audio_stream_t *s = (audio_stream_t*)sh; + CHECK_STREAM(s); + + /* remove from stream tracking */ + audio_mixer_remove_stream(s); + + /* cleanup stream */ + mixer_free_stream_resources(s); + + ESP_LOGI(TAG, "Deleted stream '%s' (active: %u)", s->name, audio_mixer_stream_count()); + + return ESP_OK; +} diff --git a/include/audio_mixer.h b/include/audio_mixer.h new file mode 100644 index 0000000..9f172a2 --- /dev/null +++ b/include/audio_mixer.h @@ -0,0 +1,60 @@ +/** + * @file audio_mixer.h + * Mixer interface for esp-audio-player. Provides a global mixer that accepts + * PCM from multiple sources via FreeRTOS ring buffers and writes mixed PCM to I2S. + */ +#pragma once + +#include + +#include "esp_err.h" + +#include "audio_player.h" +#include "../audio_decode_types.h" // FIXME: leaks out +#include "audio_stream.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + audio_player_mute_fn mute_fn; + audio_reconfig_std_clock clk_set_fn; + audio_player_write_fn write_fn; + UBaseType_t priority; /*< FreeRTOS task priority */ + BaseType_t coreID; /*< ESP32 core ID */ + + format i2s_format; +} audio_mixer_config_t; + +typedef audio_player_cb_t audio_mixer_cb_t; + + +uint8_t audio_mixer_stream_count(); + +/** Lock the mixer's main mutex. Call this before modifying stream state (busy flags, queues). */ +void audio_mixer_lock(); + +/** Unlock the mixer's main mutex. */ +void audio_mixer_unlock(); + +/** Add a stream to the mixer's processing list (Thread safe). */ +void audio_mixer_add_stream(audio_stream_handle_t stream); + +/** Remove a stream from the mixer's processing list (Thread safe). */ +void audio_mixer_remove_stream(audio_stream_handle_t stream); + +/** Query the current mixer output format. Returns zeros if not initialized. */ +void audio_mixer_get_output_format(uint32_t *sample_rate, uint32_t *bits_per_sample, uint32_t *channels); + +void audio_mixer_callback_register(audio_mixer_cb_t cb); + +/** Initialize the mixer with fixed output format and start the mixer task. */ +esp_err_t audio_mixer_init(audio_mixer_config_t *cfg); + +/** Deinitialize the mixer task. */ +void audio_mixer_deinit(); + +#ifdef __cplusplus +} +#endif diff --git a/include/audio_stream.h b/include/audio_stream.h new file mode 100644 index 0000000..936638e --- /dev/null +++ b/include/audio_stream.h @@ -0,0 +1,42 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +struct audio_stream; +typedef struct audio_stream* audio_stream_handle_t; + +#define CHECK_STREAM(s) \ + ESP_RETURN_ON_FALSE(s != NULL, ESP_ERR_INVALID_ARG, "audio_stream", "stream is NULL") + +typedef struct { + char name[16]; /*< Optional: Name of the stream (e.g. "sfx", "bgm"). Auto-generated if empty. */ + UBaseType_t priority; /*< FreeRTOS task priority */ + BaseType_t coreID; /*< ESP32 core ID */ +} audio_stream_config_t; + +#define DEFAULT_AUDIO_STREAM_CONFIG(_name) { \ + .name = _name, \ + .priority = tskIDLE_PRIORITY + 1, \ + .coreID = tskNO_AFFINITY \ + } + +/** + * Stream API — create/delete logical playback streams and control them. + * These streams own their decode task and submit PCM to the mixer. + */ +audio_player_state_t audio_stream_get_state(audio_stream_handle_t s); + +esp_err_t audio_stream_play(audio_stream_handle_t s, FILE *fp); +esp_err_t audio_stream_queue(audio_stream_handle_t s, FILE *fp, bool play_now); +esp_err_t audio_stream_stop(audio_stream_handle_t s); +esp_err_t audio_stream_pause(audio_stream_handle_t s); +esp_err_t audio_stream_resume(audio_stream_handle_t s); + +audio_stream_handle_t audio_stream_new(audio_stream_config_t *cfg); +esp_err_t audio_stream_delete(audio_stream_handle_t sh); + +#ifdef __cplusplus +} +#endif From 1b7a8af68f569bec769d28c197a3fcb685fd6b96 Mon Sep 17 00:00:00 2001 From: Robert Alfaro Date: Wed, 10 Dec 2025 12:23:20 -0800 Subject: [PATCH 03/12] rename usages of audio_stream_handle_t to 'h' for consistency --- audio_mixer.cpp | 45 +++++++++++++++++++++--------------------- include/audio_mixer.h | 4 ++-- include/audio_stream.h | 14 ++++++------- 3 files changed, 31 insertions(+), 32 deletions(-) diff --git a/audio_mixer.cpp b/audio_mixer.cpp index 440b0a5..c9fef19 100644 --- a/audio_mixer.cpp +++ b/audio_mixer.cpp @@ -184,16 +184,16 @@ inline void audio_mixer_unlock() { if (s_stream_mutex) xSemaphoreGive(s_stream_mutex); } -void audio_mixer_add_stream(audio_stream_handle_t stream) { +void audio_mixer_add_stream(audio_stream_handle_t h) { audio_mixer_lock(); - SLIST_INSERT_HEAD(&s_stream_list, (audio_stream_t*)stream, next); + SLIST_INSERT_HEAD(&s_stream_list, (audio_stream_t*)h, next); s_active_streams++; audio_mixer_unlock(); } -void audio_mixer_remove_stream(audio_stream_handle_t stream) { +void audio_mixer_remove_stream(audio_stream_handle_t h) { audio_mixer_lock(); - SLIST_REMOVE(&s_stream_list, (audio_stream_t*)stream, audio_stream, next); + SLIST_REMOVE(&s_stream_list, (audio_stream_t*)h, audio_stream, next); if (s_active_streams > 0) s_active_streams--; audio_mixer_unlock(); } @@ -259,8 +259,13 @@ static void stream_purge_ringbuf(audio_stream_t *s) { } } -esp_err_t audio_stream_play(audio_stream_handle_t sh, FILE *fp) { - audio_stream_t *s = (audio_stream_t*)sh; +audio_player_state_t audio_stream_get_state(audio_stream_handle_t h) { + if (!h) return AUDIO_PLAYER_STATE_IDLE; + return audio_instance_get_state((audio_stream_t*)h->instance); +} + +esp_err_t audio_stream_play(audio_stream_handle_t h, FILE *fp) { + audio_stream_t *s = (audio_stream_t*)h; CHECK_STREAM(s); // stop current playback? @@ -270,12 +275,12 @@ esp_err_t audio_stream_play(audio_stream_handle_t sh, FILE *fp) { return audio_instance_play(s->instance, fp); } -esp_err_t audio_stream_queue(audio_stream_handle_t sh, FILE *fp, bool play_now) { +esp_err_t audio_stream_queue(audio_stream_handle_t h, FILE *fp, bool play_now) { if (play_now) { - return audio_stream_play(sh, fp); + return audio_stream_play(h, fp); } - audio_stream_t *s = (audio_stream_t*)sh; + audio_stream_t *s = (audio_stream_t*)h; CHECK_STREAM(s); audio_mixer_lock(); @@ -301,8 +306,8 @@ esp_err_t audio_stream_queue(audio_stream_handle_t sh, FILE *fp, bool play_now) return ESP_OK; } -esp_err_t audio_stream_stop(audio_stream_handle_t sh) { - audio_stream_t *s = (audio_stream_t*)sh; +esp_err_t audio_stream_stop(audio_stream_handle_t h) { + audio_stream_t *s = (audio_stream_t*)h; CHECK_STREAM(s); esp_err_t err; @@ -317,24 +322,18 @@ esp_err_t audio_stream_stop(audio_stream_handle_t sh) { return err; } -esp_err_t audio_stream_pause(audio_stream_handle_t sh) { - audio_stream_t *s = (audio_stream_t*)sh; +esp_err_t audio_stream_pause(audio_stream_handle_t h) { + audio_stream_t *s = (audio_stream_t*)h; CHECK_STREAM(s); return audio_instance_pause(s->instance); } -esp_err_t audio_stream_resume(audio_stream_handle_t sh) { - audio_stream_t *s = (audio_stream_t*)sh; +esp_err_t audio_stream_resume(audio_stream_handle_t h) { + audio_stream_t *s = (audio_stream_t*)h; CHECK_STREAM(s); return audio_instance_resume(s->instance); } -audio_player_state_t audio_stream_get_state(audio_stream_handle_t sh) { - audio_stream_t *s = (audio_stream_t*)sh; - if (!s) return AUDIO_PLAYER_STATE_IDLE; - return audio_instance_get_state(s->instance); -} - audio_stream_handle_t audio_stream_new(audio_stream_config_t *cfg) { ESP_RETURN_ON_FALSE(cfg, NULL, TAG, "null config"); @@ -389,8 +388,8 @@ audio_stream_handle_t audio_stream_new(audio_stream_config_t *cfg) { return (audio_stream_handle_t)stream; } -esp_err_t audio_stream_delete(audio_stream_handle_t sh) { - audio_stream_t *s = (audio_stream_t*)sh; +esp_err_t audio_stream_delete(audio_stream_handle_t h) { + audio_stream_t *s = (audio_stream_t*)h; CHECK_STREAM(s); /* remove from stream tracking */ diff --git a/include/audio_mixer.h b/include/audio_mixer.h index 9f172a2..ae84cfd 100644 --- a/include/audio_mixer.h +++ b/include/audio_mixer.h @@ -39,10 +39,10 @@ void audio_mixer_lock(); void audio_mixer_unlock(); /** Add a stream to the mixer's processing list (Thread safe). */ -void audio_mixer_add_stream(audio_stream_handle_t stream); +void audio_mixer_add_stream(audio_stream_handle_t h); /** Remove a stream from the mixer's processing list (Thread safe). */ -void audio_mixer_remove_stream(audio_stream_handle_t stream); +void audio_mixer_remove_stream(audio_stream_handle_t h); /** Query the current mixer output format. Returns zeros if not initialized. */ void audio_mixer_get_output_format(uint32_t *sample_rate, uint32_t *bits_per_sample, uint32_t *channels); diff --git a/include/audio_stream.h b/include/audio_stream.h index 936638e..3554a98 100644 --- a/include/audio_stream.h +++ b/include/audio_stream.h @@ -26,16 +26,16 @@ typedef struct { * Stream API — create/delete logical playback streams and control them. * These streams own their decode task and submit PCM to the mixer. */ -audio_player_state_t audio_stream_get_state(audio_stream_handle_t s); +audio_player_state_t audio_stream_get_state(audio_stream_handle_t h); -esp_err_t audio_stream_play(audio_stream_handle_t s, FILE *fp); -esp_err_t audio_stream_queue(audio_stream_handle_t s, FILE *fp, bool play_now); -esp_err_t audio_stream_stop(audio_stream_handle_t s); -esp_err_t audio_stream_pause(audio_stream_handle_t s); -esp_err_t audio_stream_resume(audio_stream_handle_t s); +esp_err_t audio_stream_play(audio_stream_handle_t h, FILE *fp); +esp_err_t audio_stream_queue(audio_stream_handle_t h, FILE *fp, bool play_now); +esp_err_t audio_stream_stop(audio_stream_handle_t h); +esp_err_t audio_stream_pause(audio_stream_handle_t h); +esp_err_t audio_stream_resume(audio_stream_handle_t h); audio_stream_handle_t audio_stream_new(audio_stream_config_t *cfg); -esp_err_t audio_stream_delete(audio_stream_handle_t sh); +esp_err_t audio_stream_delete(audio_stream_handle_t h); #ifdef __cplusplus } From 26994a504d14983a99ad7f7a06f7c5d44368cd4a Mon Sep 17 00:00:00 2001 From: Robert Alfaro Date: Wed, 10 Dec 2025 15:23:09 -0800 Subject: [PATCH 04/12] Add support for writing pcm audio directly to stream -- introduce audio_stream_type_t enum --- audio_mixer.cpp | 131 +++++++++++++++++++++++++++++++---------- include/audio_stream.h | 22 ++++++- 2 files changed, 118 insertions(+), 35 deletions(-) diff --git a/audio_mixer.cpp b/audio_mixer.cpp index c9fef19..761b6f1 100644 --- a/audio_mixer.cpp +++ b/audio_mixer.cpp @@ -21,12 +21,13 @@ static const char *TAG = "audio_mixer"; -static TaskHandle_t s_mixer_task = nullptr; +static TaskHandle_t s_mixer_task = NULL; static audio_mixer_config_t s_cfg = {}; static volatile bool s_running = false; static audio_mixer_cb_t s_mixer_user_cb = NULL; typedef struct audio_stream { + audio_stream_type_t type; char name[16]; audio_instance_handle_t instance; QueueHandle_t file_queue; @@ -54,7 +55,7 @@ static void mixer_task(void *arg) { const size_t bytes = frames * s_cfg.i2s_format.channels * sizeof(int16_t); int16_t *mix = (int16_t*)heap_caps_malloc(bytes, MALLOC_CAP_8BIT); - if (!mix) { vTaskDelete(nullptr); return; } + ESP_ERROR_CHECK(mix == NULL); while (s_running) { memset(mix, 0, bytes); @@ -93,7 +94,7 @@ static void mixer_task(void *arg) { } free(mix); - vTaskDelete(nullptr); + vTaskDelete(NULL); } IRAM_ATTR static esp_err_t mixer_stream_write(void *data, size_t size, size_t *bytes_written, uint32_t timeout, void *stream) { @@ -260,14 +261,43 @@ static void stream_purge_ringbuf(audio_stream_t *s) { } audio_player_state_t audio_stream_get_state(audio_stream_handle_t h) { - if (!h) return AUDIO_PLAYER_STATE_IDLE; - return audio_instance_get_state((audio_stream_t*)h->instance); + audio_stream_t *s = (audio_stream_t*)h; + if (!s) return AUDIO_PLAYER_STATE_IDLE; + + /* DECODER stream? defer to the instance state */ + if (s->type == AUDIO_STREAM_TYPE_DECODER) { + return audio_instance_get_state(s->instance); + } + + /* RAW stream? check if ringbuf has data */ + if (s->type == AUDIO_STREAM_TYPE_RAW) { + if (!s->pcm_rb) return AUDIO_PLAYER_STATE_IDLE; + + // peek for any bytes + UBaseType_t items_waiting = 0; + vRingbufferGetInfo(s->pcm_rb, NULL, NULL, NULL, NULL, &items_waiting); + + if (items_waiting > 0) + return AUDIO_PLAYER_STATE_PLAYING; + } + + return AUDIO_PLAYER_STATE_IDLE; +} + +audio_stream_type_t audio_stream_get_type(audio_stream_handle_t h) { + if (!h) return AUDIO_STREAM_TYPE_UNKNOWN; + return ((audio_stream_t*)h)->type; } esp_err_t audio_stream_play(audio_stream_handle_t h, FILE *fp) { audio_stream_t *s = (audio_stream_t*)h; CHECK_STREAM(s); + if (s->type != AUDIO_STREAM_TYPE_DECODER) { + ESP_LOGE(TAG, "stream '%s' is not a decoder stream", s->name); + return ESP_ERR_NOT_SUPPORTED; + } + // stop current playback? if (audio_instance_get_state(s->instance) == AUDIO_PLAYER_STATE_PLAYING) audio_stream_stop(s); @@ -283,6 +313,11 @@ esp_err_t audio_stream_queue(audio_stream_handle_t h, FILE *fp, bool play_now) { audio_stream_t *s = (audio_stream_t*)h; CHECK_STREAM(s); + if (s->type != AUDIO_STREAM_TYPE_DECODER) { + ESP_LOGE(TAG, "stream '%s' is not a decoder stream", s->name); + return ESP_ERR_NOT_SUPPORTED; + } + audio_mixer_lock(); // add to queue @@ -309,15 +344,18 @@ esp_err_t audio_stream_queue(audio_stream_handle_t h, FILE *fp, bool play_now) { esp_err_t audio_stream_stop(audio_stream_handle_t h) { audio_stream_t *s = (audio_stream_t*)h; CHECK_STREAM(s); - esp_err_t err; + esp_err_t err = ESP_OK; + + if (s->type == AUDIO_STREAM_TYPE_DECODER) { + // clear any pending queue items + FILE *pending = NULL; + while (xQueueReceive(s->file_queue, &pending, 0) == pdTRUE) { + if (pending) fclose(pending); + } - // clear any pending queue items - FILE *pending = NULL; - while (xQueueReceive(s->file_queue, &pending, 0) == pdTRUE) { - if (pending) fclose(pending); + err = audio_instance_stop(s->instance); } - err = audio_instance_stop(s->instance); stream_purge_ringbuf(s); return err; } @@ -325,19 +363,42 @@ esp_err_t audio_stream_stop(audio_stream_handle_t h) { esp_err_t audio_stream_pause(audio_stream_handle_t h) { audio_stream_t *s = (audio_stream_t*)h; CHECK_STREAM(s); + if (s->type != AUDIO_STREAM_TYPE_DECODER) return ESP_ERR_NOT_SUPPORTED; return audio_instance_pause(s->instance); } esp_err_t audio_stream_resume(audio_stream_handle_t h) { audio_stream_t *s = (audio_stream_t*)h; CHECK_STREAM(s); + if (s->type != AUDIO_STREAM_TYPE_DECODER) return ESP_ERR_NOT_SUPPORTED; return audio_instance_resume(s->instance); } +esp_err_t audio_stream_write_pcm(audio_stream_handle_t h, void *data, size_t size, uint32_t timeout_ms) { + audio_stream_t *s = (audio_stream_t*)h; + CHECK_STREAM(s); + + if (s->type != AUDIO_STREAM_TYPE_RAW) { + ESP_LOGE(TAG, "stream '%s' is not a raw stream", s->name); + return ESP_ERR_NOT_SUPPORTED; + } + + if (!s->pcm_rb) return ESP_ERR_INVALID_STATE; + + // Send data to the ring buffer (BYTEBUF type) + BaseType_t res = xRingbufferSend(s->pcm_rb, data, size, pdMS_TO_TICKS(timeout_ms)); + if (res != pdTRUE) { + ESP_LOGW(TAG, "stream '%s' overflow", s->name); + return ESP_FAIL; + } + return ESP_OK; +} + audio_stream_handle_t audio_stream_new(audio_stream_config_t *cfg) { ESP_RETURN_ON_FALSE(cfg, NULL, TAG, "null config"); audio_stream_t *stream = (audio_stream_t*)calloc(1, sizeof(audio_stream_t)); + stream->type = cfg->type; /* use provided name? */ if (cfg->name[0] != '\0') { @@ -349,37 +410,43 @@ audio_stream_handle_t audio_stream_new(audio_stream_config_t *cfg) { snprintf(stream->name, sizeof(stream->name), "stream_%lu", s_stream_name_counter++); } - /* create player instance */ - audio_player_config_t instance_cfg; - instance_cfg.mute_fn = NULL; - instance_cfg.clk_set_fn = mixer_stream_clk_set_fn; - instance_cfg.coreID = cfg->coreID; - instance_cfg.priority = cfg->priority; - instance_cfg.force_stereo = false; - instance_cfg.write_fn2 = mixer_stream_write; - instance_cfg.write_ctx = stream; - - audio_instance_handle_t h = NULL; - esp_err_t err = audio_instance_new(&h, &instance_cfg); + /* DECODER type stream? create a player instance and queue */ + if (cfg->type == AUDIO_STREAM_TYPE_DECODER) { + // new player instance + audio_player_config_t instance_cfg; + instance_cfg.mute_fn = NULL; + instance_cfg.clk_set_fn = mixer_stream_clk_set_fn; + instance_cfg.coreID = cfg->coreID; + instance_cfg.priority = cfg->priority; + instance_cfg.force_stereo = false; + instance_cfg.write_fn2 = mixer_stream_write; + instance_cfg.write_ctx = stream; + + audio_instance_handle_t h = NULL; + esp_err_t err = audio_instance_new(&h, &instance_cfg); + + if (err != ESP_OK) { + free(stream); + return NULL; + } + stream->instance = h; - if (err != ESP_OK) { - free(stream); - return NULL; + // create file queue & attach event handler + stream->file_queue = xQueueCreate(4, sizeof(FILE*)); + audio_instance_callback_register(stream->instance, mixer_stream_event_handler, stream); } - stream->instance = h; - /* create stream's ringbuffer, file queue */ - stream->file_queue = xQueueCreate(4, sizeof(FILE*)); + /* always create a ringbuffer */ stream->pcm_rb = xRingbufferCreate(16 * 1024, RINGBUF_TYPE_BYTEBUF); - if (!stream->file_queue || !stream->pcm_rb) { + + if (!stream->pcm_rb || (cfg->type == AUDIO_STREAM_TYPE_DECODER && !stream->file_queue)) { if (stream->file_queue) vQueueDelete(stream->file_queue); if (stream->pcm_rb) vRingbufferDelete(stream->pcm_rb); + if (stream->instance) audio_instance_delete(stream->instance); free(stream); return NULL; } - audio_instance_callback_register(stream->instance, mixer_stream_event_handler, stream); - /* add to stream tracking */ audio_mixer_add_stream(stream); diff --git a/include/audio_stream.h b/include/audio_stream.h index 3554a98..3106aa9 100644 --- a/include/audio_stream.h +++ b/include/audio_stream.h @@ -10,13 +10,21 @@ typedef struct audio_stream* audio_stream_handle_t; #define CHECK_STREAM(s) \ ESP_RETURN_ON_FALSE(s != NULL, ESP_ERR_INVALID_ARG, "audio_stream", "stream is NULL") +typedef enum { + AUDIO_STREAM_TYPE_UNKNOWN = 0, + AUDIO_STREAM_TYPE_DECODER, + AUDIO_STREAM_TYPE_RAW +} audio_stream_type_t; + typedef struct { - char name[16]; /*< Optional: Name of the stream (e.g. "sfx", "bgm"). Auto-generated if empty. */ - UBaseType_t priority; /*< FreeRTOS task priority */ - BaseType_t coreID; /*< ESP32 core ID */ + audio_stream_type_t type; /*< Type of stream */ + char name[16]; /*< Optional: Name of the stream (e.g. "sfx", "bgm"). Auto-generated if empty. */ + UBaseType_t priority; /*< FreeRTOS task priority */ + BaseType_t coreID; /*< ESP32 core ID */ } audio_stream_config_t; #define DEFAULT_AUDIO_STREAM_CONFIG(_name) { \ + .type = AUDIO_STREAM_TYPE_DECODER, \ .name = _name, \ .priority = tskIDLE_PRIORITY + 1, \ .coreID = tskNO_AFFINITY \ @@ -26,7 +34,9 @@ typedef struct { * Stream API — create/delete logical playback streams and control them. * These streams own their decode task and submit PCM to the mixer. */ + audio_player_state_t audio_stream_get_state(audio_stream_handle_t h); +audio_stream_type_t audio_stream_get_type(audio_stream_handle_t h); esp_err_t audio_stream_play(audio_stream_handle_t h, FILE *fp); esp_err_t audio_stream_queue(audio_stream_handle_t h, FILE *fp, bool play_now); @@ -34,6 +44,12 @@ esp_err_t audio_stream_stop(audio_stream_handle_t h); esp_err_t audio_stream_pause(audio_stream_handle_t h); esp_err_t audio_stream_resume(audio_stream_handle_t h); +/** + * Direct write raw PCM data to a stream (For RAW streams). + * Data format must match the mixer configuration (e.g. 44.1kHz, 16-bit, mono). + */ +esp_err_t audio_stream_write_pcm(audio_stream_handle_t h, void *data, size_t size, uint32_t timeout_ms); + audio_stream_handle_t audio_stream_new(audio_stream_config_t *cfg); esp_err_t audio_stream_delete(audio_stream_handle_t h); From 70802a28949b29deadd36d2153952a69d10ba67a Mon Sep 17 00:00:00 2001 From: Robert Alfaro Date: Fri, 12 Dec 2025 09:54:45 -0800 Subject: [PATCH 05/12] mixer: add is_initialized method --- audio_mixer.cpp | 7 ++++++- include/audio_mixer.h | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/audio_mixer.cpp b/audio_mixer.cpp index 761b6f1..ae80212 100644 --- a/audio_mixer.cpp +++ b/audio_mixer.cpp @@ -229,11 +229,16 @@ esp_err_t audio_mixer_init(audio_mixer_config_t *cfg) { return ESP_OK; } +bool audio_mixer_is_initialized() { + return s_mixer_task != NULL; +} + void audio_mixer_deinit() { if (!s_running) return; - s_running = false; // Task will exit on next loop; no join primitive in FreeRTOS here. + s_running = false; + s_mixer_task = NULL; // Clean up any remaining channels (safe teardown) audio_mixer_lock(); diff --git a/include/audio_mixer.h b/include/audio_mixer.h index ae84cfd..ad6484d 100644 --- a/include/audio_mixer.h +++ b/include/audio_mixer.h @@ -49,6 +49,8 @@ void audio_mixer_get_output_format(uint32_t *sample_rate, uint32_t *bits_per_sam void audio_mixer_callback_register(audio_mixer_cb_t cb); +bool audio_mixer_is_initialized(); + /** Initialize the mixer with fixed output format and start the mixer task. */ esp_err_t audio_mixer_init(audio_mixer_config_t *cfg); From 7819aee5b264b8fac81f8ca81aea28b324637354 Mon Sep 17 00:00:00 2001 From: Robert Alfaro Date: Fri, 12 Dec 2025 13:35:23 -0800 Subject: [PATCH 06/12] stream: raw streams need state reporting and separate from mixer task, for now adding a shim to set state by event on a raw stream. Callback is fired upon setting (on callers task) --- audio_mixer.cpp | 69 +++++++++++++++++++++++++++++++++++++----- audio_player.cpp | 2 +- include/audio_stream.h | 1 + 3 files changed, 63 insertions(+), 9 deletions(-) diff --git a/audio_mixer.cpp b/audio_mixer.cpp index ae80212..16a6762 100644 --- a/audio_mixer.cpp +++ b/audio_mixer.cpp @@ -32,6 +32,7 @@ typedef struct audio_stream { audio_instance_handle_t instance; QueueHandle_t file_queue; RingbufHandle_t pcm_rb; + audio_player_state_t state; // used only for RAW stream types. SLIST_ENTRY(audio_stream) next; } audio_stream_t; @@ -255,6 +256,26 @@ void audio_mixer_deinit() { /* ================= Stream (mixer channel) API ================= */ +extern const char* event_to_string(audio_player_callback_event_t event); // from audio_player.c +extern audio_player_callback_event_t state_to_event(audio_player_state_t state); // from audio_player.c + + +static void dispatch_callback(audio_stream_t *s, audio_player_callback_event_t event) { + ESP_LOGD(TAG, "event '%s'", event_to_string(event)); + +#if CONFIG_IDF_TARGET_ARCH_XTENSA + if (esp_ptr_executable(reinterpret_cast(s_mixer_user_cb))) { +#else + if (reinterpret_cast(s_mixer_user_cb)) { +#endif + audio_player_cb_ctx_t ctx = { + .audio_event = event, + .user_ctx = s, + }; + s_mixer_user_cb(&ctx); + } +} + static void stream_purge_ringbuf(audio_stream_t *s) { if (!s || !s->pcm_rb) return; @@ -265,6 +286,37 @@ static void stream_purge_ringbuf(audio_stream_t *s) { } } +esp_err_t audio_stream_raw_send_event(audio_stream_handle_t h, audio_player_callback_event_t event) { + audio_stream_t *s = (audio_stream_t*)h; + CHECK_STREAM(s); + + if (s->type != AUDIO_STREAM_TYPE_RAW) return ESP_ERR_NOT_SUPPORTED; + + // NOTE: essentially made event_to_state() + audio_player_state_t new_state = AUDIO_PLAYER_STATE_IDLE; + switch (event) { + case AUDIO_PLAYER_CALLBACK_EVENT_IDLE: + new_state = AUDIO_PLAYER_STATE_IDLE; + break; + case AUDIO_PLAYER_CALLBACK_EVENT_PLAYING: + case AUDIO_PLAYER_CALLBACK_EVENT_COMPLETED_PLAYING_NEXT: + new_state = AUDIO_PLAYER_STATE_PLAYING; + break; + case AUDIO_PLAYER_CALLBACK_EVENT_SHUTDOWN: + new_state = AUDIO_PLAYER_STATE_SHUTDOWN; + break; + default: + new_state = AUDIO_PLAYER_STATE_IDLE; + break; + } + + if(s->state != new_state) { + s->state = new_state; + dispatch_callback(s, event); + } + return ESP_OK; +} + audio_player_state_t audio_stream_get_state(audio_stream_handle_t h) { audio_stream_t *s = (audio_stream_t*)h; if (!s) return AUDIO_PLAYER_STATE_IDLE; @@ -276,14 +328,15 @@ audio_player_state_t audio_stream_get_state(audio_stream_handle_t h) { /* RAW stream? check if ringbuf has data */ if (s->type == AUDIO_STREAM_TYPE_RAW) { - if (!s->pcm_rb) return AUDIO_PLAYER_STATE_IDLE; - - // peek for any bytes - UBaseType_t items_waiting = 0; - vRingbufferGetInfo(s->pcm_rb, NULL, NULL, NULL, NULL, &items_waiting); - - if (items_waiting > 0) - return AUDIO_PLAYER_STATE_PLAYING; + // if (!s->pcm_rb) return AUDIO_PLAYER_STATE_IDLE; + // + // // peek for any bytes + // UBaseType_t items_waiting = 0; + // vRingbufferGetInfo(s->pcm_rb, NULL, NULL, NULL, NULL, &items_waiting); + // + // if (items_waiting > 0) + // return AUDIO_PLAYER_STATE_PLAYING; + return s->state; } return AUDIO_PLAYER_STATE_IDLE; diff --git a/audio_player.cpp b/audio_player.cpp index 9736f34..453e06f 100644 --- a/audio_player.cpp +++ b/audio_player.cpp @@ -146,7 +146,7 @@ const char* event_to_string(audio_player_callback_event_t event) { return "unknown event"; } -static audio_player_callback_event_t state_to_event(audio_player_state_t state) { +audio_player_callback_event_t state_to_event(audio_player_state_t state) { audio_player_callback_event_t event = AUDIO_PLAYER_CALLBACK_EVENT_UNKNOWN; switch(state) { diff --git a/include/audio_stream.h b/include/audio_stream.h index 3106aa9..6306d26 100644 --- a/include/audio_stream.h +++ b/include/audio_stream.h @@ -49,6 +49,7 @@ esp_err_t audio_stream_resume(audio_stream_handle_t h); * Data format must match the mixer configuration (e.g. 44.1kHz, 16-bit, mono). */ esp_err_t audio_stream_write_pcm(audio_stream_handle_t h, void *data, size_t size, uint32_t timeout_ms); +esp_err_t audio_stream_raw_send_event(audio_stream_handle_t h, audio_player_callback_event_t event); audio_stream_handle_t audio_stream_new(audio_stream_config_t *cfg); esp_err_t audio_stream_delete(audio_stream_handle_t h); From 80cc4aa54f9ea0804b5c49f2abce099099216c8c Mon Sep 17 00:00:00 2001 From: Robert Alfaro Date: Wed, 21 Jan 2026 15:30:33 -0800 Subject: [PATCH 07/12] stream: fix include --- include/audio_stream.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/audio_stream.h b/include/audio_stream.h index 6306d26..0bfea0d 100644 --- a/include/audio_stream.h +++ b/include/audio_stream.h @@ -1,5 +1,7 @@ #pragma once +#include "audio_player.h" + #ifdef __cplusplus extern "C" { #endif From fa3066223c4a98fd3a7738820a22ad4e0816944f Mon Sep 17 00:00:00 2001 From: Robert Alfaro Date: Wed, 21 Jan 2026 15:33:27 -0800 Subject: [PATCH 08/12] cmake: extend support for idf 5.3-6.0 --- CMakeLists.txt | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f3c6e43..a6e3644 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,13 @@ set(includes "include" ) -set(requires "") +set(requires) + +if("${IDF_VERSION_MAJOR}.${IDF_VERSION_MINOR}" VERSION_GREATER_EQUAL "5.3") + list(APPEND requires esp_driver_i2s esp_ringbuf) +else() + list(APPEND requires driver) +endif() if(CONFIG_AUDIO_PLAYER_ENABLE_MP3) list(APPEND srcs "audio_mp3.cpp") @@ -22,7 +28,6 @@ if(CONFIG_AUDIO_PLAYER_ENABLE_WAV) endif() idf_component_register(SRCS "${srcs}" - REQUIRES "${requires}" INCLUDE_DIRS "${includes}" - REQUIRES driver + REQUIRES "${requires}" ) From 1fe6cf4f7d61a5a4b93faa24ed79cd42bc0f239b Mon Sep 17 00:00:00 2001 From: Robert Alfaro Date: Wed, 21 Jan 2026 18:40:40 -0800 Subject: [PATCH 09/12] Add audio mixer tests, add docstrings for audio mixer/stream headers --- audio_mixer.cpp | 2 + include/audio_mixer.h | 86 ++++++++++-- include/audio_stream.h | 151 ++++++++++++++++++-- test/CMakeLists.txt | 2 +- test/audio_mixer_test.c | 300 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 513 insertions(+), 28 deletions(-) create mode 100644 test/audio_mixer_test.c diff --git a/audio_mixer.cpp b/audio_mixer.cpp index 16a6762..de1e1cf 100644 --- a/audio_mixer.cpp +++ b/audio_mixer.cpp @@ -328,6 +328,8 @@ audio_player_state_t audio_stream_get_state(audio_stream_handle_t h) { /* RAW stream? check if ringbuf has data */ if (s->type == AUDIO_STREAM_TYPE_RAW) { + // TODO: determine if checking ringbuf is valuable vs. having a stream emit its own state + // using the method audio_stream_raw_send_event(). // if (!s->pcm_rb) return AUDIO_PLAYER_STATE_IDLE; // // // peek for any bytes diff --git a/include/audio_mixer.h b/include/audio_mixer.h index ad6484d..a66ce15 100644 --- a/include/audio_mixer.h +++ b/include/audio_mixer.h @@ -1,6 +1,6 @@ /** * @file audio_mixer.h - * Mixer interface for esp-audio-player. Provides a global mixer that accepts + * @brief Mixer interface for esp-audio-player. Provides a global mixer that accepts * PCM from multiple sources via FreeRTOS ring buffers and writes mixed PCM to I2S. */ #pragma once @@ -17,44 +17,100 @@ extern "C" { #endif +/** + * @brief Configuration structure for the audio mixer + */ typedef struct { - audio_player_mute_fn mute_fn; - audio_reconfig_std_clock clk_set_fn; - audio_player_write_fn write_fn; - UBaseType_t priority; /*< FreeRTOS task priority */ - BaseType_t coreID; /*< ESP32 core ID */ + audio_player_mute_fn mute_fn; /**< Function to mute/unmute audio */ + audio_reconfig_std_clock clk_set_fn; /**< Function to reconfigure I2S clock */ + audio_player_write_fn write_fn; /**< Function to write PCM data to I2S */ + UBaseType_t priority; /**< FreeRTOS task priority for the mixer task */ + BaseType_t coreID; /**< ESP32 core ID for the mixer task */ - format i2s_format; + format i2s_format; /**< Fixed output format for the mixer */ } audio_mixer_config_t; +/** + * @brief Mixer callback function type + */ typedef audio_player_cb_t audio_mixer_cb_t; - +/** + * @brief Get the number of active streams in the mixer + * + * @return Number of active streams + */ uint8_t audio_mixer_stream_count(); -/** Lock the mixer's main mutex. Call this before modifying stream state (busy flags, queues). */ +/** + * @brief Lock the mixer's main mutex + * + * Call this before modifying stream state (busy flags, queues). + */ void audio_mixer_lock(); -/** Unlock the mixer's main mutex. */ +/** + * @brief Unlock the mixer's main mutex + */ void audio_mixer_unlock(); -/** Add a stream to the mixer's processing list (Thread safe). */ +/** + * @brief Add a stream to the mixer's processing list + * + * This function is thread-safe. + * + * @param h Handle of the stream to add + */ void audio_mixer_add_stream(audio_stream_handle_t h); -/** Remove a stream from the mixer's processing list (Thread safe). */ +/** + * @brief Remove a stream from the mixer's processing list + * + * This function is thread-safe. + * + * @param h Handle of the stream to remove + */ void audio_mixer_remove_stream(audio_stream_handle_t h); -/** Query the current mixer output format. Returns zeros if not initialized. */ +/** + * @brief Query the current mixer output format + * + * Returns zeros if the mixer is not initialized. + * + * @param[out] sample_rate Pointer to store the sample rate + * @param[out] bits_per_sample Pointer to store the bits per sample + * @param[out] channels Pointer to store the number of channels + */ void audio_mixer_get_output_format(uint32_t *sample_rate, uint32_t *bits_per_sample, uint32_t *channels); +/** + * @brief Register a global callback for mixer events + * + * @param cb Callback function to register + */ void audio_mixer_callback_register(audio_mixer_cb_t cb); +/** + * @brief Check if the mixer is initialized + * + * @return true if initialized, false otherwise + */ bool audio_mixer_is_initialized(); -/** Initialize the mixer with fixed output format and start the mixer task. */ +/** + * @brief Initialize the mixer and start the mixer task + * + * @param cfg Pointer to the mixer configuration structure + * @return + * - ESP_OK: Success + * - ESP_ERR_INVALID_ARG: Invalid configuration + * - Others: Fail + */ esp_err_t audio_mixer_init(audio_mixer_config_t *cfg); -/** Deinitialize the mixer task. */ +/** + * @brief Deinitialize the mixer and stop the mixer task + */ void audio_mixer_deinit(); #ifdef __cplusplus diff --git a/include/audio_stream.h b/include/audio_stream.h index 0bfea0d..86fe56b 100644 --- a/include/audio_stream.h +++ b/include/audio_stream.h @@ -1,3 +1,8 @@ +/** + * @file audio_stream.h + * @brief Stream API — create/delete logical playback streams and control them. + * These streams own their decode task and submit PCM to the mixer. + */ #pragma once #include "audio_player.h" @@ -7,24 +12,41 @@ extern "C" { #endif struct audio_stream; +/** + * @brief Audio stream handle + */ typedef struct audio_stream* audio_stream_handle_t; +/** + * @brief Macro to check if a stream handle is valid + */ #define CHECK_STREAM(s) \ ESP_RETURN_ON_FALSE(s != NULL, ESP_ERR_INVALID_ARG, "audio_stream", "stream is NULL") +/** + * @brief Audio stream types + */ typedef enum { - AUDIO_STREAM_TYPE_UNKNOWN = 0, - AUDIO_STREAM_TYPE_DECODER, - AUDIO_STREAM_TYPE_RAW + AUDIO_STREAM_TYPE_UNKNOWN = 0, /**< Unknown stream type */ + AUDIO_STREAM_TYPE_DECODER, /**< Stream that decodes audio (e.g., MP3, WAV) */ + AUDIO_STREAM_TYPE_RAW /**< Stream that accepts raw PCM data */ } audio_stream_type_t; +/** + * @brief Configuration structure for an audio stream + */ typedef struct { - audio_stream_type_t type; /*< Type of stream */ - char name[16]; /*< Optional: Name of the stream (e.g. "sfx", "bgm"). Auto-generated if empty. */ - UBaseType_t priority; /*< FreeRTOS task priority */ - BaseType_t coreID; /*< ESP32 core ID */ + audio_stream_type_t type; /**< Type of stream */ + char name[16]; /**< Optional: Name of the stream (e.g. "sfx", "bgm"). Auto-generated if empty. */ + UBaseType_t priority; /**< FreeRTOS task priority for the stream's decoder task (if applicable) */ + BaseType_t coreID; /**< ESP32 core ID for the stream's decoder task (if applicable) */ } audio_stream_config_t; +/** + * @brief Default configuration for an audio decoder stream + * + * @param _name Name of the stream + */ #define DEFAULT_AUDIO_STREAM_CONFIG(_name) { \ .type = AUDIO_STREAM_TYPE_DECODER, \ .name = _name, \ @@ -33,27 +55,132 @@ typedef struct { } /** - * Stream API — create/delete logical playback streams and control them. - * These streams own their decode task and submit PCM to the mixer. + * @brief Get the current state of a stream + * + * @param h Handle of the stream + * @return Current audio_player_state_t of the stream */ - audio_player_state_t audio_stream_get_state(audio_stream_handle_t h); + +/** + * @brief Get the type of a stream + * + * @param h Handle of the stream + * @return audio_stream_type_t of the stream + */ audio_stream_type_t audio_stream_get_type(audio_stream_handle_t h); +/** + * @brief Play an audio file on a stream + * + * Only supported for DECODER type streams. + * + * @param h Handle of the stream + * @param fp File pointer to the audio file + * @return + * - ESP_OK: Success + * - ESP_ERR_NOT_SUPPORTED: Stream is not a decoder stream + * - Others: Fail + */ esp_err_t audio_stream_play(audio_stream_handle_t h, FILE *fp); + +/** + * @brief Queue an audio file to be played on a stream + * + * Only supported for DECODER type streams. + * + * @param h Handle of the stream + * @param fp File pointer to the audio file + * @param play_now If true, start playing immediately (interrupting current playback) + * @return + * - ESP_OK: Success + * - ESP_ERR_NOT_SUPPORTED: Stream is not a decoder stream + * - Others: Fail + */ esp_err_t audio_stream_queue(audio_stream_handle_t h, FILE *fp, bool play_now); + +/** + * @brief Stop playback on a stream + * + * @param h Handle of the stream + * @return + * - ESP_OK: Success + * - Others: Fail + */ esp_err_t audio_stream_stop(audio_stream_handle_t h); + +/** + * @brief Pause playback on a stream + * + * Only supported for DECODER type streams. + * + * @param h Handle of the stream + * @return + * - ESP_OK: Success + * - ESP_ERR_NOT_SUPPORTED: Stream is not a decoder stream + * - Others: Fail + */ esp_err_t audio_stream_pause(audio_stream_handle_t h); + +/** + * @brief Resume playback on a stream + * + * Only supported for DECODER type streams. + * + * @param h Handle of the stream + * @return + * - ESP_OK: Success + * - ESP_ERR_NOT_SUPPORTED: Stream is not a decoder stream + * - Others: Fail + */ esp_err_t audio_stream_resume(audio_stream_handle_t h); /** - * Direct write raw PCM data to a stream (For RAW streams). - * Data format must match the mixer configuration (e.g. 44.1kHz, 16-bit, mono). + * @brief Direct write raw PCM data to a stream + * + * Only supported for RAW type streams. + * Data format must match the mixer configuration (e.g. 44.1kHz, 16-bit, mono/stereo). + * + * @param h Handle of the stream + * @param data Pointer to the PCM data + * @param size Size of the data in bytes + * @param timeout_ms Timeout in milliseconds to wait for space in the stream's buffer + * @return + * - ESP_OK: Success + * - ESP_ERR_NOT_SUPPORTED: Stream is not a raw stream + * - Others: Fail */ esp_err_t audio_stream_write_pcm(audio_stream_handle_t h, void *data, size_t size, uint32_t timeout_ms); + +/** + * @brief Send an event to a raw stream's callback + * + * Allows manual state management for raw streams. + * + * @param h Handle of the stream + * @param event Event to send + * @return + * - ESP_OK: Success + * - ESP_ERR_NOT_SUPPORTED: Stream is not a raw stream + */ esp_err_t audio_stream_raw_send_event(audio_stream_handle_t h, audio_player_callback_event_t event); +/** + * @brief Create a new audio stream + * + * @param cfg Pointer to the stream configuration structure + * @return Handle to the new stream, or NULL if failed + */ audio_stream_handle_t audio_stream_new(audio_stream_config_t *cfg); + +/** + * @brief Delete an audio stream and free its resources + * + * @param h Handle of the stream to delete + * @return + * - ESP_OK: Success + * - Others: Fail + */ esp_err_t audio_stream_delete(audio_stream_handle_t h); #ifdef __cplusplus diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 640d071..fb7bbcb 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,4 +1,4 @@ -idf_component_register(SRC_DIRS "." +idf_component_register(SRCS "audio_player_test.c" "audio_mixer_test.c" PRIV_INCLUDE_DIRS "." PRIV_REQUIRES unity test_utils audio_player EMBED_TXTFILES gs-16b-1c-44100hz.mp3) diff --git a/test/audio_mixer_test.c b/test/audio_mixer_test.c new file mode 100644 index 0000000..4b5ccdc --- /dev/null +++ b/test/audio_mixer_test.c @@ -0,0 +1,300 @@ +#include "esp_log.h" +#include "esp_check.h" +#include "unity.h" +#include "audio_player.h" +#include "audio_mixer.h" +#include "audio_stream.h" +#include "driver/gpio.h" +#include "test_utils.h" +#include "freertos/semphr.h" + +static const char *TAG = "AUDIO MIXER TEST"; + +#define CONFIG_BSP_I2S_NUM 1 + +/* Audio Pins (same as in audio_player_test.c) */ +#define BSP_I2S_SCLK (GPIO_NUM_17) +#define BSP_I2S_MCLK (GPIO_NUM_2) +#define BSP_I2S_LCLK (GPIO_NUM_47) +#define BSP_I2S_DOUT (GPIO_NUM_15) +#define BSP_I2S_DSIN (GPIO_NUM_16) +#define BSP_POWER_AMP_IO (GPIO_NUM_46) + +#define BSP_I2S_GPIO_CFG \ + { \ + .mclk = BSP_I2S_MCLK, \ + .bclk = BSP_I2S_SCLK, \ + .ws = BSP_I2S_LCLK, \ + .dout = BSP_I2S_DOUT, \ + .din = BSP_I2S_DSIN, \ + .invert_flags = { \ + .mclk_inv = false, \ + .bclk_inv = false, \ + .ws_inv = false, \ + }, \ + } + +static i2s_chan_handle_t i2s_tx_chan; +static i2s_chan_handle_t i2s_rx_chan; + +static esp_err_t bsp_i2s_write(void * audio_buffer, size_t len, size_t *bytes_written, uint32_t timeout_ms) +{ + return i2s_channel_write(i2s_tx_chan, (char *)audio_buffer, len, bytes_written, timeout_ms); +} + +static esp_err_t bsp_i2s_reconfig_clk(uint32_t rate, uint32_t bits_cfg, i2s_slot_mode_t ch) +{ + i2s_std_config_t std_cfg = { + .clk_cfg = I2S_STD_CLK_DEFAULT_CONFIG(rate), + .slot_cfg = I2S_STD_PHILIP_SLOT_DEFAULT_CONFIG((i2s_data_bit_width_t)bits_cfg, (i2s_slot_mode_t)ch), + .gpio_cfg = BSP_I2S_GPIO_CFG, + }; + + i2s_channel_disable(i2s_tx_chan); + i2s_channel_reconfig_std_clock(i2s_tx_chan, &std_cfg.clk_cfg); + i2s_channel_reconfig_std_slot(i2s_tx_chan, &std_cfg.slot_cfg); + return i2s_channel_enable(i2s_tx_chan); +} + +static esp_err_t bsp_audio_init(const i2s_std_config_t *i2s_config) +{ + i2s_chan_config_t chan_cfg = I2S_CHANNEL_DEFAULT_CONFIG(CONFIG_BSP_I2S_NUM, I2S_ROLE_MASTER); + chan_cfg.auto_clear = true; + ESP_ERROR_CHECK(i2s_new_channel(&chan_cfg, &i2s_tx_chan, &i2s_rx_chan)); + ESP_ERROR_CHECK(i2s_channel_init_std_mode(i2s_tx_chan, i2s_config)); + ESP_ERROR_CHECK(i2s_channel_enable(i2s_tx_chan)); + return ESP_OK; +} + +static void bsp_audio_deinit() +{ + i2s_channel_disable(i2s_tx_chan); + i2s_del_channel(i2s_tx_chan); + i2s_del_channel(i2s_rx_chan); +} + +TEST_CASE("audio mixer can be initialized and deinitialized", "[audio mixer]") +{ + i2s_std_config_t std_cfg = { + .clk_cfg = I2S_STD_CLK_DEFAULT_CONFIG(44100), + .slot_cfg = I2S_STD_PHILIP_SLOT_DEFAULT_CONFIG(I2S_DATA_BIT_WIDTH_16BIT, I2S_SLOT_MODE_STEREO), + .gpio_cfg = BSP_I2S_GPIO_CFG, + }; + TEST_ESP_OK(bsp_audio_init(&std_cfg)); + + audio_mixer_config_t mixer_cfg = { + .write_fn = bsp_i2s_write, + .clk_set_fn = bsp_i2s_reconfig_clk, + .priority = 5, + .coreID = 0, + .i2s_format = { + .sample_rate = 44100, + .bits_per_sample = 16, + .channels = 2 + } + }; + + TEST_ESP_OK(audio_mixer_init(&mixer_cfg)); + TEST_ASSERT_TRUE(audio_mixer_is_initialized()); + + audio_mixer_deinit(); + TEST_ASSERT_FALSE(audio_mixer_is_initialized()); + + bsp_audio_deinit(); +} + +TEST_CASE("audio streams can be created and deleted", "[audio mixer]") +{ + i2s_std_config_t std_cfg = { + .clk_cfg = I2S_STD_CLK_DEFAULT_CONFIG(44100), + .slot_cfg = I2S_STD_PHILIP_SLOT_DEFAULT_CONFIG(I2S_DATA_BIT_WIDTH_16BIT, I2S_SLOT_MODE_STEREO), + .gpio_cfg = BSP_I2S_GPIO_CFG, + }; + TEST_ESP_OK(bsp_audio_init(&std_cfg)); + + audio_mixer_config_t mixer_cfg = { + .write_fn = bsp_i2s_write, + .clk_set_fn = bsp_i2s_reconfig_clk, + .priority = 5, + .coreID = 0, + .i2s_format = { + .sample_rate = 44100, + .bits_per_sample = 16, + .channels = 2 + } + }; + TEST_ESP_OK(audio_mixer_init(&mixer_cfg)); + + // Create a decoder stream + audio_stream_config_t stream_cfg = DEFAULT_AUDIO_STREAM_CONFIG("decoder"); + audio_stream_handle_t decoder_stream = audio_stream_new(&stream_cfg); + TEST_ASSERT_NOT_NULL(decoder_stream); + TEST_ASSERT_EQUAL(AUDIO_STREAM_TYPE_DECODER, audio_stream_get_type(decoder_stream)); + TEST_ASSERT_EQUAL(1, audio_mixer_stream_count()); + + // Create a raw stream + audio_stream_config_t raw_cfg = { + .type = AUDIO_STREAM_TYPE_RAW, + .name = "raw", + .priority = 5, + .coreID = 0 + }; + audio_stream_handle_t raw_stream = audio_stream_new(&raw_cfg); + TEST_ASSERT_NOT_NULL(raw_stream); + TEST_ASSERT_EQUAL(AUDIO_STREAM_TYPE_RAW, audio_stream_get_type(raw_stream)); + TEST_ASSERT_EQUAL(2, audio_mixer_stream_count()); + + // Delete streams + TEST_ESP_OK(audio_stream_delete(decoder_stream)); + TEST_ASSERT_EQUAL(1, audio_mixer_stream_count()); + + TEST_ESP_OK(audio_stream_delete(raw_stream)); + TEST_ASSERT_EQUAL(0, audio_mixer_stream_count()); + + audio_mixer_deinit(); + bsp_audio_deinit(); +} + +TEST_CASE("audio mixer handles multiple streams and output format", "[audio mixer]") +{ + i2s_std_config_t std_cfg = { + .clk_cfg = I2S_STD_CLK_DEFAULT_CONFIG(44100), + .slot_cfg = I2S_STD_PHILIP_SLOT_DEFAULT_CONFIG(I2S_DATA_BIT_WIDTH_16BIT, I2S_SLOT_MODE_STEREO), + .gpio_cfg = BSP_I2S_GPIO_CFG, + }; + TEST_ESP_OK(bsp_audio_init(&std_cfg)); + + audio_mixer_config_t mixer_cfg = { + .write_fn = bsp_i2s_write, + .clk_set_fn = bsp_i2s_reconfig_clk, + .priority = 5, + .coreID = 0, + .i2s_format = { + .sample_rate = 48000, + .bits_per_sample = 16, + .channels = 2 + } + }; + TEST_ESP_OK(audio_mixer_init(&mixer_cfg)); + + uint32_t rate, bits, ch; + audio_mixer_get_output_format(&rate, &bits, &ch); + TEST_ASSERT_EQUAL(48000, rate); + TEST_ASSERT_EQUAL(16, bits); + TEST_ASSERT_EQUAL(2, ch); + + audio_stream_config_t s1_cfg = DEFAULT_AUDIO_STREAM_CONFIG("s1"); + audio_stream_handle_t s1 = audio_stream_new(&s1_cfg); + audio_stream_config_t s2_cfg = DEFAULT_AUDIO_STREAM_CONFIG("s2"); + audio_stream_handle_t s2 = audio_stream_new(&s2_cfg); + + TEST_ASSERT_EQUAL(2, audio_mixer_stream_count()); + + audio_mixer_deinit(); // Should also clean up streams + TEST_ASSERT_EQUAL(0, audio_mixer_stream_count()); + + bsp_audio_deinit(); +} + +TEST_CASE("audio stream raw can send events", "[audio mixer]") +{ + audio_stream_config_t raw_cfg = { + .type = AUDIO_STREAM_TYPE_RAW, + .name = "raw_event", + .priority = 5, + .coreID = 0 + }; + audio_stream_handle_t raw_stream = audio_stream_new(&raw_cfg); + TEST_ASSERT_NOT_NULL(raw_stream); + + TEST_ASSERT_EQUAL(AUDIO_PLAYER_STATE_IDLE, audio_stream_get_state(raw_stream)); + + TEST_ESP_OK(audio_stream_raw_send_event(raw_stream, AUDIO_PLAYER_CALLBACK_EVENT_PLAYING)); + TEST_ASSERT_EQUAL(AUDIO_PLAYER_STATE_PLAYING, audio_stream_get_state(raw_stream)); + + TEST_ESP_OK(audio_stream_raw_send_event(raw_stream, AUDIO_PLAYER_CALLBACK_EVENT_IDLE)); + TEST_ASSERT_EQUAL(AUDIO_PLAYER_STATE_IDLE, audio_stream_get_state(raw_stream)); + + TEST_ESP_OK(audio_stream_delete(raw_stream)); +} + +static QueueHandle_t mixer_event_queue; + +static void mixer_callback(audio_player_cb_ctx_t *ctx) +{ + if (ctx->audio_event == AUDIO_PLAYER_CALLBACK_EVENT_PLAYING || + ctx->audio_event == AUDIO_PLAYER_CALLBACK_EVENT_IDLE) { + xQueueSend(mixer_event_queue, &(ctx->audio_event), 0); + } +} + +TEST_CASE("audio mixer plays sample mp3 on multiple streams", "[audio mixer]") +{ + i2s_std_config_t std_cfg = { + .clk_cfg = I2S_STD_CLK_DEFAULT_CONFIG(44100), + .slot_cfg = I2S_STD_PHILIP_SLOT_DEFAULT_CONFIG(I2S_DATA_BIT_WIDTH_16BIT, I2S_SLOT_MODE_STEREO), + .gpio_cfg = BSP_I2S_GPIO_CFG, + }; + TEST_ESP_OK(bsp_audio_init(&std_cfg)); + + audio_mixer_config_t mixer_cfg = { + .write_fn = bsp_i2s_write, + .clk_set_fn = bsp_i2s_reconfig_clk, + .priority = 5, + .coreID = 0, + .i2s_format = { + .sample_rate = 44100, + .bits_per_sample = 16, + .channels = 2 + } + }; + TEST_ESP_OK(audio_mixer_init(&mixer_cfg)); + + mixer_event_queue = xQueueCreate(10, sizeof(audio_player_callback_event_t)); + TEST_ASSERT_NOT_NULL(mixer_event_queue); + audio_mixer_callback_register(mixer_callback); + + extern const char mp3_start[] asm("_binary_gs_16b_1c_44100hz_mp3_start"); + extern const char mp3_end[] asm("_binary_gs_16b_1c_44100hz_mp3_end"); + size_t mp3_size = (mp3_end - mp3_start) - 1; + + // Create two streams + audio_stream_config_t s1_cfg = DEFAULT_AUDIO_STREAM_CONFIG("stream1"); + audio_stream_handle_t s1 = audio_stream_new(&s1_cfg); + TEST_ASSERT_NOT_NULL(s1); + + audio_stream_config_t s2_cfg = DEFAULT_AUDIO_STREAM_CONFIG("stream2"); + audio_stream_handle_t s2 = audio_stream_new(&s2_cfg); + TEST_ASSERT_NOT_NULL(s2); + + // Play on stream 1 + FILE *f1 = fmemopen((void*)mp3_start, mp3_size, "rb"); + TEST_ASSERT_NOT_NULL(f1); + TEST_ESP_OK(audio_stream_play(s1, f1)); + + // Play on stream 2 + FILE *f2 = fmemopen((void*)mp3_start, mp3_size, "rb"); + TEST_ASSERT_NOT_NULL(f2); + TEST_ESP_OK(audio_stream_play(s2, f2)); + + audio_player_callback_event_t event; + // We expect two PLAYING events (one for each stream) + int playing_count = 0; + while (playing_count < 2 && xQueueReceive(mixer_event_queue, &event, pdMS_TO_TICKS(500)) == pdPASS) { + if (event == AUDIO_PLAYER_CALLBACK_EVENT_PLAYING) { + playing_count++; + } + } + TEST_ASSERT_EQUAL(2, playing_count); + + // Let it play for a few seconds + vTaskDelay(pdMS_TO_TICKS(2000)); + + // Stop streams + TEST_ESP_OK(audio_stream_stop(s1)); + TEST_ESP_OK(audio_stream_stop(s2)); + + audio_mixer_deinit(); + vQueueDelete(mixer_event_queue); + bsp_audio_deinit(); +} From cf80913faa009b971c0becaf275a2f54f6b1dd1b Mon Sep 17 00:00:00 2001 From: Robert Alfaro Date: Thu, 22 Jan 2026 09:35:16 -0800 Subject: [PATCH 10/12] Address cppcheck errors --- audio_instance.h | 3 +++ audio_mixer.cpp | 42 +++++++++++++++++++---------------------- audio_player.cpp | 18 +++++++++--------- test/audio_mixer_test.c | 5 ++++- 4 files changed, 35 insertions(+), 33 deletions(-) diff --git a/audio_instance.h b/audio_instance.h index b532b05..76bae1c 100644 --- a/audio_instance.h +++ b/audio_instance.h @@ -16,6 +16,9 @@ typedef void* audio_instance_handle_t; #define CHECK_INSTANCE(i) \ ESP_RETURN_ON_FALSE(i != NULL, ESP_ERR_INVALID_ARG, "audio_instance", "instance is NULL") +const char* event_to_string(audio_player_callback_event_t event); +audio_player_callback_event_t state_to_event(audio_player_state_t state); + audio_player_state_t audio_instance_get_state(audio_instance_handle_t h); esp_err_t audio_instance_callback_register(audio_instance_handle_t h, audio_player_cb_t call_back, void *user_ctx); diff --git a/audio_mixer.cpp b/audio_mixer.cpp index de1e1cf..a836b1e 100644 --- a/audio_mixer.cpp +++ b/audio_mixer.cpp @@ -55,7 +55,7 @@ static void mixer_task(void *arg) { const size_t frames = 512; // tune as needed const size_t bytes = frames * s_cfg.i2s_format.channels * sizeof(int16_t); - int16_t *mix = (int16_t*)heap_caps_malloc(bytes, MALLOC_CAP_8BIT); + int16_t *mix = static_cast(heap_caps_malloc(bytes, MALLOC_CAP_8BIT)); ESP_ERROR_CHECK(mix == NULL); while (s_running) { @@ -71,7 +71,7 @@ static void mixer_task(void *arg) { void *item = xRingbufferReceiveUpTo(stream->pcm_rb, &received_bytes, pdMS_TO_TICKS(5), bytes); if (item && received_bytes > 0) { - int16_t *samples = (int16_t*)item; + int16_t *samples = static_cast(item); size_t count = received_bytes / sizeof(int16_t); for (size_t k = 0; k < count; ++k) { @@ -99,7 +99,7 @@ static void mixer_task(void *arg) { } IRAM_ATTR static esp_err_t mixer_stream_write(void *data, size_t size, size_t *bytes_written, uint32_t timeout, void *stream) { - audio_stream_t *s = (audio_stream_t*)stream; + audio_stream_t *s = static_cast(stream); if (!s || !s->pcm_rb) { if (bytes_written) *bytes_written = 0; return ESP_ERR_INVALID_ARG; @@ -138,7 +138,7 @@ static esp_err_t mixer_stream_clk_set_fn(uint32_t rate, uint32_t bits_cfg, i2s_s static void mixer_stream_event_handler(audio_player_cb_ctx_t *ctx) { if (!ctx || !ctx->user_ctx) return; - audio_stream_t *s = (audio_stream_t *)ctx->user_ctx; + audio_stream_t *s = static_cast(ctx->user_ctx); // handle auto-queueing if (ctx->audio_event == AUDIO_PLAYER_CALLBACK_EVENT_IDLE) { @@ -188,14 +188,14 @@ inline void audio_mixer_unlock() { void audio_mixer_add_stream(audio_stream_handle_t h) { audio_mixer_lock(); - SLIST_INSERT_HEAD(&s_stream_list, (audio_stream_t*)h, next); + SLIST_INSERT_HEAD(&s_stream_list, static_cast(h), next); s_active_streams++; audio_mixer_unlock(); } void audio_mixer_remove_stream(audio_stream_handle_t h) { audio_mixer_lock(); - SLIST_REMOVE(&s_stream_list, (audio_stream_t*)h, audio_stream, next); + SLIST_REMOVE(&s_stream_list, static_cast(h), audio_stream, next); if (s_active_streams > 0) s_active_streams--; audio_mixer_unlock(); } @@ -256,10 +256,6 @@ void audio_mixer_deinit() { /* ================= Stream (mixer channel) API ================= */ -extern const char* event_to_string(audio_player_callback_event_t event); // from audio_player.c -extern audio_player_callback_event_t state_to_event(audio_player_state_t state); // from audio_player.c - - static void dispatch_callback(audio_stream_t *s, audio_player_callback_event_t event) { ESP_LOGD(TAG, "event '%s'", event_to_string(event)); @@ -287,7 +283,7 @@ static void stream_purge_ringbuf(audio_stream_t *s) { } esp_err_t audio_stream_raw_send_event(audio_stream_handle_t h, audio_player_callback_event_t event) { - audio_stream_t *s = (audio_stream_t*)h; + audio_stream_t *s = h; CHECK_STREAM(s); if (s->type != AUDIO_STREAM_TYPE_RAW) return ESP_ERR_NOT_SUPPORTED; @@ -318,7 +314,7 @@ esp_err_t audio_stream_raw_send_event(audio_stream_handle_t h, audio_player_call } audio_player_state_t audio_stream_get_state(audio_stream_handle_t h) { - audio_stream_t *s = (audio_stream_t*)h; + audio_stream_t *s = h; if (!s) return AUDIO_PLAYER_STATE_IDLE; /* DECODER stream? defer to the instance state */ @@ -346,11 +342,11 @@ audio_player_state_t audio_stream_get_state(audio_stream_handle_t h) { audio_stream_type_t audio_stream_get_type(audio_stream_handle_t h) { if (!h) return AUDIO_STREAM_TYPE_UNKNOWN; - return ((audio_stream_t*)h)->type; + return h->type; } esp_err_t audio_stream_play(audio_stream_handle_t h, FILE *fp) { - audio_stream_t *s = (audio_stream_t*)h; + audio_stream_t *s = h; CHECK_STREAM(s); if (s->type != AUDIO_STREAM_TYPE_DECODER) { @@ -370,7 +366,7 @@ esp_err_t audio_stream_queue(audio_stream_handle_t h, FILE *fp, bool play_now) { return audio_stream_play(h, fp); } - audio_stream_t *s = (audio_stream_t*)h; + audio_stream_t *s = h; CHECK_STREAM(s); if (s->type != AUDIO_STREAM_TYPE_DECODER) { @@ -402,7 +398,7 @@ esp_err_t audio_stream_queue(audio_stream_handle_t h, FILE *fp, bool play_now) { } esp_err_t audio_stream_stop(audio_stream_handle_t h) { - audio_stream_t *s = (audio_stream_t*)h; + audio_stream_t *s = h; CHECK_STREAM(s); esp_err_t err = ESP_OK; @@ -421,21 +417,21 @@ esp_err_t audio_stream_stop(audio_stream_handle_t h) { } esp_err_t audio_stream_pause(audio_stream_handle_t h) { - audio_stream_t *s = (audio_stream_t*)h; + audio_stream_t *s = h; CHECK_STREAM(s); if (s->type != AUDIO_STREAM_TYPE_DECODER) return ESP_ERR_NOT_SUPPORTED; return audio_instance_pause(s->instance); } esp_err_t audio_stream_resume(audio_stream_handle_t h) { - audio_stream_t *s = (audio_stream_t*)h; + audio_stream_t *s = h; CHECK_STREAM(s); if (s->type != AUDIO_STREAM_TYPE_DECODER) return ESP_ERR_NOT_SUPPORTED; return audio_instance_resume(s->instance); } esp_err_t audio_stream_write_pcm(audio_stream_handle_t h, void *data, size_t size, uint32_t timeout_ms) { - audio_stream_t *s = (audio_stream_t*)h; + audio_stream_t *s = h; CHECK_STREAM(s); if (s->type != AUDIO_STREAM_TYPE_RAW) { @@ -457,7 +453,7 @@ esp_err_t audio_stream_write_pcm(audio_stream_handle_t h, void *data, size_t siz audio_stream_handle_t audio_stream_new(audio_stream_config_t *cfg) { ESP_RETURN_ON_FALSE(cfg, NULL, TAG, "null config"); - audio_stream_t *stream = (audio_stream_t*)calloc(1, sizeof(audio_stream_t)); + audio_stream_t *stream = static_cast(calloc(1, sizeof(audio_stream_t))); stream->type = cfg->type; /* use provided name? */ @@ -467,7 +463,7 @@ audio_stream_handle_t audio_stream_new(audio_stream_config_t *cfg) { } /* otherwise, generate a unique monotonic name */ else { - snprintf(stream->name, sizeof(stream->name), "stream_%lu", s_stream_name_counter++); + snprintf(stream->name, sizeof(stream->name), "stream_%lu", static_cast(s_stream_name_counter++)); } /* DECODER type stream? create a player instance and queue */ @@ -512,11 +508,11 @@ audio_stream_handle_t audio_stream_new(audio_stream_config_t *cfg) { ESP_LOGI(TAG, "Created stream '%s' (active: %u)", stream->name, audio_mixer_stream_count()); - return (audio_stream_handle_t)stream; + return stream; } esp_err_t audio_stream_delete(audio_stream_handle_t h) { - audio_stream_t *s = (audio_stream_t*)h; + audio_stream_t *s = h; CHECK_STREAM(s); /* remove from stream tracking */ diff --git a/audio_player.cpp b/audio_player.cpp index 453e06f..3725693 100644 --- a/audio_player.cpp +++ b/audio_player.cpp @@ -102,7 +102,7 @@ typedef struct audio_instance { static audio_instance_t *g_instance = NULL; // when non-null, in legacy non-mixer mode audio_player_state_t audio_instance_get_state(audio_instance_handle_t h) { - audio_instance_t *i = (audio_instance_t*)h; + audio_instance_t *i = static_cast(h); return i ? i->state : AUDIO_PLAYER_STATE_IDLE; } @@ -114,7 +114,7 @@ esp_err_t audio_instance_callback_register(audio_instance_handle_t h, audio_play ESP_RETURN_ON_FALSE(reinterpret_cast(call_back), ESP_ERR_INVALID_ARG, TAG, "Not a valid call back"); #endif - audio_instance_t *i = (audio_instance_t*)h; + audio_instance_t *i = static_cast(h); CHECK_INSTANCE(i); i->s_audio_cb = call_back; i->audio_cb_usrt_ctx = user_ctx; @@ -484,7 +484,7 @@ static esp_err_t audio_send_event(audio_instance_t *i, audio_player_event_t even /* ================= New multi-instance API ================= */ esp_err_t audio_instance_play(audio_instance_handle_t h, FILE *fp) { - audio_instance_t *i = (audio_instance_t*)h; + audio_instance_t *i = static_cast(h); CHECK_INSTANCE(i); LOGI_1("%s", __FUNCTION__); @@ -493,7 +493,7 @@ esp_err_t audio_instance_play(audio_instance_handle_t h, FILE *fp) { } esp_err_t audio_instance_pause(audio_instance_handle_t h) { - audio_instance_t *i = (audio_instance_t*)h; + audio_instance_t *i = static_cast(h); CHECK_INSTANCE(i); LOGI_1("%s", __FUNCTION__); @@ -502,7 +502,7 @@ esp_err_t audio_instance_pause(audio_instance_handle_t h) { } esp_err_t audio_instance_resume(audio_instance_handle_t h) { - audio_instance_t *i = (audio_instance_t*)h; + audio_instance_t *i = static_cast(h); CHECK_INSTANCE(i); LOGI_1("%s", __FUNCTION__); @@ -511,7 +511,7 @@ esp_err_t audio_instance_resume(audio_instance_handle_t h) { } esp_err_t audio_instance_stop(audio_instance_handle_t h) { - audio_instance_t *i = (audio_instance_t*)h; + audio_instance_t *i = static_cast(h); CHECK_INSTANCE(i); LOGI_1("%s", __FUNCTION__); @@ -548,7 +548,7 @@ esp_err_t audio_instance_new(audio_instance_handle_t *h, audio_player_config_t * ESP_RETURN_ON_FALSE(*h == NULL, ESP_ERR_INVALID_ARG, TAG, "instance is not NULL"); ESP_RETURN_ON_FALSE(config, ESP_ERR_INVALID_ARG, TAG, "null config"); - audio_instance_t *i = (audio_instance_t*)calloc(1, sizeof(audio_instance_t)); + audio_instance_t *i = static_cast(calloc(1, sizeof(audio_instance_t))); if (i == NULL) return ESP_ERR_NO_MEM; audio_instance_init(i); @@ -613,7 +613,7 @@ esp_err_t audio_instance_new(audio_instance_handle_t *h, audio_player_config_t * } esp_err_t audio_instance_delete(audio_instance_handle_t h) { - audio_instance_t *i = (audio_instance_t*)h; + audio_instance_t *i = static_cast(h); CHECK_INSTANCE(i); const int MAX_RETRIES = 5; @@ -670,7 +670,7 @@ esp_err_t audio_player_new(audio_player_config_t config) { config.force_stereo = true; // preserve legacy behavior audio_instance_handle_t h = NULL; ESP_RETURN_ON_ERROR(audio_instance_new(&h, &config), TAG, "failed to create new audio instance"); - g_instance = (audio_instance_t*)h; + g_instance = static_cast(h); return ESP_OK; } diff --git a/test/audio_mixer_test.c b/test/audio_mixer_test.c index 4b5ccdc..d741f3e 100644 --- a/test/audio_mixer_test.c +++ b/test/audio_mixer_test.c @@ -1,3 +1,4 @@ +#include #include "esp_log.h" #include "esp_check.h" #include "unity.h" @@ -185,8 +186,10 @@ TEST_CASE("audio mixer handles multiple streams and output format", "[audio mixe audio_stream_config_t s1_cfg = DEFAULT_AUDIO_STREAM_CONFIG("s1"); audio_stream_handle_t s1 = audio_stream_new(&s1_cfg); + (void)s1; audio_stream_config_t s2_cfg = DEFAULT_AUDIO_STREAM_CONFIG("s2"); audio_stream_handle_t s2 = audio_stream_new(&s2_cfg); + (void)s2; TEST_ASSERT_EQUAL(2, audio_mixer_stream_count()); @@ -256,7 +259,7 @@ TEST_CASE("audio mixer plays sample mp3 on multiple streams", "[audio mixer]") extern const char mp3_start[] asm("_binary_gs_16b_1c_44100hz_mp3_start"); extern const char mp3_end[] asm("_binary_gs_16b_1c_44100hz_mp3_end"); - size_t mp3_size = (mp3_end - mp3_start) - 1; + size_t mp3_size = (size_t)((uintptr_t)mp3_end - (uintptr_t)mp3_start); // Create two streams audio_stream_config_t s1_cfg = DEFAULT_AUDIO_STREAM_CONFIG("stream1"); From f0080d7669cffe20f6e254a5dd5f1195c8ee49fa Mon Sep 17 00:00:00 2001 From: Robert Alfaro Date: Thu, 22 Jan 2026 09:46:06 -0800 Subject: [PATCH 11/12] Address cppcheck errors --- audio_player.cpp | 3 --- test/audio_mixer_test.c | 50 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/audio_player.cpp b/audio_player.cpp index 3725693..08ca1da 100644 --- a/audio_player.cpp +++ b/audio_player.cpp @@ -122,9 +122,6 @@ esp_err_t audio_instance_callback_register(audio_instance_handle_t h, audio_play return ESP_OK; } -// This function is used in some optional logging functions so we don't want to -// have a cppcheck warning here -// cppcheck-suppress unusedFunction const char* event_to_string(audio_player_callback_event_t event) { switch(event) { case AUDIO_PLAYER_CALLBACK_EVENT_IDLE: diff --git a/test/audio_mixer_test.c b/test/audio_mixer_test.c index d741f3e..cd9ff61 100644 --- a/test/audio_mixer_test.c +++ b/test/audio_mixer_test.c @@ -301,3 +301,53 @@ TEST_CASE("audio mixer plays sample mp3 on multiple streams", "[audio mixer]") vQueueDelete(mixer_event_queue); bsp_audio_deinit(); } + +TEST_CASE("audio stream pause and resume", "[audio mixer]") +{ + audio_stream_config_t stream_cfg = DEFAULT_AUDIO_STREAM_CONFIG("pause_resume"); + audio_stream_handle_t s = audio_stream_new(&stream_cfg); + TEST_ASSERT_NOT_NULL(s); + + TEST_ESP_OK(audio_stream_pause(s)); + TEST_ASSERT_EQUAL(AUDIO_PLAYER_STATE_PAUSE, audio_stream_get_state(s)); + + TEST_ESP_OK(audio_stream_resume(s)); + TEST_ASSERT_EQUAL(AUDIO_PLAYER_STATE_PLAYING, audio_stream_get_state(s)); + + TEST_ESP_OK(audio_stream_delete(s)); +} + +TEST_CASE("audio stream queue", "[audio mixer]") +{ + audio_stream_config_t stream_cfg = DEFAULT_AUDIO_STREAM_CONFIG("queue"); + audio_stream_handle_t s = audio_stream_new(&stream_cfg); + TEST_ASSERT_NOT_NULL(s); + + extern const char mp3_start[] asm("_binary_gs_16b_1c_44100hz_mp3_start"); + extern const char mp3_end[] asm("_binary_gs_16b_1c_44100hz_mp3_end"); + size_t mp3_size = (size_t)((uintptr_t)mp3_end - (uintptr_t)mp3_start); + + FILE *f1 = fmemopen((void*)mp3_start, mp3_size, "rb"); + TEST_ASSERT_NOT_NULL(f1); + + TEST_ESP_OK(audio_stream_queue(s, f1, false)); + + TEST_ESP_OK(audio_stream_delete(s)); +} + +TEST_CASE("audio stream write pcm", "[audio mixer]") +{ + audio_stream_config_t raw_cfg = { + .type = AUDIO_STREAM_TYPE_RAW, + .name = "raw_write", + .priority = 5, + .coreID = 0 + }; + audio_stream_handle_t s = audio_stream_new(&raw_cfg); + TEST_ASSERT_NOT_NULL(s); + + int16_t dummy_pcm[128] = {0}; + TEST_ESP_OK(audio_stream_write_pcm(s, dummy_pcm, sizeof(dummy_pcm), 100)); + + TEST_ESP_OK(audio_stream_delete(s)); +} From 510fad297b6f0602ba4ef38ff3738960c7dcf620 Mon Sep 17 00:00:00 2001 From: Robert Alfaro Date: Thu, 22 Jan 2026 10:03:56 -0800 Subject: [PATCH 12/12] Update README --- README.md | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/README.md b/README.md index 3740ca2..e77bebc 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ * MP3 decoding (via libhelix-mp3) * Wav/wave file decoding +* Audio mixing (multiple concurrent streams) ## Who is this for? @@ -49,6 +50,40 @@ For MP3 support you'll need the [esp-libhelix-mp3](https://github.com/chmorgan/e Unity tests are implemented in the [test/](../test) folder. + +## Audio Mixer + +The Audio Mixer allows for concurrent playback of multiple audio streams. It supports two types of streams: + +* **Decoder Streams**: For playing MP3 or WAV files. Each stream runs its own decoding task. +* **Raw PCM Streams**: For writing raw PCM data directly to the mixer. + +### Basic Mixer Usage + +1. Initialize the mixer with output format and I2S write functions. +2. Create one or more streams using `audio_stream_new()`. +3. Start playback on the streams. + +```c +audio_mixer_config_t mixer_cfg = { + .write_fn = bsp_i2s_write, + .clk_set_fn = bsp_i2s_reconfig_clk, + .i2s_format = { + .sample_rate = 44100, + .bits_per_sample = 16, + .channels = 2 + }, + // ... +}; +audio_mixer_init(&mixer_cfg); + +audio_stream_config_t stream_cfg = DEFAULT_AUDIO_STREAM_CONFIG("bgm"); +audio_stream_handle_t bgm_stream = audio_stream_new(&stream_cfg); + +FILE *f = fopen("/sdcard/music.mp3", "rb"); +audio_stream_play(bgm_stream, f); +``` + ## States ```mermaid