ESPHome 2026.5.0b1
Loading...
Searching...
No Matches
audio_resampler.cpp
Go to the documentation of this file.
1#include "audio_resampler.h"
2
3#ifdef USE_ESP32
4
5#include "esphome/core/hal.h"
6
7#include <cstring>
8
9namespace esphome::audio {
10
11static const uint32_t READ_WRITE_TIMEOUT_MS = 20;
12
13AudioResampler::AudioResampler(size_t input_buffer_size, size_t output_buffer_size)
14 : input_buffer_size_(input_buffer_size), output_buffer_size_(output_buffer_size) {
17}
18
19esp_err_t AudioResampler::add_source(std::weak_ptr<ring_buffer::RingBuffer> &input_ring_buffer) {
20 if (this->input_transfer_buffer_ != nullptr) {
21 this->input_transfer_buffer_->set_source(input_ring_buffer);
22 return ESP_OK;
23 }
24 return ESP_ERR_NO_MEM;
25}
26
27esp_err_t AudioResampler::add_sink(std::weak_ptr<ring_buffer::RingBuffer> &output_ring_buffer) {
28 if (this->output_transfer_buffer_ != nullptr) {
29 this->output_transfer_buffer_->set_sink(output_ring_buffer);
30 return ESP_OK;
31 }
32 return ESP_ERR_NO_MEM;
33}
34
35#ifdef USE_SPEAKER
37 if (this->output_transfer_buffer_ != nullptr) {
38 this->output_transfer_buffer_->set_sink(speaker);
39 return ESP_OK;
40 }
41 return ESP_ERR_NO_MEM;
42}
43#endif
44
45esp_err_t AudioResampler::start(AudioStreamInfo &input_stream_info, AudioStreamInfo &output_stream_info,
46 uint16_t number_of_taps, uint16_t number_of_filters) {
47 this->input_stream_info_ = input_stream_info;
48 this->output_stream_info_ = output_stream_info;
49
50 if ((this->input_transfer_buffer_ == nullptr) || (this->output_transfer_buffer_ == nullptr)) {
51 return ESP_ERR_NO_MEM;
52 }
53
54 if ((input_stream_info.get_bits_per_sample() > 32) || (output_stream_info.get_bits_per_sample() > 32) ||
55 (input_stream_info_.get_channels() != output_stream_info.get_channels())) {
56 return ESP_ERR_NOT_SUPPORTED;
57 }
58
59 if ((input_stream_info.get_sample_rate() != output_stream_info.get_sample_rate()) ||
60 (input_stream_info.get_bits_per_sample() != output_stream_info.get_bits_per_sample())) {
61 this->resampler_ = make_unique<esp_audio_libs::resampler::Resampler>(
62 input_stream_info.bytes_to_samples(this->input_buffer_size_),
63 output_stream_info.bytes_to_samples(this->output_buffer_size_));
64
65 // Use cascaded biquad filters when downsampling to avoid aliasing
66 bool use_pre_filter = output_stream_info.get_sample_rate() < input_stream_info.get_sample_rate();
67
68 esp_audio_libs::resampler::ResamplerConfiguration resample_config = {
69 .source_sample_rate = static_cast<float>(input_stream_info.get_sample_rate()),
70 .target_sample_rate = static_cast<float>(output_stream_info.get_sample_rate()),
71 .source_bits_per_sample = input_stream_info.get_bits_per_sample(),
72 .target_bits_per_sample = output_stream_info.get_bits_per_sample(),
73 .channels = input_stream_info_.get_channels(),
74 .use_pre_or_post_filter = use_pre_filter,
75 .subsample_interpolate = false, // Doubles the CPU load. Using more filters is a better alternative
76 .number_of_taps = number_of_taps,
77 .number_of_filters = number_of_filters,
78 };
79
80 if (!this->resampler_->initialize(resample_config)) {
81 // Failed to allocate the resampler's internal buffers
82 return ESP_ERR_NO_MEM;
83 }
84 }
85
86 return ESP_OK;
87}
88
89AudioResamplerState AudioResampler::resample(bool stop_gracefully, int32_t *ms_differential) {
90 if (stop_gracefully) {
91 if (!this->input_transfer_buffer_->has_buffered_data() && (this->output_transfer_buffer_->available() == 0)) {
93 }
94 }
95
96 if (!this->pause_output_) {
97 // Move audio data to the sink without shifting the data in the output transfer buffer to avoid unnecessary, slow
98 // data moves
99 this->output_transfer_buffer_->transfer_data_to_sink(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS), false);
100 } else {
101 // If paused, block to avoid wasting CPU resources
102 delay(READ_WRITE_TIMEOUT_MS);
103 }
104
105 this->input_transfer_buffer_->transfer_data_from_source(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
106
107 if (this->input_transfer_buffer_->available() == 0) {
108 // No samples available to process
110 }
111
112 const size_t bytes_free = this->output_transfer_buffer_->free();
113 const uint32_t frames_free = this->output_stream_info_.bytes_to_frames(bytes_free);
114
115 const size_t bytes_available = this->input_transfer_buffer_->available();
116 const uint32_t frames_available = this->input_stream_info_.bytes_to_frames(bytes_available);
117
118 if ((this->input_stream_info_.get_sample_rate() != this->output_stream_info_.get_sample_rate()) ||
119 (this->input_stream_info_.get_bits_per_sample() != this->output_stream_info_.get_bits_per_sample())) {
120 // Adjust gain by -3 dB to avoid clipping due to the resampling process
121 esp_audio_libs::resampler::ResamplerResults results =
122 this->resampler_->resample(this->input_transfer_buffer_->get_buffer_start(),
123 this->output_transfer_buffer_->get_buffer_end(), frames_available, frames_free, -3);
124
125 this->input_transfer_buffer_->decrease_buffer_length(this->input_stream_info_.frames_to_bytes(results.frames_used));
126 this->output_transfer_buffer_->increase_buffer_length(
127 this->output_stream_info_.frames_to_bytes(results.frames_generated));
128
129 // Resampling causes slight differences in the durations used versus generated. Computes the difference in
130 // millisconds. The callback function passing the played audio duration uses the difference to convert from output
131 // duration to input duration.
132 this->accumulated_frames_used_ += results.frames_used;
133 this->accumulated_frames_generated_ += results.frames_generated;
134
135 const int32_t used_ms =
137 const int32_t generated_ms =
139
140 *ms_differential = used_ms - generated_ms;
141
142 } else {
143 // No resampling required, copy samples directly to the output transfer buffer
144 *ms_differential = 0;
145
146 const size_t bytes_to_transfer = std::min(this->output_stream_info_.frames_to_bytes(frames_free),
147 this->input_stream_info_.frames_to_bytes(frames_available));
148
149 std::memcpy((void *) this->output_transfer_buffer_->get_buffer_end(),
150 (void *) this->input_transfer_buffer_->get_buffer_start(), bytes_to_transfer);
151
152 this->input_transfer_buffer_->decrease_buffer_length(bytes_to_transfer);
153 this->output_transfer_buffer_->increase_buffer_length(bytes_to_transfer);
154 }
155
157}
158
159} // namespace esphome::audio
160
161#endif
AudioResamplerState resample(bool stop_gracefully, int32_t *ms_differential)
Resamples audio from the ring buffer source and writes to the sink.
std::unique_ptr< esp_audio_libs::resampler::Resampler > resampler_
esp_err_t add_source(std::weak_ptr< ring_buffer::RingBuffer > &input_ring_buffer)
Adds a source ring buffer for audio data.
std::unique_ptr< AudioSinkTransferBuffer > output_transfer_buffer_
esp_err_t add_sink(std::weak_ptr< ring_buffer::RingBuffer > &output_ring_buffer)
Adds a sink ring buffer for resampled audio.
AudioResampler(size_t input_buffer_size, size_t output_buffer_size)
Allocates the input and output transfer buffers.
std::unique_ptr< AudioSourceTransferBuffer > input_transfer_buffer_
esp_err_t start(AudioStreamInfo &input_stream_info, AudioStreamInfo &output_stream_info, uint16_t number_of_taps, uint16_t number_of_filters)
Sets up the class to resample.
static std::unique_ptr< AudioSinkTransferBuffer > create(size_t buffer_size)
Creates a new sink transfer buffer.
static std::unique_ptr< AudioSourceTransferBuffer > create(size_t buffer_size)
Creates a new source transfer buffer.
size_t frames_to_bytes(uint32_t frames) const
Converts frames to bytes.
Definition audio.h:52
uint8_t get_bits_per_sample() const
Definition audio.h:27
uint32_t bytes_to_frames(size_t bytes) const
Convert bytes to frames.
Definition audio.h:42
uint8_t get_channels() const
Definition audio.h:28
uint32_t frames_to_milliseconds_with_remainder(uint32_t *frames) const
Computes the duration, in milliseconds, the given amount of frames represents.
Definition audio.cpp:29
uint32_t get_sample_rate() const
Definition audio.h:29
uint32_t bytes_to_samples(size_t bytes) const
Convert bytes to samples.
Definition audio.h:47
void HOT delay(uint32_t ms)
Definition hal.cpp:82
static void uint32_t