3#if defined(ESP_PLATFORM)
11#if CONFIG_IDF_TARGET_ESP32S3
13#include "esp_async_memcpy.h"
16#include "esp_memory_utils.h"
17#include "freertos/task.h"
18#include "roo_logging.h"
23constexpr size_t kDmaAlign = 16;
24constexpr size_t kExtDmaAlign = 32;
25constexpr uint32_t kStatsLogInterval = 120;
26constexpr const char* kTag =
"async_blit";
28struct AsyncBlitStats {
29 uint32_t total_requests = 0;
30 uint32_t dma_contiguous = 0;
31 uint32_t dma_rowwise = 0;
32 uint32_t fallback_no_handle_or_busy = 0;
33 uint32_t fallback_alignment = 0;
34 uint32_t fallback_esp_err = 0;
37struct AsyncBlitState {
38 async_memcpy_handle_t handle =
nullptr;
39 const roo::byte* src =
nullptr;
40 roo::byte* dst =
nullptr;
41 size_t src_stride = 0;
42 size_t dst_stride = 0;
44 size_t remaining_rows = 0;
45 TaskHandle_t caller_task =
nullptr;
61 void IRAM_ATTR notifyDoneFromISR() {
62 TaskHandle_t caller = caller_task;
63 caller_task =
nullptr;
64 if (caller !=
nullptr) {
65 BaseType_t high_wakeup = pdFALSE;
66 vTaskNotifyGiveFromISR(caller, &high_wakeup);
67 if (high_wakeup == pdTRUE) {
74AsyncBlitState& state() {
75 static AsyncBlitState s;
79void maybe_log_stats(AsyncBlitState& st) {
80 if (st.stats.total_requests % kStatsLogInterval != 0)
return;
82 "req=%u dma_contig=%u dma_rows=%u fb_busy=%u fb_align=%u fb_err=%u",
83 st.stats.total_requests, st.stats.dma_contiguous,
84 st.stats.dma_rowwise, st.stats.fallback_no_handle_or_busy,
85 st.stats.fallback_alignment, st.stats.fallback_esp_err);
88void IRAM_ATTR copy_remaining_rows_sync(AsyncBlitState& st) {
89 while (st.remaining_rows > 0) {
90 std::memcpy(st.dst, st.src, st.row_bytes);
91 st.dst += st.dst_stride;
92 st.src += st.src_stride;
97void copy_sync(
const roo::byte* src_ptr,
size_t src_stride, roo::byte* dst_ptr,
98 size_t dst_stride,
size_t width,
size_t height) {
99 if (src_stride == width && dst_stride == width) {
100 std::memcpy(dst_ptr, src_ptr, width * height);
104 const roo::byte* src_row = src_ptr;
105 roo::byte* dst_row = dst_ptr;
106 for (
size_t row = 0; row < height; ++row) {
107 std::memcpy(dst_row, src_row, width);
108 src_row += src_stride;
109 dst_row += dst_stride;
113inline bool is_aligned(
const void* ptr,
size_t align) {
114 return (
reinterpret_cast<uintptr_t
>(ptr) % align) == 0;
117inline size_t dma_align_for_ptr(
const void* ptr) {
118 return esp_ptr_external_ram(ptr) ? kExtDmaAlign : kDmaAlign;
121inline bool can_dma_transfer(
const void* src,
const void* dst,
size_t n) {
122 const bool src_dma_capable =
123 esp_ptr_dma_capable(src) || esp_ptr_dma_ext_capable(src);
124 const bool dst_dma_capable =
125 esp_ptr_dma_capable(dst) || esp_ptr_dma_ext_capable(dst);
126 const size_t required_align =
127 std::max(dma_align_for_ptr(src), dma_align_for_ptr(dst));
128 return n > 0 && is_aligned(src, required_align) &&
129 is_aligned(dst, required_align) && (n % required_align) == 0 &&
130 src_dma_capable && dst_dma_capable;
133void log_dma_reject(
const char* mode,
const void* src,
const void* dst,
134 size_t bytes,
size_t src_stride,
size_t dst_stride,
136 const bool src_dma_capable =
137 esp_ptr_dma_capable(src) || esp_ptr_dma_ext_capable(src);
138 const bool dst_dma_capable =
139 esp_ptr_dma_capable(dst) || esp_ptr_dma_ext_capable(dst);
140 const size_t required_align =
141 std::max(dma_align_for_ptr(src), dma_align_for_ptr(dst));
144 "reject %s src=%p dst=%p bytes=%u h=%u req_align=%u src_mod16=%u "
145 "dst_mod16=%u bytes_mod16=%u src_mod32=%u dst_mod32=%u bytes_mod32=%u "
146 "src_stride=%u dst_stride=%u src_stride_mod16=%u dst_stride_mod16=%u "
149 mode, src, dst,
static_cast<unsigned>(bytes),
150 static_cast<unsigned>(height),
static_cast<unsigned>(required_align),
151 static_cast<unsigned>(
reinterpret_cast<uintptr_t
>(src) % kDmaAlign),
152 static_cast<unsigned>(
reinterpret_cast<uintptr_t
>(dst) % kDmaAlign),
153 static_cast<unsigned>(bytes % kDmaAlign),
154 static_cast<unsigned>(
reinterpret_cast<uintptr_t
>(src) % kExtDmaAlign),
155 static_cast<unsigned>(
reinterpret_cast<uintptr_t
>(dst) % kExtDmaAlign),
156 static_cast<unsigned>(bytes % kExtDmaAlign),
157 static_cast<unsigned>(src_stride),
static_cast<unsigned>(dst_stride),
158 static_cast<unsigned>(src_stride % kDmaAlign),
159 static_cast<unsigned>(dst_stride % kDmaAlign),
160 static_cast<unsigned>(src_dma_capable),
161 static_cast<unsigned>(dst_dma_capable));
164inline bool are_strides_aligned(
size_t src_stride,
size_t dst_stride,
165 const void* src,
const void* dst) {
166 const size_t required_align =
167 std::max(dma_align_for_ptr(src), dma_align_for_ptr(dst));
168 return (src_stride % required_align) == 0 &&
169 (dst_stride % required_align) == 0;
172bool IRAM_ATTR on_copy_done(async_memcpy_handle_t, async_memcpy_event_t*,
174 auto* st =
static_cast<AsyncBlitState*
>(cb_args);
175 if (st ==
nullptr)
return false;
177 if (st->remaining_rows == 0) {
178 st->notifyDoneFromISR();
182 st->src += st->src_stride;
183 st->dst += st->dst_stride;
184 --st->remaining_rows;
186 if (st->remaining_rows == 0) {
187 st->notifyDoneFromISR();
192 esp_async_memcpy(st->handle, st->dst,
const_cast<roo::byte*
>(st->src),
193 st->row_bytes, on_copy_done, st);
195 copy_remaining_rows_sync(*st);
196 st->notifyDoneFromISR();
204 AsyncBlitState& st = state();
205 if (st.handle !=
nullptr)
return;
207 async_memcpy_config_t cfg = ASYNC_MEMCPY_DEFAULT_CONFIG();
209 async_memcpy_handle_t handle =
nullptr;
210 if (esp_async_memcpy_install(&cfg, &handle) == ESP_OK) {
216 AsyncBlitState& st = state();
217 if (st.handle !=
nullptr && st.caller_task ==
nullptr) {
218 esp_async_memcpy_uninstall(st.handle);
224 AsyncBlitState& st = state();
225 if (st.caller_task ==
nullptr)
return;
227 TaskHandle_t me = xTaskGetCurrentTaskHandle();
228 CHECK(st.caller_task == me)
229 <<
"async_blit_await() must be called by the same task as async_blit()";
231 while (st.caller_task !=
nullptr) {
232 ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
236void async_blit(
const roo::byte* src_ptr,
size_t src_stride, roo::byte* dst_ptr,
237 size_t dst_stride,
size_t width,
size_t height) {
238 if (src_ptr ==
nullptr || dst_ptr ==
nullptr || width == 0 || height == 0) {
242 AsyncBlitState& st = state();
243 CHECK(st.caller_task ==
nullptr)
244 <<
"previous async_blit() still in progress; call async_blit_await()";
248 (void)ulTaskNotifyTake(pdTRUE, 0);
250 ++st.stats.total_requests;
252 const bool contiguous = (src_stride == width && dst_stride == width);
253 if (st.handle ==
nullptr) {
254 ++st.stats.fallback_no_handle_or_busy;
255 copy_sync(src_ptr, src_stride, dst_ptr, dst_stride, width, height);
260 if (!can_dma_transfer(src_ptr, dst_ptr, width * height)) {
261 ++st.stats.fallback_alignment;
262 log_dma_reject(
"contig", src_ptr, dst_ptr, width * height, src_stride,
264 copy_sync(src_ptr, src_stride, dst_ptr, dst_stride, width, height);
268 if (!can_dma_transfer(src_ptr, dst_ptr, width) ||
269 !are_strides_aligned(src_stride, dst_stride, src_ptr, dst_ptr)) {
270 ++st.stats.fallback_alignment;
271 log_dma_reject(
"row", src_ptr, dst_ptr, width, src_stride, dst_stride,
273 copy_sync(src_ptr, src_stride, dst_ptr, dst_stride, width, height);
280 st.src_stride = src_stride;
281 st.dst_stride = dst_stride;
282 st.row_bytes = width;
283 st.remaining_rows = height;
284 st.caller_task = xTaskGetCurrentTaskHandle();
288 ++st.stats.dma_contiguous;
289 st.remaining_rows = 0;
290 err = esp_async_memcpy(st.handle, st.dst,
const_cast<roo::byte*
>(st.src),
291 width * height, on_copy_done, &st);
293 ++st.stats.dma_rowwise;
294 err = esp_async_memcpy(st.handle, st.dst,
const_cast<roo::byte*
>(st.src),
295 st.row_bytes, on_copy_done, &st);
299 ++st.stats.fallback_esp_err;
300 st.caller_task =
nullptr;
301 copy_sync(src_ptr, src_stride, dst_ptr, dst_stride, width, height);
317void async_blit(
const roo::byte* src_ptr,
size_t src_stride, roo::byte* dst_ptr,
318 size_t dst_stride,
size_t width,
size_t height) {
319 if (src_ptr ==
nullptr || dst_ptr ==
nullptr || width == 0 || height == 0) {
323 if (src_stride == width && dst_stride == width) {
324 std::memcpy(dst_ptr, src_ptr, width * height);
326 const roo::byte* src_row = src_ptr;
327 roo::byte* dst_row = dst_ptr;
328 for (
size_t row = 0; row < height; ++row) {
329 std::memcpy(dst_row, src_row, width);
330 src_row += src_stride;
331 dst_row += dst_stride;
Defines 140 opaque HTML named colors.
void async_blit(const roo::byte *src_ptr, size_t src_stride, roo::byte *dst_ptr, size_t dst_stride, size_t width, size_t height)