roo_display
API Documentation for roo_display
Loading...
Searching...
No Matches
async_blit.cpp
Go to the documentation of this file.
2
3#if defined(ESP_PLATFORM)
4
5#include <algorithm>
6#include <cstdint>
7#include <cstring>
8
10
11#if CONFIG_IDF_TARGET_ESP32S3
12
13#include "esp_async_memcpy.h"
14#include "esp_err.h"
15#include "esp_log.h"
16#include "esp_memory_utils.h"
17#include "freertos/task.h"
18#include "roo_logging.h"
19
20namespace roo_display {
21namespace {
22
23constexpr size_t kDmaAlign = 16;
24constexpr size_t kExtDmaAlign = 32;
25constexpr uint32_t kStatsLogInterval = 120;
26constexpr const char* kTag = "async_blit";
27
28struct AsyncBlitStats {
29 uint32_t total_requests = 0;
30 uint32_t dma_contiguous = 0;
31 uint32_t dma_rowwise = 0;
32 uint32_t fallback_no_handle_or_busy = 0;
33 uint32_t fallback_alignment = 0;
34 uint32_t fallback_esp_err = 0;
35};
36
37struct AsyncBlitState {
38 async_memcpy_handle_t handle = nullptr;
39 const roo::byte* src = nullptr;
40 roo::byte* dst = nullptr;
41 size_t src_stride = 0;
42 size_t dst_stride = 0;
43 size_t row_bytes = 0;
44 size_t remaining_rows = 0;
45 TaskHandle_t caller_task = nullptr;
46 AsyncBlitStats stats;
47
48 // Completes the current operation from ISR context.
49 //
50 // Ownership model: caller_task is non-null iff an operation is in flight.
51 // The ISR captures caller_task, clears it, and then notifies the captured
52 // task. With this ordering, async_blit_await() is safe from lost wakeups:
53 // - If await sees caller_task == nullptr, completion already happened.
54 // - If await blocks, the notification is pending or will be sent by ISR.
55 // This also relies on pointer-sized loads/stores being atomic on ESP32 for
56 // caller_task handoff between task and ISR contexts.
57 //
58 // Note: because task notifications are counting, a completion token can be
59 // left pending if await exits via the fast path. We explicitly drain any
60 // stale token before starting a new operation in async_blit().
61 void IRAM_ATTR notifyDoneFromISR() {
62 TaskHandle_t caller = caller_task;
63 caller_task = nullptr;
64 if (caller != nullptr) {
65 BaseType_t high_wakeup = pdFALSE;
66 vTaskNotifyGiveFromISR(caller, &high_wakeup);
67 if (high_wakeup == pdTRUE) {
68 portYIELD_FROM_ISR();
69 }
70 }
71 }
72};
73
74AsyncBlitState& state() {
75 static AsyncBlitState s;
76 return s;
77}
78
79void maybe_log_stats(AsyncBlitState& st) {
80 if (st.stats.total_requests % kStatsLogInterval != 0) return;
81 ESP_LOGI(kTag,
82 "req=%u dma_contig=%u dma_rows=%u fb_busy=%u fb_align=%u fb_err=%u",
83 st.stats.total_requests, st.stats.dma_contiguous,
84 st.stats.dma_rowwise, st.stats.fallback_no_handle_or_busy,
85 st.stats.fallback_alignment, st.stats.fallback_esp_err);
86}
87
88void IRAM_ATTR copy_remaining_rows_sync(AsyncBlitState& st) {
89 while (st.remaining_rows > 0) {
90 std::memcpy(st.dst, st.src, st.row_bytes);
91 st.dst += st.dst_stride;
92 st.src += st.src_stride;
93 --st.remaining_rows;
94 }
95}
96
97void copy_sync(const roo::byte* src_ptr, size_t src_stride, roo::byte* dst_ptr,
98 size_t dst_stride, size_t width, size_t height) {
99 if (src_stride == width && dst_stride == width) {
100 std::memcpy(dst_ptr, src_ptr, width * height);
101 return;
102 }
103
104 const roo::byte* src_row = src_ptr;
105 roo::byte* dst_row = dst_ptr;
106 for (size_t row = 0; row < height; ++row) {
107 std::memcpy(dst_row, src_row, width);
108 src_row += src_stride;
109 dst_row += dst_stride;
110 }
111}
112
113inline bool is_aligned(const void* ptr, size_t align) {
114 return (reinterpret_cast<uintptr_t>(ptr) % align) == 0;
115}
116
117inline size_t dma_align_for_ptr(const void* ptr) {
118 return esp_ptr_external_ram(ptr) ? kExtDmaAlign : kDmaAlign;
119}
120
121inline bool can_dma_transfer(const void* src, const void* dst, size_t n) {
122 const bool src_dma_capable =
123 esp_ptr_dma_capable(src) || esp_ptr_dma_ext_capable(src);
124 const bool dst_dma_capable =
125 esp_ptr_dma_capable(dst) || esp_ptr_dma_ext_capable(dst);
126 const size_t required_align =
127 std::max(dma_align_for_ptr(src), dma_align_for_ptr(dst));
128 return n > 0 && is_aligned(src, required_align) &&
129 is_aligned(dst, required_align) && (n % required_align) == 0 &&
130 src_dma_capable && dst_dma_capable;
131}
132
133void log_dma_reject(const char* mode, const void* src, const void* dst,
134 size_t bytes, size_t src_stride, size_t dst_stride,
135 size_t height) {
136 const bool src_dma_capable =
137 esp_ptr_dma_capable(src) || esp_ptr_dma_ext_capable(src);
138 const bool dst_dma_capable =
139 esp_ptr_dma_capable(dst) || esp_ptr_dma_ext_capable(dst);
140 const size_t required_align =
141 std::max(dma_align_for_ptr(src), dma_align_for_ptr(dst));
142 ESP_LOGW(
143 kTag,
144 "reject %s src=%p dst=%p bytes=%u h=%u req_align=%u src_mod16=%u "
145 "dst_mod16=%u bytes_mod16=%u src_mod32=%u dst_mod32=%u bytes_mod32=%u "
146 "src_stride=%u dst_stride=%u src_stride_mod16=%u dst_stride_mod16=%u "
147 "src_dma=%u "
148 "dst_dma=%u",
149 mode, src, dst, static_cast<unsigned>(bytes),
150 static_cast<unsigned>(height), static_cast<unsigned>(required_align),
151 static_cast<unsigned>(reinterpret_cast<uintptr_t>(src) % kDmaAlign),
152 static_cast<unsigned>(reinterpret_cast<uintptr_t>(dst) % kDmaAlign),
153 static_cast<unsigned>(bytes % kDmaAlign),
154 static_cast<unsigned>(reinterpret_cast<uintptr_t>(src) % kExtDmaAlign),
155 static_cast<unsigned>(reinterpret_cast<uintptr_t>(dst) % kExtDmaAlign),
156 static_cast<unsigned>(bytes % kExtDmaAlign),
157 static_cast<unsigned>(src_stride), static_cast<unsigned>(dst_stride),
158 static_cast<unsigned>(src_stride % kDmaAlign),
159 static_cast<unsigned>(dst_stride % kDmaAlign),
160 static_cast<unsigned>(src_dma_capable),
161 static_cast<unsigned>(dst_dma_capable));
162}
163
164inline bool are_strides_aligned(size_t src_stride, size_t dst_stride,
165 const void* src, const void* dst) {
166 const size_t required_align =
167 std::max(dma_align_for_ptr(src), dma_align_for_ptr(dst));
168 return (src_stride % required_align) == 0 &&
169 (dst_stride % required_align) == 0;
170}
171
172bool IRAM_ATTR on_copy_done(async_memcpy_handle_t, async_memcpy_event_t*,
173 void* cb_args) {
174 auto* st = static_cast<AsyncBlitState*>(cb_args);
175 if (st == nullptr) return false;
176
177 if (st->remaining_rows == 0) {
178 st->notifyDoneFromISR();
179 return false;
180 }
181
182 st->src += st->src_stride;
183 st->dst += st->dst_stride;
184 --st->remaining_rows;
185
186 if (st->remaining_rows == 0) {
187 st->notifyDoneFromISR();
188 return false;
189 }
190
191 esp_err_t err =
192 esp_async_memcpy(st->handle, st->dst, const_cast<roo::byte*>(st->src),
193 st->row_bytes, on_copy_done, st);
194 if (err != ESP_OK) {
195 copy_remaining_rows_sync(*st);
196 st->notifyDoneFromISR();
197 }
198 return false;
199}
200
201} // namespace
202
203void async_blit_init() {
204 AsyncBlitState& st = state();
205 if (st.handle != nullptr) return;
206
207 async_memcpy_config_t cfg = ASYNC_MEMCPY_DEFAULT_CONFIG();
208 cfg.backlog = 2;
209 async_memcpy_handle_t handle = nullptr;
210 if (esp_async_memcpy_install(&cfg, &handle) == ESP_OK) {
211 st.handle = handle;
212 }
213}
214
215void async_blit_deinit() {
216 AsyncBlitState& st = state();
217 if (st.handle != nullptr && st.caller_task == nullptr) {
218 esp_async_memcpy_uninstall(st.handle);
219 st.handle = nullptr;
220 }
221}
222
223void async_blit_await() {
224 AsyncBlitState& st = state();
225 if (st.caller_task == nullptr) return;
226
227 TaskHandle_t me = xTaskGetCurrentTaskHandle();
228 CHECK(st.caller_task == me)
229 << "async_blit_await() must be called by the same task as async_blit()";
230
231 while (st.caller_task != nullptr) {
232 ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
233 }
234}
235
236void async_blit(const roo::byte* src_ptr, size_t src_stride, roo::byte* dst_ptr,
237 size_t dst_stride, size_t width, size_t height) {
238 if (src_ptr == nullptr || dst_ptr == nullptr || width == 0 || height == 0) {
239 return;
240 }
241
242 AsyncBlitState& st = state();
243 CHECK(st.caller_task == nullptr)
244 << "previous async_blit() still in progress; call async_blit_await()";
245
246 // Drain any stale completion token left from a previous operation.
247 // This keeps notification state aligned 1:1 with newly started transfers.
248 (void)ulTaskNotifyTake(pdTRUE, 0);
249
250 ++st.stats.total_requests;
251 maybe_log_stats(st);
252 const bool contiguous = (src_stride == width && dst_stride == width);
253 if (st.handle == nullptr) {
254 ++st.stats.fallback_no_handle_or_busy;
255 copy_sync(src_ptr, src_stride, dst_ptr, dst_stride, width, height);
256 return;
257 }
258
259 if (contiguous) {
260 if (!can_dma_transfer(src_ptr, dst_ptr, width * height)) {
261 ++st.stats.fallback_alignment;
262 log_dma_reject("contig", src_ptr, dst_ptr, width * height, src_stride,
263 dst_stride, height);
264 copy_sync(src_ptr, src_stride, dst_ptr, dst_stride, width, height);
265 return;
266 }
267 } else {
268 if (!can_dma_transfer(src_ptr, dst_ptr, width) ||
269 !are_strides_aligned(src_stride, dst_stride, src_ptr, dst_ptr)) {
270 ++st.stats.fallback_alignment;
271 log_dma_reject("row", src_ptr, dst_ptr, width, src_stride, dst_stride,
272 height);
273 copy_sync(src_ptr, src_stride, dst_ptr, dst_stride, width, height);
274 return;
275 }
276 }
277
278 st.src = src_ptr;
279 st.dst = dst_ptr;
280 st.src_stride = src_stride;
281 st.dst_stride = dst_stride;
282 st.row_bytes = width;
283 st.remaining_rows = height;
284 st.caller_task = xTaskGetCurrentTaskHandle();
285
286 esp_err_t err;
287 if (contiguous) {
288 ++st.stats.dma_contiguous;
289 st.remaining_rows = 0;
290 err = esp_async_memcpy(st.handle, st.dst, const_cast<roo::byte*>(st.src),
291 width * height, on_copy_done, &st);
292 } else {
293 ++st.stats.dma_rowwise;
294 err = esp_async_memcpy(st.handle, st.dst, const_cast<roo::byte*>(st.src),
295 st.row_bytes, on_copy_done, &st);
296 }
297
298 if (err != ESP_OK) {
299 ++st.stats.fallback_esp_err;
300 st.caller_task = nullptr;
301 copy_sync(src_ptr, src_stride, dst_ptr, dst_stride, width, height);
302 }
303}
304
305} // namespace roo_display
306
307#else
308
309namespace roo_display {
310
311void async_blit_init() {}
312
313void async_blit_deinit() {}
314
315void async_blit_await() {}
316
317void async_blit(const roo::byte* src_ptr, size_t src_stride, roo::byte* dst_ptr,
318 size_t dst_stride, size_t width, size_t height) {
319 if (src_ptr == nullptr || dst_ptr == nullptr || width == 0 || height == 0) {
320 return;
321 }
322
323 if (src_stride == width && dst_stride == width) {
324 std::memcpy(dst_ptr, src_ptr, width * height);
325 } else {
326 const roo::byte* src_row = src_ptr;
327 roo::byte* dst_row = dst_ptr;
328 for (size_t row = 0; row < height; ++row) {
329 std::memcpy(dst_row, src_row, width);
330 src_row += src_stride;
331 dst_row += dst_stride;
332 }
333 }
334}
335
336} // namespace roo_display
337
338#endif
339
340#endif // defined(ESP_PLATFORM)
@ CHECK
Definition inflate.h:47
Defines 140 opaque HTML named colors.
void async_blit_await()
void async_blit(const roo::byte *src_ptr, size_t src_stride, roo::byte *dst_ptr, size_t dst_stride, size_t width, size_t height)
void async_blit_deinit()
void async_blit_init()