So, in my actual firmware, RP2040 with pico-sdk 2.1.1
`CMakeLists.txt``blink_simple.c`In both cases, I've uploaded using gdb and openocd (via `monitor program ...elf`); and then I've tried debugging: first by adding a breakpoint on `platform_entry` (due to copy_to_ram), then breaking on `mutex_enter_blocking`; for both cases (my app (bad) and minimal example (good)) first call to `mutex_enter_blocking` is due to `__wrap_malloc` initiated ultimately by `alarm_pool_create`; here for the minimal example:
The differences I've seen are:
Here is a comparison of stepping through the `mutex_enter_blocking` in gdb:Here is a comparison of `<__wrap_malloc>` disassembly - I've added empty line separators to easier see which sections are similar:In the linked thread, the only suggestion is:
- I've made some single-core code, which worked fine
- Decided to merely enable `pico_multicore` in target_link_libraries in CMakeLists.txt and rebuild the project
- Firmware now freezes at start, with the same symptoms as in Deadlock in multicore when using malloc - Raspberry Pi Forums: endless loop in `mutex_enter_blocking`, called by `__wrap_malloc` (called by, in my case, `alarm_pool_create`)
`CMakeLists.txt`
Code:
add_executable(blink_simple blink_simple.c)pico_set_binary_type(blink_simple copy_to_ram)# pull in common dependenciestarget_link_libraries(blink_simple#pico_stdlib#pico_multicore pico_multicore hardware_regs hardware_timer hardware_clocks hardware_i2c hardware_uart hardware_watchdog hardware_resets hardware_dma hardware_spi hardware_flash pico_platform pico_standard_link pico_crt0 pico_runtime_headers pico_bootrom pico_runtime #pico_printf #pico_stdlib)# create map/bin/hex/uf2 file etc.pico_add_extra_outputs(blink_simple)# call pico_set_program_url to set path to example on github, so users can find the source for an example via picotoolexample_auto_set_url(blink_simple)Code:
/** * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. * * SPDX-License-Identifier: BSD-3-Clause *///#include "pico/stdlib.h"#include "hardware/gpio.h"#include "pico/multicore.h"#ifndef LED_DELAY_MS#define LED_DELAY_MS 250#endif#ifndef PICO_DEFAULT_LED_PIN#warning blink_simple example requires a board with a regular LED#endif// Initialize the GPIO for the LEDvoid pico_led_init(void) {#ifdef PICO_DEFAULT_LED_PIN // A device like Pico that uses a GPIO for the LED will define PICO_DEFAULT_LED_PIN // so we can use normal GPIO functionality to turn the led on and off gpio_init(PICO_DEFAULT_LED_PIN); gpio_set_dir(PICO_DEFAULT_LED_PIN, GPIO_OUT);#endif}// Turn the LED on or offvoid pico_set_led(bool led_on) {#if defined(PICO_DEFAULT_LED_PIN) // Just set the GPIO on or off gpio_put(PICO_DEFAULT_LED_PIN, led_on);#endif}int32_t tick_timer_alarm_id;alarm_pool_t* alarm_pool_core0 = NULL;int16_t alarm_pool_core0_corenum = -1;uint32_t num_ticks = 0;uint8_t blink_state = 0;int64_t __time_critical_func( tick_timer )( alarm_id_t id, void *user_data ){ num_ticks += 1; blink_state = !blink_state; if (blink_state) { pico_set_led(true); } else { pico_set_led(false); } int64_t ret_us = 250000; // 250 ms return ret_us;}void core1_entry(void){ while( 1 ) { sleep_us(500); // introduce a bit of a delay //tight_loop_contents(); }}int main() { pico_led_init(); multicore_launch_core1(core1_entry); uint hardware_alarm_num = 0; // "you should pass 0, 1 or 2 (since 3 is already used for the default alarm pool)" uint max_timers = 16; alarm_pool_core0 = alarm_pool_create( hardware_alarm_num, max_timers ); // "This method will hard assert if the hardware alarm is already claimed"; but turns out, for hardware_alarm_num value 2, here getting an assert and program stops - but for hardware_alarm_num value 1, this passes (and alarm_pool_core1_extra is a proper pointer) alarm_pool_core0_corenum = alarm_pool_core_num( alarm_pool_core0 ); tick_timer_alarm_id = alarm_pool_add_alarm_in_us( alarm_pool_core0, 500, ( alarm_callback_t )tick_timer, NULL, true ); //sleep_us(5000); //multicore_launch_core1(core1_entry); while (true) { //pico_set_led(true); //sleep_ms(LED_DELAY_MS); //pico_set_led(false); //sleep_ms(LED_DELAY_MS); sleep_ms(1); }}Code:
Thread 1 hit Breakpoint 2, mutex_enter_blocking (mtx=mtx@entry=0x200096bc <malloc_mutex>) at /src/pico-sdk/src/common/pico_sync/mutex.c:6262void __time_critical_func(mutex_enter_blocking)(mutex_t *mtx) {(gdb) bt#0 mutex_enter_blocking (mtx=mtx@entry=0x200096bc <malloc_mutex>) at /src/pico-sdk/src/common/pico_sync/mutex.c:62#1 0x20001e98 in __wrap_malloc (size=24) at /src/pico-sdk/src/rp2_common/pico_malloc/malloc.c:69#2 0x200007ce in alarm_pool_create_on_timer (timer=0x40054000, hardware_alarm_num=hardware_alarm_num@entry=0, max_timers=max_timers@entry=16) at /src/pico-sdk/src/common/pico_time/time.c:112#3 0x200001b6 in alarm_pool_create (max_timers=16, timer_alarm_num=0) at /src/pico-sdk/src/common/pico_time/include/pico/time.h:426#4 main () at /src/pico-examples/blink_simple/blink_simple.c:75- The good firmware shows the <malloc_mutex> mtx in `mutex_enter_blocking` as `{core = {spin_lock = 0xd0000140}, owner = -1 '\377'}` -- the bad firmware shows it as `{core = {spin_lock = 0x0}, owner = 0 '\000'}`
- The good firmware can well pass the `if (!lock_is_owner_id_valid(mtx->owner))` and exit the endless loop -- the bad firmware can not
- The good firmware has a somewhat different disassembly listing from the bad firmware (though not sure if this has any relevance)
Code:
# bad one(gdb) p mtx$8 = (mutex_t *) 0x2000aca4 <malloc_mutex>(gdb) p *mtx$9 = {core = {spin_lock = 0x0}, owner = 0 '\000'}# good one(gdb) p *mtx$2 = {core = {spin_lock = 0xd0000140}, owner = -1 '\377'}Code:
# good one Thread 1 hit Breakpoint 2, mutex_enter_blocking (mtx=mtx@entry=0x200096bc <malloc_mutex>) at /src/pico-sdk/src/common/pico_sync/mutex.c:6262void __time_critical_func(mutex_enter_blocking)(mutex_t *mtx) {(gdb) n139 return (*(uint32_t *) (SIO_BASE + SIO_CPUID_OFFSET));(gdb) n71 uint32_t save = spin_lock_blocking(mtx->core.spin_lock);(gdb) p *mtx$1 = {core = {spin_lock = 0xd0000140}, owner = -1 '\377'}(gdb) n72 if (!lock_is_owner_id_valid(mtx->owner)) {(gdb) p lock_is_owner_id_valid(mtx->owner)No symbol "lock_is_owner_id_valid" in current context.(gdb) n73 mtx->owner = caller;(gdb) p caller$3 = 0 '\000'(gdb) n74 spin_unlock(mtx->core.spin_lock, save);(gdb)75 break; # bad one Thread 2 received signal SIGINT, Interrupt.mutex_enter_blocking (mtx=mtx@entry=0x2000ac94 <malloc_mutex>) at /src/pico-sdk/src/common/pico_sync/mutex.c:7171 uint32_t save = spin_lock_blocking(mtx->core.spin_lock);(gdb) p mtx->core$3 = {spin_lock = 0x0}(gdb) n72 if (!lock_is_owner_id_valid(mtx->owner)) {(gdb) n77 lock_internal_spin_unlock_with_wait(&mtx->core, save);(gdb) n71 uint32_t save = spin_lock_blocking(mtx->core.spin_lock);(gdb) n72 if (!lock_is_owner_id_valid(mtx->owner)) {(gdb) n77 lock_internal_spin_unlock_with_wait(&mtx->core, save);(gdb) n71 uint32_t save = spin_lock_blocking(mtx->core.spin_lock);(gdb) n72 if (!lock_is_owner_id_valid(mtx->owner)) {Code:
# good one20001e8c <__wrap_malloc>:20001e8c: b570 push {r4, r5, r6, lr}20001e8e: 0004 movs r4, r020001e90: 4e0a ldr r6, [pc, #40] ; (20001ebc <__wrap_malloc+0x30>)20001e92: 0030 movs r0, r620001e94: f006 f872 bl 20007f7c <mutex_enter_blocking>20001e98: 0020 movs r0, r420001e9a: f000 f8ef bl 2000207c <malloc>20001e9e: 0005 movs r5, r020001ea0: 0030 movs r0, r620001ea2: f006 f88f bl 20007fc4 <mutex_exit>20001ea6: 2d00 cmp r5, #020001ea8: d005 beq.n 20001eb6 <__wrap_malloc+0x2a>20001eaa: 192c adds r4, r5, r420001eac: 4b04 ldr r3, [pc, #16] ; (20001ec0 <__wrap_malloc+0x34>)20001eae: 429c cmp r4, r320001eb0: d801 bhi.n 20001eb6 <__wrap_malloc+0x2a>20001eb2: 0028 movs r0, r520001eb4: bd70 pop {r4, r5, r6, pc}20001eb6: 4803 ldr r0, [pc, #12] ; (20001ec4 <__wrap_malloc+0x38>)20001eb8: f7fe fde6 bl 20000a88 <panic>20001ebc: 200096bc .word 0x200096bc20001ec0: 20040000 .word 0x2004000020001ec4: 2000857c .word 0x2000857c# bad one20002e7c <__wrap_malloc>:20002e7c: b570 push {r4, r5, r6, lr}20002e7e: 0005 movs r5, r020002e80: 4e08 ldr r6, [pc, #32] @ (20002ea4 <__wrap_malloc+0x28>)20002e82: 0030 movs r0, r620002e84: f006 fa8c bl 200093a0 <mutex_enter_blocking>20002e88: 0028 movs r0, r520002e8a: f000 feb5 bl 20003bf8 <malloc>20002e8e: 0004 movs r4, r020002e90: 0030 movs r0, r620002e92: f006 faa7 bl 200093e4 <mutex_exit>20002e96: 0029 movs r1, r520002e98: 0020 movs r0, r420002e9a: f7ff ffdf bl 20002e5c <check_alloc>20002e9e: 0020 movs r0, r420002ea0: bd70 pop {r4, r5, r6, pc}20002ea2: 46c0 nop @ (mov r8, r8)20002ea4: 2000aca4 .word 0x2000aca4... but I do not really think that is it, because:I think most likely it means an interrupt handler is calling malloc or free. Generally C standard library functions are not reentrant and malloc and free certainly aren't.
- If there was something else calling `mutex_enter_blocking`, such as an interrupt, wouldn't the debugger have caught it?
- It seems to me, in the bad case, the problem is more with the mtx mutex somehow ending up being used uninitialized (as a consequence of linking to pico_multicore - even though that makes no sense either, why would linking to pico_multicore have such an effect)
- Even so, why do the assembly listings for `<__wrap_malloc>` between these two cases differ so much (at certain parts)? That is certainly done way before the program runs, and any interrupts may interfere
Statistics: Posted by sdbbs — Sun May 04, 2025 5:47 pm — Replies 0 — Views 37