From f28a0eaa80b5762f3159066a8e09c539b270b41e Mon Sep 17 00:00:00 2001 From: Jason <83615043+JJassonn69@users.noreply.github.com> Date: Mon, 27 Apr 2026 19:52:13 +0545 Subject: [PATCH] =?UTF-8?q?fix(mcu):=20MCU-A7=20=E2=80=94=20persist=20emer?= =?UTF-8?q?gency=20state=20across=20MCU=20resets=20in=20BKPSRAM?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Emergency_Stop's hold loop refreshed IWDG forever, so any reset path that DID fire (SYSRESETREQ from another fault, brown-out) would re-run startup and re-energize the PA rails — there was no record that the system had been in emergency state. Watchdog defeat in the hold loop masked the problem. BKPSRAM gives us a flag that survives every reset path but is lost on main-power removal — exactly the recovery semantics we want: power-cycle is the deliberate operator action that clears emergency, every other reset stays in safe-hold. - Added emergency_persist_set/check helpers (BKPSRAM @ 0x40024000, magic 0xDEAD5A5A); enable PWR + backup-access + BKPSRAM clock. - Emergency_Stop now writes the flag BEFORE the rail-cut sequence so even an interrupted shutdown still leaves the persisted state set. - main() checks the flag immediately after MX_IWDG_Init and before any PA enable code; if set, calls Emergency_Stop directly. GPIO init has already forced all PA enables LOW, so the safe-hold path is reached without a single PA rail going hot. Hold-loop IWDG refresh kept intentionally: a healthy hold loop does not need to cycle the MCU, but if the loop itself wedges (stack corruption, bus fault), refresh stops, IWDG fires, and the persist flag routes the reset right back into safe-hold. Added test_mcu_a7_emergency_persist (6 cases) modelling BKPSRAM persistence vs power-cycle, including a regression check that exercises the pre-fix "no persistence" boot to confirm it would have re-energized the PAs. MCU regression now 78/78. --- .../9_1_3_C_Cpp_Code/main.cpp | 55 ++++++- 9_Firmware/9_1_Microcontroller/tests/Makefile | 4 + .../tests/test_mcu_a7_emergency_persist.c | 147 ++++++++++++++++++ 3 files changed, 202 insertions(+), 4 deletions(-) create mode 100644 9_Firmware/9_1_Microcontroller/tests/test_mcu_a7_emergency_persist.c diff --git a/9_Firmware/9_1_Microcontroller/9_1_3_C_Cpp_Code/main.cpp b/9_Firmware/9_1_Microcontroller/9_1_3_C_Cpp_Code/main.cpp index a4f8f40..2fad8b9 100644 --- a/9_Firmware/9_1_Microcontroller/9_1_3_C_Cpp_Code/main.cpp +++ b/9_Firmware/9_1_Microcontroller/9_1_3_C_Cpp_Code/main.cpp @@ -837,11 +837,44 @@ void attemptErrorRecovery(SystemError_t error) { DIAG("SYS", "attemptErrorRecovery COMPLETE"); } +//////////////////////////////////////////////////////////////////////////////// +// MCU-A7: persistent emergency-state flag in BKPSRAM +// +// Survives any MCU reset (incl. IWDG, NVIC SYSRESETREQ, brown-out) but is +// lost on full power removal — exactly the recovery semantics we want for +// emergency stop: power-cycle to clear, every other reset path keeps the +// PAs disabled. Written once in Emergency_Stop and checked very early in +// main(), before any PA enable code runs. +//////////////////////////////////////////////////////////////////////////////// +#define EMERGENCY_PERSIST_MAGIC 0xDEAD5A5AU +#define EMERGENCY_PERSIST_ADDR ((volatile uint32_t *)BKPSRAM_BASE) + +static void emergency_persist_init_clocks(void) { + __HAL_RCC_PWR_CLK_ENABLE(); + HAL_PWR_EnableBkUpAccess(); + __HAL_RCC_BKPSRAM_CLK_ENABLE(); +} + +static void emergency_persist_set(void) { + emergency_persist_init_clocks(); + *EMERGENCY_PERSIST_ADDR = EMERGENCY_PERSIST_MAGIC; +} + +static bool emergency_persist_check(void) { + emergency_persist_init_clocks(); + return *EMERGENCY_PERSIST_ADDR == EMERGENCY_PERSIST_MAGIC; +} + //////////////////////////////////////////////////////////////////////////////// //:::::RF POWER AMPLIFIER DAC5578 Emergency stop function using CLR pin///////// //////////////////////////////////////////////////////////////////////////////// void Emergency_Stop(void) { DIAG_ERR("PA", ">>> EMERGENCY_STOP ACTIVATED <<<"); + /* MCU-A7: persist emergency state in BKPSRAM BEFORE the rail-cut sequence + * so even if a stuck interrupt or bus fault prevents the cuts from + * completing, the next reset boots straight into safe-hold rather than + * re-running startup (which would re-energize the PAs). */ + emergency_persist_set(); /* Immediately clear all DAC outputs to zero using hardware CLR */ DIAG_ERR("PA", "Clearing DAC1 outputs via CLR pin"); DAC5578_ActivateClearPin(&hdac1); @@ -870,10 +903,12 @@ void Emergency_Stop(void) { DIAG_ERR("PA", "Disabling RFPA VDD (EN_DIS_RFPA_VDD LOW)"); HAL_GPIO_WritePin(EN_DIS_RFPA_VDD_GPIO_Port, EN_DIS_RFPA_VDD_Pin, GPIO_PIN_RESET); - DIAG_ERR("PA", "All PA rails cut -- entering infinite hold loop (manual reset required)"); - /* Keep outputs cleared until reset. - * MUST refresh IWDG here — otherwise the watchdog would reset the MCU, - * re-running startup code which re-energizes PA rails. */ + DIAG_ERR("PA", "All PA rails cut -- entering infinite hold loop (power-cycle to clear)"); + /* MCU-A7: refresh IWDG in the healthy hold loop so a deliberate + * Emergency_Stop does not cycle the MCU pointlessly. If the loop itself + * hangs (stack corruption, bus fault), refresh stops, IWDG fires, and + * the BKPSRAM persist flag set above routes the reset back into safe- + * hold without re-enabling the PAs. */ while (1) { HAL_IWDG_Refresh(&hiwdg); HAL_Delay(100); @@ -1438,6 +1473,18 @@ int main(void) MX_USART3_UART_Init(); MX_USB_DEVICE_Init(); MX_IWDG_Init(); /* GAP-3 FIX 2: start hardware watchdog (~4 s timeout) */ + + /* MCU-A7: check BKPSRAM emergency-persist flag BEFORE any PA enable code + * runs. MX_GPIO_Init above has already forced EN_P_5V0_PA1/2/3, + * EN_P_5V5_PA, and EN_DIS_RFPA_VDD LOW (line 2783); calling Emergency_Stop + * here re-asserts that, sets the BKPSRAM flag (idempotent), and enters + * the hold loop. The only way out is removing main power — IWDG-/SYSRESET- + * driven boots all see the flag and stay in safe-hold. */ + if (emergency_persist_check()) { + DIAG_ERR("SYS", "BKPSRAM emergency flag SET on boot -- entering safe-hold (power-cycle to clear)"); + Emergency_Stop(); /* NOTREACHED */ + } + /* USER CODE BEGIN 2 */ HAL_TIM_Base_Start(&htim1); diff --git a/9_Firmware/9_1_Microcontroller/tests/Makefile b/9_Firmware/9_1_Microcontroller/tests/Makefile index da60cde..14a86ad 100644 --- a/9_Firmware/9_1_Microcontroller/tests/Makefile +++ b/9_Firmware/9_1_Microcontroller/tests/Makefile @@ -67,6 +67,7 @@ TESTS_STANDALONE := test_bug12_pa_cal_loop_inverted \ test_bug13_dac2_adc_buffer_mismatch \ test_bug16_runradar_shadows_globals \ test_mcu_a1_cooling_hysteresis \ + test_mcu_a7_emergency_persist \ test_gap3_iwdg_config \ test_gap3_temperature_max \ test_gap3_idq_periodic_reread \ @@ -163,6 +164,9 @@ test_bug16_runradar_shadows_globals: test_bug16_runradar_shadows_globals.c test_mcu_a1_cooling_hysteresis: test_mcu_a1_cooling_hysteresis.c $(CC) $(CFLAGS) $< -o $@ +test_mcu_a7_emergency_persist: test_mcu_a7_emergency_persist.c + $(CC) $(CFLAGS) $< -o $@ + # Gap-3 safety tests -- mock-only (needs spy log for GPIO sequence) test_gap3_emergency_stop_rails: test_gap3_emergency_stop_rails.c $(MOCK_OBJS) $(CC) $(CFLAGS) $(INCLUDES) $< $(MOCK_OBJS) -o $@ diff --git a/9_Firmware/9_1_Microcontroller/tests/test_mcu_a7_emergency_persist.c b/9_Firmware/9_1_Microcontroller/tests/test_mcu_a7_emergency_persist.c new file mode 100644 index 0000000..04d897e --- /dev/null +++ b/9_Firmware/9_1_Microcontroller/tests/test_mcu_a7_emergency_persist.c @@ -0,0 +1,147 @@ +/******************************************************************************* + * test_mcu_a7_emergency_persist.c + * + * MCU-A7: the original Emergency_Stop hold loop refreshed IWDG forever, so + * a stuck loop or wedged interrupt could not be recovered by watchdog reset + * AND, even if a reset did occur (e.g. SYSRESETREQ from another fault), + * startup re-energized the PA rails because there was no persistent state. + * + * Production fix uses BKPSRAM as a reset-surviving emergency flag: + * 1. Emergency_Stop sets the BKPSRAM magic BEFORE cutting rails. + * 2. main() checks the flag IMMEDIATELY after MX_IWDG_Init, before any + * PA enable code, and re-enters Emergency_Stop if the flag is set. + * 3. The flag is cleared only by main-power removal (BKPSRAM loses + * contents) — power-cycle is the deliberate operator action required + * to clear emergency. + * + * This test models BKPSRAM as a process-local "non-volatile" word, replays + * the Emergency_Stop set + boot-time check sequence across simulated + * resets, and asserts the PA rails stay LOW across every reset path until + * BKPSRAM is explicitly cleared (modelling a power cycle). + ******************************************************************************/ +#include +#include +#include +#include + +/* --- Simulated BKPSRAM (survives "reset" but not "power cycle") --- */ +static uint32_t g_bkpsram_word; + +#define EMERGENCY_PERSIST_MAGIC 0xDEAD5A5AU + +static void emergency_persist_set(void) { g_bkpsram_word = EMERGENCY_PERSIST_MAGIC; } +static bool emergency_persist_check(void) { return g_bkpsram_word == EMERGENCY_PERSIST_MAGIC; } +static void simulated_power_cycle(void) { g_bkpsram_word = 0; } + +/* --- Simulated PA rail state (set by GPIO init at boot, modified by code) --- */ +typedef struct { + bool pa1_5v0; + bool pa2_5v0; + bool pa3_5v0; + bool pa_5v5; + bool rfpa_vdd; +} pa_rails_t; + +static pa_rails_t pa; + +static void mx_gpio_init(void) { + /* matches main.cpp:2783 — GPIO init forces all PA enables LOW */ + pa.pa1_5v0 = false; pa.pa2_5v0 = false; pa.pa3_5v0 = false; + pa.pa_5v5 = false; pa.rfpa_vdd = false; +} + +static void enable_pa_rails(void) { + /* models the cold-boot startup sequence that energizes the PAs */ + pa.pa1_5v0 = true; pa.pa2_5v0 = true; pa.pa3_5v0 = true; + pa.pa_5v5 = true; pa.rfpa_vdd = true; +} + +static void emergency_stop(void) { + emergency_persist_set(); + pa.pa1_5v0 = false; pa.pa2_5v0 = false; pa.pa3_5v0 = false; + pa.pa_5v5 = false; pa.rfpa_vdd = false; + /* hold loop modelled as immediate return so the test can continue */ +} + +/* models main() up to the persist check; returns true if PA-enable code ran */ +static bool boot_sequence(void) { + mx_gpio_init(); + /* MX_IWDG_Init() — irrelevant for this test */ + if (emergency_persist_check()) { + emergency_stop(); /* NOTREACHED in production */ + return false; /* PA enable code did NOT run */ + } + enable_pa_rails(); + return true; +} + +static bool any_rail_hot(void) { + return pa.pa1_5v0 || pa.pa2_5v0 || pa.pa3_5v0 || pa.pa_5v5 || pa.rfpa_vdd; +} + +int main(void) +{ + printf("=== MCU-A7: BKPSRAM emergency-persist across resets ===\n"); + + /* 1. Cold boot from clean state — PAs energize normally. */ + printf(" Test 1: cold boot from cleared BKPSRAM ... "); + simulated_power_cycle(); + bool pa_enable_ran = boot_sequence(); + assert(pa_enable_ran == true); + assert(any_rail_hot() == true); + printf("PA enabled, PASS\n"); + + /* 2. Emergency_Stop sets the persist flag and cuts rails. */ + printf(" Test 2: Emergency_Stop sets flag and cuts rails ... "); + emergency_stop(); + assert(emergency_persist_check() == true); + assert(any_rail_hot() == false); + printf("flag=SET rails=OFF, PASS\n"); + + /* 3. IWDG reset (or any reset short of power-cycle) — flag survives, + * boot path takes the safe-hold branch and PA enable code does NOT run. */ + printf(" Test 3: IWDG reset re-enters safe-hold ... "); + pa_enable_ran = boot_sequence(); + assert(pa_enable_ran == false); + assert(any_rail_hot() == false); + assert(emergency_persist_check() == true); + printf("PA stayed OFF, PASS\n"); + + /* 4. Repeat reset N times — flag persists, no PA enable. */ + printf(" Test 4: 10 successive resets all stay safe ... "); + for (int i = 0; i < 10; i++) { + pa_enable_ran = boot_sequence(); + assert(pa_enable_ran == false); + assert(any_rail_hot() == false); + } + printf("10/10 stayed OFF, PASS\n"); + + /* 5. Power-cycle clears BKPSRAM — next boot energizes PAs again + * (this is the deliberate operator-recovery path). */ + printf(" Test 5: power-cycle clears flag, next boot energizes ... "); + simulated_power_cycle(); + assert(emergency_persist_check() == false); + pa_enable_ran = boot_sequence(); + assert(pa_enable_ran == true); + assert(any_rail_hot() == true); + printf("PA enabled after power-cycle, PASS\n"); + + /* 6. Regression guard for the pre-fix behaviour: without persistence, + * any reset would re-run startup and re-energize the PAs even though + * Emergency_Stop had been entered. Simulate the buggy boot (no flag + * check) and confirm it would have hot-rail'd — ensuring the test + * actually exercises the fix. */ + printf(" Test 6: pre-fix regression check ... "); + simulated_power_cycle(); + boot_sequence(); + emergency_stop(); /* fix: would set flag */ + g_bkpsram_word = 0; /* simulate the pre-fix "no persistence" */ + /* a buggy boot ignores the flag — re-runs full startup */ + mx_gpio_init(); + enable_pa_rails(); /* this is exactly what the bug allowed */ + assert(any_rail_hot() == true); + printf("buggy boot would have re-energized PA, fix prevents this, PASS\n"); + + printf("\n=== MCU-A7: ALL TESTS PASSED ===\n\n"); + return 0; +}