fix(mcu): MCU-A1 — replace 25 C cooling stub with 70/60 C hysteresis

Cooling-fan trip in main.cpp's periodic temperature block was a 25 C dev
stub that latched the fan ON at room temperature on every boot. Replaced
with production thermal control: ON at 70 C, OFF at 60 C. The 10 C
dead-band prevents relay/fan chatter near the threshold; the 70 C ON
point sits below the 75 C SAFE-mode gate in checkSystemHealth() so the
fan engages before the system shuts down.

Driven from the existing `temperature` global (max of 8 sensors,
populated just above by the GAP-3 fix) instead of re-OR'ing the eight
Temperature_N variables — single source of truth, and the diag now
prints the actual peak temperature on each transition.

Added test_mcu_a1_cooling_hysteresis (9 cases) covering cold-start,
upward crossing, dead-band hold, downward crossing, and a regression
guard at 30 C that would have engaged the fan under the old stub.
MCU regression now 77/77.
This commit is contained in:
Jason
2026-04-27 19:42:42 +05:45
parent 0b8b933e27
commit df0b2fd469
3 changed files with 116 additions and 8 deletions
@@ -2179,16 +2179,25 @@ int main(void)
DIAG("PA", "System temperature (max of 8 sensors) = %.1f C", (double)temperature);
}
//(20 mV/°C on TMP37) QPA2962 RF amplifier Operating Temp. Range, TBASE min40 normal+25 max+85 °C
int Max_Temp = 25;
if((Temperature_1>Max_Temp)||(Temperature_2>Max_Temp)||(Temperature_3>Max_Temp)||(Temperature_4>Max_Temp)
||(Temperature_5>Max_Temp)||(Temperature_6>Max_Temp)||(Temperature_7>Max_Temp)||(Temperature_8>Max_Temp))
{
// MCU-A1: production thermal limits for QPA2962 (TBASE max +85 °C).
// Cooling fan turns ON at 70 °C and OFF at 60 °C — 10 °C hysteresis
// prevents relay/fan chatter near the threshold. The system-level
// hard overtemp gate at checkSystemHealth() (>75 °C → SAFE mode)
// sits above the cooling ON point so the fan gets a chance to act
// before shutdown. Was previously a 25 °C dev-bench stub that kept
// the fan latched on at room temperature.
static bool cooling_on = false;
const float COOLING_ON_C = 70.0f;
const float COOLING_OFF_C = 60.0f;
float t_max = temperature; // populated above as max of 8 sensors
if (!cooling_on && t_max > COOLING_ON_C) {
cooling_on = true;
HAL_GPIO_WritePin(EN_DIS_COOLING_GPIO_Port, EN_DIS_COOLING_Pin, GPIO_PIN_SET);
DIAG_WARN("PA", "Over-temp detected (>%d C) -- cooling ENABLED", Max_Temp);
}
else{
DIAG_WARN("PA", "Over-temp %.1f C > %.0f C -- cooling ENABLED", (double)t_max, (double)COOLING_ON_C);
} else if (cooling_on && t_max < COOLING_OFF_C) {
cooling_on = false;
HAL_GPIO_WritePin(EN_DIS_COOLING_GPIO_Port, EN_DIS_COOLING_Pin, GPIO_PIN_RESET);
DIAG("PA", "Temp %.1f C < %.0f C -- cooling DISABLED", (double)t_max, (double)COOLING_OFF_C);
}
/* [GAP-3 FIX 4] Periodic IDQ re-read — the Idq_reading[] array was only
@@ -66,6 +66,7 @@ TESTS_MOCK_ONLY := test_bug2_ad9523_double_setup \
TESTS_STANDALONE := test_bug12_pa_cal_loop_inverted \
test_bug13_dac2_adc_buffer_mismatch \
test_bug16_runradar_shadows_globals \
test_mcu_a1_cooling_hysteresis \
test_gap3_iwdg_config \
test_gap3_temperature_max \
test_gap3_idq_periodic_reread \
@@ -159,6 +160,9 @@ test_bug13_dac2_adc_buffer_mismatch: test_bug13_dac2_adc_buffer_mismatch.c
test_bug16_runradar_shadows_globals: test_bug16_runradar_shadows_globals.c
$(CC) $(CFLAGS) $< -o $@
test_mcu_a1_cooling_hysteresis: test_mcu_a1_cooling_hysteresis.c
$(CC) $(CFLAGS) $< -o $@
# Gap-3 safety tests -- mock-only (needs spy log for GPIO sequence)
test_gap3_emergency_stop_rails: test_gap3_emergency_stop_rails.c $(MOCK_OBJS)
$(CC) $(CFLAGS) $(INCLUDES) $< $(MOCK_OBJS) -o $@
@@ -0,0 +1,95 @@
/*******************************************************************************
* test_mcu_a1_cooling_hysteresis.c
*
* MCU-A1: cooling-fan threshold was a 25 C dev stub that latched the fan ON
* at room temperature. Production fix raises the threshold to 70 C with a
* 60 C off point (10 C hysteresis) so the fan does not chatter near the
* trip point.
*
* This test replays the fixed cooling-control logic against a temperature
* sweep and asserts (a) the fan stays off below the ON threshold from cold,
* (b) it engages crossing 70 C upward, (c) it stays on through the 60-70 C
* dead-band on the way down, and (d) it disengages below 60 C.
******************************************************************************/
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
static const float COOLING_ON_C = 70.0f;
static const float COOLING_OFF_C = 60.0f;
/* Replays the post-fix cooling control inside main.cpp:2183-... */
static bool step_cooling(bool cooling_on, float t_max)
{
if (!cooling_on && t_max > COOLING_ON_C) return true;
else if (cooling_on && t_max < COOLING_OFF_C) return false;
return cooling_on;
}
int main(void)
{
printf("=== MCU-A1: cooling-fan hysteresis (70 C ON / 60 C OFF) ===\n");
bool fan = false;
/* 1. Cold start: room temperature must NOT engage the fan
* (this is the bug the 25 C stub caused). */
printf(" Test 1: 25 C from cold ... ");
fan = step_cooling(fan, 25.0f);
assert(fan == false);
printf("OFF, PASS\n");
/* 2. Walking up through the dead band must not engage. */
printf(" Test 2: 65 C from cold ... ");
fan = step_cooling(fan, 65.0f);
assert(fan == false);
printf("OFF, PASS\n");
/* 3. At the exact threshold (>, not >=) still off. */
printf(" Test 3: 70.0 C exactly ... ");
fan = step_cooling(fan, 70.0f);
assert(fan == false);
printf("OFF, PASS\n");
/* 4. Crossing the trip point upward engages. */
printf(" Test 4: 70.5 C ... ");
fan = step_cooling(fan, 70.5f);
assert(fan == true);
printf("ON, PASS\n");
/* 5. Cooling off into the dead band — fan must stay on. */
printf(" Test 5: 65 C while ON ... ");
fan = step_cooling(fan, 65.0f);
assert(fan == true);
printf("ON (hysteresis), PASS\n");
/* 6. At the OFF threshold exactly, still on (uses <, not <=). */
printf(" Test 6: 60.0 C exactly while ON ... ");
fan = step_cooling(fan, 60.0f);
assert(fan == true);
printf("ON, PASS\n");
/* 7. Crossing the OFF point disengages. */
printf(" Test 7: 59.5 C while ON ... ");
fan = step_cooling(fan, 59.5f);
assert(fan == false);
printf("OFF, PASS\n");
/* 8. Spike-and-recover above the system overtemp gate (75 C) — the
* fan engages well before checkSystemHealth() trips SAFE mode. */
printf(" Test 8: 76 C engages cooling before 75 C SAFE-mode gate ... ");
fan = step_cooling(fan, 76.0f);
assert(fan == true);
printf("ON, PASS\n");
/* 9. The pre-fix 25 C stub would have set fan=true here. Confirm the
* fixed logic does not. */
printf(" Test 9: 30 C does NOT engage (regression guard for 25 C stub) ... ");
fan = false;
fan = step_cooling(fan, 30.0f);
assert(fan == false);
printf("OFF, PASS\n");
printf("\n=== MCU-A1: ALL TESTS PASSED ===\n\n");
return 0;
}