fix(fpga): PR-AA — doppler_mag 1-cell shift in usb emit FSM

The WR_DOPPLER_DATA emit advanced mag_rd_addr at end of phase 1 (LSB byte)
but BRAM has 1-cycle read latency, so phase 0 of the next pair re-read
the prior cell. Result: wire pair K = (HIGH(bram[K-1]), LOW(bram[K])) —
adjacent cells silently swapped their high bytes whenever the high byte
differed. Footprint was 30 of 24576 cells (peak rows + high-byte
transitions in the noise floor); max diff 6656 LSB on the target row.

Fix: advance the BRAM read address at end of phase 0 (MSB) so BRAM has
2 cycles between addr-set and the next pair's MSB read. Same pattern
existed in WR_RANGE_DATA — silently broken (regression skips range
stream); fixed for symmetry. After fix, both iverilog and remote
Vivado 2025.2 xsim emit a bit-exact match against the Python golden.

Tighten E12.14 / E12.6.b to strict zero tolerance and rename the
"PR-AA pending" notes to indicate the fix has landed. Target-cell
window check (E12.15) now points at the exact (rb, db) bin.

Verification:
  * iverilog A6 in-TB: doppler_mismatches=0/24576 (16/16 PASS)
  * iverilog A6 parse strict: 28/28 PASS
  * Vivado 2025.2 xsim A6 in-TB: doppler_mismatches=0/24576 (16/16 PASS)
  * Vivado 2025.2 xsim A6 parse strict: 28/28 PASS
  * Full regression: 41 passed, 0 failed, 0 skipped / 41 total
This commit is contained in:
Jason
2026-05-06 10:30:54 +05:45
parent d9e7a5becf
commit 98ec9cb6a5
2 changed files with 35 additions and 34 deletions
@@ -59,11 +59,11 @@ DOPPLER_OFFSET = HEADER_BYTES # 9
CFAR_OFFSET = DOPPLER_OFFSET + DOPPLER_MAG_BYTES # 49161
FOOTER_OFFSET = CFAR_OFFSET + CFAR_DENSE_BYTES # 55305
# Doppler_mag 1-cell shift is a separate but related production bug (see
# `project_aeris10_usb_cfar_stale_bin_2026-05-05.md` — "Related cosmetic
# finding"). Until PR-AA investigates, allow up to this many byte
# differences in the doppler_mag section so the regression stays green.
DOPPLER_MAG_BYTE_DIFF_TOLERANCE = 80
# PR-AA fix: doppler_mag is now bit-exact against the Python golden — the
# WR_DOPPLER_DATA emit FSM advances mag_rd_addr at end of phase 0 (MSB) so
# BRAM has 2 cycles between addr-set and the next pair's MSB read. Strict
# zero-byte tolerance from here; any drift is a real regression.
DOPPLER_MAG_BYTE_DIFF_TOLERANCE = 0
# ============================================================================
@@ -156,7 +156,7 @@ def main() -> int:
# ---- Per-section compare against expected_frame.bin ----
# E12.6 is split into 4 sub-checks so diffs are isolated:
# .a header (strict) .b doppler_mag (tolerance — PR-AA pending)
# .a header (strict) .b doppler_mag (strict, post-PR-AA)
# .c cfar_dense (strict) .d footer (strict)
if len(captured) == len(expected):
# .a header
@@ -164,12 +164,10 @@ def main() -> int:
state.check('E12.6.a: header bytes == expected (strict)',
hdr_diff == 0, f"{hdr_diff} differing bytes")
# .b doppler_mag — relaxed tolerance until PR-AA fix
# .b doppler_mag — strict bit-exact (PR-AA fix landed)
dop_diffs = [i for i in range(DOPPLER_OFFSET, CFAR_OFFSET)
if captured[i] != expected[i]]
state.check('E12.6.b: doppler_mag bytes within '
f'tol={DOPPLER_MAG_BYTE_DIFF_TOLERANCE} '
'(PR-AA: 1-cell-shift bug)',
state.check('E12.6.b: doppler_mag bytes == expected (strict)',
len(dop_diffs) <= DOPPLER_MAG_BYTE_DIFF_TOLERANCE,
f"{len(dop_diffs)} differing bytes; "
f"first 5 at {dop_diffs[:5]}")
@@ -217,26 +215,18 @@ def main() -> int:
state.check('E12.13: doppler_mag shape (512, 48)',
doppler_mag is not None and doppler_mag.shape == (NUM_RANGE_BINS, NUM_DOPPLER_BINS))
if doppler_mag is not None:
# Diff distribution drives BOTH a cell-count and a max-diff bound.
# Until PR-AA investigates the doppler 1-cell-shift bug, allow up
# to ~50 cells to differ; once the shift is fixed, this should
# tighten back to "max diff <= 1 LSB".
# Strict bit-exact post-PR-AA. Any drift fails.
diff = np.abs(doppler_mag.astype(np.int64) - expected_mag.astype(np.int64))
max_diff = int(diff.max())
n_diff = int((diff > 0).sum())
state.check('E12.14: doppler_mag cell-diff <= 50 cells '
'(PR-AA: 1-cell-shift bug)',
n_diff <= 50,
state.check('E12.14: doppler_mag bit-exact vs Python golden',
n_diff == 0,
f"max_diff={max_diff} ({n_diff} of {diff.size} cells differ)")
# Specific target cells — magnitude > 0 (E9). The 1-cell shift can
# nudge the peak's exact bin, so check the 3-cell neighborhood
# instead of the single expected cell.
# Specific target cells — magnitude at exact expected (rb,db) bin.
for (rb, db) in EXPECTED_TARGETS:
window = doppler_mag[rb, max(0, db-1):db+2]
peak = int(window.max())
state.check(f'E12.15.{rb}.{db}: peak in 3-bin doppler '
f'window {tuple(range(max(0,db-1), db+2))} > 1000',
peak = int(doppler_mag[rb, db])
state.check(f'E12.15.{rb}.{db}: doppler_mag[{rb},{db}] > 1000',
peak > 1000, f"got {peak}")
# ---- CFAR dense — E10 ----
@@ -1056,17 +1056,19 @@ always @(posedge ft_clk or negedge ft_effective_reset_n) begin
ft_data_oe <= 1'b1;
ft_wr_n <= 1'b0;
// BRAM read has 1-cycle latency. We pre-loaded range_rd_addr.
// On phase 0: output MSB of range_rd_data (read on prev cycle)
// On phase 1: output LSB, advance to next address
// PR-AA: addr advance lives at end of phase 0 (MSB emit), not
// phase 1 (LSB emit). With BRAM 1-cycle read latency, a 2-byte
// pair needs 2 cycles between addr-set and the next pair's MSB
// read; advancing at phase 1 (1 cycle gap) leaves the next MSB
// reading the prior cell's high byte. See WR_DOPPLER_DATA below.
if (!wr_byte_phase) begin
ft_data_out <= range_rd_data[15:8];
wr_byte_phase <= 1'b1;
range_rd_idx <= range_rd_idx + {{(RANGE_BIN_BITS-1){1'b0}}, 1'b1};
range_rd_addr <= range_rd_idx + {{(RANGE_BIN_BITS-1){1'b0}}, 1'b1};
end else begin
ft_data_out <= range_rd_data[7:0];
wr_byte_phase <= 1'b0;
range_rd_idx <= range_rd_idx + {{(RANGE_BIN_BITS-1){1'b0}}, 1'b1};
range_rd_addr <= range_rd_idx + {{(RANGE_BIN_BITS-1){1'b0}}, 1'b1};
end
wr_byte_idx <= wr_byte_idx + 16'd1;
@@ -1102,14 +1104,20 @@ always @(posedge ft_clk or negedge ft_effective_reset_n) begin
ft_data_oe <= 1'b1;
ft_wr_n <= 1'b0;
// PR-AA fix: BRAM read has 1-cycle latency. A 2-byte pair
// emits MSB then LSB from the SAME cell, so addr must advance
// at end of phase 0 (MSB) — that gives BRAM 2 cycles before
// the next pair's MSB needs the new cell:
// cycle K (phase 0): data=bram[addr_{K-1}]=bram[N], emit H(N), advance addr<=N+1
// cycle K+1 (phase 1): data=bram[addr_K]=bram[N], emit L(N)
// cycle K+2 (phase 0): data=bram[addr_{K+1}]=bram[N+1], emit H(N+1)
// Previous (broken) pattern advanced at phase 1, so phase 0 of
// the next pair re-read bram[N] and emitted H(N) again, leaving
// the wire pair-K = (HIGH(bram[K-1]), LOW(bram[K])).
if (!wr_byte_phase) begin
ft_data_out <= mag_rd_data[15:8];
wr_byte_phase <= 1'b1;
end else begin
ft_data_out <= mag_rd_data[7:0];
wr_byte_phase <= 1'b0;
// Pre-load mag_rd_addr 1 cell ahead (BRAM 1-cycle
// read latency). Address layout: {range[8:0], doppler[5:0]}.
// Address layout: {range[8:0], doppler[5:0]}.
if (dop_doppler_idx == DOP_BIN_LAST) begin
dop_doppler_idx <= {DOPPLER_BIN_BITS{1'b0}};
dop_range_idx <= dop_range_idx + {{(RANGE_BIN_BITS-1){1'b0}}, 1'b1};
@@ -1120,6 +1128,9 @@ always @(posedge ft_clk or negedge ft_effective_reset_n) begin
mag_rd_addr <= {dop_range_idx,
dop_doppler_idx + {{(DOPPLER_BIN_BITS-1){1'b0}}, 1'b1}};
end
end else begin
ft_data_out <= mag_rd_data[7:0];
wr_byte_phase <= 1'b0;
end
wr_byte_idx <= wr_byte_idx + 16'd1;