From 98ec9cb6a5b7a7aa0d2aef17e6fd757500b221b7 Mon Sep 17 00:00:00 2001 From: Jason <83615043+JJassonn69@users.noreply.github.com> Date: Wed, 6 May 2026 10:30:54 +0545 Subject: [PATCH] =?UTF-8?q?fix(fpga):=20PR-AA=20=E2=80=94=20doppler=5Fmag?= =?UTF-8?q?=201-cell=20shift=20in=20usb=20emit=20FSM?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The WR_DOPPLER_DATA emit advanced mag_rd_addr at end of phase 1 (LSB byte) but BRAM has 1-cycle read latency, so phase 0 of the next pair re-read the prior cell. Result: wire pair K = (HIGH(bram[K-1]), LOW(bram[K])) — adjacent cells silently swapped their high bytes whenever the high byte differed. Footprint was 30 of 24576 cells (peak rows + high-byte transitions in the noise floor); max diff 6656 LSB on the target row. Fix: advance the BRAM read address at end of phase 0 (MSB) so BRAM has 2 cycles between addr-set and the next pair's MSB read. Same pattern existed in WR_RANGE_DATA — silently broken (regression skips range stream); fixed for symmetry. After fix, both iverilog and remote Vivado 2025.2 xsim emit a bit-exact match against the Python golden. Tighten E12.14 / E12.6.b to strict zero tolerance and rename the "PR-AA pending" notes to indicate the fix has landed. Target-cell window check (E12.15) now points at the exact (rb, db) bin. Verification: * iverilog A6 in-TB: doppler_mismatches=0/24576 (16/16 PASS) * iverilog A6 parse strict: 28/28 PASS * Vivado 2025.2 xsim A6 in-TB: doppler_mismatches=0/24576 (16/16 PASS) * Vivado 2025.2 xsim A6 parse strict: 28/28 PASS * Full regression: 41 passed, 0 failed, 0 skipped / 41 total --- .../tb/cosim/tb_e2e_dsp_to_host_parse.py | 38 +++++++------------ .../9_2_FPGA/usb_data_interface_ft2232h.v | 31 ++++++++++----- 2 files changed, 35 insertions(+), 34 deletions(-) diff --git a/9_Firmware/9_2_FPGA/tb/cosim/tb_e2e_dsp_to_host_parse.py b/9_Firmware/9_2_FPGA/tb/cosim/tb_e2e_dsp_to_host_parse.py index c96fafe..59eecb9 100644 --- a/9_Firmware/9_2_FPGA/tb/cosim/tb_e2e_dsp_to_host_parse.py +++ b/9_Firmware/9_2_FPGA/tb/cosim/tb_e2e_dsp_to_host_parse.py @@ -59,11 +59,11 @@ DOPPLER_OFFSET = HEADER_BYTES # 9 CFAR_OFFSET = DOPPLER_OFFSET + DOPPLER_MAG_BYTES # 49161 FOOTER_OFFSET = CFAR_OFFSET + CFAR_DENSE_BYTES # 55305 -# Doppler_mag 1-cell shift is a separate but related production bug (see -# `project_aeris10_usb_cfar_stale_bin_2026-05-05.md` — "Related cosmetic -# finding"). Until PR-AA investigates, allow up to this many byte -# differences in the doppler_mag section so the regression stays green. -DOPPLER_MAG_BYTE_DIFF_TOLERANCE = 80 +# PR-AA fix: doppler_mag is now bit-exact against the Python golden — the +# WR_DOPPLER_DATA emit FSM advances mag_rd_addr at end of phase 0 (MSB) so +# BRAM has 2 cycles between addr-set and the next pair's MSB read. Strict +# zero-byte tolerance from here; any drift is a real regression. +DOPPLER_MAG_BYTE_DIFF_TOLERANCE = 0 # ============================================================================ @@ -156,7 +156,7 @@ def main() -> int: # ---- Per-section compare against expected_frame.bin ---- # E12.6 is split into 4 sub-checks so diffs are isolated: - # .a header (strict) .b doppler_mag (tolerance — PR-AA pending) + # .a header (strict) .b doppler_mag (strict, post-PR-AA) # .c cfar_dense (strict) .d footer (strict) if len(captured) == len(expected): # .a header @@ -164,12 +164,10 @@ def main() -> int: state.check('E12.6.a: header bytes == expected (strict)', hdr_diff == 0, f"{hdr_diff} differing bytes") - # .b doppler_mag — relaxed tolerance until PR-AA fix + # .b doppler_mag — strict bit-exact (PR-AA fix landed) dop_diffs = [i for i in range(DOPPLER_OFFSET, CFAR_OFFSET) if captured[i] != expected[i]] - state.check('E12.6.b: doppler_mag bytes within ' - f'tol={DOPPLER_MAG_BYTE_DIFF_TOLERANCE} ' - '(PR-AA: 1-cell-shift bug)', + state.check('E12.6.b: doppler_mag bytes == expected (strict)', len(dop_diffs) <= DOPPLER_MAG_BYTE_DIFF_TOLERANCE, f"{len(dop_diffs)} differing bytes; " f"first 5 at {dop_diffs[:5]}") @@ -217,26 +215,18 @@ def main() -> int: state.check('E12.13: doppler_mag shape (512, 48)', doppler_mag is not None and doppler_mag.shape == (NUM_RANGE_BINS, NUM_DOPPLER_BINS)) if doppler_mag is not None: - # Diff distribution drives BOTH a cell-count and a max-diff bound. - # Until PR-AA investigates the doppler 1-cell-shift bug, allow up - # to ~50 cells to differ; once the shift is fixed, this should - # tighten back to "max diff <= 1 LSB". + # Strict bit-exact post-PR-AA. Any drift fails. diff = np.abs(doppler_mag.astype(np.int64) - expected_mag.astype(np.int64)) max_diff = int(diff.max()) n_diff = int((diff > 0).sum()) - state.check('E12.14: doppler_mag cell-diff <= 50 cells ' - '(PR-AA: 1-cell-shift bug)', - n_diff <= 50, + state.check('E12.14: doppler_mag bit-exact vs Python golden', + n_diff == 0, f"max_diff={max_diff} ({n_diff} of {diff.size} cells differ)") - # Specific target cells — magnitude > 0 (E9). The 1-cell shift can - # nudge the peak's exact bin, so check the 3-cell neighborhood - # instead of the single expected cell. + # Specific target cells — magnitude at exact expected (rb,db) bin. for (rb, db) in EXPECTED_TARGETS: - window = doppler_mag[rb, max(0, db-1):db+2] - peak = int(window.max()) - state.check(f'E12.15.{rb}.{db}: peak in 3-bin doppler ' - f'window {tuple(range(max(0,db-1), db+2))} > 1000', + peak = int(doppler_mag[rb, db]) + state.check(f'E12.15.{rb}.{db}: doppler_mag[{rb},{db}] > 1000', peak > 1000, f"got {peak}") # ---- CFAR dense — E10 ---- diff --git a/9_Firmware/9_2_FPGA/usb_data_interface_ft2232h.v b/9_Firmware/9_2_FPGA/usb_data_interface_ft2232h.v index 77342dd..2248edc 100644 --- a/9_Firmware/9_2_FPGA/usb_data_interface_ft2232h.v +++ b/9_Firmware/9_2_FPGA/usb_data_interface_ft2232h.v @@ -1056,17 +1056,19 @@ always @(posedge ft_clk or negedge ft_effective_reset_n) begin ft_data_oe <= 1'b1; ft_wr_n <= 1'b0; - // BRAM read has 1-cycle latency. We pre-loaded range_rd_addr. - // On phase 0: output MSB of range_rd_data (read on prev cycle) - // On phase 1: output LSB, advance to next address + // PR-AA: addr advance lives at end of phase 0 (MSB emit), not + // phase 1 (LSB emit). With BRAM 1-cycle read latency, a 2-byte + // pair needs 2 cycles between addr-set and the next pair's MSB + // read; advancing at phase 1 (1 cycle gap) leaves the next MSB + // reading the prior cell's high byte. See WR_DOPPLER_DATA below. if (!wr_byte_phase) begin ft_data_out <= range_rd_data[15:8]; wr_byte_phase <= 1'b1; + range_rd_idx <= range_rd_idx + {{(RANGE_BIN_BITS-1){1'b0}}, 1'b1}; + range_rd_addr <= range_rd_idx + {{(RANGE_BIN_BITS-1){1'b0}}, 1'b1}; end else begin ft_data_out <= range_rd_data[7:0]; wr_byte_phase <= 1'b0; - range_rd_idx <= range_rd_idx + {{(RANGE_BIN_BITS-1){1'b0}}, 1'b1}; - range_rd_addr <= range_rd_idx + {{(RANGE_BIN_BITS-1){1'b0}}, 1'b1}; end wr_byte_idx <= wr_byte_idx + 16'd1; @@ -1102,14 +1104,20 @@ always @(posedge ft_clk or negedge ft_effective_reset_n) begin ft_data_oe <= 1'b1; ft_wr_n <= 1'b0; + // PR-AA fix: BRAM read has 1-cycle latency. A 2-byte pair + // emits MSB then LSB from the SAME cell, so addr must advance + // at end of phase 0 (MSB) — that gives BRAM 2 cycles before + // the next pair's MSB needs the new cell: + // cycle K (phase 0): data=bram[addr_{K-1}]=bram[N], emit H(N), advance addr<=N+1 + // cycle K+1 (phase 1): data=bram[addr_K]=bram[N], emit L(N) + // cycle K+2 (phase 0): data=bram[addr_{K+1}]=bram[N+1], emit H(N+1) + // Previous (broken) pattern advanced at phase 1, so phase 0 of + // the next pair re-read bram[N] and emitted H(N) again, leaving + // the wire pair-K = (HIGH(bram[K-1]), LOW(bram[K])). if (!wr_byte_phase) begin ft_data_out <= mag_rd_data[15:8]; wr_byte_phase <= 1'b1; - end else begin - ft_data_out <= mag_rd_data[7:0]; - wr_byte_phase <= 1'b0; - // Pre-load mag_rd_addr 1 cell ahead (BRAM 1-cycle - // read latency). Address layout: {range[8:0], doppler[5:0]}. + // Address layout: {range[8:0], doppler[5:0]}. if (dop_doppler_idx == DOP_BIN_LAST) begin dop_doppler_idx <= {DOPPLER_BIN_BITS{1'b0}}; dop_range_idx <= dop_range_idx + {{(RANGE_BIN_BITS-1){1'b0}}, 1'b1}; @@ -1120,6 +1128,9 @@ always @(posedge ft_clk or negedge ft_effective_reset_n) begin mag_rd_addr <= {dop_range_idx, dop_doppler_idx + {{(DOPPLER_BIN_BITS-1){1'b0}}, 1'b1}}; end + end else begin + ft_data_out <= mag_rd_data[7:0]; + wr_byte_phase <= 1'b0; end wr_byte_idx <= wr_byte_idx + 16'd1;