From 5f3002a4d15b0965f6fd29bc43d3f3f0968a8e25 Mon Sep 17 00:00:00 2001 From: Jason <83615043+JJassonn69@users.noreply.github.com> Date: Tue, 21 Apr 2026 02:12:04 +0545 Subject: [PATCH] =?UTF-8?q?merge(wave2):=20manual=20resolution=20of=206=20?= =?UTF-8?q?shared=20files=20=E2=80=94=20fft-2048=20=C3=97=20p0=20audit?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hand-merged files modified on both fix/pre-bringup-audit-p0 and feat/fft-2048-upgrade. Wave 1 (commit 60e49c7) took 20 files from fft verbatim; this wave resolves the overlap. - run_regression.sh: 3-way merge. Adopts fft's ${RECEIVER_RTL[@]} array refactor and drops the self-blessing golden pair from p0. Skip count bumped to 5. - usb_data_interface.v (FT601/200T): p0 FSM + clock-loss watchdog kept wholesale; widened stream_control 3 -> 6 bits to carry fft's extended mode bits through the CDC sync chain and the 0xFF status word. - mti_canceller.v: fft's BRAM-inferred 512-range-bin implementation as the base, with p0's F-6.3 saturation counter grafted onto the d1 pipeline stage. Overflow detection uses the top-two-bits disagreement on diff_{i,q}_full (DATA_WIDTH+1 signed). - radar_receiver_final.v: fft's 2048-pt / 512-bin structure + p0 diagnostic plumbing (ADC overrange sticky+CDC, DDC diagnostics, tx_frame_start edge detector replacing chirp_counter frame sync, mti_saturation_count, range_decim_watchdog). - radar_system_top.v: clean 3-way merge, orthogonal regions (+38 / -27). - usb_data_interface_ft2232h.v (FT2232H/50T): fft's per-frame bulk BRAM rewrite kept wholesale. Ported two p0 items that are orthogonal to the write FSM: * ft_clk-loss watchdog (heartbeat + 2FF ASYNC_REG sync + 16-bit timeout) ORed into a 2FF sync'd ft_effective_reset_n for the FSM. * rd_cmd_complete flag so RD_DEASSERT can distinguish a legitimate 3-byte completion from an ft_rxf_n abort that also zeros rd_byte_cnt. Deliberately NOT taken from 2401f5f: cic_decimator_4x_enhanced.v and ddc_400m.v reset-strategy changes. Those conflict with p0's shipped registered-sync-reset + max_fanout=25 distribution, which is already timing-clean on the production build. --- 9_Firmware/9_2_FPGA/mti_canceller.v | 128 +- 9_Firmware/9_2_FPGA/radar_receiver_final.v | 114 +- 9_Firmware/9_2_FPGA/radar_system_top.v | 65 +- 9_Firmware/9_2_FPGA/run_regression.sh | 177 ++- 9_Firmware/9_2_FPGA/usb_data_interface.v | 20 +- .../9_2_FPGA/usb_data_interface_ft2232h.v | 1029 +++++++++++------ 6 files changed, 1031 insertions(+), 502 deletions(-) diff --git a/9_Firmware/9_2_FPGA/mti_canceller.v b/9_Firmware/9_2_FPGA/mti_canceller.v index 3191bf7..c7dbe2f 100644 --- a/9_Firmware/9_2_FPGA/mti_canceller.v +++ b/9_Firmware/9_2_FPGA/mti_canceller.v @@ -19,25 +19,33 @@ * mti_out_i[r] = current_i[r] - previous_i[r] * mti_out_q[r] = current_q[r] - previous_q[r] * - * The previous chirp's 64 range bins are stored in a small BRAM. + * The previous chirp's 512 range bins are stored in BRAM (inferred via + * sync-only read/write always blocks — NO async reset on memory arrays). * On the very first chirp after reset (or enable), there is no previous * data — output is zero (muted) for that first chirp. * - * When mti_enable=0, the module is a transparent pass-through with zero - * latency penalty (data goes straight through combinationally registered). + * When mti_enable=0, the module is a transparent pass-through. * - * Resources: - * - 2 BRAM18 (64 x 16-bit I + 64 x 16-bit Q) or distributed RAM - * - ~30 LUTs (subtract + mux) - * - ~40 FFs (pipeline + control) + * BRAM inference note: + * prev_i/prev_q arrays use dedicated sync-only always blocks for read + * and write. This ensures Vivado infers BRAM (RAMB18) instead of fabric + * FFs + mux trees. The registered read adds 1 cycle of latency, which + * is compensated by a pipeline stage on the input data path. + * + * Resources (target): + * - 2 BRAM18 (512 x 16-bit I + 512 x 16-bit Q) + * - ~30 LUTs (subtract + mux + saturation) + * - ~80 FFs (pipeline + control) * - 0 DSP48 * * Clock domain: clk (100 MHz) */ +`include "radar_params.vh" + module mti_canceller #( - parameter NUM_RANGE_BINS = 64, - parameter DATA_WIDTH = 16 + parameter NUM_RANGE_BINS = `RP_NUM_RANGE_BINS, // 512 + parameter DATA_WIDTH = `RP_DATA_WIDTH // 16 ) ( input wire clk, input wire reset_n, @@ -46,13 +54,13 @@ module mti_canceller #( input wire signed [DATA_WIDTH-1:0] range_i_in, input wire signed [DATA_WIDTH-1:0] range_q_in, input wire range_valid_in, - input wire [5:0] range_bin_in, + input wire [`RP_RANGE_BIN_BITS-1:0] range_bin_in, // 9-bit // ========== OUTPUT (to Doppler processor) ========== output reg signed [DATA_WIDTH-1:0] range_i_out, output reg signed [DATA_WIDTH-1:0] range_q_out, output reg range_valid_out, - output reg [5:0] range_bin_out, + output reg [`RP_RANGE_BIN_BITS-1:0] range_bin_out, // 9-bit // ========== CONFIGURATION ========== input wire mti_enable, // 1=MTI active, 0=pass-through @@ -67,30 +75,79 @@ module mti_canceller #( ); // ============================================================================ -// PREVIOUS CHIRP BUFFER (64 x 16-bit I, 64 x 16-bit Q) +// PREVIOUS CHIRP BUFFER (512 x 16-bit I, 512 x 16-bit Q) // ============================================================================ -// Small enough for distributed RAM on XC7A200T (64 entries). -// Using separate I/Q arrays for clean read/write. +// BRAM-inferred on XC7A50T/200T (512 entries, sync-only read/write). +// Using separate I/Q arrays for clean dual-port inference. -reg signed [DATA_WIDTH-1:0] prev_i [0:NUM_RANGE_BINS-1]; -reg signed [DATA_WIDTH-1:0] prev_q [0:NUM_RANGE_BINS-1]; +(* ram_style = "block" *) reg signed [DATA_WIDTH-1:0] prev_i [0:NUM_RANGE_BINS-1]; +(* ram_style = "block" *) reg signed [DATA_WIDTH-1:0] prev_q [0:NUM_RANGE_BINS-1]; + +// ============================================================================ +// INPUT PIPELINE STAGE (1 cycle delay to match BRAM read latency) +// ============================================================================ +// Declarations must precede the BRAM write block that references them. + +reg signed [DATA_WIDTH-1:0] range_i_d1, range_q_d1; +reg range_valid_d1; +reg [`RP_RANGE_BIN_BITS-1:0] range_bin_d1; +reg mti_enable_d1; + +always @(posedge clk or negedge reset_n) begin + if (!reset_n) begin + range_i_d1 <= {DATA_WIDTH{1'b0}}; + range_q_d1 <= {DATA_WIDTH{1'b0}}; + range_valid_d1 <= 1'b0; + range_bin_d1 <= {`RP_RANGE_BIN_BITS{1'b0}}; + mti_enable_d1 <= 1'b0; + end else begin + range_i_d1 <= range_i_in; + range_q_d1 <= range_q_in; + range_valid_d1 <= range_valid_in; + range_bin_d1 <= range_bin_in; + mti_enable_d1 <= mti_enable; + end +end + +// ============================================================================ +// BRAM WRITE PORT (sync only — NO async reset for BRAM inference) +// ============================================================================ +// Writes the current chirp sample into prev_i/prev_q for next chirp's +// subtraction. Uses the delayed (d1) signals so the write happens 1 cycle +// after the read address is presented, avoiding RAW hazards. + +always @(posedge clk) begin + if (range_valid_d1) begin + prev_i[range_bin_d1] <= range_i_d1; + prev_q[range_bin_d1] <= range_q_d1; + end +end + +// ============================================================================ +// BRAM READ PORT (sync only — 1 cycle read latency) +// ============================================================================ +// Address is always driven by range_bin_in (cycle 0). Read data appears +// on prev_i_rd / prev_q_rd at cycle 1, aligned with the d1 pipeline stage. + +reg signed [DATA_WIDTH-1:0] prev_i_rd, prev_q_rd; + +always @(posedge clk) begin + prev_i_rd <= prev_i[range_bin_in]; + prev_q_rd <= prev_q[range_bin_in]; +end // Track whether we have valid previous data reg has_previous; // ============================================================================ -// MTI PROCESSING +// MTI PROCESSING (operates on d1 pipeline stage + BRAM read data) // ============================================================================ -// Read previous chirp data (combinational) -wire signed [DATA_WIDTH-1:0] prev_i_rd = prev_i[range_bin_in]; -wire signed [DATA_WIDTH-1:0] prev_q_rd = prev_q[range_bin_in]; - // Compute difference with saturation // Subtraction can produce DATA_WIDTH+1 bits; saturate back to DATA_WIDTH. -wire signed [DATA_WIDTH:0] diff_i_full = {range_i_in[DATA_WIDTH-1], range_i_in} +wire signed [DATA_WIDTH:0] diff_i_full = {range_i_d1[DATA_WIDTH-1], range_i_d1} - {prev_i_rd[DATA_WIDTH-1], prev_i_rd}; -wire signed [DATA_WIDTH:0] diff_q_full = {range_q_in[DATA_WIDTH-1], range_q_in} +wire signed [DATA_WIDTH:0] diff_q_full = {range_q_d1[DATA_WIDTH-1], range_q_d1} - {prev_q_rd[DATA_WIDTH-1], prev_q_rd}; // Saturate to DATA_WIDTH bits @@ -115,20 +172,21 @@ wire diff_i_overflow = (diff_i_full[DATA_WIDTH] != diff_i_full[DATA_WIDTH-1]); wire diff_q_overflow = (diff_q_full[DATA_WIDTH] != diff_q_full[DATA_WIDTH-1]); // ============================================================================ -// MAIN LOGIC +// MAIN OUTPUT LOGIC (operates on d1 pipeline stage) // ============================================================================ always @(posedge clk or negedge reset_n) begin if (!reset_n) begin range_i_out <= {DATA_WIDTH{1'b0}}; range_q_out <= {DATA_WIDTH{1'b0}}; range_valid_out <= 1'b0; - range_bin_out <= 6'd0; + range_bin_out <= {`RP_RANGE_BIN_BITS{1'b0}}; has_previous <= 1'b0; mti_first_chirp <= 1'b1; mti_saturation_count <= 8'd0; end else begin // Count saturated MTI-active samples (F-6.3). Clamp at 0xFF. - if (range_valid_in && mti_enable && has_previous + // Uses d1 pipeline stage to align with diff_i_full/diff_q_full. + if (range_valid_d1 && mti_enable_d1 && has_previous && (diff_i_overflow || diff_q_overflow) && (mti_saturation_count != 8'hFF)) begin mti_saturation_count <= mti_saturation_count + 8'd1; @@ -136,18 +194,14 @@ always @(posedge clk or negedge reset_n) begin // Default: no valid output range_valid_out <= 1'b0; - if (range_valid_in) begin - // Always store current sample as "previous" for next chirp - prev_i[range_bin_in] <= range_i_in; - prev_q[range_bin_in] <= range_q_in; + if (range_valid_d1) begin + // Output path — range_bin is from the delayed pipeline + range_bin_out <= range_bin_d1; - // Output path - range_bin_out <= range_bin_in; - - if (!mti_enable) begin + if (!mti_enable_d1) begin // Pass-through mode: no MTI processing - range_i_out <= range_i_in; - range_q_out <= range_q_in; + range_i_out <= range_i_d1; + range_q_out <= range_q_d1; range_valid_out <= 1'b1; // Reset first-chirp state when MTI is disabled has_previous <= 1'b0; @@ -161,7 +215,7 @@ always @(posedge clk or negedge reset_n) begin range_valid_out <= 1'b1; // After last range bin of first chirp, mark previous as valid - if (range_bin_in == NUM_RANGE_BINS - 1) begin + if (range_bin_d1 == NUM_RANGE_BINS - 1) begin has_previous <= 1'b1; mti_first_chirp <= 1'b0; end diff --git a/9_Firmware/9_2_FPGA/radar_receiver_final.v b/9_Firmware/9_2_FPGA/radar_receiver_final.v index f292c1c..6029c40 100644 --- a/9_Firmware/9_2_FPGA/radar_receiver_final.v +++ b/9_Firmware/9_2_FPGA/radar_receiver_final.v @@ -1,5 +1,7 @@ `timescale 1ns / 1ps +`include "radar_params.vh" + module radar_receiver_final ( input wire clk, // 100MHz input wire reset_n, @@ -13,26 +15,31 @@ module radar_receiver_final ( input wire adc_or_p, input wire adc_or_n, output wire adc_pwdn, - + // Chirp counter from transmitter (for matched filter indexing) input wire [5:0] chirp_counter, // Frame-start pulse from transmitter (CDC-synchronized, 1 clk_100m cycle) input wire tx_frame_start, - + output wire [31:0] doppler_output, output wire doppler_valid, output wire [4:0] doppler_bin, - output wire [5:0] range_bin, + output wire [`RP_RANGE_BIN_BITS-1:0] range_bin, // 9-bit - // Matched filter range profile output (for USB) + // Raw matched-filter output (debug/bring-up) output wire signed [15:0] range_profile_i_out, output wire signed [15:0] range_profile_q_out, output wire range_profile_valid_out, + + // Decimated 512-bin range profile (for USB bulk frames / downstream consumers) + output wire [15:0] decimated_range_mag_out, + output wire decimated_range_valid_out, // Host command inputs (Gap 4: USB Read Path, CDC-synchronized) // CDC-synchronized in radar_system_top.v before reaching here input wire [1:0] host_mode, // Radar mode: 00=STM32, 01=auto-scan, 10=single-chirp input wire host_trigger, // Single-chirp trigger pulse (1 clk cycle) + input wire [1:0] host_range_mode, // Range mode: 00=3km (short only), 01=long-range (dual chirp) // Gap 2: Host-configurable chirp timing (CDC-synchronized in radar_system_top.v) input wire [15:0] host_long_chirp_cycles, @@ -128,9 +135,9 @@ wire [7:0] gc_saturation_count; // Diagnostic: per-frame clipped sample counter wire [7:0] gc_peak_magnitude; // Diagnostic: per-frame peak magnitude wire [3:0] gc_current_gain; // Diagnostic: effective gain_shift -// Reference signals for the processing chain -wire [15:0] long_chirp_real, long_chirp_imag; -wire [15:0] short_chirp_real, short_chirp_imag; +// Reference signal for the processing chain (carries long OR short ref +// depending on use_long_chirp — selected by chirp_memory_loader_param) +wire [15:0] ref_chirp_real, ref_chirp_imag; // ========== DOPPLER PROCESSING SIGNALS ========== wire [31:0] range_data_32bit; @@ -142,20 +149,36 @@ wire [31:0] doppler_spectrum; wire doppler_spectrum_valid; wire [4:0] doppler_bin_out; wire doppler_processing; -wire doppler_frame_done; + +// frame_complete from doppler_processor is a LEVEL signal (high whenever +// state == S_IDLE && !frame_buffer_full). Downstream consumers (USB FT2232H, +// AGC, CFAR) expect a single-cycle PULSE. Convert here at the source so all +// consumers are safe. +wire doppler_frame_done_level; // raw level from doppler_processor +reg doppler_frame_done_prev; +wire doppler_frame_done; // rising-edge pulse (1 clk cycle) + +always @(posedge clk or negedge reset_n) begin + if (!reset_n) + doppler_frame_done_prev <= 1'b0; + else + doppler_frame_done_prev <= doppler_frame_done_level; +end + +assign doppler_frame_done = doppler_frame_done_level & ~doppler_frame_done_prev; assign doppler_frame_done_out = doppler_frame_done; // ========== RANGE BIN DECIMATOR SIGNALS ========== wire signed [15:0] decimated_range_i; wire signed [15:0] decimated_range_q; wire decimated_range_valid; -wire [5:0] decimated_range_bin; +wire [`RP_RANGE_BIN_BITS-1:0] decimated_range_bin; // 9-bit // ========== MTI CANCELLER SIGNALS ========== wire signed [15:0] mti_range_i; wire signed [15:0] mti_range_q; wire mti_range_valid; -wire [5:0] mti_range_bin; +wire [`RP_RANGE_BIN_BITS-1:0] mti_range_bin; // 9-bit wire mti_first_chirp; // ========== RADAR MODE CONTROLLER SIGNALS ========== @@ -173,6 +196,7 @@ radar_mode_controller rmc ( .clk(clk), .reset_n(reset_n), .mode(host_mode), // Controlled by host via USB (default: 2'b01 auto-scan) + .range_mode(host_range_mode), // Range mode: 00=3km, 01=long-range (drives chirp type) .stm32_new_chirp(stm32_new_chirp_rx), .stm32_new_elevation(stm32_new_elevation_rx), .stm32_new_azimuth(stm32_new_azimuth_rx), @@ -345,7 +369,7 @@ rx_gain_control gain_ctrl ( ); // 3. Dual Chirp Memory Loader -wire [9:0] sample_addr_from_chain; +wire [10:0] sample_addr_from_chain; chirp_memory_loader_param chirp_mem ( .clk(clk), @@ -359,20 +383,9 @@ chirp_memory_loader_param chirp_mem ( .mem_ready(mem_ready) ); -// Sample address generator -reg [9:0] sample_addr_reg; -always @(posedge clk or negedge reset_n) begin - if (!reset_n) begin - sample_addr_reg <= 0; - end else if (mem_request) begin - sample_addr_reg <= sample_addr_reg + 1; - if (sample_addr_reg == 1023) sample_addr_reg <= 0; - end -end -// sample_addr_wire removed — was unused implicit wire (synthesis warning) - // 4. CRITICAL: Reference Chirp Latency Buffer -// This aligns reference data with FFT output (2159 cycle delay) +// This aligns reference data with FFT output (3187 cycle delay) +// TODO: verify empirically during hardware bring-up with correlation test wire [15:0] delayed_ref_i, delayed_ref_q; wire mem_ready_delayed; @@ -388,11 +401,10 @@ latency_buffer #( .valid_out(mem_ready_delayed) ); -// Assign delayed reference signals -assign long_chirp_real = delayed_ref_i; -assign long_chirp_imag = delayed_ref_q; -assign short_chirp_real = delayed_ref_i; -assign short_chirp_imag = delayed_ref_q; +// Assign delayed reference signals (single pair — chirp_memory_loader_param +// selects long/short reference upstream via use_long_chirp) +assign ref_chirp_real = delayed_ref_i; +assign ref_chirp_imag = delayed_ref_q; // 5. Dual Chirp Matched Filter @@ -404,6 +416,12 @@ wire range_valid; assign range_profile_i_out = range_profile_i; assign range_profile_q_out = range_profile_q; assign range_profile_valid_out = range_valid; +// Manhattan magnitude: |I| + |Q|, saturated to 16 bits +wire [15:0] abs_mti_i = mti_range_i[15] ? (~mti_range_i + 16'd1) : mti_range_i; +wire [15:0] abs_mti_q = mti_range_q[15] ? (~mti_range_q + 16'd1) : mti_range_q; +wire [16:0] manhattan_sum = {1'b0, abs_mti_i} + {1'b0, abs_mti_q}; +assign decimated_range_mag_out = manhattan_sum[16] ? 16'hFFFF : manhattan_sum[15:0]; +assign decimated_range_valid_out = mti_range_valid; matched_filter_multi_segment mf_dual ( .clk(clk), @@ -416,10 +434,8 @@ matched_filter_multi_segment mf_dual ( .mc_new_chirp(mc_new_chirp), .mc_new_elevation(mc_new_elevation), .mc_new_azimuth(mc_new_azimuth), - .long_chirp_real(delayed_ref_i), // From latency buffer - .long_chirp_imag(delayed_ref_q), - .short_chirp_real(delayed_ref_i), // Same for short chirp - .short_chirp_imag(delayed_ref_q), + .ref_chirp_real(delayed_ref_i), // From latency buffer (long or short ref) + .ref_chirp_imag(delayed_ref_q), .segment_request(segment_request), .mem_request(mem_request), .sample_addr_out(sample_addr_from_chain), @@ -430,11 +446,11 @@ matched_filter_multi_segment mf_dual ( ); // ========== CRITICAL: RANGE BIN DECIMATOR ========== -// Convert 1024 range bins to 64 bins for Doppler +// Convert 2048 range bins to 512 bins for Doppler range_bin_decimator #( - .INPUT_BINS(1024), - .OUTPUT_BINS(64), - .DECIMATION_FACTOR(16) + .INPUT_BINS(`RP_FFT_SIZE), // 2048 + .OUTPUT_BINS(`RP_NUM_RANGE_BINS), // 512 + .DECIMATION_FACTOR(`RP_DECIMATION_FACTOR) // 4 ) range_decim ( .clk(clk), .reset_n(reset_n), @@ -446,7 +462,7 @@ range_bin_decimator #( .range_valid_out(decimated_range_valid), .range_bin_index(decimated_range_bin), .decimation_mode(2'b01), // Peak detection mode - .start_bin(10'd0), + .start_bin(11'd0), .watchdog_timeout(range_decim_watchdog) // Audit F-6.4 — plumbed out ); @@ -455,8 +471,8 @@ range_bin_decimator #( // H(z) = 1 - z^{-1} → null at DC Doppler, removes stationary clutter. // When host_mti_enable=0: transparent pass-through. mti_canceller #( - .NUM_RANGE_BINS(64), - .DATA_WIDTH(16) + .NUM_RANGE_BINS(`RP_NUM_RANGE_BINS), // 512 + .DATA_WIDTH(`RP_DATA_WIDTH) // 16 ) mti_inst ( .clk(clk), .reset_n(reset_n), @@ -476,11 +492,11 @@ mti_canceller #( // ========== FRAME SYNC FROM TRANSMITTER ========== // [FPGA-001 FIXED] Use the authoritative new_chirp_frame signal from the // transmitter (via plfm_chirp_controller_enhanced), CDC-synchronized to -// clk_100m in radar_system_top. Previous code tried to derive frame +// clk_100m in radar_system_top. Previous code tried to derive frame // boundaries from chirp_counter == 0, but that counter comes from the // transmitter path (plfm_chirp_controller_enhanced) which does NOT wrap // at chirps_per_elev — it overflows to N and only wraps at 6-bit rollover -// (64). This caused frame pulses at half the expected rate for N=32. +// (64). This caused frame pulses at half the expected rate for N=32. reg tx_frame_start_prev; reg new_frame_pulse; @@ -490,13 +506,13 @@ always @(posedge clk or negedge reset_n) begin new_frame_pulse <= 1'b0; end else begin new_frame_pulse <= 1'b0; - + // Edge detect: tx_frame_start is a toggle-CDC derived pulse that // may be 1 clock wide. Capture rising edge for clean 1-cycle pulse. if (tx_frame_start && !tx_frame_start_prev) begin new_frame_pulse <= 1'b1; end - + tx_frame_start_prev <= tx_frame_start; end end @@ -510,10 +526,10 @@ assign range_data_valid = mti_range_valid; // ========== DOPPLER PROCESSOR ========== doppler_processor_optimized #( - .DOPPLER_FFT_SIZE(16), - .RANGE_BINS(64), - .CHIRPS_PER_FRAME(32), - .CHIRPS_PER_SUBFRAME(16) + .DOPPLER_FFT_SIZE(`RP_DOPPLER_FFT_SIZE), // 16 + .RANGE_BINS(`RP_NUM_RANGE_BINS), // 512 + .CHIRPS_PER_FRAME(`RP_CHIRPS_PER_FRAME), // 32 + .CHIRPS_PER_SUBFRAME(`RP_CHIRPS_PER_SUBFRAME) // 16 ) doppler_proc ( .clk(clk), .reset_n(reset_n), @@ -529,7 +545,7 @@ doppler_processor_optimized #( // Status .processing_active(doppler_processing), - .frame_complete(doppler_frame_done), + .frame_complete(doppler_frame_done_level), .status() ); diff --git a/9_Firmware/9_2_FPGA/radar_system_top.v b/9_Firmware/9_2_FPGA/radar_system_top.v index 461af0f..41e5dde 100644 --- a/9_Firmware/9_2_FPGA/radar_system_top.v +++ b/9_Firmware/9_2_FPGA/radar_system_top.v @@ -1,5 +1,7 @@ `timescale 1ns / 1ps +`include "radar_params.vh" + /** * radar_system_top.v * @@ -125,7 +127,7 @@ module radar_system_top ( output wire [31:0] dbg_doppler_data, output wire dbg_doppler_valid, output wire [4:0] dbg_doppler_bin, - output wire [5:0] dbg_range_bin, + output wire [`RP_RANGE_BIN_BITS-1:0] dbg_range_bin, // System status output wire [3:0] system_status, @@ -179,11 +181,13 @@ wire tx_current_chirp_sync_valid; wire [31:0] rx_doppler_output; wire rx_doppler_valid; wire [4:0] rx_doppler_bin; -wire [5:0] rx_range_bin; -wire [31:0] rx_range_profile; -wire rx_range_valid; -wire [15:0] rx_doppler_real; -wire [15:0] rx_doppler_imag; +wire [`RP_RANGE_BIN_BITS-1:0] rx_range_bin; +wire [31:0] rx_range_profile; +wire rx_range_valid; +wire [15:0] rx_range_profile_decimated; +wire rx_range_profile_decimated_valid; +wire [15:0] rx_doppler_real; +wire [15:0] rx_doppler_imag; wire rx_doppler_data_valid; reg rx_detect_flag; // Threshold detection result (was rx_cfar_detection) reg rx_detect_valid; // Detection valid pulse (was rx_cfar_valid) @@ -239,7 +243,7 @@ wire [15:0] usb_cmd_value; reg [1:0] host_radar_mode; reg host_trigger_pulse; reg [15:0] host_detect_threshold; // (was host_cfar_threshold) -reg [2:0] host_stream_control; +reg [5:0] host_stream_control; // Fix 3: Digital gain control register // [3]=direction: 0=amplify, 1=attenuate. [2:0]=shift amount 0..7. @@ -266,13 +270,12 @@ reg host_status_request; // Opcode 0xFF (self-clearing pulse) localparam DOPPLER_FRAME_CHIRPS = 32; // Total chirps per Doppler frame reg chirps_mismatch_error; // Set if host tried to set chirps != FFT size -// Fix 7: Range-mode register (opcode 0x20) -// Future-proofing for 3km/10km antenna switching. -// 2'b00 = Auto (default — system selects based on scene) -// 2'b01 = Short-range (3km) -// 2'b10 = Long-range (10km) +// Range-mode register (opcode 0x20) +// Controls chirp type selection in the mode controller: +// 2'b00 = 3 km mode (all short chirps — long blind zone > max range) +// 2'b01 = Long-range (dual chirp: first half long, second half short) +// 2'b10 = Reserved // 2'b11 = Reserved -// Currently a configuration store only — antenna/timing switching TBD. reg [1:0] host_range_mode; // CFAR configuration registers (host-configurable via USB) @@ -539,14 +542,16 @@ radar_receiver_final rx_inst ( .doppler_bin(rx_doppler_bin), .range_bin(rx_range_bin), - // Matched filter range profile (for USB) - .range_profile_i_out(rx_range_profile[15:0]), - .range_profile_q_out(rx_range_profile[31:16]), - .range_profile_valid_out(rx_range_valid), + // Range-profile outputs + .range_profile_i_out(rx_range_profile[15:0]), + .range_profile_q_out(rx_range_profile[31:16]), + .range_profile_valid_out(rx_range_valid), + .decimated_range_mag_out(rx_range_profile_decimated), + .decimated_range_valid_out(rx_range_profile_decimated_valid), - // Host command inputs (Gap 4: USB Read Path) .host_mode(host_radar_mode), .host_trigger(host_trigger_pulse), + .host_range_mode(host_range_mode), // Gap 2: Host-configurable chirp timing .host_long_chirp_cycles(host_long_chirp_cycles), .host_long_listen_cycles(host_long_listen_cycles), @@ -624,7 +629,7 @@ assign dc_notch_active = (host_dc_notch_width != 3'd0) && wire [31:0] notched_doppler_data = dc_notch_active ? 32'd0 : rx_doppler_output; wire notched_doppler_valid = rx_doppler_valid; wire [4:0] notched_doppler_bin = rx_doppler_bin; -wire [5:0] notched_range_bin = rx_range_bin; +wire [`RP_RANGE_BIN_BITS-1:0] notched_range_bin = rx_range_bin; // ============================================================================ // CFAR DETECTOR (replaces simple threshold detector) @@ -635,7 +640,7 @@ wire [5:0] notched_range_bin = rx_range_bin; wire cfar_detect_flag; wire cfar_detect_valid; -wire [5:0] cfar_detect_range; +wire [`RP_RANGE_BIN_BITS-1:0] cfar_detect_range; wire [4:0] cfar_detect_doppler; wire [16:0] cfar_detect_magnitude; wire [16:0] cfar_detect_threshold; @@ -726,9 +731,10 @@ end // DATA PACKING FOR USB // ============================================================================ -// Range profile from matched filter output (wired through radar_receiver_final) -assign usb_range_profile = rx_range_profile; -assign usb_range_valid = rx_range_valid; +// USB range profile must match the advertised 512-bin frame payload, so source it +// from the decimated range stream that feeds Doppler rather than raw MF samples. +assign usb_range_profile = {16'd0, rx_range_profile_decimated}; +assign usb_range_valid = rx_range_profile_decimated_valid; assign usb_doppler_real = rx_doppler_real; assign usb_doppler_imag = rx_doppler_imag; @@ -831,6 +837,11 @@ end else begin : gen_ft2232h .cfar_detection(usb_detect_flag), .cfar_valid(usb_detect_valid), + // Bulk frame protocol inputs + .range_bin_in(notched_range_bin), + .doppler_bin_in(notched_doppler_bin), + .frame_complete(rx_frame_complete), + // FT2232H Interface .ft_data(ft_data), .ft_rxf_n(ft_rxf_n), @@ -952,7 +963,7 @@ always @(posedge clk_100m_buf or negedge sys_reset_n) begin host_radar_mode <= 2'b01; // Default: auto-scan host_trigger_pulse <= 1'b0; host_detect_threshold <= 16'd10000; // Default threshold - host_stream_control <= 3'b111; // Default: all streams enabled + host_stream_control <= `RP_STREAM_CTRL_DEFAULT; // Default: all streams, mag-only mode host_gain_shift <= 4'd0; // Default: pass-through (no gain change) // Gap 2: chirp timing defaults (match radar_mode_controller parameters) host_long_chirp_cycles <= 16'd3000; @@ -963,7 +974,7 @@ always @(posedge clk_100m_buf or negedge sys_reset_n) begin host_chirps_per_elev <= 6'd32; host_status_request <= 1'b0; chirps_mismatch_error <= 1'b0; - host_range_mode <= 2'b00; // Default: auto + host_range_mode <= 2'b00; // Default: 3 km mode (all short chirps) // CFAR defaults (disabled by default — backward-compatible) host_cfar_guard <= 4'd2; // 2 guard cells each side host_cfar_train <= 5'd8; // 8 training cells each side @@ -990,7 +1001,7 @@ always @(posedge clk_100m_buf or negedge sys_reset_n) begin 8'h01: host_radar_mode <= usb_cmd_value[1:0]; 8'h02: host_trigger_pulse <= 1'b1; 8'h03: host_detect_threshold <= usb_cmd_value; - 8'h04: host_stream_control <= usb_cmd_value[2:0]; + 8'h04: host_stream_control <= usb_cmd_value[5:0]; // Gap 2: chirp timing configuration 8'h10: host_long_chirp_cycles <= usb_cmd_value; 8'h11: host_long_listen_cycles <= usb_cmd_value; @@ -1013,7 +1024,7 @@ always @(posedge clk_100m_buf or negedge sys_reset_n) begin end end 8'h16: host_gain_shift <= usb_cmd_value[3:0]; // Fix 3: digital gain - 8'h20: host_range_mode <= usb_cmd_value[1:0]; // Fix 7: range mode + 8'h20: host_range_mode <= usb_cmd_value[1:0]; // Range mode // CFAR configuration opcodes 8'h21: host_cfar_guard <= usb_cmd_value[3:0]; 8'h22: host_cfar_train <= usb_cmd_value[4:0]; diff --git a/9_Firmware/9_2_FPGA/run_regression.sh b/9_Firmware/9_2_FPGA/run_regression.sh index 7ae9822..6b12ac3 100755 --- a/9_Firmware/9_2_FPGA/run_regression.sh +++ b/9_Firmware/9_2_FPGA/run_regression.sh @@ -264,11 +264,141 @@ run_lint_static() { fi } +# --------------------------------------------------------------------------- +# Helper: compile, run, and compare a matched-filter co-sim scenario +# run_mf_cosim +# --------------------------------------------------------------------------- +run_mf_cosim() { + local name="$1" + local define="$2" + local vvp="tb/tb_mf_cosim_${name}.vvp" + local scenario_lower="$name" + + printf " %-45s " "MF Co-Sim ($name)" + + # Compile — build command as string to handle optional define + local cmd="iverilog -g2001 -DSIMULATION" + if [[ -n "$define" ]]; then + cmd="$cmd $define" + fi + cmd="$cmd -o $vvp tb/tb_mf_cosim.v matched_filter_processing_chain.v fft_engine.v chirp_memory_loader_param.v" + + if ! eval "$cmd" 2>/tmp/iverilog_err_$$; then + echo -e "${RED}COMPILE FAIL${NC}" + ERRORS="$ERRORS\n MF Co-Sim ($name): compile error ($(head -1 /tmp/iverilog_err_$$))" + FAIL=$((FAIL + 1)) + return + fi + + # Run TB + local output + output=$(timeout 120 vvp "$vvp" 2>&1) || true + rm -f "$vvp" + + # Check TB internal pass/fail + local tb_fail + tb_fail=$(echo "$output" | grep -Ec '^\[FAIL' || true) + if [[ "$tb_fail" -gt 0 ]]; then + echo -e "${RED}FAIL${NC} (TB internal failure)" + ERRORS="$ERRORS\n MF Co-Sim ($name): TB internal failure" + FAIL=$((FAIL + 1)) + return + fi + + # Run Python compare + if command -v python3 >/dev/null 2>&1; then + local compare_out + local compare_rc=0 + compare_out=$(python3 tb/cosim/compare_mf.py "$scenario_lower" 2>&1) || compare_rc=$? + if [[ "$compare_rc" -ne 0 ]]; then + echo -e "${RED}FAIL${NC} (compare_mf.py mismatch)" + ERRORS="$ERRORS\n MF Co-Sim ($name): Python compare failed" + FAIL=$((FAIL + 1)) + return + fi + else + echo -e "${YELLOW}SKIP${NC} (RTL passed, python3 not found — compare skipped)" + SKIP=$((SKIP + 1)) + return + fi + + echo -e "${GREEN}PASS${NC} (RTL + Python compare)" + PASS=$((PASS + 1)) +} + +# --------------------------------------------------------------------------- +# Helper: compile, run, and compare a Doppler co-sim scenario +# run_doppler_cosim +# --------------------------------------------------------------------------- +run_doppler_cosim() { + local name="$1" + local define="$2" + local vvp="tb/tb_doppler_cosim_${name}.vvp" + + printf " %-45s " "Doppler Co-Sim ($name)" + + # Compile — build command as string to handle optional define + local cmd="iverilog -g2001 -DSIMULATION" + if [[ -n "$define" ]]; then + cmd="$cmd $define" + fi + cmd="$cmd -o $vvp tb/tb_doppler_cosim.v doppler_processor.v xfft_16.v fft_engine.v" + + if ! eval "$cmd" 2>/tmp/iverilog_err_$$; then + echo -e "${RED}COMPILE FAIL${NC}" + ERRORS="$ERRORS\n Doppler Co-Sim ($name): compile error ($(head -1 /tmp/iverilog_err_$$))" + FAIL=$((FAIL + 1)) + return + fi + + # Run TB + local output + output=$(timeout 120 vvp "$vvp" 2>&1) || true + rm -f "$vvp" + + # Check TB internal pass/fail + local tb_fail + tb_fail=$(echo "$output" | grep -Ec '^\[FAIL' || true) + if [[ "$tb_fail" -gt 0 ]]; then + echo -e "${RED}FAIL${NC} (TB internal failure)" + ERRORS="$ERRORS\n Doppler Co-Sim ($name): TB internal failure" + FAIL=$((FAIL + 1)) + return + fi + + # Run Python compare + if command -v python3 >/dev/null 2>&1; then + local compare_out + local compare_rc=0 + compare_out=$(python3 tb/cosim/compare_doppler.py "$name" 2>&1) || compare_rc=$? + if [[ "$compare_rc" -ne 0 ]]; then + echo -e "${RED}FAIL${NC} (compare_doppler.py mismatch)" + ERRORS="$ERRORS\n Doppler Co-Sim ($name): Python compare failed" + FAIL=$((FAIL + 1)) + return + fi + else + echo -e "${YELLOW}SKIP${NC} (RTL passed, python3 not found — compare skipped)" + SKIP=$((SKIP + 1)) + return + fi + + echo -e "${GREEN}PASS${NC} (RTL + Python compare)" + PASS=$((PASS + 1)) +} + # --------------------------------------------------------------------------- # Helper: compile and run a single testbench # run_test # --------------------------------------------------------------------------- run_test() { + # Optional: --timeout=N as first arg overrides default 120s + local timeout_secs=120 + if [[ "$1" == --timeout=* ]]; then + timeout_secs="${1#--timeout=}" + shift + fi + local name="$1" local vvp="$2" shift 2 @@ -286,7 +416,7 @@ run_test() { # Run local output - output=$(timeout 120 vvp "$vvp" 2>&1) || true + output=$(timeout "$timeout_secs" vvp "$vvp" 2>&1) || true # Count PASS/FAIL in output (testbenches use explicit [PASS]/[FAIL] markers) local test_pass test_fail @@ -378,9 +508,9 @@ run_test "Chirp Contract" \ tb/tb_chirp_ctr_reg.vvp \ tb/tb_chirp_contract.v plfm_chirp_controller.v -run_test "Doppler Processor (DSP48)" \ - tb/tb_doppler_reg.vvp \ - tb/tb_doppler_cosim.v doppler_processor.v xfft_16.v fft_engine.v +run_doppler_cosim "stationary" "" +run_doppler_cosim "moving" "-DSCENARIO_MOVING" +run_doppler_cosim "two_targets" "-DSCENARIO_TWO" run_test "Threshold Detector (detection bugs)" \ tb/tb_threshold_detector.vvp \ @@ -427,15 +557,16 @@ run_test "Full-Chain Real-Data (decim→Doppler, exact match)" \ doppler_processor.v xfft_16.v fft_engine.v if [[ "$QUICK" -eq 0 ]]; then - # Golden generate - run_test "Receiver (golden generate)" \ - tb/tb_rx_golden_reg.vvp \ - -DGOLDEN_GENERATE \ - tb/tb_radar_receiver_final.v "${RECEIVER_RTL[@]}" - - # Golden compare - run_test "Receiver (golden compare)" \ - tb/tb_rx_compare_reg.vvp \ + # Receiver integration (structural + bounds + pulse assertions). + # Replaces the earlier "Receiver golden generate/compare" pair, which was + # self-blessing (both passes ran identical RTL on identical stimulus, so + # it passed regardless of bugs). Real co-sim coverage is now provided by + # tb_doppler_realdata.v and tb_fullchain_realdata.v (Python goldens, + # exact match); this integration test exercises the full RX pipeline + # (ADC stub → DDC → MF → Decim → Doppler) and verifies that + # doppler_frame_done is a single-cycle pulse at module boundaries. + run_test --timeout=600 "Receiver Integration (tb_radar_receiver_final)" \ + tb/tb_rx_final_reg.vvp \ tb/tb_radar_receiver_final.v "${RECEIVER_RTL[@]}" # Full system top (monitoring-only, legacy) @@ -459,12 +590,28 @@ if [[ "$QUICK" -eq 0 ]]; then -DUSB_MODE_1 \ tb/tb_system_e2e.v "${SYSTEM_RTL[@]}" else - echo " (skipped receiver golden + system top + E2E — use without --quick)" - SKIP=$((SKIP + 6)) + echo " (skipped receiver integration + system top + E2E + USB_MODE=1 variants — use without --quick)" + SKIP=$((SKIP + 5)) fi echo "" +# =========================================================================== +# PHASE 2b: MATCHED FILTER CO-SIMULATION (RTL vs Python golden reference) +# Runs tb_mf_cosim.v for 4 scenarios, then compare_mf.py validates output +# against committed Python golden CSV files. In SIMULATION mode, thresholds +# are generous (behavioral vs fixed-point twiddles differ) — validates +# state machine mechanics, output count, and energy sanity. +# =========================================================================== +echo "--- PHASE 2b: Matched Filter Co-Sim ---" + +run_mf_cosim "chirp" "" +run_mf_cosim "dc" "-DSCENARIO_DC" +run_mf_cosim "impulse" "-DSCENARIO_IMPULSE" +run_mf_cosim "tone5" "-DSCENARIO_TONE5" + +echo "" + # =========================================================================== # PHASE 3: UNIT TESTS — Signal Processing # =========================================================================== diff --git a/9_Firmware/9_2_FPGA/usb_data_interface.v b/9_Firmware/9_2_FPGA/usb_data_interface.v index bcb0272..ae0e29e 100644 --- a/9_Firmware/9_2_FPGA/usb_data_interface.v +++ b/9_Firmware/9_2_FPGA/usb_data_interface.v @@ -76,14 +76,14 @@ module usb_data_interface ( // Bit 0 = range stream enable // Bit 1 = doppler stream enable // Bit 2 = cfar/detection stream enable - input wire [2:0] stream_control, + input wire [5:0] stream_control, // Gap 2: Status readback inputs (clk_100m domain, CDC'd internally) // When status_request pulses, the write FSM sends a status response // packet containing the current register values. input wire status_request, // 1-cycle pulse in clk_100m when 0xFF received input wire [15:0] status_cfar_threshold, // Current CFAR threshold - input wire [2:0] status_stream_ctrl, // Current stream control + input wire [5:0] status_stream_ctrl, // Current stream control (6-bit: [2:0]=stream en, [5:3]=format) input wire [1:0] status_radar_mode, // Current radar mode input wire [15:0] status_long_chirp, // Current long chirp cycles input wire [15:0] status_long_listen, // Current long listen cycles @@ -302,8 +302,8 @@ reg [11:0] sample_counter; // deadlock before host configures streams. With all streams enabled on // reset, the first range_valid triggers the write FSM which then blocks // forever on SEND_DOPPLER_DATA (Doppler hasn't produced data yet). -(* ASYNC_REG = "TRUE" *) reg [2:0] stream_ctrl_sync_0; -(* ASYNC_REG = "TRUE" *) reg [2:0] stream_ctrl_sync_1; +(* ASYNC_REG = "TRUE" *) reg [5:0] stream_ctrl_sync_0; +(* ASYNC_REG = "TRUE" *) reg [5:0] stream_ctrl_sync_1; wire stream_range_en = stream_ctrl_sync_1[0]; wire stream_doppler_en = stream_ctrl_sync_1[1]; wire stream_cfar_en = stream_ctrl_sync_1[2]; @@ -339,8 +339,8 @@ always @(posedge ft601_clk_in or negedge ft601_effective_reset_n) begin cfar_detection_cap <= 1'b0; range_data_ready <= 1'b0; // Fix #5: Default to range-only on reset (prevents write FSM deadlock) - stream_ctrl_sync_0 <= 3'b001; - stream_ctrl_sync_1 <= 3'b001; + stream_ctrl_sync_0 <= 6'b000_001; + stream_ctrl_sync_1 <= 6'b000_001; // Gap 2: status request CDC reset status_req_sync <= 2'b00; status_req_sync_prev <= 1'b0; @@ -362,9 +362,9 @@ always @(posedge ft601_clk_in or negedge ft601_effective_reset_n) begin // Gap 2: Capture status snapshot when request arrives in ft601 domain if (status_req_ft601) begin // Pack register values into 5x 32-bit status words - // Word 0: {0xFF[31:24], mode[23:22], stream[21:19], 3'b000[18:16], threshold[15:0]} + // Word 0: {0xFF[31:24], mode[23:22], stream[21:16], threshold[15:0]} status_words[0] <= {8'hFF, status_radar_mode, status_stream_ctrl, - 3'b000, status_cfar_threshold}; + status_cfar_threshold}; // Word 1: {long_chirp_cycles[15:0], long_listen_cycles[15:0]} status_words[1] <= {status_long_chirp, status_long_listen}; // Word 2: {guard_cycles[15:0], short_chirp_cycles[15:0]} @@ -554,7 +554,7 @@ always @(posedge ft601_clk_in or negedge ft601_effective_reset_n) begin data_pkt_word2 <= {stream_doppler_en ? doppler_imag_cap[7:0] : 8'd0, stream_cfar_en ? {(sample_counter == 12'd0), 6'b0, cfar_detection_cap} - : {(sample_counter == 12'd0), 7'd0}, + : {(sample_counter == 12'd0), 7'd0}, FOOTER, 8'h00}; // pad byte data_pkt_be2 <= 4'b1110; // 3 valid bytes + 1 pad @@ -637,7 +637,7 @@ always @(posedge ft601_clk_in or negedge ft601_effective_reset_n) begin if (sample_counter == NUM_CELLS - 12'd1) sample_counter <= 12'd0; else - sample_counter <= sample_counter + 12'd1; + sample_counter <= sample_counter + 12'd1; current_state <= IDLE; end endcase diff --git a/9_Firmware/9_2_FPGA/usb_data_interface_ft2232h.v b/9_Firmware/9_2_FPGA/usb_data_interface_ft2232h.v index 6244138..a9d9e4c 100644 --- a/9_Firmware/9_2_FPGA/usb_data_interface_ft2232h.v +++ b/9_Firmware/9_2_FPGA/usb_data_interface_ft2232h.v @@ -1,48 +1,76 @@ `timescale 1ns / 1ps +`include "radar_params.vh" + /** * usb_data_interface_ft2232h.v * * FT2232H USB 2.0 Hi-Speed FIFO Interface (245 Synchronous FIFO Mode) * Channel A only — 8-bit data bus, 60 MHz CLKOUT from FT2232H. * - * This module is the 50T production board equivalent of usb_data_interface.v - * (FT601, 32-bit, USB 3.0). Both share the same internal interface signals - * so they can be swapped via a generate block in radar_system_top.v. + * BULK PER-FRAME PROTOCOL (replaces legacy per-sample 11-byte packets): * - * Data packet (FPGA→Host): 11 bytes - * Byte 0: 0xAA (header) - * Bytes 1-4: range_profile[31:0] = {range_q[15:0], range_i[15:0]} MSB first - * Bytes 5-6: doppler_real[15:0] MSB first - * Bytes 7-8: doppler_imag[15:0] MSB first - * Byte 9: {7'b0, cfar_detection} - * Byte 10: 0x55 (footer) + * Frame packet (FPGA→Host): variable length, up to ~35 KB + * Byte 0: 0xAA (frame start header) + * Byte 1: Format flags {2'b0, sparse_det, mag_only, stream_cfar, stream_doppler, stream_range} + * Bytes 2-3: Frame number (16-bit, MSB first) + * Bytes 4-5: Range bin count (16-bit, MSB first) = 512 + * Bytes 6-7: Doppler bin count (16-bit, MSB first) = 32 * - * Status packet (FPGA→Host): 26 bytes - * Byte 0: 0xBB (status header) - * Bytes 1-24: 6 × 32-bit status words, MSB first - * Byte 25: 0x55 (footer) + * [If stream_range (bit 0):] + * Next 1024 bytes: Range profile, 512 × 16-bit magnitude, MSB first * - * Command (Host→FPGA): 4 bytes received sequentially + * [If stream_doppler (bit 1) AND mag_only (bit 3):] + * Next 32768 bytes: Doppler magnitude, 512×32 × 16-bit, row-major, MSB first + * + * [If stream_doppler (bit 1) AND NOT mag_only:] + * Next 65536 bytes: Doppler I/Q, 512×32 × 32-bit (I16,Q16), row-major, MSB first + * + * [If stream_cfar (bit 2) AND NOT sparse_det (bit 4):] + * Next 2048 bytes: Detection flags, 512×32 bits packed into bytes, MSB-first bit order + * + * [If stream_cfar (bit 2) AND sparse_det (bit 4):] + * Next 2 bytes: Detection count N (16-bit, MSB first) + * Next N×6 bytes: Each = {range_bin[16], doppler_bin[16], magnitude[16]}, MSB first + * + * Last byte: 0x55 (frame end footer) + * + * Status packet (FPGA→Host): 26 bytes (unchanged from legacy) + * Byte 0: 0xBB (status header) + * Bytes 1-24: 6 × 32-bit status words, MSB first + * Byte 25: 0x55 (footer) + * + * Command (Host→FPGA): 4 bytes received sequentially (unchanged) * Byte 0: opcode[7:0] * Byte 1: addr[7:0] * Byte 2: value[15:8] * Byte 3: value[7:0] * - * CDC: Toggle CDC (not level sync) for all valid pulse crossings from - * 100 MHz → 60 MHz. Toggle CDC is guaranteed to work regardless of - * clock frequency ratio. + * MEMORY ARCHITECTURE: + * - Doppler magnitude BRAM: 512×32 = 16384 entries × 16-bit = 32 KB (~14 BRAM18) + * Written in clk (100 MHz) domain as Doppler cells arrive. + * Read in ft_clk (60 MHz) domain during USB bulk transfer. + * - Range profile buffer: 512 × 16-bit = 1 KB (~1 BRAM18) + * Written in clk domain from range_valid events. + * - Detection flag buffer: 512×32 = 16384 bits = 2048 bytes (~1 BRAM18) + * Written in clk domain from cfar_valid events. + * - Doppler I/Q BRAM: 16384 × 32-bit = 64 KB (~28 BRAM18) — only when mag_only=0 + * NOTE: For 50T (75 BRAM18 total), I/Q mode may not fit alongside processing + * chain BRAMs. Default to mag_only=1. I/Q mode is a stretch goal. + * + * BANDWIDTH BUDGET (mag_only=1, all streams): + * Header: 8 B + Range: 1024 B + Doppler: 32768 B + CFAR: 2048 B + Footer: 1 B + * = 35,849 bytes/frame × 178 fps = 6.38 MB/s (80% of USB 2.0 Hi-Speed 8 MB/s) + * + * CDC STRATEGY: + * - Frame data: Written to dual-port BRAM at 100 MHz, read at 60 MHz (inherently CDC-safe) + * - frame_ready flag: Toggle CDC (100 MHz → 60 MHz), same as status_request + * - stream_control: 2-stage level sync (changes infrequently) + * - Commands: Read FSM in ft_clk domain, output CDC'd by consumer (unchanged) * * Clock domains: * clk = 100 MHz system clock (radar data domain) * ft_clk = 60 MHz from FT2232H CLKOUT (USB FIFO domain) - * - * USB disconnect recovery: - * A clock-activity watchdog in the clk domain detects when ft_clk stops - * (USB cable unplugged). After ~0.65 ms of silence (65536 system clocks) - * it asserts ft_clk_lost, which is OR'd into the FT-domain reset so - * FSMs and FIFOs return to a clean state. When ft_clk resumes, a 2-stage - * reset synchronizer deasserts the reset cleanly in the ft_clk domain. */ module usb_data_interface_ft2232h ( @@ -51,7 +79,7 @@ module usb_data_interface_ft2232h ( input wire ft_reset_n, // FT2232H-domain synchronized reset // Radar data inputs (clk domain) - input wire [31:0] range_profile, + input wire [31:0] range_profile, // {range_q[15:0], range_i[15:0]} input wire range_valid, input wire [15:0] doppler_real, input wire [15:0] doppler_imag, @@ -59,6 +87,11 @@ module usb_data_interface_ft2232h ( input wire cfar_detection, input wire cfar_valid, + // New inputs for bulk frame protocol (clk domain) + input wire [`RP_RANGE_BIN_BITS-1:0] range_bin_in, // 9-bit range bin index + input wire [4:0] doppler_bin_in, // 5-bit doppler bin index + input wire frame_complete, // 1-cycle pulse from radar_receiver_final edge detector + // FT2232H Physical Interface (245 Synchronous FIFO mode) inout wire [7:0] ft_data, // 8-bit bidirectional data bus input wire ft_rxf_n, // Receive FIFO not empty (active low) @@ -66,9 +99,7 @@ module usb_data_interface_ft2232h ( output reg ft_rd_n, // Read strobe (active low) output reg ft_wr_n, // Write strobe (active low) output reg ft_oe_n, // Output enable (active low) — bus direction - output reg ft_siwu, // Send Immediate / WakeUp — UNUSED: held low. - // SIWU could flush the TX FIFO for lower latency - // but is not needed at current data rates. Deferred. + output reg ft_siwu, // Send Immediate / WakeUp // Clock from FT2232H (directly used — no ODDR forwarding needed) input wire ft_clk, // 60 MHz from FT2232H CLKOUT @@ -81,12 +112,12 @@ module usb_data_interface_ft2232h ( output reg [15:0] cmd_value, // Stream control input (clk domain, CDC'd internally) - input wire [2:0] stream_control, + input wire [5:0] stream_control, // Status readback inputs (clk domain, CDC'd internally) input wire status_request, input wire [15:0] status_cfar_threshold, - input wire [2:0] status_stream_ctrl, + input wire [5:0] status_stream_ctrl, input wire [1:0] status_radar_mode, input wire [15:0] status_long_chirp, input wire [15:0] status_long_listen, @@ -109,30 +140,45 @@ module usb_data_interface_ft2232h ( ); // ============================================================================ -// PACKET FORMAT CONSTANTS +// CONSTANTS // ============================================================================ localparam HEADER = 8'hAA; localparam FOOTER = 8'h55; localparam STATUS_HEADER = 8'hBB; -// Data packet: 11 bytes total -localparam DATA_PKT_LEN = 5'd11; -// Status packet: 26 bytes total (1 header + 24 data + 1 footer) -localparam STATUS_PKT_LEN = 5'd26; +localparam NUM_RANGE_BINS = `RP_NUM_RANGE_BINS; // 512 +localparam NUM_DOPPLER_BINS = `RP_NUM_DOPPLER_BINS; // 32 +localparam RANGE_BIN_BITS = `RP_RANGE_BIN_BITS; // 9 +localparam FRAME_CELLS = NUM_RANGE_BINS * NUM_DOPPLER_BINS; // 16384 + +// Frame header: 8 bytes (0xAA + flags + frame_num[2] + range_bins[2] + doppler_bins[2]) +localparam FRAME_HDR_BYTES = 8; +// Range profile section: 512 × 2 = 1024 bytes +localparam RANGE_SECTION_BYTES = NUM_RANGE_BINS * 2; +// Doppler mag section: 16384 × 2 = 32768 bytes +localparam DOPPLER_MAG_SECTION_BYTES = FRAME_CELLS * 2; +// Detection flag section: 16384 bits = 2048 bytes +localparam DETECT_SECTION_BYTES = FRAME_CELLS / 8; + +// Status packet: 26 bytes (unchanged) +localparam STATUS_PKT_LEN = 5'd26; // ============================================================================ -// WRITE FSM STATES (FPGA → Host) +// WRITE FSM STATES (FPGA → Host, ft_clk domain) // ============================================================================ -localparam [2:0] WR_IDLE = 3'd0, - WR_DATA_SEND = 3'd1, - WR_STATUS_SEND = 3'd2, - WR_DONE = 3'd3; +localparam [3:0] WR_IDLE = 4'd0, + WR_FRAME_HDR = 4'd1, + WR_RANGE_DATA = 4'd2, + WR_DOPPLER_DATA = 4'd3, + WR_DETECT_DATA = 4'd4, + WR_FRAME_FOOTER = 4'd5, + WR_STATUS_SEND = 4'd6, + WR_DONE = 4'd7; -reg [2:0] wr_state; -reg [4:0] wr_byte_idx; // Byte counter within packet (0..10 data, 0..25 status) +reg [3:0] wr_state; // ============================================================================ -// READ FSM STATES (Host → FPGA) +// READ FSM STATES (Host → FPGA, ft_clk domain — unchanged from legacy) // ============================================================================ localparam [2:0] RD_IDLE = 3'd0, RD_OE_ASSERT = 3'd1, @@ -141,67 +187,362 @@ localparam [2:0] RD_IDLE = 3'd0, RD_PROCESS = 3'd4; reg [2:0] rd_state; -reg [1:0] rd_byte_cnt; // 0..3 for 4-byte command word -reg [31:0] rd_shift_reg; // Shift register to assemble 4-byte command -reg rd_cmd_complete; // Set when all 4 bytes received (distinguishes from abort) +reg [1:0] rd_byte_cnt; +reg [31:0] rd_shift_reg; +reg rd_cmd_complete; // Set when all 4 bytes received (distinguishes complete from aborted) // ============================================================================ // DATA BUS DIRECTION CONTROL // ============================================================================ reg [7:0] ft_data_out; -reg ft_data_oe; // 1 = FPGA drives bus, 0 = FT2232H drives bus +reg ft_data_oe; assign ft_data = ft_data_oe ? ft_data_out : 8'hZZ; // ============================================================================ -// TOGGLE CDC: clk (100 MHz) → ft_clk (60 MHz) +// FRAME BRAM — Doppler Magnitude (clk write, ft_clk read) // ============================================================================ -// Toggle CDC is used instead of level synchronizers because a 10 ns pulse -// on clk_100m could be missed by the 16.67 ns ft_clk period. Toggle CDC -// converts pulses to level transitions, which are always captured. +// Simple dual-port BRAM: port A = write (100 MHz), port B = read (60 MHz). +// Xilinx infers true dual-port BRAM from this pattern. +// Address = {range_bin[8:0], doppler_bin[4:0]} = 14 bits, 16384 entries. +// Data = 16-bit Manhattan magnitude |I| + |Q|. -// --- Toggle registers (clk domain) --- -reg range_valid_toggle; -reg doppler_valid_toggle; -reg cfar_valid_toggle; -reg status_req_toggle; +(* ram_style = "block" *) reg [15:0] doppler_mag_bram [0:FRAME_CELLS-1]; -// --- Holding registers (clk domain) --- -// Data captured on valid pulse, held stable for ft_clk domain to read -reg [31:0] range_profile_hold; -reg [15:0] doppler_real_hold; -reg [15:0] doppler_imag_hold; -reg cfar_detection_hold; +// Write port (clk domain) +reg [13:0] mag_wr_addr; +reg [15:0] mag_wr_data; +reg mag_wr_en; + +always @(posedge clk) begin + if (mag_wr_en) + doppler_mag_bram[mag_wr_addr] <= mag_wr_data; +end + +// Read port (ft_clk domain) +reg [13:0] mag_rd_addr; +reg [15:0] mag_rd_data; + +always @(posedge ft_clk) begin + mag_rd_data <= doppler_mag_bram[mag_rd_addr]; +end + +// ============================================================================ +// RANGE PROFILE BRAM (clk write, ft_clk read) +// ============================================================================ +// 512 entries × 16-bit magnitude. Written as range bins arrive. +// range_profile input is {range_q[15:0], range_i[15:0]}. +// We store Manhattan magnitude: |I| + |Q|. + +(* ram_style = "block" *) reg [15:0] range_bram [0:NUM_RANGE_BINS-1]; + +reg [RANGE_BIN_BITS-1:0] range_wr_addr; +reg [15:0] range_wr_data; +reg range_wr_en; + +always @(posedge clk) begin + if (range_wr_en) + range_bram[range_wr_addr] <= range_wr_data; +end + +reg [RANGE_BIN_BITS-1:0] range_rd_addr; +reg [15:0] range_rd_data; + +always @(posedge ft_clk) begin + range_rd_data <= range_bram[range_rd_addr]; +end + +// ============================================================================ +// DETECTION FLAG BRAM (clk write, ft_clk read) +// ============================================================================ +// 16384 bits stored as 2048 × 8-bit bytes. +// Write: individual bit-set on cfar_valid with cfar_detection=1. +// Clear: bulk clear on frame_complete (start of new frame). +// Address = {range_bin[8:0], doppler_bin[4:2]} = byte address (11 bits, 2048 entries) +// Bit position = doppler_bin[1:0] within sub-byte ... actually let's use +// a simpler scheme: 16384 entries × 1-bit, but that doesn't map well to BRAM. +// +// Better: Store as 2048 × 8-bit. Each byte holds 8 consecutive detection bits. +// Bit address = {range_bin, doppler_bin} = 14-bit. Byte addr = bit_addr[13:3]. +// Bit position = bit_addr[2:0]. +// On write: read-modify-write (set bit). On frame clear: bulk zero. +// +// For simplicity and BRAM efficiency, we use a separate approach: +// Store detections in a small register file and pack during transfer. +// With 512×32=16384 bits, that's 2048 bytes — fits in 1 BRAM18. +// +// IMPLEMENTATION: We use the BRAM in byte-write mode. On cfar_valid, we do +// a 1-cycle read then 1-cycle write-back with the bit set. This works because +// CFAR outputs arrive one cell per clock cycle (sequential scan). + +(* ram_style = "block" *) reg [7:0] detect_bram [0:2047]; + +reg [10:0] detect_wr_addr; +reg [7:0] detect_wr_data; +reg detect_wr_en; + +always @(posedge clk) begin + if (detect_wr_en) + detect_bram[detect_wr_addr] <= detect_wr_data; +end + +reg [10:0] detect_rd_addr; +reg [7:0] detect_rd_data; + +always @(posedge ft_clk) begin + detect_rd_data <= detect_bram[detect_rd_addr]; +end + +// Detection BRAM read-modify-write pipeline (clk domain) +reg [10:0] detect_rmw_addr; +reg [7:0] detect_rmw_rd; +reg [2:0] detect_rmw_bit; +reg detect_rmw_value; +reg [1:0] detect_rmw_state; // 0=idle, 1=read, 2=write + +// Synchronous read for RMW (clk domain, separate from ft_clk read port) +// We need a second read port for RMW. Since Xilinx BRAM is true dual-port, +// we use port A for both RMW-read and write (same clock), port B for ft_clk read. +// Port A read: +reg [7:0] detect_rmw_rddata; +always @(posedge clk) begin + detect_rmw_rddata <= detect_bram[detect_rmw_addr]; +end + +// ============================================================================ +// MANHATTAN MAGNITUDE COMPUTATION (combinational) +// ============================================================================ +// |I| + |Q| with saturation to 16 bits. +// doppler_real and doppler_imag are signed 16-bit. + +wire [15:0] abs_doppler_i = doppler_real[15] ? (~doppler_real + 16'd1) : doppler_real; +wire [15:0] abs_doppler_q = doppler_imag[15] ? (~doppler_imag + 16'd1) : doppler_imag; +wire [16:0] manhattan_sum = {1'b0, abs_doppler_i} + {1'b0, abs_doppler_q}; +wire [15:0] manhattan_mag = manhattan_sum[16] ? 16'hFFFF : manhattan_sum[15:0]; + +// Range profile magnitude. +// Input range_profile is {16'd0, decimated_manhattan_mag} from radar_receiver_final, +// so range_profile[15:0] is already an unsigned Manhattan magnitude. +// We keep the full I/Q Manhattan path for future I/Q streaming support. +wire [15:0] range_i = range_profile[15:0]; +wire [15:0] range_q = range_profile[31:16]; +wire [15:0] abs_range_i = range_i[15] ? (~range_i + 16'd1) : range_i; +wire [15:0] abs_range_q = range_q[15] ? (~range_q + 16'd1) : range_q; +wire [16:0] range_manhattan = {1'b0, abs_range_i} + {1'b0, abs_range_q}; +wire [15:0] range_mag = range_manhattan[16] ? 16'hFFFF : range_manhattan[15:0]; + +// ============================================================================ +// FRAME WRITE LOGIC (clk domain, 100 MHz) +// ============================================================================ +// Accumulates one full frame of data into BRAMs. +// On frame_complete: toggles frame_ready signal for CDC to ft_clk domain. + +reg [15:0] frame_number; // Incrementing frame counter +reg frame_ready_toggle; // Toggle CDC: frame ready for USB transfer +reg frame_filling; // 1 = currently accumulating frame data +reg [13:0] detect_clear_addr; // Counter for bulk-clearing detection BRAM +reg detect_clearing; // 1 = bulk clear in progress + +// Range bin counter for range profile writes +// range_valid arrives with range_profile data; we need range_bin_in to address it. +// However, range_valid comes from the matched filter chain BEFORE Doppler processing, +// so range_bin_in may not be valid at that time. We use a simple counter instead, +// since range bins arrive sequentially (0, 1, 2, ..., 511) within each chirp. +// Actually, range_profile from radar_receiver_final comes with associated range index +// from the matched filter. But the USB interface receives usb_range_valid which is +// just the Doppler output re-packed. Let's use the range_bin_in input which is +// the Doppler processor's range_bin output — valid when doppler_valid is asserted. +// +// For the range profile, we accumulate the magnitude across chirps (sum of |I|+|Q| +// for each range bin across all Doppler bins). This gives a range profile that +// represents total energy per range bin. +// +// SIMPLER APPROACH: Just store the range_profile data directly using a sequential +// counter. range_valid fires once per range bin per chirp; we overwrite (last chirp wins). +// This matches the legacy behavior where range_profile was sent per-sample. + +reg [RANGE_BIN_BITS-1:0] range_write_counter; always @(posedge clk or negedge reset_n) begin if (!reset_n) begin - range_valid_toggle <= 1'b0; - doppler_valid_toggle <= 1'b0; - cfar_valid_toggle <= 1'b0; - status_req_toggle <= 1'b0; - range_profile_hold <= 32'd0; - doppler_real_hold <= 16'd0; - doppler_imag_hold <= 16'd0; - cfar_detection_hold <= 1'b0; + frame_number <= 16'd0; + frame_ready_toggle <= 1'b0; + frame_filling <= 1'b1; + mag_wr_en <= 1'b0; + mag_wr_addr <= 14'd0; + mag_wr_data <= 16'd0; + range_wr_en <= 1'b0; + range_wr_addr <= {RANGE_BIN_BITS{1'b0}}; + range_wr_data <= 16'd0; + detect_wr_en <= 1'b0; + detect_wr_addr <= 11'd0; + detect_wr_data <= 8'd0; + detect_clearing <= 1'b0; + detect_clear_addr <= 14'd0; + detect_rmw_state <= 2'd0; + detect_rmw_addr <= 11'd0; + detect_rmw_bit <= 3'd0; + detect_rmw_value <= 1'b0; + range_write_counter <= {RANGE_BIN_BITS{1'b0}}; end else begin - if (range_valid) begin - range_valid_toggle <= ~range_valid_toggle; - range_profile_hold <= range_profile; + // Default: deassert write enables + mag_wr_en <= 1'b0; + range_wr_en <= 1'b0; + detect_wr_en <= 1'b0; + + // === Detection BRAM bulk clear (runs after frame_complete) === + if (detect_clearing) begin + detect_wr_en <= 1'b1; + detect_wr_addr <= detect_clear_addr[13:3]; + detect_wr_data <= 8'd0; + if (detect_clear_addr[13:3] == 11'd2047) begin + detect_clearing <= 1'b0; + detect_clear_addr <= 14'd0; + end else begin + detect_clear_addr <= detect_clear_addr + 14'd8; // Step by 8 bits = 1 byte + end end - if (doppler_valid) begin - doppler_valid_toggle <= ~doppler_valid_toggle; - doppler_real_hold <= doppler_real; - doppler_imag_hold <= doppler_imag; + + // === Detection RMW state machine === + case (detect_rmw_state) + 2'd0: begin /* idle */ end + 2'd1: begin + // Read cycle completed (data available next cycle) + detect_rmw_state <= 2'd2; + end + 2'd2: begin + // Write back with bit set/cleared + detect_wr_en <= 1'b1; + detect_wr_addr <= detect_rmw_addr; + if (detect_rmw_value) + detect_wr_data <= detect_rmw_rddata | (8'd1 << detect_rmw_bit); + else + detect_wr_data <= detect_rmw_rddata & ~(8'd1 << detect_rmw_bit); + detect_rmw_state <= 2'd0; + end + default: detect_rmw_state <= 2'd0; + endcase + + // === Doppler magnitude write === + if (doppler_valid && frame_filling) begin + mag_wr_en <= 1'b1; + mag_wr_addr <= {range_bin_in, doppler_bin_in}; + mag_wr_data <= manhattan_mag; end - if (cfar_valid) begin - cfar_valid_toggle <= ~cfar_valid_toggle; - cfar_detection_hold <= cfar_detection; + + // === Range profile write === + if (range_valid && frame_filling) begin + range_wr_en <= 1'b1; + range_wr_addr <= range_write_counter; + range_wr_data <= range_mag; + range_write_counter <= range_write_counter + {{(RANGE_BIN_BITS-1){1'b0}}, 1'b1}; end + + // === CFAR detection write (read-modify-write) === + if (cfar_valid && frame_filling && detect_rmw_state == 2'd0 && !detect_clearing) begin + // Start RMW: compute byte address and bit position + // bit_addr = {range_bin_in, doppler_bin_in} = 14-bit + // byte_addr = bit_addr[13:3], bit_pos = bit_addr[2:0] + detect_rmw_addr <= {range_bin_in, doppler_bin_in[4:3]}; + detect_rmw_bit <= doppler_bin_in[2:0]; + detect_rmw_value <= cfar_detection; + detect_rmw_state <= 2'd1; + end + + // === Frame complete: latch frame, signal ft_clk domain === + if (frame_complete) begin + frame_ready_toggle <= ~frame_ready_toggle; + frame_number <= frame_number + 16'd1; + frame_filling <= 1'b0; // Stop writing until USB transfer starts + range_write_counter <= {RANGE_BIN_BITS{1'b0}}; + end + + // === Resume filling after ft_clk domain acknowledges frame transfer === + // (handled below via frame_ack_toggle CDC) + // For simplicity, we resume filling immediately — the USB transfer reads + // from the same BRAM while new data writes. Since the Doppler burst + // (0.164 ms) is much shorter than the inter-frame gap (5.6 ms), and USB + // transfer (0.875 ms) also fits within the gap, there's no overlap: + // t=0: frame_complete pulse (1 clk cycle, from edge detector) + // t=0-0.87ms: USB reads BRAM (old frame data is stable) + // t=5.6ms: next Doppler burst starts writing + // So single-buffer is safe. Re-enable filling after a short delay. + if (!frame_filling && !frame_complete) begin + // Re-enable after 1 cycle (frame_complete already deasserted) + frame_filling <= 1'b1; + detect_clearing <= 1'b1; // Clear detection BRAM for next frame + detect_clear_addr <= 14'd0; + end + end +end + +// ============================================================================ +// TOGGLE CDC: clk (100 MHz) → ft_clk (60 MHz) +// ============================================================================ + +// --- Toggle registers (clk domain) --- +reg status_req_toggle; + +always @(posedge clk or negedge reset_n) begin + if (!reset_n) begin + status_req_toggle <= 1'b0; + end else begin if (status_request) status_req_toggle <= ~status_req_toggle; end end +// --- 3-stage synchronizers (ft_clk domain) --- +(* ASYNC_REG = "TRUE" *) reg [2:0] frame_ready_sync; +(* ASYNC_REG = "TRUE" *) reg [2:0] status_toggle_sync; + +reg frame_ready_prev; +reg status_toggle_prev; + +wire frame_ready_ft = frame_ready_sync[2] ^ frame_ready_prev; +wire status_req_ft = status_toggle_sync[2] ^ status_toggle_prev; + +// --- Stream control CDC (6-bit, 2-stage level sync) --- +(* ASYNC_REG = "TRUE" *) reg [5:0] stream_ctrl_sync_0; +(* ASYNC_REG = "TRUE" *) reg [5:0] stream_ctrl_sync_1; + +wire stream_range_en = stream_ctrl_sync_1[0]; +wire stream_doppler_en = stream_ctrl_sync_1[1]; +wire stream_cfar_en = stream_ctrl_sync_1[2]; +wire stream_mag_only = stream_ctrl_sync_1[3]; +wire stream_sparse_det = stream_ctrl_sync_1[4]; +// Bit 5 reserved +// NOTE: Phase 1 write FSM always sends magnitude-only range/Doppler and +// dense detection bitmap. The mag_only and sparse_det bits are included in +// the frame header for the Python parser but are not yet honored by the +// write FSM. Phase 2 will add I/Q and sparse detection paths. + +// --- Frame metadata snapshot (latched in clk domain, stable for ft_clk read) --- +reg [15:0] frame_number_snapshot; +reg [5:0] stream_flags_snapshot; + +always @(posedge clk or negedge reset_n) begin + if (!reset_n) begin + frame_number_snapshot <= 16'd0; + stream_flags_snapshot <= `RP_STREAM_CTRL_DEFAULT; + end else if (frame_complete) begin + frame_number_snapshot <= frame_number; + stream_flags_snapshot <= stream_control; + end +end + +// --- Status snapshot (ft_clk domain) --- +reg [31:0] status_words [0:5]; + +// Byte counter for write FSM (needs to be wide enough for largest section) +reg [15:0] wr_byte_idx; + +// BRAM read address for frame transfer +reg [13:0] bram_rd_cell; // Cell index 0..16383 for doppler/detect +reg [RANGE_BIN_BITS-1:0] range_rd_idx; // Range bin index 0..511 +reg wr_byte_phase; // 0=MSB, 1=LSB for 16-bit values +reg [10:0] detect_rd_idx; // Byte index 0..2047 for detection flags + // ============================================================================ // CLOCK-ACTIVITY WATCHDOG (clk domain) // ============================================================================ @@ -211,9 +552,13 @@ end // cycles (~0.65 ms at 100 MHz), ft_clk_lost asserts. // // ft_clk_lost feeds into the effective reset for the ft_clk domain so that -// FSMs and capture registers return to a clean state automatically. +// the write FSM and BRAM read pointers return to a clean state automatically +// when the cable is unplugged. When ft_clk resumes, a 2-stage reset +// synchronizer deasserts the effective reset cleanly in the ft_clk domain. -// Toggle register: flips every ft_clk edge (ft_clk domain) +// Toggle register: flips every ft_clk edge (ft_clk domain). +// Uses raw ft_reset_n here (not ft_effective_reset_n) to avoid a +// combinational loop through ft_clk_lost. reg ft_heartbeat; always @(posedge ft_clk or negedge ft_reset_n) begin if (!ft_reset_n) @@ -222,7 +567,7 @@ always @(posedge ft_clk or negedge ft_reset_n) begin ft_heartbeat <= ~ft_heartbeat; end -// Synchronize heartbeat into clk domain (2-stage) +// Synchronize heartbeat into clk domain (2-stage) and watch for stalls (* ASYNC_REG = "TRUE" *) reg [1:0] ft_hb_sync; reg ft_hb_prev; reg [15:0] ft_clk_timeout; @@ -230,10 +575,10 @@ reg ft_clk_lost; always @(posedge clk or negedge reset_n) begin if (!reset_n) begin - ft_hb_sync <= 2'b00; - ft_hb_prev <= 1'b0; + ft_hb_sync <= 2'b00; + ft_hb_prev <= 1'b0; ft_clk_timeout <= 16'd0; - ft_clk_lost <= 1'b0; + ft_clk_lost <= 1'b0; end else begin ft_hb_sync <= {ft_hb_sync[0], ft_heartbeat}; ft_hb_prev <= ft_hb_sync[1]; @@ -251,236 +596,43 @@ always @(posedge clk or negedge reset_n) begin end end -// Effective FT-domain reset: asserted by global reset OR clock loss. +// Effective FT-domain reset: asserted by ft_reset_n OR clock loss. // Deassertion synchronized to ft_clk via 2-stage sync to avoid // metastability on the recovery edge. -(* ASYNC_REG = "TRUE" *) reg [1:0] ft_reset_sync; wire ft_reset_raw_n = ft_reset_n & ~ft_clk_lost; - +(* ASYNC_REG = "TRUE" *) reg [1:0] ft_reset_sync; always @(posedge ft_clk or negedge ft_reset_raw_n) begin if (!ft_reset_raw_n) ft_reset_sync <= 2'b00; else ft_reset_sync <= {ft_reset_sync[0], 1'b1}; end - wire ft_effective_reset_n = ft_reset_sync[1]; -// --- 3-stage synchronizers (ft_clk domain) --- -// 3 stages for better MTBF at 60 MHz - -(* ASYNC_REG = "TRUE" *) reg [2:0] range_toggle_sync; -(* ASYNC_REG = "TRUE" *) reg [2:0] doppler_toggle_sync; -(* ASYNC_REG = "TRUE" *) reg [2:0] cfar_toggle_sync; -(* ASYNC_REG = "TRUE" *) reg [2:0] status_toggle_sync; - -reg range_toggle_prev; -reg doppler_toggle_prev; -reg cfar_toggle_prev; -reg status_toggle_prev; - -// Edge-detected pulses in ft_clk domain -wire range_valid_ft = range_toggle_sync[2] ^ range_toggle_prev; -wire doppler_valid_ft = doppler_toggle_sync[2] ^ doppler_toggle_prev; -wire cfar_valid_ft = cfar_toggle_sync[2] ^ cfar_toggle_prev; -wire status_req_ft = status_toggle_sync[2] ^ status_toggle_prev; - -// --- Stream control CDC (per-bit 2-stage, changes infrequently) --- -(* ASYNC_REG = "TRUE" *) reg [2:0] stream_ctrl_sync_0; -(* ASYNC_REG = "TRUE" *) reg [2:0] stream_ctrl_sync_1; -wire stream_range_en = stream_ctrl_sync_1[0]; -wire stream_doppler_en = stream_ctrl_sync_1[1]; -wire stream_cfar_en = stream_ctrl_sync_1[2]; - -// --- Captured data in ft_clk domain --- -reg [31:0] range_profile_cap; -reg [15:0] doppler_real_cap; -reg [15:0] doppler_imag_cap; -reg cfar_detection_cap; - -// Data-pending flags (ft_clk domain) -reg doppler_data_pending; -reg cfar_data_pending; - -// 1-cycle delayed range trigger. range_valid_ft fires on the same clock -// edge that range_profile_cap is captured (non-blocking). If the FSM -// reads range_profile_cap on that same edge it sees the STALE value. -// Delaying the trigger by one cycle guarantees the capture register has -// settled before the byte mux reads it. -reg range_data_ready; - -// Frame sync: sample counter (ft_clk domain, wraps at NUM_CELLS) -// Bit 7 of detection byte is set when sample_counter == 0 (frame start). -// This allows the Python host to resynchronize without a protocol change. -localparam [11:0] NUM_CELLS = 12'd2048; // 64 range x 32 doppler -reg [11:0] sample_counter; - -// Status snapshot (ft_clk domain) -reg [31:0] status_words [0:5]; - -integer si; // status_words loop index +integer si; always @(posedge ft_clk or negedge ft_effective_reset_n) begin if (!ft_effective_reset_n) begin - range_toggle_sync <= 3'b000; - doppler_toggle_sync <= 3'b000; - cfar_toggle_sync <= 3'b000; - status_toggle_sync <= 3'b000; - range_toggle_prev <= 1'b0; - doppler_toggle_prev <= 1'b0; - cfar_toggle_prev <= 1'b0; - status_toggle_prev <= 1'b0; - range_profile_cap <= 32'd0; - doppler_real_cap <= 16'd0; - doppler_imag_cap <= 16'd0; - cfar_detection_cap <= 1'b0; - range_data_ready <= 1'b0; - // Default to range-only on reset (prevents write FSM deadlock) - stream_ctrl_sync_0 <= 3'b001; - stream_ctrl_sync_1 <= 3'b001; - // Explicit reset for status_words to avoid Synth 8-7137 + frame_ready_sync <= 3'b000; + status_toggle_sync <= 3'b000; + frame_ready_prev <= 1'b0; + status_toggle_prev <= 1'b0; + stream_ctrl_sync_0 <= `RP_STREAM_CTRL_DEFAULT; + stream_ctrl_sync_1 <= `RP_STREAM_CTRL_DEFAULT; for (si = 0; si < 6; si = si + 1) status_words[si] <= 32'd0; - end else begin - // 3-stage toggle synchronizers - range_toggle_sync <= {range_toggle_sync[1:0], range_valid_toggle}; - doppler_toggle_sync <= {doppler_toggle_sync[1:0], doppler_valid_toggle}; - cfar_toggle_sync <= {cfar_toggle_sync[1:0], cfar_valid_toggle}; - status_toggle_sync <= {status_toggle_sync[1:0], status_req_toggle}; - - // Previous toggle value for edge detection - range_toggle_prev <= range_toggle_sync[2]; - doppler_toggle_prev <= doppler_toggle_sync[2]; - cfar_toggle_prev <= cfar_toggle_sync[2]; - status_toggle_prev <= status_toggle_sync[2]; - - // Stream control CDC (2-stage) - stream_ctrl_sync_0 <= stream_control; - stream_ctrl_sync_1 <= stream_ctrl_sync_0; - - // Capture data on toggle edge - if (range_valid_ft) - range_profile_cap <= range_profile_hold; - if (doppler_valid_ft) begin - doppler_real_cap <= doppler_real_hold; - doppler_imag_cap <= doppler_imag_hold; - end - if (cfar_valid_ft) - cfar_detection_cap <= cfar_detection_hold; - - // 1-cycle delayed trigger: ensures range_profile_cap has settled - // before the FSM reads it via the byte mux. - range_data_ready <= range_valid_ft; - - // Status snapshot on request - if (status_req_ft) begin - // Word 0: {0xFF[31:24], mode[23:22], stream[21:19], 3'b000[18:16], threshold[15:0]} - status_words[0] <= {8'hFF, status_radar_mode, status_stream_ctrl, - 3'b000, status_cfar_threshold}; - status_words[1] <= {status_long_chirp, status_long_listen}; - status_words[2] <= {status_guard, status_short_chirp}; - status_words[3] <= {status_short_listen, 10'd0, status_chirps_per_elev}; - status_words[4] <= {status_agc_current_gain, // [31:28] - status_agc_peak_magnitude, // [27:20] - status_agc_saturation_count, // [19:12] - status_agc_enable, // [11] - 9'd0, // [10:2] reserved - status_range_mode}; // [1:0] - status_words[5] <= {7'd0, status_self_test_busy, - 8'd0, status_self_test_detail, - 3'd0, status_self_test_flags}; - end - end -end - -// ============================================================================ -// WRITE DATA MUX — byte selection for data packet (11 bytes) -// ============================================================================ -// Mux-based byte selection is simpler than a shift register and gives -// explicit byte ordering for synthesis. - -reg [7:0] data_pkt_byte; - -always @(*) begin - case (wr_byte_idx) - 5'd0: data_pkt_byte = HEADER; - 5'd1: data_pkt_byte = range_profile_cap[31:24]; // range MSB - 5'd2: data_pkt_byte = range_profile_cap[23:16]; - 5'd3: data_pkt_byte = range_profile_cap[15:8]; - 5'd4: data_pkt_byte = range_profile_cap[7:0]; // range LSB - // Doppler fields: zero when stream_doppler_en is off - 5'd5: data_pkt_byte = stream_doppler_en ? doppler_real_cap[15:8] : 8'd0; - 5'd6: data_pkt_byte = stream_doppler_en ? doppler_real_cap[7:0] : 8'd0; - 5'd7: data_pkt_byte = stream_doppler_en ? doppler_imag_cap[15:8] : 8'd0; - 5'd8: data_pkt_byte = stream_doppler_en ? doppler_imag_cap[7:0] : 8'd0; - // Detection field: zero when stream_cfar_en is off - // Bit 7 = frame_start flag (sample_counter == 0), bit 0 = cfar_detection - 5'd9: data_pkt_byte = stream_cfar_en - ? {(sample_counter == 12'd0), 6'b0, cfar_detection_cap} - : {(sample_counter == 12'd0), 7'd0}; - 5'd10: data_pkt_byte = FOOTER; - default: data_pkt_byte = 8'h00; - endcase -end - -// ============================================================================ -// WRITE DATA MUX — byte selection for status packet (26 bytes) -// ============================================================================ - -reg [7:0] status_pkt_byte; - -always @(*) begin - case (wr_byte_idx) - 5'd0: status_pkt_byte = STATUS_HEADER; - // Word 0 (bytes 1-4) - 5'd1: status_pkt_byte = status_words[0][31:24]; - 5'd2: status_pkt_byte = status_words[0][23:16]; - 5'd3: status_pkt_byte = status_words[0][15:8]; - 5'd4: status_pkt_byte = status_words[0][7:0]; - // Word 1 (bytes 5-8) - 5'd5: status_pkt_byte = status_words[1][31:24]; - 5'd6: status_pkt_byte = status_words[1][23:16]; - 5'd7: status_pkt_byte = status_words[1][15:8]; - 5'd8: status_pkt_byte = status_words[1][7:0]; - // Word 2 (bytes 9-12) - 5'd9: status_pkt_byte = status_words[2][31:24]; - 5'd10: status_pkt_byte = status_words[2][23:16]; - 5'd11: status_pkt_byte = status_words[2][15:8]; - 5'd12: status_pkt_byte = status_words[2][7:0]; - // Word 3 (bytes 13-16) - 5'd13: status_pkt_byte = status_words[3][31:24]; - 5'd14: status_pkt_byte = status_words[3][23:16]; - 5'd15: status_pkt_byte = status_words[3][15:8]; - 5'd16: status_pkt_byte = status_words[3][7:0]; - // Word 4 (bytes 17-20) - 5'd17: status_pkt_byte = status_words[4][31:24]; - 5'd18: status_pkt_byte = status_words[4][23:16]; - 5'd19: status_pkt_byte = status_words[4][15:8]; - 5'd20: status_pkt_byte = status_words[4][7:0]; - // Word 5 (bytes 21-24) - 5'd21: status_pkt_byte = status_words[5][31:24]; - 5'd22: status_pkt_byte = status_words[5][23:16]; - 5'd23: status_pkt_byte = status_words[5][15:8]; - 5'd24: status_pkt_byte = status_words[5][7:0]; - // Footer (byte 25) - 5'd25: status_pkt_byte = FOOTER; - default: status_pkt_byte = 8'h00; - endcase -end - -// ============================================================================ -// MAIN FSM (ft_clk domain) -// ============================================================================ -// Write FSM and Read FSM share the bus. Write FSM operates when Read FSM -// is idle. Read FSM takes priority when host has data available. - -always @(posedge ft_clk or negedge ft_effective_reset_n) begin - if (!ft_effective_reset_n) begin wr_state <= WR_IDLE; - wr_byte_idx <= 5'd0; - rd_state <= RD_IDLE; - rd_byte_cnt <= 2'd0; + wr_byte_idx <= 16'd0; + wr_byte_phase <= 1'b0; + bram_rd_cell <= 14'd0; + range_rd_idx <= {RANGE_BIN_BITS{1'b0}}; + range_rd_addr <= {RANGE_BIN_BITS{1'b0}}; + detect_rd_idx <= 11'd0; + detect_rd_addr <= 11'd0; + mag_rd_addr <= 14'd0; + rd_state <= RD_IDLE; + rd_byte_cnt <= 2'd0; rd_cmd_complete <= 1'b0; - rd_shift_reg <= 32'd0; + rd_shift_reg <= 32'd0; ft_data_out <= 8'd0; ft_data_oe <= 1'b0; ft_rd_n <= 1'b1; @@ -492,57 +644,70 @@ always @(posedge ft_clk or negedge ft_effective_reset_n) begin cmd_opcode <= 8'd0; cmd_addr <= 8'd0; cmd_value <= 16'd0; - doppler_data_pending <= 1'b0; - cfar_data_pending <= 1'b0; - sample_counter <= 12'd0; end else begin - // Default: clear one-shot signals cmd_valid <= 1'b0; - // Data-pending flag management - if (doppler_valid_ft) - doppler_data_pending <= 1'b1; - if (cfar_valid_ft) - cfar_data_pending <= 1'b1; + // 3-stage toggle synchronizers + frame_ready_sync <= {frame_ready_sync[1:0], frame_ready_toggle}; + status_toggle_sync <= {status_toggle_sync[1:0], status_req_toggle}; + frame_ready_prev <= frame_ready_sync[2]; + status_toggle_prev <= status_toggle_sync[2]; + + // Stream control CDC + stream_ctrl_sync_0 <= stream_control; + stream_ctrl_sync_1 <= stream_ctrl_sync_0; + + // Status snapshot on request + if (status_req_ft) begin + // Word 0: {0xFF, mode[1:0], stream[5:0], threshold[15:0]} + // NOTE: stream_ctrl now 6-bit. Pack as: {0xFF, mode, stream[5:3], stream[2:0], threshold} + // Keep backward-compatible layout: {0xFF[31:24], mode[23:22], stream[21:16], threshold[15:0]} + status_words[0] <= {8'hFF, status_radar_mode, status_stream_ctrl, status_cfar_threshold}; + status_words[1] <= {status_long_chirp, status_long_listen}; + status_words[2] <= {status_guard, status_short_chirp}; + status_words[3] <= {status_short_listen, 10'd0, status_chirps_per_elev}; + status_words[4] <= {status_agc_current_gain, + status_agc_peak_magnitude, + status_agc_saturation_count, + status_agc_enable, + 9'd0, + status_range_mode}; + status_words[5] <= {7'd0, status_self_test_busy, + 8'd0, status_self_test_detail, + 3'd0, status_self_test_flags}; + end // ================================================================ - // READ FSM — Host → FPGA command path (4-byte sequential read) + // READ FSM — Host → FPGA command path (unchanged from legacy) // ================================================================ case (rd_state) RD_IDLE: begin - // Only start reading if write FSM is idle and host has data if (wr_state == WR_IDLE && !ft_rxf_n) begin - ft_oe_n <= 1'b0; // Assert OE: FT2232H drives bus - ft_data_oe <= 1'b0; // FPGA releases bus + ft_oe_n <= 1'b0; + ft_data_oe <= 1'b0; rd_state <= RD_OE_ASSERT; end end RD_OE_ASSERT: begin - // 1-cycle turnaround: OE asserted, bus settling if (!ft_rxf_n) begin - ft_rd_n <= 1'b0; // Assert RD: start reading + ft_rd_n <= 1'b0; rd_state <= RD_READING; end else begin - // Host withdrew data — abort ft_oe_n <= 1'b1; rd_state <= RD_IDLE; end end RD_READING: begin - // Sample byte and shift into command register - // Byte order: opcode, addr, value_hi, value_lo rd_shift_reg <= {rd_shift_reg[23:0], ft_data}; if (rd_byte_cnt == 2'd3) begin - // All 4 bytes received ft_rd_n <= 1'b1; rd_byte_cnt <= 2'd0; rd_cmd_complete <= 1'b1; rd_state <= RD_DEASSERT; end else begin rd_byte_cnt <= rd_byte_cnt + 2'd1; - // Keep reading if more data available if (ft_rxf_n) begin // Host ran out of data mid-command — abort ft_rd_n <= 1'b1; @@ -554,12 +719,11 @@ always @(posedge ft_clk or negedge ft_effective_reset_n) begin end RD_DEASSERT: begin - // Deassert OE (1 cycle after RD deasserted) - ft_oe_n <= 1'b1; + ft_oe_n <= 1'b1; // Only process if we received a full 4-byte command if (rd_cmd_complete) begin rd_cmd_complete <= 1'b0; - rd_state <= RD_PROCESS; + rd_state <= RD_PROCESS; end else begin // Incomplete command — discard rd_state <= RD_IDLE; @@ -567,7 +731,6 @@ always @(posedge ft_clk or negedge ft_effective_reset_n) begin end RD_PROCESS: begin - // Decode the assembled command word cmd_data <= rd_shift_reg; cmd_opcode <= rd_shift_reg[31:24]; cmd_addr <= rd_shift_reg[23:16]; @@ -580,76 +743,214 @@ always @(posedge ft_clk or negedge ft_effective_reset_n) begin endcase // ================================================================ - // WRITE FSM — FPGA → Host data streaming (byte-sequential) + // WRITE FSM — Bulk per-frame transfer (ft_clk domain) // ================================================================ if (rd_state == RD_IDLE) begin case (wr_state) WR_IDLE: begin ft_wr_n <= 1'b1; - ft_data_oe <= 1'b0; // Release data bus + ft_data_oe <= 1'b0; - // Status readback takes priority + // Status readback takes priority over frame data if (status_req_ft && ft_rxf_n) begin wr_state <= WR_STATUS_SEND; - wr_byte_idx <= 5'd0; + wr_byte_idx <= 16'd0; end - // Trigger on range_data_ready (1 cycle after range_valid_ft) - // so that range_profile_cap has settled from the CDC block. - // Gate on pending flags: only send when all enabled - // streams have fresh data (avoids stale doppler/CFAR) - else if (range_data_ready && stream_range_en - && (!stream_doppler_en || doppler_data_pending) - && (!stream_cfar_en || cfar_data_pending)) begin - if (ft_rxf_n) begin // No host read pending - wr_state <= WR_DATA_SEND; - wr_byte_idx <= 5'd0; - end + // New frame ready for transfer + else if (frame_ready_ft && ft_rxf_n) begin + wr_state <= WR_FRAME_HDR; + wr_byte_idx <= 16'd0; + bram_rd_cell <= 14'd0; + range_rd_idx <= {RANGE_BIN_BITS{1'b0}}; + range_rd_addr <= {RANGE_BIN_BITS{1'b0}}; // Pre-load first read addr + detect_rd_idx <= 11'd0; + detect_rd_addr <= 11'd0; + mag_rd_addr <= 14'd0; + wr_byte_phase <= 1'b0; end end - WR_DATA_SEND: begin + // ---- Frame header: 8 bytes ---- + WR_FRAME_HDR: begin if (!ft_txe_n) begin - // TXE# low = TX FIFO has room - ft_data_oe <= 1'b1; - ft_data_out <= data_pkt_byte; - ft_wr_n <= 1'b0; // Assert write strobe + ft_data_oe <= 1'b1; + ft_wr_n <= 1'b0; - if (wr_byte_idx == DATA_PKT_LEN - 5'd1) begin - // Last byte of data packet - wr_state <= WR_DONE; - wr_byte_idx <= 5'd0; + case (wr_byte_idx[2:0]) + 3'd0: ft_data_out <= HEADER; + 3'd1: ft_data_out <= {2'b00, stream_flags_snapshot}; + 3'd2: ft_data_out <= frame_number_snapshot[15:8]; + 3'd3: ft_data_out <= frame_number_snapshot[7:0]; + 3'd4: ft_data_out <= NUM_RANGE_BINS[15:8]; // 512 >> 8 = 2 + 3'd5: ft_data_out <= NUM_RANGE_BINS[7:0]; // 512 & 0xFF = 0 + 3'd6: ft_data_out <= NUM_DOPPLER_BINS[15:8]; // 32 >> 8 = 0 + 3'd7: ft_data_out <= NUM_DOPPLER_BINS[7:0]; // 32 & 0xFF = 32 + endcase + + if (wr_byte_idx[2:0] == 3'd7) begin + wr_byte_idx <= 16'd0; + wr_byte_phase <= 1'b0; + // Decide next section based on stream flags + if (stream_flags_snapshot[0]) // stream_range + wr_state <= WR_RANGE_DATA; + else if (stream_flags_snapshot[1]) // stream_doppler + wr_state <= WR_DOPPLER_DATA; + else if (stream_flags_snapshot[2]) // stream_cfar + wr_state <= WR_DETECT_DATA; + else + wr_state <= WR_FRAME_FOOTER; end else begin - wr_byte_idx <= wr_byte_idx + 5'd1; + wr_byte_idx <= wr_byte_idx + 16'd1; end end end + // ---- Range profile: 512 × 2 = 1024 bytes ---- + WR_RANGE_DATA: begin + if (!ft_txe_n) begin + ft_data_oe <= 1'b1; + ft_wr_n <= 1'b0; + + // BRAM read has 1-cycle latency. We pre-loaded range_rd_addr. + // On phase 0: output MSB of range_rd_data (read on prev cycle) + // On phase 1: output LSB, advance to next address + if (!wr_byte_phase) begin + ft_data_out <= range_rd_data[15:8]; + wr_byte_phase <= 1'b1; + end else begin + ft_data_out <= range_rd_data[7:0]; + wr_byte_phase <= 1'b0; + range_rd_idx <= range_rd_idx + {{(RANGE_BIN_BITS-1){1'b0}}, 1'b1}; + range_rd_addr <= range_rd_idx + {{(RANGE_BIN_BITS-1){1'b0}}, 1'b1}; + end + + wr_byte_idx <= wr_byte_idx + 16'd1; + + if (wr_byte_idx == RANGE_SECTION_BYTES[15:0] - 16'd1) begin + wr_byte_idx <= 16'd0; + wr_byte_phase <= 1'b0; + bram_rd_cell <= 14'd0; + mag_rd_addr <= 14'd0; + if (stream_flags_snapshot[1]) + wr_state <= WR_DOPPLER_DATA; + else if (stream_flags_snapshot[2]) + wr_state <= WR_DETECT_DATA; + else + wr_state <= WR_FRAME_FOOTER; + end + end + end + + // ---- Doppler magnitude: 16384 × 2 = 32768 bytes (mag_only mode) ---- + // Row-major: range_bin varies slowest, doppler_bin varies fastest. + WR_DOPPLER_DATA: begin + if (!ft_txe_n) begin + ft_data_oe <= 1'b1; + ft_wr_n <= 1'b0; + + if (!wr_byte_phase) begin + ft_data_out <= mag_rd_data[15:8]; + wr_byte_phase <= 1'b1; + end else begin + ft_data_out <= mag_rd_data[7:0]; + wr_byte_phase <= 1'b0; + bram_rd_cell <= bram_rd_cell + 14'd1; + mag_rd_addr <= bram_rd_cell + 14'd1; + end + + wr_byte_idx <= wr_byte_idx + 16'd1; + + if (wr_byte_idx == DOPPLER_MAG_SECTION_BYTES[15:0] - 16'd1) begin + wr_byte_idx <= 16'd0; + wr_byte_phase <= 1'b0; + detect_rd_idx <= 11'd0; + detect_rd_addr <= 11'd0; + if (stream_flags_snapshot[2]) + wr_state <= WR_DETECT_DATA; + else + wr_state <= WR_FRAME_FOOTER; + end + end + end + + // ---- Detection flags: 2048 bytes (dense mode) ---- + WR_DETECT_DATA: begin + if (!ft_txe_n) begin + ft_data_oe <= 1'b1; + ft_wr_n <= 1'b0; + + // 1-byte per cycle (BRAM read latency handled by pre-loading addr) + ft_data_out <= detect_rd_data; + detect_rd_idx <= detect_rd_idx + 11'd1; + detect_rd_addr <= detect_rd_idx + 11'd1; + + wr_byte_idx <= wr_byte_idx + 16'd1; + + if (wr_byte_idx == DETECT_SECTION_BYTES[15:0] - 16'd1) begin + wr_byte_idx <= 16'd0; + wr_state <= WR_FRAME_FOOTER; + end + end + end + + // ---- Frame footer: 1 byte ---- + WR_FRAME_FOOTER: begin + if (!ft_txe_n) begin + ft_data_oe <= 1'b1; + ft_data_out <= FOOTER; + ft_wr_n <= 1'b0; + wr_state <= WR_DONE; + end + end + + // ---- Status packet: 26 bytes (unchanged from legacy) ---- WR_STATUS_SEND: begin if (!ft_txe_n) begin - ft_data_oe <= 1'b1; - ft_data_out <= status_pkt_byte; - ft_wr_n <= 1'b0; + ft_data_oe <= 1'b1; + ft_wr_n <= 1'b0; - if (wr_byte_idx == STATUS_PKT_LEN - 5'd1) begin + case (wr_byte_idx[4:0]) + 5'd0: ft_data_out <= STATUS_HEADER; + 5'd1: ft_data_out <= status_words[0][31:24]; + 5'd2: ft_data_out <= status_words[0][23:16]; + 5'd3: ft_data_out <= status_words[0][15:8]; + 5'd4: ft_data_out <= status_words[0][7:0]; + 5'd5: ft_data_out <= status_words[1][31:24]; + 5'd6: ft_data_out <= status_words[1][23:16]; + 5'd7: ft_data_out <= status_words[1][15:8]; + 5'd8: ft_data_out <= status_words[1][7:0]; + 5'd9: ft_data_out <= status_words[2][31:24]; + 5'd10: ft_data_out <= status_words[2][23:16]; + 5'd11: ft_data_out <= status_words[2][15:8]; + 5'd12: ft_data_out <= status_words[2][7:0]; + 5'd13: ft_data_out <= status_words[3][31:24]; + 5'd14: ft_data_out <= status_words[3][23:16]; + 5'd15: ft_data_out <= status_words[3][15:8]; + 5'd16: ft_data_out <= status_words[3][7:0]; + 5'd17: ft_data_out <= status_words[4][31:24]; + 5'd18: ft_data_out <= status_words[4][23:16]; + 5'd19: ft_data_out <= status_words[4][15:8]; + 5'd20: ft_data_out <= status_words[4][7:0]; + 5'd21: ft_data_out <= status_words[5][31:24]; + 5'd22: ft_data_out <= status_words[5][23:16]; + 5'd23: ft_data_out <= status_words[5][15:8]; + 5'd24: ft_data_out <= status_words[5][7:0]; + 5'd25: ft_data_out <= FOOTER; + default: ft_data_out <= 8'h00; + endcase + + if (wr_byte_idx[4:0] == STATUS_PKT_LEN - 5'd1) begin wr_state <= WR_DONE; - wr_byte_idx <= 5'd0; + wr_byte_idx <= 16'd0; end else begin - wr_byte_idx <= wr_byte_idx + 5'd1; + wr_byte_idx <= wr_byte_idx + 16'd1; end end end WR_DONE: begin ft_wr_n <= 1'b1; - ft_data_oe <= 1'b0; // Release data bus - // Clear pending flags — data consumed - doppler_data_pending <= 1'b0; - cfar_data_pending <= 1'b0; - // Advance frame sync counter - if (sample_counter == NUM_CELLS - 12'd1) - sample_counter <= 12'd0; - else - sample_counter <= sample_counter + 12'd1; + ft_data_oe <= 1'b0; wr_state <= WR_IDLE; end