feat(fpga): swap matched-filter chain to Xilinx LogiCORE FFT v9.1 IP

Replaces the in-house iterative fft_engine.v in the matched-filter chain
with the Pipelined Streaming Xilinx FFT IP, closing RX-NEW-3 (FFT chain
~11x too slow vs PRI budget).

Components:
  * ip/xfft_2048_ip/xfft_2048_ip.xci — committed IP definition
    (16-bit fixed point, BFP scaling, convergent rounding, natural order,
    pipelined-streaming, BRAM data/reorder/phase factors). Vivado
    regenerates .dcp / sim-netlist from this on each build.
  * scripts/50t/gen_xfft_2048_ip.tcl — IP-Catalog generation script
  * scripts/50t/run_xfft_xsim.sh — XSim batch runner for tb_xfft_2048_xsim
  * xfft_2048.v — AXI-Stream wrapper. FFT_USE_XILINX_IP define routes to
    real LogiCORE for synth/XSim; falls back to fft_engine batched
    one-shot for iverilog (unit coverage only).
  * fft_engine_axi_bridge.v — exposes legacy fft_engine port surface on
    top of the xfft_2048 AXI wrapper, so the chain swap is a 1-line
    module-name change.
  * matched_filter_processing_chain.v — fft_engine -> fft_engine_axi_bridge
  * scripts/50t/build_50t.tcl — read_ip + generate_target + synth_ip;
    adds FFT_USE_XILINX_IP to verilog defines.
  * tb/tb_xfft_2048_xsim.v — XSim verification (DC, impulse, tone bin 128).
    All 5 assertions PASS on remote with the real IP; tuser=0x0a (BLK_EXP=10)
    confirms BFP scaling working.

Local iverilog regression: 32/34 PASS — identical to baseline. Same two
RX-NEW-3 failures (Receiver Integration, Matched Filter Chain) — these
only resolve in remote XSim with the real IP, since iverilog uses the
fft_engine fallback inside xfft_2048 (~150K cycles/pass, not the
~2200-cycle Pipelined Streaming throughput). MF cosim 4/4 PASS confirms
bridge bit-exact in fallback mode.

Pending: remote XSim of tb_radar_receiver_final to demonstrate Doppler
frames produced within PRI budget; remote synth to confirm DSP/timing
post-IP.
This commit is contained in:
Jason
2026-04-23 12:39:33 +05:45
parent cc6691dec9
commit 5c8cc8c96a
10 changed files with 1256 additions and 6 deletions
+16 -1
View File
@@ -43,7 +43,22 @@ foreach f [glob -directory $rtl_dir *.v] {
}
set_property top $top_module [current_fileset]
set_property verilog_define {FFT_XPM_BRAM} [current_fileset]
# FFT_USE_XILINX_IP routes xfft_2048.v's wrapper to the LogiCORE FFT v9.1 IP
# (xfft_2048_ip) instead of the in-house fft_engine fallback. The IP closes
# RX-NEW-3 (~6600-cycle 3-FFT chain budget vs 16700-cycle PRI).
set_property verilog_define {FFT_XPM_BRAM FFT_USE_XILINX_IP} [current_fileset]
# ===== IP CATALOG =====
# Read the pre-generated xfft_2048_ip XCI (produced by gen_xfft_2048_ip.tcl).
# generate_target + synth_ip prepare its OOC netlist before launch_runs.
set xci_path [file join $project_root "ip" "xfft_2048_ip" "xfft_2048_ip.xci"]
if {![file exists $xci_path]} {
puts "ERROR: $xci_path missing run scripts/50t/gen_xfft_2048_ip.tcl first."
exit 1
}
read_ip $xci_path
generate_target {synthesis simulation instantiation_template} [get_ips xfft_2048_ip]
synth_ip [get_ips xfft_2048_ip]
# Constraints — 50T XDC + MMCM supplement
add_files -fileset constrs_1 -norecurse [file join $project_root "constraints" "xc7a50t_ftg256.xdc"]
@@ -0,0 +1,75 @@
################################################################################
# gen_xfft_2048_ip.tcl — Generate Xilinx LogiCORE FFT (xfft_v9_1) for AERIS-10
#
# Produces ip/xfft_2048/xfft_2048.xci configured for the matched-filter chain:
# - Transform Length: 2048
# - Architecture: Pipelined Streaming I/O
# - Data Format: Fixed Point
# - Scaling: Block Floating Point (run-time auto-scale)
# - Rounding: Convergent (round-to-even)
# - Input Width: 16-bit per real/imag (matches DDC output, DATA_W in chain)
# - Phase Width: 16-bit
# - Output Ordering: Natural Order
# - Throttle Scheme: Non Real Time (allows downstream backpressure)
# - Memory: Block RAM for data, reorder, phase factors
#
# Usage (run on remote Vivado box):
# cd ~/PLFM_RADAR_work/PLFM_RADAR/9_Firmware/9_2_FPGA
# vivado -mode batch -source scripts/50t/gen_xfft_2048_ip.tcl
#
# Output: ip/xfft_2048_ip/xfft_2048_ip.xci (committed; build_50t.tcl reads this)
# Note: IP module is named xfft_2048_ip to avoid collision with the wrapper
# module xfft_2048 in xfft_2048.v.
################################################################################
set script_dir [file dirname [file normalize [info script]]]
set project_root [file normalize [file join $script_dir "../.."]]
set ip_dir [file join $project_root "ip"]
set fpga_part "xc7a50tftg256-2"
file mkdir $ip_dir
# Spin up a throwaway in-memory project just for IP generation.
create_project -in_memory -part $fpga_part
set_property ip_repo_paths $ip_dir [current_project]
# Create the IP. Any prior version is overwritten via -force.
create_ip -name xfft -vendor xilinx.com -library ip \
-version 9.1 -module_name xfft_2048_ip -dir $ip_dir -force
set ip [get_ips xfft_2048_ip]
set_property -dict [list \
CONFIG.transform_length {2048} \
CONFIG.implementation_options {pipelined_streaming_io} \
CONFIG.channels {1} \
CONFIG.data_format {fixed_point} \
CONFIG.scaling_options {block_floating_point} \
CONFIG.rounding_modes {convergent_rounding} \
CONFIG.input_width {16} \
CONFIG.phase_factor_width {16} \
CONFIG.output_ordering {natural_order} \
CONFIG.cyclic_prefix_insertion {false} \
CONFIG.throttle_scheme {nonrealtime} \
CONFIG.target_clock_frequency {100} \
CONFIG.target_data_throughput {50} \
CONFIG.complex_mult_type {use_mults_resources} \
CONFIG.butterfly_type {use_xtremedsp_slices} \
CONFIG.memory_options_data {block_ram} \
CONFIG.memory_options_reorder {block_ram} \
CONFIG.memory_options_phase_factors {block_ram} \
CONFIG.memory_options_hybrid {false} \
] $ip
# Generate synthesis + simulation targets so XSim and Vivado synth both work.
generate_target {synthesis simulation instantiation_template} $ip
synth_ip $ip
puts "================================================================"
puts " xfft_2048_ip IP generation complete"
puts " XCI: $ip_dir/xfft_2048_ip/xfft_2048_ip.xci"
puts " DCP: [get_property IP_OUTPUT_DIR $ip]/xfft_2048_ip.dcp"
puts "================================================================"
close_project
exit 0
@@ -0,0 +1,47 @@
#!/usr/bin/env bash
# ============================================================================
# run_xfft_xsim.sh — Compile + run xfft_2048 wrapper testbench in Vivado XSim
#
# Verifies the wrapper with the real LogiCORE FFT v9.1 (xfft_2048_ip).
# Cannot run in iverilog because the IP uses Xilinx primitives.
#
# Usage (on remote Vivado box):
# cd ~/PLFM_RADAR_work/PLFM_RADAR/9_Firmware/9_2_FPGA
# bash scripts/50t/run_xfft_xsim.sh
#
# Output: /tmp/xfft_xsim.log (look for "ALL TESTS PASSED")
# ============================================================================
set -e
PROJ_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
IP_NETLIST="$PROJ_ROOT/ip/xfft_2048_ip/xfft_2048_ip_sim_netlist.v"
WRAPPER="$PROJ_ROOT/xfft_2048.v"
TB="$PROJ_ROOT/tb/tb_xfft_2048_xsim.v"
WORK_DIR="$PROJ_ROOT/build_xsim_xfft"
mkdir -p "$WORK_DIR"
cd "$WORK_DIR"
echo "===== Compiling Verilog sources ====="
# Wrapper + testbench with the IP-on define
xvlog -d FFT_USE_XILINX_IP "$WRAPPER" "$TB"
# IP simulation netlist — references unisim primitives
xvlog "$IP_NETLIST"
# fft_engine etc. NOT needed because FFT_USE_XILINX_IP routes around it,
# but the wrapper still must compile cleanly under both branches; if xvlog
# complains about an unresolved fft_engine reference (it shouldn't because
# the `else` branch is hidden by the define), include it here:
# xvlog "$PROJ_ROOT/fft_engine.v"
echo "===== Elaborating ====="
# `glbl` is a Vivado-supplied module that Xilinx primitives (FDRE etc.)
# reference for the global GSR/GTS signals. Elaborating it as a second top
# satisfies the unresolved-reference error xelab raises for the IP netlist.
xelab -L unisims_ver -L secureip --debug typical \
tb_xfft_2048_xsim glbl -snapshot tb_xfft_2048_snap
echo "===== Running simulation ====="
xsim tb_xfft_2048_snap --runall --log /tmp/xfft_xsim.log
echo "===== Done. Tail of log: ====="
tail -40 /tmp/xfft_xsim.log