From 21c864444316583741896558272f07fd37185fa5 Mon Sep 17 00:00:00 2001 From: Mick <119439091+mickbrowns1@users.noreply.github.com> Date: Fri, 22 May 2026 12:07:03 -0400 Subject: [PATCH] Preserve parser_detected across syncs to prevent coverage regression Before re-creating ActiveSource rows, snapshot existing parser_detected values. When writing new rows, take max(new, previous) so a source that was once confirmed as parsed (event.type present in the data lake) never loses its Covered status due to a sampling gap, partial query result, or SDL PowerQuery timeout during Sync All. Co-Authored-By: Claude Sonnet 4.6 --- backend/routers/coverage.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/backend/routers/coverage.py b/backend/routers/coverage.py index 96479ef..2f1bfb5 100644 --- a/backend/routers/coverage.py +++ b/backend/routers/coverage.py @@ -565,6 +565,13 @@ async def sync_sources(days: int = 7, db: Session = Depends(get_db)): if name: parsed_by_source[name] = row.get("parsed", 0) + # Preserve existing parser_detected values so a source once confirmed as + # parsed never loses its "Covered" status due to a sampling gap or timeout. + existing_detected: dict[str, int] = { + s.source_name: (s.parser_detected or 0) + for s in db.query(ActiveSource).all() + } + rows = volume_result.get("events", []) db.query(ActiveSource).delete() synced_at = datetime.utcnow() @@ -572,11 +579,14 @@ async def sync_sources(days: int = 7, db: Session = Depends(get_db)): for row in rows: name = row.get("dataSource.name") if name and name not in _S1_NATIVE_SOURCES: + # Keep the highest parser_detected ever seen for this source + new_detected = parsed_by_source.get(name, 0) + prev_detected = existing_detected.get(name, 0) db.add(ActiveSource( source_name=name, event_count=row.get("events", 0), synced_at=synced_at, - parser_detected=parsed_by_source.get(name, 0), + parser_detected=max(new_detected, prev_detected), )) seen += 1