Preserve parser_detected across syncs to prevent coverage regression

Before re-creating ActiveSource rows, snapshot existing parser_detected
values. When writing new rows, take max(new, previous) so a source that
was once confirmed as parsed (event.type present in the data lake) never
loses its Covered status due to a sampling gap, partial query result, or
SDL PowerQuery timeout during Sync All.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mick
2026-05-22 12:07:03 -04:00
parent 7620d1fcc8
commit 21c8644443
+11 -1
View File
@@ -565,6 +565,13 @@ async def sync_sources(days: int = 7, db: Session = Depends(get_db)):
if name:
parsed_by_source[name] = row.get("parsed", 0)
# Preserve existing parser_detected values so a source once confirmed as
# parsed never loses its "Covered" status due to a sampling gap or timeout.
existing_detected: dict[str, int] = {
s.source_name: (s.parser_detected or 0)
for s in db.query(ActiveSource).all()
}
rows = volume_result.get("events", [])
db.query(ActiveSource).delete()
synced_at = datetime.utcnow()
@@ -572,11 +579,14 @@ async def sync_sources(days: int = 7, db: Session = Depends(get_db)):
for row in rows:
name = row.get("dataSource.name")
if name and name not in _S1_NATIVE_SOURCES:
# Keep the highest parser_detected ever seen for this source
new_detected = parsed_by_source.get(name, 0)
prev_detected = existing_detected.get(name, 0)
db.add(ActiveSource(
source_name=name,
event_count=row.get("events", 0),
synced_at=synced_at,
parser_detected=parsed_by_source.get(name, 0),
parser_detected=max(new_detected, prev_detected),
))
seen += 1