mtg_python_deckbuilder/code/scripts/ratchet_description_thresholds.py

100 lines
4.2 KiB
Python
Raw Normal View History

"""Analyze description_fallback_history.jsonl and propose updated regression test thresholds.
Algorithm:
- Load all history records (JSON lines) that include generic_total & generic_pct.
- Use the most recent N (default 5) snapshots to compute a smoothed (median) generic_pct.
- If median is at least 2 percentage points below current test ceiling OR
the latest generic_total is at least 10 below current ceiling, propose new targets.
- Output JSON with keys: current_total_ceiling, current_pct_ceiling,
proposed_total_ceiling, proposed_pct_ceiling, rationale.
Defaults assume current ceilings (update if test changes):
total <= 365, pct < 52.0
Usage:
python code/scripts/ratchet_description_thresholds.py \
--history config/themes/description_fallback_history.jsonl
You can override current thresholds:
--current-total 365 --current-pct 52.0
"""
from __future__ import annotations
import argparse
import json
from pathlib import Path
from statistics import median
from typing import List, Dict, Any
def load_history(path: Path) -> List[Dict[str, Any]]:
if not path.exists():
return []
out: List[Dict[str, Any]] = []
for line in path.read_text(encoding='utf-8').splitlines():
line = line.strip()
if not line:
continue
try:
obj = json.loads(line)
if isinstance(obj, dict) and 'generic_total' in obj:
out.append(obj)
except Exception:
continue
# Sort by timestamp lexicographically (ISO) ensures chronological
out.sort(key=lambda x: x.get('timestamp',''))
return out
def propose(history: List[Dict[str, Any]], current_total: int, current_pct: float, window: int) -> Dict[str, Any]:
if not history:
return {
'error': 'No history records found',
'current_total_ceiling': current_total,
'current_pct_ceiling': current_pct,
}
recent = history[-window:] if len(history) > window else history
generic_pcts = [h.get('generic_pct') for h in recent if isinstance(h.get('generic_pct'), (int,float))]
generic_totals = [h.get('generic_total') for h in recent if isinstance(h.get('generic_total'), int)]
if not generic_pcts or not generic_totals:
return {'error': 'Insufficient numeric data', 'current_total_ceiling': current_total, 'current_pct_ceiling': current_pct}
med_pct = median(generic_pcts)
latest = history[-1]
latest_total = latest.get('generic_total', 0)
# Proposed ceilings start as current
proposed_total = current_total
proposed_pct = current_pct
rationale: List[str] = []
# Condition 1: median improvement >= 2 pct points vs current ceiling (i.e., headroom exists)
if med_pct + 2.0 <= current_pct:
proposed_pct = round(max(med_pct + 1.0, med_pct * 1.02), 2) # leave ~1pct or small buffer
rationale.append(f"Median generic_pct {med_pct}% well below ceiling {current_pct}%")
# Condition 2: latest total at least 10 below current total ceiling
if latest_total + 10 <= current_total:
proposed_total = latest_total + 5 # leave small absolute buffer
rationale.append(f"Latest generic_total {latest_total} well below ceiling {current_total}")
return {
'current_total_ceiling': current_total,
'current_pct_ceiling': current_pct,
'median_recent_pct': med_pct,
'latest_total': latest_total,
'proposed_total_ceiling': proposed_total,
'proposed_pct_ceiling': proposed_pct,
'rationale': rationale,
'records_considered': len(recent),
}
def main(): # pragma: no cover (I/O tool)
ap = argparse.ArgumentParser(description='Propose ratcheted generic description regression thresholds')
ap.add_argument('--history', type=str, default='config/themes/description_fallback_history.jsonl')
ap.add_argument('--current-total', type=int, default=365)
ap.add_argument('--current-pct', type=float, default=52.0)
ap.add_argument('--window', type=int, default=5, help='Number of most recent records to consider')
args = ap.parse_args()
hist = load_history(Path(args.history))
result = propose(hist, args.current_total, args.current_pct, args.window)
print(json.dumps(result, indent=2))
if __name__ == '__main__':
main()