mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-12-17 08:00:13 +01:00
feat: theme catalog optimization with tag search and faster enrichment
This commit is contained in:
parent
952b151162
commit
9e6c68f559
26 changed files with 5906 additions and 5688 deletions
|
|
@ -1,203 +0,0 @@
|
|||
"""
|
||||
Full audit of Protection-tagged cards with kindred metadata support (M2 Phase 2).
|
||||
|
||||
Created: October 8, 2025
|
||||
Purpose: Audit and validate Protection tag precision after implementing grant detection.
|
||||
Can be re-run periodically to check tagging quality.
|
||||
|
||||
This script audits ALL Protection-tagged cards and categorizes them:
|
||||
- Grant: Gives broad protection to other permanents YOU control
|
||||
- Kindred: Gives protection to specific creature types (metadata tags)
|
||||
- Mixed: Both broad and kindred/inherent
|
||||
- Inherent: Only has protection itself
|
||||
- ConditionalSelf: Only conditionally grants to itself
|
||||
- Opponent: Grants to opponent's permanents
|
||||
- Neither: False positive
|
||||
|
||||
Outputs:
|
||||
- m2_audit_v2.json: Full analysis with summary
|
||||
- m2_audit_v2_grant.csv: Cards for main Protection tag
|
||||
- m2_audit_v2_kindred.csv: Cards for kindred metadata tags
|
||||
- m2_audit_v2_mixed.csv: Cards with both broad and kindred grants
|
||||
- m2_audit_v2_conditional.csv: Conditional self-grants (exclude)
|
||||
- m2_audit_v2_inherent.csv: Inherent protection only (exclude)
|
||||
- m2_audit_v2_opponent.csv: Opponent grants (exclude)
|
||||
- m2_audit_v2_neither.csv: False positives (exclude)
|
||||
- m2_audit_v2_all.csv: All cards combined
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
import json
|
||||
|
||||
# Add project root to path
|
||||
project_root = Path(__file__).parent.parent.parent
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
from code.tagging.protection_grant_detection import (
|
||||
categorize_protection_card,
|
||||
get_kindred_protection_tags,
|
||||
is_granting_protection,
|
||||
)
|
||||
|
||||
def load_all_cards():
|
||||
"""Load all cards from color/identity CSV files."""
|
||||
csv_dir = project_root / 'csv_files'
|
||||
|
||||
# Get all color/identity CSVs (not the raw cards.csv)
|
||||
csv_files = list(csv_dir.glob('*_cards.csv'))
|
||||
csv_files = [f for f in csv_files if f.stem not in ['cards', 'testdata']]
|
||||
|
||||
all_cards = []
|
||||
for csv_file in csv_files:
|
||||
try:
|
||||
df = pd.read_csv(csv_file)
|
||||
all_cards.append(df)
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not load {csv_file.name}: {e}")
|
||||
|
||||
# Combine all DataFrames
|
||||
combined = pd.concat(all_cards, ignore_index=True)
|
||||
|
||||
# Drop duplicates (cards appear in multiple color files)
|
||||
combined = combined.drop_duplicates(subset=['name'], keep='first')
|
||||
|
||||
return combined
|
||||
|
||||
def audit_all_protection_cards():
|
||||
"""Audit all Protection-tagged cards."""
|
||||
print("Loading all cards...")
|
||||
df = load_all_cards()
|
||||
|
||||
print(f"Total cards loaded: {len(df)}")
|
||||
|
||||
# Filter to Protection-tagged cards (column is 'themeTags' in color CSVs)
|
||||
df_prot = df[df['themeTags'].str.contains('Protection', case=False, na=False)].copy()
|
||||
|
||||
print(f"Protection-tagged cards: {len(df_prot)}")
|
||||
|
||||
# Categorize each card
|
||||
categories = []
|
||||
grants_list = []
|
||||
kindred_tags_list = []
|
||||
|
||||
for idx, row in df_prot.iterrows():
|
||||
name = row['name']
|
||||
text = str(row.get('text', '')).replace('\\n', '\n') # Convert escaped newlines to real newlines
|
||||
keywords = str(row.get('keywords', ''))
|
||||
card_type = str(row.get('type', ''))
|
||||
|
||||
# Categorize with kindred exclusion enabled
|
||||
category = categorize_protection_card(name, text, keywords, card_type, exclude_kindred=True)
|
||||
|
||||
# Check if it grants broadly
|
||||
grants_broad = is_granting_protection(text, keywords, exclude_kindred=True)
|
||||
|
||||
# Get kindred tags
|
||||
kindred_tags = get_kindred_protection_tags(text)
|
||||
|
||||
categories.append(category)
|
||||
grants_list.append(grants_broad)
|
||||
kindred_tags_list.append(', '.join(sorted(kindred_tags)) if kindred_tags else '')
|
||||
|
||||
df_prot['category'] = categories
|
||||
df_prot['grants_broad'] = grants_list
|
||||
df_prot['kindred_tags'] = kindred_tags_list
|
||||
|
||||
# Generate summary (convert numpy types to native Python for JSON serialization)
|
||||
summary = {
|
||||
'total': int(len(df_prot)),
|
||||
'categories': {k: int(v) for k, v in df_prot['category'].value_counts().to_dict().items()},
|
||||
'grants_broad_count': int(df_prot['grants_broad'].sum()),
|
||||
'kindred_cards_count': int((df_prot['kindred_tags'] != '').sum()),
|
||||
}
|
||||
|
||||
# Calculate keep vs remove
|
||||
keep_categories = {'Grant', 'Mixed'}
|
||||
kindred_only = df_prot[df_prot['category'] == 'Kindred']
|
||||
keep_count = len(df_prot[df_prot['category'].isin(keep_categories)])
|
||||
remove_count = len(df_prot[~df_prot['category'].isin(keep_categories | {'Kindred'})])
|
||||
|
||||
summary['keep_main_tag'] = keep_count
|
||||
summary['kindred_metadata'] = len(kindred_only)
|
||||
summary['remove'] = remove_count
|
||||
summary['precision_estimate'] = round((keep_count / len(df_prot)) * 100, 1) if len(df_prot) > 0 else 0
|
||||
|
||||
# Print summary
|
||||
print(f"\n{'='*60}")
|
||||
print("AUDIT SUMMARY")
|
||||
print(f"{'='*60}")
|
||||
print(f"Total Protection-tagged cards: {summary['total']}")
|
||||
print(f"\nCategories:")
|
||||
for cat, count in sorted(summary['categories'].items()):
|
||||
pct = (count / summary['total']) * 100
|
||||
print(f" {cat:20s} {count:4d} ({pct:5.1f}%)")
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Main Protection tag: {keep_count:4d} ({keep_count/len(df_prot)*100:5.1f}%)")
|
||||
print(f"Kindred metadata only: {len(kindred_only):4d} ({len(kindred_only)/len(df_prot)*100:5.1f}%)")
|
||||
print(f"Remove: {remove_count:4d} ({remove_count/len(df_prot)*100:5.1f}%)")
|
||||
print(f"{'='*60}")
|
||||
print(f"Precision estimate: {summary['precision_estimate']}%")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
# Export results
|
||||
output_dir = project_root / 'logs' / 'roadmaps' / 'source' / 'tagging_refinement'
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Export JSON summary
|
||||
with open(output_dir / 'm2_audit_v2.json', 'w') as f:
|
||||
json.dump({
|
||||
'summary': summary,
|
||||
'cards': df_prot[['name', 'type', 'category', 'grants_broad', 'kindred_tags', 'keywords', 'text']].to_dict(orient='records')
|
||||
}, f, indent=2)
|
||||
|
||||
# Export CSVs by category
|
||||
export_cols = ['name', 'type', 'category', 'grants_broad', 'kindred_tags', 'keywords', 'text']
|
||||
|
||||
# Grant category
|
||||
df_grant = df_prot[df_prot['category'] == 'Grant']
|
||||
df_grant[export_cols].to_csv(output_dir / 'm2_audit_v2_grant.csv', index=False)
|
||||
print(f"Exported {len(df_grant)} Grant cards to m2_audit_v2_grant.csv")
|
||||
|
||||
# Kindred category
|
||||
df_kindred = df_prot[df_prot['category'] == 'Kindred']
|
||||
df_kindred[export_cols].to_csv(output_dir / 'm2_audit_v2_kindred.csv', index=False)
|
||||
print(f"Exported {len(df_kindred)} Kindred cards to m2_audit_v2_kindred.csv")
|
||||
|
||||
# Mixed category
|
||||
df_mixed = df_prot[df_prot['category'] == 'Mixed']
|
||||
df_mixed[export_cols].to_csv(output_dir / 'm2_audit_v2_mixed.csv', index=False)
|
||||
print(f"Exported {len(df_mixed)} Mixed cards to m2_audit_v2_mixed.csv")
|
||||
|
||||
# ConditionalSelf category
|
||||
df_conditional = df_prot[df_prot['category'] == 'ConditionalSelf']
|
||||
df_conditional[export_cols].to_csv(output_dir / 'm2_audit_v2_conditional.csv', index=False)
|
||||
print(f"Exported {len(df_conditional)} ConditionalSelf cards to m2_audit_v2_conditional.csv")
|
||||
|
||||
# Inherent category
|
||||
df_inherent = df_prot[df_prot['category'] == 'Inherent']
|
||||
df_inherent[export_cols].to_csv(output_dir / 'm2_audit_v2_inherent.csv', index=False)
|
||||
print(f"Exported {len(df_inherent)} Inherent cards to m2_audit_v2_inherent.csv")
|
||||
|
||||
# Opponent category
|
||||
df_opponent = df_prot[df_prot['category'] == 'Opponent']
|
||||
df_opponent[export_cols].to_csv(output_dir / 'm2_audit_v2_opponent.csv', index=False)
|
||||
print(f"Exported {len(df_opponent)} Opponent cards to m2_audit_v2_opponent.csv")
|
||||
|
||||
# Neither category
|
||||
df_neither = df_prot[df_prot['category'] == 'Neither']
|
||||
df_neither[export_cols].to_csv(output_dir / 'm2_audit_v2_neither.csv', index=False)
|
||||
print(f"Exported {len(df_neither)} Neither cards to m2_audit_v2_neither.csv")
|
||||
|
||||
# All cards
|
||||
df_prot[export_cols].to_csv(output_dir / 'm2_audit_v2_all.csv', index=False)
|
||||
print(f"Exported {len(df_prot)} total cards to m2_audit_v2_all.csv")
|
||||
|
||||
print(f"\nAll files saved to: {output_dir}")
|
||||
|
||||
return df_prot, summary
|
||||
|
||||
if __name__ == '__main__':
|
||||
df_results, summary = audit_all_protection_cards()
|
||||
|
|
@ -1,118 +0,0 @@
|
|||
"""Opt-in guard that compares multi-theme filter performance to a stored baseline.
|
||||
|
||||
Run inside the project virtual environment:
|
||||
|
||||
python -m code.scripts.check_random_theme_perf --baseline config/random_theme_perf_baseline.json
|
||||
|
||||
The script executes the same profiling loop as `profile_multi_theme_filter` and fails
|
||||
if the observed mean or p95 timings regress more than the allowed threshold.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Tuple
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[2]
|
||||
DEFAULT_BASELINE = PROJECT_ROOT / "config" / "random_theme_perf_baseline.json"
|
||||
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.append(str(PROJECT_ROOT))
|
||||
|
||||
from code.scripts.profile_multi_theme_filter import run_profile # type: ignore # noqa: E402
|
||||
|
||||
|
||||
def _load_baseline(path: Path) -> Dict[str, Any]:
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"Baseline file not found: {path}")
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
return data
|
||||
|
||||
|
||||
def _extract(metric: Dict[str, Any], key: str) -> float:
|
||||
try:
|
||||
value = float(metric.get(key, 0.0))
|
||||
except Exception:
|
||||
value = 0.0
|
||||
return value
|
||||
|
||||
|
||||
def _check_section(name: str, actual: Dict[str, Any], baseline: Dict[str, Any], threshold: float) -> Tuple[bool, str]:
|
||||
a_mean = _extract(actual, "mean_ms")
|
||||
b_mean = _extract(baseline, "mean_ms")
|
||||
a_p95 = _extract(actual, "p95_ms")
|
||||
b_p95 = _extract(baseline, "p95_ms")
|
||||
|
||||
allowed_mean = b_mean * (1.0 + threshold)
|
||||
allowed_p95 = b_p95 * (1.0 + threshold)
|
||||
|
||||
mean_ok = a_mean <= allowed_mean or b_mean == 0.0
|
||||
p95_ok = a_p95 <= allowed_p95 or b_p95 == 0.0
|
||||
|
||||
status = mean_ok and p95_ok
|
||||
|
||||
def _format_row(label: str, actual_val: float, baseline_val: float, allowed_val: float, ok: bool) -> str:
|
||||
trend = ((actual_val - baseline_val) / baseline_val * 100.0) if baseline_val else 0.0
|
||||
trend_str = f"{trend:+.1f}%" if baseline_val else "n/a"
|
||||
limit_str = f"≤ {allowed_val:.3f}ms" if baseline_val else "n/a"
|
||||
return f" {label:<6} actual={actual_val:.3f}ms baseline={baseline_val:.3f}ms ({trend_str}), limit {limit_str} -> {'OK' if ok else 'FAIL'}"
|
||||
|
||||
rows = [f"Section: {name}"]
|
||||
rows.append(_format_row("mean", a_mean, b_mean, allowed_mean, mean_ok))
|
||||
rows.append(_format_row("p95", a_p95, b_p95, allowed_p95, p95_ok))
|
||||
return status, "\n".join(rows)
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
parser = argparse.ArgumentParser(description="Check multi-theme filtering performance against a baseline")
|
||||
parser.add_argument("--baseline", type=Path, default=DEFAULT_BASELINE, help="Baseline JSON file (default: config/random_theme_perf_baseline.json)")
|
||||
parser.add_argument("--iterations", type=int, default=400, help="Number of iterations to sample (default: 400)")
|
||||
parser.add_argument("--seed", type=int, default=None, help="Optional RNG seed for reproducibility")
|
||||
parser.add_argument("--threshold", type=float, default=0.15, help="Allowed regression threshold as a fraction (default: 0.15 = 15%)")
|
||||
parser.add_argument("--update-baseline", action="store_true", help="Overwrite the baseline file with the newly collected metrics")
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
baseline_path = args.baseline if args.baseline else DEFAULT_BASELINE
|
||||
if args.update_baseline and not baseline_path.parent.exists():
|
||||
baseline_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if not args.update_baseline:
|
||||
baseline = _load_baseline(baseline_path)
|
||||
else:
|
||||
baseline = {}
|
||||
|
||||
results = run_profile(args.iterations, args.seed)
|
||||
|
||||
cascade_status, cascade_report = _check_section("cascade", results.get("cascade", {}), baseline.get("cascade", {}), args.threshold)
|
||||
synergy_status, synergy_report = _check_section("synergy", results.get("synergy", {}), baseline.get("synergy", {}), args.threshold)
|
||||
|
||||
print("Iterations:", results.get("iterations"))
|
||||
print("Seed:", results.get("seed"))
|
||||
print(cascade_report)
|
||||
print(synergy_report)
|
||||
|
||||
overall_ok = cascade_status and synergy_status
|
||||
|
||||
if args.update_baseline:
|
||||
payload = {
|
||||
"iterations": results.get("iterations"),
|
||||
"seed": results.get("seed"),
|
||||
"cascade": results.get("cascade"),
|
||||
"synergy": results.get("synergy"),
|
||||
}
|
||||
baseline_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
||||
print(f"Baseline updated → {baseline_path}")
|
||||
return 0
|
||||
|
||||
if not overall_ok:
|
||||
print(f"FAIL: performance regressions exceeded {args.threshold * 100:.1f}% threshold", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
print("PASS: performance within allowed threshold")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
raise SystemExit(main())
|
||||
135
code/scripts/enrich_themes.py
Normal file
135
code/scripts/enrich_themes.py
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
"""CLI wrapper for theme enrichment pipeline.
|
||||
|
||||
Runs the consolidated theme enrichment pipeline with command-line options.
|
||||
For backward compatibility, individual scripts can still be run separately,
|
||||
but this provides a faster single-pass alternative.
|
||||
|
||||
Usage:
|
||||
python code/scripts/enrich_themes.py --write
|
||||
python code/scripts/enrich_themes.py --dry-run --enforce-min
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add project root to path
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
# Import after adding to path
|
||||
from code.tagging.theme_enrichment import run_enrichment_pipeline # noqa: E402
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""Run theme enrichment pipeline from CLI."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Consolidated theme metadata enrichment pipeline',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Dry run (no changes written):
|
||||
python code/scripts/enrich_themes.py --dry-run
|
||||
|
||||
# Write changes:
|
||||
python code/scripts/enrich_themes.py --write
|
||||
|
||||
# Enforce minimum examples (errors if insufficient):
|
||||
python code/scripts/enrich_themes.py --write --enforce-min
|
||||
|
||||
# Strict validation for cornerstone themes:
|
||||
python code/scripts/enrich_themes.py --write --strict
|
||||
|
||||
Note: This replaces running 7 separate scripts (autofill, pad, cleanup, purge,
|
||||
augment, suggestions, lint) with a single 5-10x faster operation.
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--write',
|
||||
action='store_true',
|
||||
help='Write changes to disk (default: dry run)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--dry-run',
|
||||
action='store_true',
|
||||
help='Dry run mode: show what would be changed without writing'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--min',
|
||||
'--min-examples',
|
||||
type=int,
|
||||
default=None,
|
||||
metavar='N',
|
||||
help='Minimum number of example commanders (default: $EDITORIAL_MIN_EXAMPLES or 5)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--enforce-min',
|
||||
action='store_true',
|
||||
help='Treat minimum examples violations as errors'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--strict',
|
||||
action='store_true',
|
||||
help='Enable strict validation (cornerstone themes must have examples)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Determine write mode
|
||||
if args.dry_run:
|
||||
write = False
|
||||
elif args.write:
|
||||
write = True
|
||||
else:
|
||||
# Default to dry run if neither specified
|
||||
write = False
|
||||
print("Note: Running in dry-run mode (use --write to save changes)\n")
|
||||
|
||||
# Get minimum examples threshold
|
||||
if args.min is not None:
|
||||
min_examples = args.min
|
||||
else:
|
||||
min_examples = int(os.environ.get('EDITORIAL_MIN_EXAMPLES', '5'))
|
||||
|
||||
print("Theme Enrichment Pipeline")
|
||||
print("========================")
|
||||
print(f"Mode: {'WRITE' if write else 'DRY RUN'}")
|
||||
print(f"Min examples: {min_examples}")
|
||||
print(f"Enforce min: {args.enforce_min}")
|
||||
print(f"Strict: {args.strict}")
|
||||
print()
|
||||
|
||||
try:
|
||||
stats = run_enrichment_pipeline(
|
||||
root=ROOT,
|
||||
min_examples=min_examples,
|
||||
write=write,
|
||||
enforce_min=args.enforce_min,
|
||||
strict=args.strict,
|
||||
progress_callback=None, # Use default print
|
||||
)
|
||||
|
||||
# Return non-zero if there are lint errors
|
||||
if stats.lint_errors > 0:
|
||||
print(f"\n❌ Enrichment completed with {stats.lint_errors} error(s)")
|
||||
return 1
|
||||
|
||||
print("\n✅ Enrichment completed successfully")
|
||||
return 0
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n\nInterrupted by user")
|
||||
return 130
|
||||
except Exception as e:
|
||||
print(f"\n❌ Error: {e}", file=sys.stderr)
|
||||
if '--debug' in sys.argv:
|
||||
raise
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
raise SystemExit(main())
|
||||
|
|
@ -123,6 +123,9 @@ def main():
|
|||
enforced_set = set(enforced_synergies)
|
||||
inferred_synergies = [s for s in synergy_list if s not in curated_set and s not in enforced_set]
|
||||
|
||||
example_cards_value = entry.get('example_cards', [])
|
||||
example_commanders_value = entry.get('example_commanders', [])
|
||||
|
||||
doc = {
|
||||
'id': slug,
|
||||
'display_name': theme_name,
|
||||
|
|
@ -132,13 +135,40 @@ def main():
|
|||
'inferred_synergies': inferred_synergies,
|
||||
'primary_color': entry.get('primary_color'),
|
||||
'secondary_color': entry.get('secondary_color'),
|
||||
'example_cards': example_cards_value,
|
||||
'example_commanders': example_commanders_value,
|
||||
'synergy_example_cards': entry.get('synergy_example_cards', []),
|
||||
'synergy_commanders': entry.get('synergy_commanders', []),
|
||||
'deck_archetype': entry.get('deck_archetype'),
|
||||
'popularity_hint': entry.get('popularity_hint'),
|
||||
'popularity_bucket': entry.get('popularity_bucket'),
|
||||
'editorial_quality': entry.get('editorial_quality'),
|
||||
'description': entry.get('description'),
|
||||
'notes': ''
|
||||
}
|
||||
# Drop None color keys for cleanliness
|
||||
# Drop None/empty keys for cleanliness
|
||||
if doc['primary_color'] is None:
|
||||
doc.pop('primary_color')
|
||||
if doc.get('secondary_color') is None:
|
||||
doc.pop('secondary_color')
|
||||
if not doc.get('example_cards'):
|
||||
doc.pop('example_cards')
|
||||
if not doc.get('example_commanders'):
|
||||
doc.pop('example_commanders')
|
||||
if not doc.get('synergy_example_cards'):
|
||||
doc.pop('synergy_example_cards')
|
||||
if not doc.get('synergy_commanders'):
|
||||
doc.pop('synergy_commanders')
|
||||
if doc.get('deck_archetype') is None:
|
||||
doc.pop('deck_archetype')
|
||||
if doc.get('popularity_hint') is None:
|
||||
doc.pop('popularity_hint')
|
||||
if doc.get('popularity_bucket') is None:
|
||||
doc.pop('popularity_bucket')
|
||||
if doc.get('editorial_quality') is None:
|
||||
doc.pop('editorial_quality')
|
||||
if doc.get('description') is None:
|
||||
doc.pop('description')
|
||||
with path.open('w', encoding='utf-8') as f:
|
||||
yaml.safe_dump(doc, f, sort_keys=False, allow_unicode=True)
|
||||
exported += 1
|
||||
|
|
|
|||
|
|
@ -19,6 +19,13 @@ from datetime import datetime, timezone
|
|||
from pathlib import Path
|
||||
from typing import Dict, Iterable, List, Optional, Sequence
|
||||
|
||||
try:
|
||||
import pandas as pd
|
||||
HAS_PANDAS = True
|
||||
except ImportError:
|
||||
HAS_PANDAS = False
|
||||
pd = None # type: ignore
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
CODE_ROOT = ROOT / "code"
|
||||
if str(CODE_ROOT) not in sys.path:
|
||||
|
|
@ -29,6 +36,9 @@ try:
|
|||
except Exception: # pragma: no cover - fallback for adhoc execution
|
||||
DEFAULT_CSV_DIRECTORY = "csv_files"
|
||||
|
||||
# Parquet support requires pandas (imported at top of file, uses pyarrow under the hood)
|
||||
HAS_PARQUET_SUPPORT = HAS_PANDAS
|
||||
|
||||
DEFAULT_OUTPUT_PATH = ROOT / "config" / "themes" / "theme_catalog.csv"
|
||||
HEADER_COMMENT_PREFIX = "# theme_catalog"
|
||||
|
||||
|
|
@ -87,7 +97,68 @@ def parse_theme_tags(value: object) -> List[str]:
|
|||
return []
|
||||
|
||||
|
||||
def _load_theme_counts_from_parquet(
|
||||
parquet_path: Path,
|
||||
theme_variants: Dict[str, set[str]]
|
||||
) -> Counter[str]:
|
||||
"""Load theme counts from a parquet file using pandas (which uses pyarrow).
|
||||
|
||||
Args:
|
||||
parquet_path: Path to the parquet file (commander_cards.parquet or all_cards.parquet)
|
||||
theme_variants: Dict to accumulate theme name variants
|
||||
|
||||
Returns:
|
||||
Counter of theme occurrences
|
||||
"""
|
||||
if pd is None:
|
||||
return Counter()
|
||||
|
||||
counts: Counter[str] = Counter()
|
||||
|
||||
if not parquet_path.exists():
|
||||
return counts
|
||||
|
||||
# Read only themeTags column for efficiency
|
||||
try:
|
||||
df = pd.read_parquet(parquet_path, columns=["themeTags"])
|
||||
except Exception:
|
||||
# If themeTags column doesn't exist, return empty
|
||||
return counts
|
||||
|
||||
# Convert to list for fast iteration (faster than iterrows)
|
||||
theme_tags_list = df["themeTags"].tolist()
|
||||
|
||||
for raw_value in theme_tags_list:
|
||||
if raw_value is None or (isinstance(raw_value, float) and pd.isna(raw_value)):
|
||||
continue
|
||||
tags = parse_theme_tags(raw_value)
|
||||
if not tags:
|
||||
continue
|
||||
seen_in_row: set[str] = set()
|
||||
for tag in tags:
|
||||
display = normalize_theme_display(tag)
|
||||
if not display:
|
||||
continue
|
||||
key = canonical_key(display)
|
||||
if key in seen_in_row:
|
||||
continue
|
||||
seen_in_row.add(key)
|
||||
counts[key] += 1
|
||||
theme_variants[key].add(display)
|
||||
|
||||
return counts
|
||||
|
||||
|
||||
def _load_theme_counts(csv_path: Path, theme_variants: Dict[str, set[str]]) -> Counter[str]:
|
||||
"""Load theme counts from CSV file (fallback method).
|
||||
|
||||
Args:
|
||||
csv_path: Path to CSV file
|
||||
theme_variants: Dict to accumulate theme name variants
|
||||
|
||||
Returns:
|
||||
Counter of theme occurrences
|
||||
"""
|
||||
counts: Counter[str] = Counter()
|
||||
if not csv_path.exists():
|
||||
return counts
|
||||
|
|
@ -146,24 +217,67 @@ def build_theme_catalog(
|
|||
commander_filename: str = "commander_cards.csv",
|
||||
cards_filename: str = "cards.csv",
|
||||
logs_directory: Optional[Path] = None,
|
||||
use_parquet: bool = True,
|
||||
) -> CatalogBuildResult:
|
||||
"""Build theme catalog from card data.
|
||||
|
||||
Args:
|
||||
csv_directory: Directory containing CSV files (fallback)
|
||||
output_path: Where to write the catalog CSV
|
||||
generated_at: Optional timestamp for generation
|
||||
commander_filename: Name of commander CSV file
|
||||
cards_filename: Name of cards CSV file
|
||||
logs_directory: Optional directory to copy output to
|
||||
use_parquet: If True, try to use all_cards.parquet first (default: True)
|
||||
|
||||
Returns:
|
||||
CatalogBuildResult with generated rows and metadata
|
||||
"""
|
||||
csv_directory = csv_directory.resolve()
|
||||
output_path = output_path.resolve()
|
||||
|
||||
theme_variants: Dict[str, set[str]] = defaultdict(set)
|
||||
|
||||
commander_counts = _load_theme_counts(csv_directory / commander_filename, theme_variants)
|
||||
# Try to use parquet file first (much faster)
|
||||
used_parquet = False
|
||||
if use_parquet and HAS_PARQUET_SUPPORT:
|
||||
try:
|
||||
# Use dedicated parquet files (matches CSV structure exactly)
|
||||
parquet_dir = csv_directory.parent / "card_files"
|
||||
|
||||
# Load commander counts directly from commander_cards.parquet
|
||||
commander_parquet = parquet_dir / "commander_cards.parquet"
|
||||
commander_counts = _load_theme_counts_from_parquet(
|
||||
commander_parquet, theme_variants=theme_variants
|
||||
)
|
||||
|
||||
# CSV method doesn't load non-commander cards, so we don't either
|
||||
card_counts = Counter()
|
||||
|
||||
used_parquet = True
|
||||
print("✓ Loaded theme data from parquet files")
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠ Failed to load from parquet: {e}")
|
||||
print(" Falling back to CSV files...")
|
||||
used_parquet = False
|
||||
|
||||
# Fallback to CSV files if parquet not available or failed
|
||||
if not used_parquet:
|
||||
commander_counts = _load_theme_counts(csv_directory / commander_filename, theme_variants)
|
||||
|
||||
card_counts: Counter[str] = Counter()
|
||||
cards_path = csv_directory / cards_filename
|
||||
if cards_path.exists():
|
||||
card_counts = _load_theme_counts(cards_path, theme_variants)
|
||||
else:
|
||||
# Fallback: scan all *_cards.csv except commander
|
||||
for candidate in csv_directory.glob("*_cards.csv"):
|
||||
if candidate.name == commander_filename:
|
||||
continue
|
||||
card_counts += _load_theme_counts(candidate, theme_variants)
|
||||
card_counts: Counter[str] = Counter()
|
||||
cards_path = csv_directory / cards_filename
|
||||
if cards_path.exists():
|
||||
card_counts = _load_theme_counts(cards_path, theme_variants)
|
||||
else:
|
||||
# Fallback: scan all *_cards.csv except commander
|
||||
for candidate in csv_directory.glob("*_cards.csv"):
|
||||
if candidate.name == commander_filename:
|
||||
continue
|
||||
card_counts += _load_theme_counts(candidate, theme_variants)
|
||||
|
||||
print("✓ Loaded theme data from CSV files")
|
||||
|
||||
keys = sorted(set(card_counts.keys()) | set(commander_counts.keys()))
|
||||
generated_at_iso = _derive_generated_at(generated_at)
|
||||
|
|
|
|||
|
|
@ -1,305 +0,0 @@
|
|||
"""Catalog diff helper for verifying multi-face merge output.
|
||||
|
||||
This utility regenerates the card CSV catalog (optionally writing compatibility
|
||||
snapshots) and then compares the merged outputs against the baseline snapshots.
|
||||
It is intended to support the MDFC rollout checklist by providing a concise summary
|
||||
of how many rows were merged, which cards collapsed into a single record, and
|
||||
whether any tag unions diverge from expectations.
|
||||
|
||||
Example usage (from repo root, inside virtualenv):
|
||||
|
||||
python -m code.scripts.preview_dfc_catalog_diff --compat-snapshot --output logs/dfc_catalog_diff.json
|
||||
|
||||
The script prints a human readable summary to stdout and optionally writes a JSON
|
||||
artifact for release/staging review.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import ast
|
||||
import importlib
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Sequence
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from settings import COLORS, CSV_DIRECTORY
|
||||
|
||||
DEFAULT_COMPAT_DIR = Path(os.getenv("DFC_COMPAT_DIR", "csv_files/compat_faces"))
|
||||
CSV_ROOT = Path(CSV_DIRECTORY)
|
||||
|
||||
|
||||
def _parse_list_cell(value: Any) -> List[str]:
|
||||
"""Convert serialized list cells ("['A', 'B']") into Python lists."""
|
||||
if isinstance(value, list):
|
||||
return [str(item) for item in value]
|
||||
if value is None:
|
||||
return []
|
||||
if isinstance(value, float) and pd.isna(value): # type: ignore[arg-type]
|
||||
return []
|
||||
text = str(value).strip()
|
||||
if not text:
|
||||
return []
|
||||
try:
|
||||
parsed = ast.literal_eval(text)
|
||||
except (SyntaxError, ValueError):
|
||||
return [text]
|
||||
if isinstance(parsed, list):
|
||||
return [str(item) for item in parsed]
|
||||
return [str(parsed)]
|
||||
|
||||
|
||||
def _load_catalog(path: Path) -> pd.DataFrame:
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"Catalog file missing: {path}")
|
||||
df = pd.read_csv(path)
|
||||
for column in ("themeTags", "keywords", "creatureTypes"):
|
||||
if column in df.columns:
|
||||
df[column] = df[column].apply(_parse_list_cell)
|
||||
return df
|
||||
|
||||
|
||||
def _multi_face_names(df: pd.DataFrame) -> List[str]:
|
||||
counts = Counter(df.get("name", []))
|
||||
return [name for name, count in counts.items() if isinstance(name, str) and count > 1]
|
||||
|
||||
|
||||
def _collect_tags(series: Iterable[List[str]]) -> List[str]:
|
||||
tags: List[str] = []
|
||||
for value in series:
|
||||
if isinstance(value, list):
|
||||
tags.extend(str(item) for item in value)
|
||||
return sorted(set(tags))
|
||||
|
||||
|
||||
def _summarize_color(
|
||||
color: str,
|
||||
merged: pd.DataFrame,
|
||||
baseline: pd.DataFrame,
|
||||
sample_size: int,
|
||||
) -> Dict[str, Any]:
|
||||
merged_names = set(merged.get("name", []))
|
||||
baseline_names = list(baseline.get("name", []))
|
||||
baseline_name_set = set(name for name in baseline_names if isinstance(name, str))
|
||||
|
||||
multi_face = _multi_face_names(baseline)
|
||||
collapsed = []
|
||||
tag_mismatches: List[str] = []
|
||||
missing_after_merge: List[str] = []
|
||||
|
||||
for name in multi_face:
|
||||
group = baseline[baseline["name"] == name]
|
||||
merged_row = merged[merged["name"] == name]
|
||||
if merged_row.empty:
|
||||
missing_after_merge.append(name)
|
||||
continue
|
||||
expected_tags = _collect_tags(group["themeTags"]) if "themeTags" in group else []
|
||||
merged_tags = _collect_tags(merged_row.iloc[[0]]["themeTags"]) if "themeTags" in merged_row else []
|
||||
if expected_tags != merged_tags:
|
||||
tag_mismatches.append(name)
|
||||
collapsed.append(name)
|
||||
|
||||
removed_names = sorted(baseline_name_set - merged_names)
|
||||
added_names = sorted(merged_names - baseline_name_set)
|
||||
|
||||
return {
|
||||
"rows_merged": len(merged),
|
||||
"rows_baseline": len(baseline),
|
||||
"row_delta": len(merged) - len(baseline),
|
||||
"multi_face_groups": len(multi_face),
|
||||
"collapsed_sample": collapsed[:sample_size],
|
||||
"tag_union_mismatches": tag_mismatches[:sample_size],
|
||||
"missing_after_merge": missing_after_merge[:sample_size],
|
||||
"removed_names": removed_names[:sample_size],
|
||||
"added_names": added_names[:sample_size],
|
||||
}
|
||||
|
||||
|
||||
def _refresh_catalog(colors: Sequence[str], compat_snapshot: bool) -> None:
|
||||
os.environ.pop("ENABLE_DFC_MERGE", None)
|
||||
os.environ["DFC_COMPAT_SNAPSHOT"] = "1" if compat_snapshot else "0"
|
||||
importlib.invalidate_caches()
|
||||
# Reload tagger to pick up the new env var
|
||||
tagger = importlib.import_module("code.tagging.tagger")
|
||||
tagger = importlib.reload(tagger) # type: ignore[assignment]
|
||||
|
||||
for color in colors:
|
||||
tagger.load_dataframe(color)
|
||||
|
||||
|
||||
def generate_diff(
|
||||
colors: Sequence[str],
|
||||
compat_dir: Path,
|
||||
sample_size: int,
|
||||
) -> Dict[str, Any]:
|
||||
per_color: Dict[str, Any] = {}
|
||||
overall = {
|
||||
"total_rows_merged": 0,
|
||||
"total_rows_baseline": 0,
|
||||
"total_multi_face_groups": 0,
|
||||
"colors": len(colors),
|
||||
"tag_union_mismatches": 0,
|
||||
"missing_after_merge": 0,
|
||||
}
|
||||
|
||||
for color in colors:
|
||||
merged_path = CSV_ROOT / f"{color}_cards.csv"
|
||||
baseline_path = compat_dir / f"{color}_cards_unmerged.csv"
|
||||
merged_df = _load_catalog(merged_path)
|
||||
baseline_df = _load_catalog(baseline_path)
|
||||
summary = _summarize_color(color, merged_df, baseline_df, sample_size)
|
||||
per_color[color] = summary
|
||||
overall["total_rows_merged"] += summary["rows_merged"]
|
||||
overall["total_rows_baseline"] += summary["rows_baseline"]
|
||||
overall["total_multi_face_groups"] += summary["multi_face_groups"]
|
||||
overall["tag_union_mismatches"] += len(summary["tag_union_mismatches"])
|
||||
overall["missing_after_merge"] += len(summary["missing_after_merge"])
|
||||
|
||||
overall["row_delta_total"] = overall["total_rows_merged"] - overall["total_rows_baseline"]
|
||||
return {"overall": overall, "per_color": per_color}
|
||||
|
||||
|
||||
def main(argv: List[str]) -> int:
|
||||
parser = argparse.ArgumentParser(description="Preview merged vs baseline DFC catalog diff")
|
||||
parser.add_argument(
|
||||
"--skip-refresh",
|
||||
action="store_true",
|
||||
help="Skip rebuilding the catalog in compatibility mode (requires existing compat snapshots)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mode",
|
||||
default="",
|
||||
help="[Deprecated] Legacy ENABLE_DFC_MERGE value (compat|1|0 etc.)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--compat-snapshot",
|
||||
dest="compat_snapshot",
|
||||
action="store_true",
|
||||
help="Write compatibility snapshots before diffing (default: off unless legacy --mode compat)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-compat-snapshot",
|
||||
dest="compat_snapshot",
|
||||
action="store_false",
|
||||
help="Skip compatibility snapshots even if legacy --mode compat is supplied",
|
||||
)
|
||||
parser.set_defaults(compat_snapshot=None)
|
||||
parser.add_argument(
|
||||
"--colors",
|
||||
nargs="*",
|
||||
help="Optional subset of colors to diff (defaults to full COLORS list)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--compat-dir",
|
||||
type=Path,
|
||||
default=DEFAULT_COMPAT_DIR,
|
||||
help="Directory containing unmerged compatibility snapshots (default: %(default)s)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
type=Path,
|
||||
help="Optional JSON file to write with the diff summary",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sample-size",
|
||||
type=int,
|
||||
default=10,
|
||||
help="Number of sample entries to include per section (default: %(default)s)",
|
||||
)
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
colors = tuple(args.colors) if args.colors else tuple(COLORS)
|
||||
compat_dir = args.compat_dir
|
||||
|
||||
mode = str(args.mode or "").strip().lower()
|
||||
if mode and mode not in {"compat", "dual", "both", "1", "on", "true", "0", "off", "false", "disabled"}:
|
||||
print(
|
||||
f"ℹ Legacy --mode value '{mode}' detected; merge remains enabled. Use --compat-snapshot as needed.",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
if args.compat_snapshot is None:
|
||||
compat_snapshot = mode in {"compat", "dual", "both"}
|
||||
else:
|
||||
compat_snapshot = args.compat_snapshot
|
||||
if mode:
|
||||
print(
|
||||
"ℹ Ignoring deprecated --mode value because --compat-snapshot/--no-compat-snapshot was supplied.",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
if mode in {"0", "off", "false", "disabled"}:
|
||||
print(
|
||||
"⚠ ENABLE_DFC_MERGE=off is deprecated; the merge remains enabled regardless of the value.",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
if not args.skip_refresh:
|
||||
start = time.perf_counter()
|
||||
_refresh_catalog(colors, compat_snapshot)
|
||||
duration = time.perf_counter() - start
|
||||
snapshot_msg = "with compat snapshot" if compat_snapshot else "merged-only"
|
||||
print(f"✔ Refreshed catalog in {duration:.1f}s ({snapshot_msg})")
|
||||
else:
|
||||
print("ℹ Using existing catalog outputs (refresh skipped)")
|
||||
|
||||
try:
|
||||
diff = generate_diff(colors, compat_dir, args.sample_size)
|
||||
except FileNotFoundError as exc:
|
||||
print(f"ERROR: {exc}")
|
||||
print("Run without --skip-refresh (or ensure compat snapshots exist).", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
overall = diff["overall"]
|
||||
print("\n=== DFC Catalog Diff Summary ===")
|
||||
print(
|
||||
f"Merged rows: {overall['total_rows_merged']:,} | Baseline rows: {overall['total_rows_baseline']:,} | "
|
||||
f"Δ rows: {overall['row_delta_total']:,}"
|
||||
)
|
||||
print(
|
||||
f"Multi-face groups: {overall['total_multi_face_groups']:,} | "
|
||||
f"Tag union mismatches: {overall['tag_union_mismatches']} | Missing after merge: {overall['missing_after_merge']}"
|
||||
)
|
||||
|
||||
for color, summary in diff["per_color"].items():
|
||||
print(f"\n[{color}] baseline={summary['rows_baseline']} merged={summary['rows_merged']} Δ={summary['row_delta']}")
|
||||
if summary["multi_face_groups"]:
|
||||
print(f" multi-face groups: {summary['multi_face_groups']}")
|
||||
if summary["collapsed_sample"]:
|
||||
sample = ", ".join(summary["collapsed_sample"][:3])
|
||||
print(f" collapsed sample: {sample}")
|
||||
if summary["tag_union_mismatches"]:
|
||||
print(f" TAG MISMATCH sample: {', '.join(summary['tag_union_mismatches'])}")
|
||||
if summary["missing_after_merge"]:
|
||||
print(f" MISSING sample: {', '.join(summary['missing_after_merge'])}")
|
||||
if summary["removed_names"]:
|
||||
print(f" removed sample: {', '.join(summary['removed_names'])}")
|
||||
if summary["added_names"]:
|
||||
print(f" added sample: {', '.join(summary['added_names'])}")
|
||||
|
||||
if args.output:
|
||||
payload = {
|
||||
"captured_at": int(time.time()),
|
||||
"mode": args.mode,
|
||||
"colors": colors,
|
||||
"compat_dir": str(compat_dir),
|
||||
"summary": diff,
|
||||
}
|
||||
try:
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
args.output.write_text(json.dumps(payload, indent=2, sort_keys=True), encoding="utf-8")
|
||||
print(f"\n📄 Wrote JSON summary to {args.output}")
|
||||
except Exception as exc: # pragma: no cover
|
||||
print(f"Failed to write output file {args.output}: {exc}", file=sys.stderr)
|
||||
return 3
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
raise SystemExit(main(sys.argv[1:]))
|
||||
|
|
@ -1,105 +0,0 @@
|
|||
"""CLI utility: snapshot preview metrics and emit summary/top slow themes.
|
||||
|
||||
Usage (from repo root virtualenv):
|
||||
python -m code.scripts.preview_metrics_snapshot --limit 10 --output logs/preview_metrics_snapshot.json
|
||||
|
||||
Fetches /themes/metrics (requires WEB_THEME_PICKER_DIAGNOSTICS=1) and writes a compact JSON plus
|
||||
human-readable summary to stdout.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
DEFAULT_URL = "http://localhost:8000/themes/metrics"
|
||||
|
||||
|
||||
def fetch_metrics(url: str) -> Dict[str, Any]:
|
||||
req = urllib.request.Request(url, headers={"Accept": "application/json"})
|
||||
with urllib.request.urlopen(req, timeout=10) as resp: # nosec B310 (local trusted)
|
||||
data = resp.read().decode("utf-8", "replace")
|
||||
try:
|
||||
return json.loads(data) # type: ignore[return-value]
|
||||
except json.JSONDecodeError as e: # pragma: no cover - unlikely if server OK
|
||||
raise SystemExit(f"Invalid JSON from metrics endpoint: {e}\nRaw: {data[:400]}")
|
||||
|
||||
|
||||
def summarize(metrics: Dict[str, Any], top_n: int) -> Dict[str, Any]:
|
||||
preview = (metrics.get("preview") or {}) if isinstance(metrics, dict) else {}
|
||||
per_theme = preview.get("per_theme") or {}
|
||||
# Compute top slow themes by avg_ms
|
||||
items = []
|
||||
for slug, info in per_theme.items():
|
||||
if not isinstance(info, dict):
|
||||
continue
|
||||
avg = info.get("avg_ms")
|
||||
if isinstance(avg, (int, float)):
|
||||
items.append((slug, float(avg), info))
|
||||
items.sort(key=lambda x: x[1], reverse=True)
|
||||
top = items[:top_n]
|
||||
return {
|
||||
"preview_requests": preview.get("preview_requests"),
|
||||
"preview_cache_hits": preview.get("preview_cache_hits"),
|
||||
"preview_avg_build_ms": preview.get("preview_avg_build_ms"),
|
||||
"preview_p95_build_ms": preview.get("preview_p95_build_ms"),
|
||||
"preview_ttl_seconds": preview.get("preview_ttl_seconds"),
|
||||
"editorial_curated_vs_sampled_pct": preview.get("editorial_curated_vs_sampled_pct"),
|
||||
"top_slowest": [
|
||||
{
|
||||
"slug": slug,
|
||||
"avg_ms": avg,
|
||||
"p95_ms": info.get("p95_ms"),
|
||||
"builds": info.get("builds"),
|
||||
"requests": info.get("requests"),
|
||||
"avg_curated_pct": info.get("avg_curated_pct"),
|
||||
}
|
||||
for slug, avg, info in top
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def main(argv: list[str]) -> int:
|
||||
ap = argparse.ArgumentParser(description="Snapshot preview metrics")
|
||||
ap.add_argument("--url", default=DEFAULT_URL, help="Metrics endpoint URL (default: %(default)s)")
|
||||
ap.add_argument("--limit", type=int, default=10, help="Top N slow themes to include (default: %(default)s)")
|
||||
ap.add_argument("--output", type=Path, help="Optional output JSON file for snapshot")
|
||||
ap.add_argument("--quiet", action="store_true", help="Suppress stdout summary (still writes file if --output)")
|
||||
args = ap.parse_args(argv)
|
||||
|
||||
try:
|
||||
raw = fetch_metrics(args.url)
|
||||
except urllib.error.URLError as e:
|
||||
print(f"ERROR: Failed fetching metrics endpoint: {e}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
summary = summarize(raw, args.limit)
|
||||
snapshot = {
|
||||
"captured_at": int(time.time()),
|
||||
"source": args.url,
|
||||
"summary": summary,
|
||||
}
|
||||
|
||||
if args.output:
|
||||
try:
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
args.output.write_text(json.dumps(snapshot, indent=2, sort_keys=True), encoding="utf-8")
|
||||
except Exception as e: # pragma: no cover
|
||||
print(f"ERROR: writing snapshot file failed: {e}", file=sys.stderr)
|
||||
return 3
|
||||
|
||||
if not args.quiet:
|
||||
print("Preview Metrics Snapshot:")
|
||||
print(json.dumps(summary, indent=2))
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
raise SystemExit(main(sys.argv[1:]))
|
||||
|
|
@ -1,349 +0,0 @@
|
|||
"""Ad-hoc performance benchmark for theme preview build latency (Phase A validation).
|
||||
|
||||
Runs warm-up plus measured request loops against several theme slugs and prints
|
||||
aggregate latency stats (p50/p90/p95, cache hit ratio evolution). Intended to
|
||||
establish or validate that refactor did not introduce >5% p95 regression.
|
||||
|
||||
Usage (ensure server running locally – commonly :8080 in docker compose):
|
||||
python -m code.scripts.preview_perf_benchmark --themes 8 --loops 40 \
|
||||
--url http://localhost:8080 --warm 1 --limit 12
|
||||
|
||||
Theme slug discovery hierarchy (when --theme not provided):
|
||||
1. Try /themes/index.json (legacy / planned static index)
|
||||
2. Fallback to /themes/api/themes (current API) and take the first N ids
|
||||
The discovered slugs are sorted deterministically then truncated to N.
|
||||
|
||||
NOTE: This is intentionally minimal (no external deps). For stable comparisons
|
||||
run with identical parameters pre/post-change and commit the JSON output under
|
||||
logs/perf/.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import statistics
|
||||
import time
|
||||
from typing import Any, Dict, List
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _fetch_json(url: str) -> Dict[str, Any]:
|
||||
req = urllib.request.Request(url, headers={"Accept": "application/json"})
|
||||
with urllib.request.urlopen(req, timeout=15) as resp: # nosec B310 local dev
|
||||
data = resp.read().decode("utf-8", "replace")
|
||||
return json.loads(data) # type: ignore[return-value]
|
||||
|
||||
|
||||
def _fetch_json_with_retry(url: str, attempts: int = 3, delay: float = 0.6) -> Dict[str, Any]:
|
||||
last_error: Exception | None = None
|
||||
for attempt in range(1, attempts + 1):
|
||||
try:
|
||||
return _fetch_json(url)
|
||||
except Exception as exc: # pragma: no cover - network variability
|
||||
last_error = exc
|
||||
if attempt < attempts:
|
||||
print(json.dumps({ # noqa: T201
|
||||
"event": "preview_perf_fetch_retry",
|
||||
"url": url,
|
||||
"attempt": attempt,
|
||||
"max_attempts": attempts,
|
||||
"error": str(exc),
|
||||
}))
|
||||
time.sleep(delay * attempt)
|
||||
else:
|
||||
raise
|
||||
raise last_error # pragma: no cover - defensive; should be unreachable
|
||||
|
||||
|
||||
def select_theme_slugs(base_url: str, count: int) -> List[str]:
|
||||
"""Discover theme slugs for benchmarking.
|
||||
|
||||
Attempts legacy static index first, then falls back to live API listing.
|
||||
"""
|
||||
errors: List[str] = []
|
||||
slugs: List[str] = []
|
||||
# Attempt 1: legacy /themes/index.json
|
||||
try:
|
||||
idx = _fetch_json(f"{base_url.rstrip('/')}/themes/index.json")
|
||||
entries = idx.get("themes") or []
|
||||
for it in entries:
|
||||
if not isinstance(it, dict):
|
||||
continue
|
||||
slug = it.get("slug") or it.get("id") or it.get("theme_id")
|
||||
if isinstance(slug, str):
|
||||
slugs.append(slug)
|
||||
except Exception as e: # pragma: no cover - network variability
|
||||
errors.append(f"index.json failed: {e}")
|
||||
|
||||
if not slugs:
|
||||
# Attempt 2: live API listing
|
||||
try:
|
||||
listing = _fetch_json(f"{base_url.rstrip('/')}/themes/api/themes")
|
||||
items = listing.get("items") or []
|
||||
for it in items:
|
||||
if not isinstance(it, dict):
|
||||
continue
|
||||
tid = it.get("id") or it.get("slug") or it.get("theme_id")
|
||||
if isinstance(tid, str):
|
||||
slugs.append(tid)
|
||||
except Exception as e: # pragma: no cover - network variability
|
||||
errors.append(f"api/themes failed: {e}")
|
||||
|
||||
slugs = sorted(set(slugs))[:count]
|
||||
if not slugs:
|
||||
raise SystemExit("No theme slugs discovered; cannot benchmark (" + "; ".join(errors) + ")")
|
||||
return slugs
|
||||
|
||||
|
||||
def fetch_all_theme_slugs(base_url: str, page_limit: int = 200) -> List[str]:
|
||||
"""Fetch all theme slugs via paginated /themes/api/themes endpoint.
|
||||
|
||||
Uses maximum page size (200) and iterates using offset until no next page.
|
||||
Returns deterministic sorted unique list of slugs.
|
||||
"""
|
||||
slugs: List[str] = []
|
||||
offset = 0
|
||||
seen: set[str] = set()
|
||||
page_attempts = 5
|
||||
page_delay = 1.2
|
||||
while True:
|
||||
url = f"{base_url.rstrip('/')}/themes/api/themes?limit={page_limit}&offset={offset}"
|
||||
data: Dict[str, Any] | None = None
|
||||
last_error: Exception | None = None
|
||||
for attempt in range(1, page_attempts + 1):
|
||||
try:
|
||||
data = _fetch_json_with_retry(url, attempts=4, delay=0.75)
|
||||
break
|
||||
except Exception as exc: # pragma: no cover - network variability
|
||||
last_error = exc
|
||||
if attempt < page_attempts:
|
||||
print(json.dumps({ # noqa: T201
|
||||
"event": "preview_perf_page_retry",
|
||||
"offset": offset,
|
||||
"attempt": attempt,
|
||||
"max_attempts": page_attempts,
|
||||
"error": str(exc),
|
||||
}))
|
||||
time.sleep(page_delay * attempt)
|
||||
else:
|
||||
raise SystemExit(f"Failed fetching themes page offset={offset}: {exc}")
|
||||
if data is None: # pragma: no cover - defensive
|
||||
raise SystemExit(f"Failed fetching themes page offset={offset}: {last_error}")
|
||||
items = data.get("items") or []
|
||||
for it in items:
|
||||
if not isinstance(it, dict):
|
||||
continue
|
||||
tid = it.get("id") or it.get("slug") or it.get("theme_id")
|
||||
if isinstance(tid, str) and tid not in seen:
|
||||
seen.add(tid)
|
||||
slugs.append(tid)
|
||||
next_offset = data.get("next_offset")
|
||||
if not next_offset or next_offset == offset:
|
||||
break
|
||||
offset = int(next_offset)
|
||||
return sorted(slugs)
|
||||
|
||||
|
||||
def percentile(values: List[float], pct: float) -> float:
|
||||
if not values:
|
||||
return 0.0
|
||||
sv = sorted(values)
|
||||
k = (len(sv) - 1) * pct
|
||||
f = int(k)
|
||||
c = min(f + 1, len(sv) - 1)
|
||||
if f == c:
|
||||
return sv[f]
|
||||
d0 = sv[f] * (c - k)
|
||||
d1 = sv[c] * (k - f)
|
||||
return d0 + d1
|
||||
|
||||
|
||||
def run_loop(base_url: str, slugs: List[str], loops: int, limit: int, warm: bool, path_template: str) -> Dict[str, Any]:
|
||||
latencies: List[float] = []
|
||||
per_slug_counts = {s: 0 for s in slugs}
|
||||
t_start = time.time()
|
||||
for i in range(loops):
|
||||
slug = slugs[i % len(slugs)]
|
||||
# path_template may contain {slug} and {limit}
|
||||
try:
|
||||
rel = path_template.format(slug=slug, limit=limit)
|
||||
except Exception:
|
||||
rel = f"/themes/api/theme/{slug}/preview?limit={limit}"
|
||||
if not rel.startswith('/'):
|
||||
rel = '/' + rel
|
||||
url = f"{base_url.rstrip('/')}{rel}"
|
||||
t0 = time.time()
|
||||
try:
|
||||
_fetch_json(url)
|
||||
except Exception as e:
|
||||
print(json.dumps({"event": "perf_benchmark_error", "slug": slug, "error": str(e)})) # noqa: T201
|
||||
continue
|
||||
ms = (time.time() - t0) * 1000.0
|
||||
latencies.append(ms)
|
||||
per_slug_counts[slug] += 1
|
||||
elapsed = time.time() - t_start
|
||||
return {
|
||||
"warm": warm,
|
||||
"loops": loops,
|
||||
"slugs": slugs,
|
||||
"per_slug_requests": per_slug_counts,
|
||||
"elapsed_s": round(elapsed, 3),
|
||||
"p50_ms": round(percentile(latencies, 0.50), 2),
|
||||
"p90_ms": round(percentile(latencies, 0.90), 2),
|
||||
"p95_ms": round(percentile(latencies, 0.95), 2),
|
||||
"avg_ms": round(statistics.mean(latencies), 2) if latencies else 0.0,
|
||||
"count": len(latencies),
|
||||
"_latencies": latencies, # internal (removed in final result unless explicitly retained)
|
||||
}
|
||||
|
||||
|
||||
def _stats_from_latencies(latencies: List[float]) -> Dict[str, Any]:
|
||||
if not latencies:
|
||||
return {"count": 0, "p50_ms": 0.0, "p90_ms": 0.0, "p95_ms": 0.0, "avg_ms": 0.0}
|
||||
return {
|
||||
"count": len(latencies),
|
||||
"p50_ms": round(percentile(latencies, 0.50), 2),
|
||||
"p90_ms": round(percentile(latencies, 0.90), 2),
|
||||
"p95_ms": round(percentile(latencies, 0.95), 2),
|
||||
"avg_ms": round(statistics.mean(latencies), 2),
|
||||
}
|
||||
|
||||
|
||||
def main(argv: List[str]) -> int:
|
||||
ap = argparse.ArgumentParser(description="Theme preview performance benchmark")
|
||||
ap.add_argument("--url", default="http://localhost:8000", help="Base server URL (default: %(default)s)")
|
||||
ap.add_argument("--themes", type=int, default=6, help="Number of theme slugs to exercise (default: %(default)s)")
|
||||
ap.add_argument("--loops", type=int, default=60, help="Total request iterations (default: %(default)s)")
|
||||
ap.add_argument("--limit", type=int, default=12, help="Preview size (default: %(default)s)")
|
||||
ap.add_argument("--path-template", default="/themes/api/theme/{slug}/preview?limit={limit}", help="Format string for preview request path (default: %(default)s)")
|
||||
ap.add_argument("--theme", action="append", dest="explicit_theme", help="Explicit theme slug(s); overrides automatic selection")
|
||||
ap.add_argument("--warm", type=int, default=1, help="Number of warm-up loops (full cycles over selected slugs) (default: %(default)s)")
|
||||
ap.add_argument("--output", type=Path, help="Optional JSON output path (committed under logs/perf)")
|
||||
ap.add_argument("--all", action="store_true", help="Exercise ALL themes (ignores --themes; loops auto-set to passes*total_slugs unless --loops-explicit)")
|
||||
ap.add_argument("--passes", type=int, default=1, help="When using --all, number of passes over the full theme set (default: %(default)s)")
|
||||
# Hidden flag to detect if user explicitly set --loops (argparse has no direct support, so use sentinel technique)
|
||||
# We keep original --loops for backwards compatibility; when --all we recompute unless user passed --loops-explicit
|
||||
ap.add_argument("--loops-explicit", action="store_true", help=argparse.SUPPRESS)
|
||||
ap.add_argument("--extract-warm-baseline", type=Path, help="If multi-pass (--all --passes >1), write a warm-only baseline JSON (final pass stats) to this path")
|
||||
args = ap.parse_args(argv)
|
||||
|
||||
try:
|
||||
if args.explicit_theme:
|
||||
slugs = args.explicit_theme
|
||||
elif args.all:
|
||||
slugs = fetch_all_theme_slugs(args.url)
|
||||
else:
|
||||
slugs = select_theme_slugs(args.url, args.themes)
|
||||
except SystemExit as e: # pragma: no cover - dependency on live server
|
||||
print(str(e), file=sys.stderr)
|
||||
return 2
|
||||
|
||||
mode = "all" if args.all else "subset"
|
||||
total_slugs = len(slugs)
|
||||
if args.all and not args.loops_explicit:
|
||||
# Derive loops = passes * total_slugs
|
||||
args.loops = max(1, args.passes) * total_slugs
|
||||
|
||||
print(json.dumps({ # noqa: T201
|
||||
"event": "preview_perf_start",
|
||||
"mode": mode,
|
||||
"total_slugs": total_slugs,
|
||||
"planned_loops": args.loops,
|
||||
"passes": args.passes if args.all else None,
|
||||
}))
|
||||
|
||||
# Execution paths:
|
||||
# 1. Standard subset or single-pass all: warm cycles -> single measured run
|
||||
# 2. Multi-pass all mode (--all --passes >1): iterate passes capturing per-pass stats (no separate warm loops)
|
||||
if args.all and args.passes > 1:
|
||||
pass_results: List[Dict[str, Any]] = []
|
||||
combined_latencies: List[float] = []
|
||||
t0_all = time.time()
|
||||
for p in range(1, args.passes + 1):
|
||||
r = run_loop(args.url, slugs, len(slugs), args.limit, warm=(p == 1), path_template=args.path_template)
|
||||
lat = r.pop("_latencies", [])
|
||||
combined_latencies.extend(lat)
|
||||
pass_result = {
|
||||
"pass": p,
|
||||
"warm": r["warm"],
|
||||
"elapsed_s": r["elapsed_s"],
|
||||
"p50_ms": r["p50_ms"],
|
||||
"p90_ms": r["p90_ms"],
|
||||
"p95_ms": r["p95_ms"],
|
||||
"avg_ms": r["avg_ms"],
|
||||
"count": r["count"],
|
||||
}
|
||||
pass_results.append(pass_result)
|
||||
total_elapsed = round(time.time() - t0_all, 3)
|
||||
aggregate = _stats_from_latencies(combined_latencies)
|
||||
result = {
|
||||
"mode": mode,
|
||||
"total_slugs": total_slugs,
|
||||
"passes": args.passes,
|
||||
"slugs": slugs,
|
||||
"combined": {
|
||||
**aggregate,
|
||||
"elapsed_s": total_elapsed,
|
||||
},
|
||||
"passes_results": pass_results,
|
||||
"cold_pass_p95_ms": pass_results[0]["p95_ms"],
|
||||
"warm_pass_p95_ms": pass_results[-1]["p95_ms"],
|
||||
"cold_pass_p50_ms": pass_results[0]["p50_ms"],
|
||||
"warm_pass_p50_ms": pass_results[-1]["p50_ms"],
|
||||
}
|
||||
print(json.dumps({"event": "preview_perf_result", **result}, indent=2)) # noqa: T201
|
||||
# Optional warm baseline extraction (final pass only; represents warmed steady-state)
|
||||
if args.extract_warm_baseline:
|
||||
try:
|
||||
wb = pass_results[-1]
|
||||
warm_obj = {
|
||||
"event": "preview_perf_warm_baseline",
|
||||
"mode": mode,
|
||||
"total_slugs": total_slugs,
|
||||
"warm_baseline": True,
|
||||
"source_pass": wb["pass"],
|
||||
"p50_ms": wb["p50_ms"],
|
||||
"p90_ms": wb["p90_ms"],
|
||||
"p95_ms": wb["p95_ms"],
|
||||
"avg_ms": wb["avg_ms"],
|
||||
"count": wb["count"],
|
||||
"slugs": slugs,
|
||||
}
|
||||
args.extract_warm_baseline.parent.mkdir(parents=True, exist_ok=True)
|
||||
args.extract_warm_baseline.write_text(json.dumps(warm_obj, indent=2, sort_keys=True), encoding="utf-8")
|
||||
print(json.dumps({ # noqa: T201
|
||||
"event": "preview_perf_warm_baseline_written",
|
||||
"path": str(args.extract_warm_baseline),
|
||||
"p95_ms": wb["p95_ms"],
|
||||
}))
|
||||
except Exception as e: # pragma: no cover
|
||||
print(json.dumps({"event": "preview_perf_warm_baseline_error", "error": str(e)})) # noqa: T201
|
||||
else:
|
||||
# Warm-up loops first (if requested)
|
||||
for w in range(args.warm):
|
||||
run_loop(args.url, slugs, len(slugs), args.limit, warm=True, path_template=args.path_template)
|
||||
result = run_loop(args.url, slugs, args.loops, args.limit, warm=False, path_template=args.path_template)
|
||||
result.pop("_latencies", None)
|
||||
result["slugs"] = slugs
|
||||
result["mode"] = mode
|
||||
result["total_slugs"] = total_slugs
|
||||
if args.all:
|
||||
result["passes"] = args.passes
|
||||
print(json.dumps({"event": "preview_perf_result", **result}, indent=2)) # noqa: T201
|
||||
|
||||
if args.output:
|
||||
try:
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
# Ensure we write the final result object (multi-pass already prepared above)
|
||||
args.output.write_text(json.dumps(result, indent=2, sort_keys=True), encoding="utf-8")
|
||||
except Exception as e: # pragma: no cover
|
||||
print(f"ERROR: failed writing output file: {e}", file=sys.stderr)
|
||||
return 3
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
raise SystemExit(main(sys.argv[1:]))
|
||||
|
|
@ -1,106 +0,0 @@
|
|||
"""CI helper: run a warm-pass benchmark candidate (single pass over all themes)
|
||||
then compare against the committed warm baseline with threshold enforcement.
|
||||
|
||||
Intended usage (example):
|
||||
python -m code.scripts.preview_perf_ci_check --url http://localhost:8080 \
|
||||
--baseline logs/perf/theme_preview_warm_baseline.json --p95-threshold 5
|
||||
|
||||
Exit codes:
|
||||
0 success (within threshold)
|
||||
2 regression (p95 delta > threshold)
|
||||
3 setup / usage error
|
||||
|
||||
Notes:
|
||||
- Uses --all --passes 1 to create a fresh candidate snapshot that approximates
|
||||
a warmed steady-state (server should have background refresh / typical load).
|
||||
- If you prefer multi-pass then warm-only selection, adjust logic accordingly.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
def _wait_for_service(base_url: str, attempts: int = 12, delay: float = 1.5) -> bool:
|
||||
health_url = base_url.rstrip("/") + "/healthz"
|
||||
last_error: Exception | None = None
|
||||
for attempt in range(1, attempts + 1):
|
||||
try:
|
||||
with urllib.request.urlopen(health_url, timeout=5) as resp: # nosec B310 local CI
|
||||
if 200 <= resp.status < 300:
|
||||
return True
|
||||
except urllib.error.HTTPError as exc:
|
||||
last_error = exc
|
||||
if 400 <= exc.code < 500 and exc.code != 429:
|
||||
# Treat permanent client errors (other than rate limit) as fatal
|
||||
break
|
||||
except Exception as exc: # pragma: no cover - network variability
|
||||
last_error = exc
|
||||
time.sleep(delay * attempt)
|
||||
print(json.dumps({
|
||||
"event": "ci_perf_error",
|
||||
"stage": "startup",
|
||||
"message": "Service health check failed",
|
||||
"url": health_url,
|
||||
"attempts": attempts,
|
||||
"error": str(last_error) if last_error else None,
|
||||
}))
|
||||
return False
|
||||
|
||||
def run(cmd: list[str]) -> subprocess.CompletedProcess:
|
||||
return subprocess.run(cmd, capture_output=True, text=True, check=False)
|
||||
|
||||
def main(argv: list[str]) -> int:
|
||||
ap = argparse.ArgumentParser(description="Preview performance CI regression gate")
|
||||
ap.add_argument("--url", default="http://localhost:8080", help="Base URL of running web service")
|
||||
ap.add_argument("--baseline", type=Path, required=True, help="Path to committed warm baseline JSON")
|
||||
ap.add_argument("--p95-threshold", type=float, default=5.0, help="Max allowed p95 regression percent (default: %(default)s)")
|
||||
ap.add_argument("--candidate-output", type=Path, default=Path("logs/perf/theme_preview_ci_candidate.json"), help="Where to write candidate benchmark JSON")
|
||||
ap.add_argument("--multi-pass", action="store_true", help="Run a 2-pass all-themes benchmark and compare warm pass only (optional enhancement)")
|
||||
args = ap.parse_args(argv)
|
||||
|
||||
if not args.baseline.exists():
|
||||
print(json.dumps({"event":"ci_perf_error","message":"Baseline not found","path":str(args.baseline)}))
|
||||
return 3
|
||||
|
||||
if not _wait_for_service(args.url):
|
||||
return 3
|
||||
|
||||
# Run candidate single-pass all-themes benchmark (no extra warm cycles to keep CI fast)
|
||||
# If multi-pass requested, run two passes over all themes so second pass represents warmed steady-state.
|
||||
passes = "2" if args.multi_pass else "1"
|
||||
bench_cmd = [sys.executable, "-m", "code.scripts.preview_perf_benchmark", "--url", args.url, "--all", "--passes", passes, "--output", str(args.candidate_output)]
|
||||
bench_proc = run(bench_cmd)
|
||||
if bench_proc.returncode != 0:
|
||||
print(json.dumps({"event":"ci_perf_error","stage":"benchmark","code":bench_proc.returncode,"stderr":bench_proc.stderr}))
|
||||
return 3
|
||||
print(bench_proc.stdout)
|
||||
|
||||
if not args.candidate_output.exists():
|
||||
print(json.dumps({"event":"ci_perf_error","message":"Candidate output missing"}))
|
||||
return 3
|
||||
|
||||
compare_cmd = [
|
||||
sys.executable,
|
||||
"-m","code.scripts.preview_perf_compare",
|
||||
"--baseline", str(args.baseline),
|
||||
"--candidate", str(args.candidate_output),
|
||||
"--warm-only",
|
||||
"--p95-threshold", str(args.p95_threshold),
|
||||
]
|
||||
cmp_proc = run(compare_cmd)
|
||||
print(cmp_proc.stdout)
|
||||
if cmp_proc.returncode == 2:
|
||||
# Already printed JSON with failure status
|
||||
return 2
|
||||
if cmp_proc.returncode != 0:
|
||||
print(json.dumps({"event":"ci_perf_error","stage":"compare","code":cmp_proc.returncode,"stderr":cmp_proc.stderr}))
|
||||
return 3
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
raise SystemExit(main(sys.argv[1:]))
|
||||
|
|
@ -1,115 +0,0 @@
|
|||
"""Compare two preview benchmark JSON result files and emit delta stats.
|
||||
|
||||
Usage:
|
||||
python -m code.scripts.preview_perf_compare --baseline logs/perf/theme_preview_baseline_all_pass1_20250923.json --candidate logs/perf/new_run.json
|
||||
|
||||
Outputs JSON with percentage deltas for p50/p90/p95/avg (positive = regression/slower).
|
||||
If multi-pass structures are present (combined & passes_results) those are included.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
|
||||
def load(path: Path) -> Dict[str, Any]:
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
# Multi-pass result may store stats under combined
|
||||
if "combined" in data:
|
||||
core = data["combined"].copy()
|
||||
# Inject representative fields for uniform comparison
|
||||
core["p50_ms"] = core.get("p50_ms") or data.get("p50_ms")
|
||||
core["p90_ms"] = core.get("p90_ms") or data.get("p90_ms")
|
||||
core["p95_ms"] = core.get("p95_ms") or data.get("p95_ms")
|
||||
core["avg_ms"] = core.get("avg_ms") or data.get("avg_ms")
|
||||
data["_core_stats"] = core
|
||||
else:
|
||||
data["_core_stats"] = {
|
||||
k: data.get(k) for k in ("p50_ms", "p90_ms", "p95_ms", "avg_ms", "count")
|
||||
}
|
||||
return data
|
||||
|
||||
|
||||
def pct_delta(new: float, old: float) -> float:
|
||||
if old == 0:
|
||||
return 0.0
|
||||
return round(((new - old) / old) * 100.0, 2)
|
||||
|
||||
|
||||
def compare(baseline: Dict[str, Any], candidate: Dict[str, Any]) -> Dict[str, Any]:
|
||||
b = baseline["_core_stats"]
|
||||
c = candidate["_core_stats"]
|
||||
result = {"baseline_count": b.get("count"), "candidate_count": c.get("count")}
|
||||
for k in ("p50_ms", "p90_ms", "p95_ms", "avg_ms"):
|
||||
if b.get(k) is not None and c.get(k) is not None:
|
||||
result[k] = {
|
||||
"baseline": b[k],
|
||||
"candidate": c[k],
|
||||
"delta_pct": pct_delta(c[k], b[k]),
|
||||
}
|
||||
# If both have per-pass details include first and last pass p95/p50
|
||||
if "passes_results" in baseline and "passes_results" in candidate:
|
||||
result["passes"] = {
|
||||
"baseline": {
|
||||
"cold_p95": baseline.get("cold_pass_p95_ms"),
|
||||
"warm_p95": baseline.get("warm_pass_p95_ms"),
|
||||
"cold_p50": baseline.get("cold_pass_p50_ms"),
|
||||
"warm_p50": baseline.get("warm_pass_p50_ms"),
|
||||
},
|
||||
"candidate": {
|
||||
"cold_p95": candidate.get("cold_pass_p95_ms"),
|
||||
"warm_p95": candidate.get("warm_pass_p95_ms"),
|
||||
"cold_p50": candidate.get("cold_pass_p50_ms"),
|
||||
"warm_p50": candidate.get("warm_pass_p50_ms"),
|
||||
},
|
||||
}
|
||||
return result
|
||||
|
||||
|
||||
def main(argv: list[str]) -> int:
|
||||
ap = argparse.ArgumentParser(description="Compare two preview benchmark JSON result files")
|
||||
ap.add_argument("--baseline", required=True, type=Path, help="Baseline JSON path")
|
||||
ap.add_argument("--candidate", required=True, type=Path, help="Candidate JSON path")
|
||||
ap.add_argument("--p95-threshold", type=float, default=None, help="Fail (exit 2) if p95 regression exceeds this percent (positive delta)")
|
||||
ap.add_argument("--warm-only", action="store_true", help="When both results have passes, compare warm pass p95/p50 instead of combined/core")
|
||||
args = ap.parse_args(argv)
|
||||
if not args.baseline.exists():
|
||||
raise SystemExit(f"Baseline not found: {args.baseline}")
|
||||
if not args.candidate.exists():
|
||||
raise SystemExit(f"Candidate not found: {args.candidate}")
|
||||
baseline = load(args.baseline)
|
||||
candidate = load(args.candidate)
|
||||
# If warm-only requested and both have warm pass stats, override _core_stats before compare
|
||||
if args.warm_only and "warm_pass_p95_ms" in baseline and "warm_pass_p95_ms" in candidate:
|
||||
baseline["_core_stats"] = {
|
||||
"p50_ms": baseline.get("warm_pass_p50_ms"),
|
||||
"p90_ms": baseline.get("_core_stats", {}).get("p90_ms"), # p90 not tracked per-pass; retain combined
|
||||
"p95_ms": baseline.get("warm_pass_p95_ms"),
|
||||
"avg_ms": baseline.get("_core_stats", {}).get("avg_ms"),
|
||||
"count": baseline.get("_core_stats", {}).get("count"),
|
||||
}
|
||||
candidate["_core_stats"] = {
|
||||
"p50_ms": candidate.get("warm_pass_p50_ms"),
|
||||
"p90_ms": candidate.get("_core_stats", {}).get("p90_ms"),
|
||||
"p95_ms": candidate.get("warm_pass_p95_ms"),
|
||||
"avg_ms": candidate.get("_core_stats", {}).get("avg_ms"),
|
||||
"count": candidate.get("_core_stats", {}).get("count"),
|
||||
}
|
||||
cmp = compare(baseline, candidate)
|
||||
payload = {"event": "preview_perf_compare", **cmp}
|
||||
if args.p95_threshold is not None and "p95_ms" in cmp:
|
||||
delta = cmp["p95_ms"]["delta_pct"]
|
||||
payload["threshold"] = {"p95_threshold": args.p95_threshold, "p95_delta_pct": delta}
|
||||
if delta is not None and delta > args.p95_threshold:
|
||||
payload["result"] = "fail"
|
||||
print(json.dumps(payload, indent=2)) # noqa: T201
|
||||
return 2
|
||||
payload["result"] = "pass"
|
||||
print(json.dumps(payload, indent=2)) # noqa: T201
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
raise SystemExit(main(__import__('sys').argv[1:]))
|
||||
|
|
@ -1,91 +0,0 @@
|
|||
"""Generate warm preview traffic to populate theme preview cache & metrics.
|
||||
|
||||
Usage:
|
||||
python -m code.scripts.warm_preview_traffic --count 25 --repeats 2 \
|
||||
--base-url http://localhost:8000 --delay 0.05
|
||||
|
||||
Requirements:
|
||||
- FastAPI server running locally exposing /themes endpoints
|
||||
- WEB_THEME_PICKER_DIAGNOSTICS=1 so /themes/metrics is accessible
|
||||
|
||||
Strategy:
|
||||
1. Fetch /themes/fragment/list?limit=COUNT to obtain HTML table.
|
||||
2. Extract theme slugs via regex on data-theme-id attributes.
|
||||
3. Issue REPEATS preview fragment requests per slug in order.
|
||||
4. Print simple timing / status summary.
|
||||
|
||||
This script intentionally uses stdlib only (urllib, re, time) to avoid extra deps.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import time
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
from typing import List
|
||||
|
||||
LIST_PATH = "/themes/fragment/list"
|
||||
PREVIEW_PATH = "/themes/fragment/preview/{slug}"
|
||||
|
||||
|
||||
def fetch(url: str) -> str:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "warm-preview/1"})
|
||||
with urllib.request.urlopen(req, timeout=15) as resp: # nosec B310 (local trusted)
|
||||
return resp.read().decode("utf-8", "replace")
|
||||
|
||||
|
||||
def extract_slugs(html: str, limit: int) -> List[str]:
|
||||
slugs = []
|
||||
for m in re.finditer(r'data-theme-id="([^"]+)"', html):
|
||||
s = m.group(1).strip()
|
||||
if s and s not in slugs:
|
||||
slugs.append(s)
|
||||
if len(slugs) >= limit:
|
||||
break
|
||||
return slugs
|
||||
|
||||
|
||||
def warm(base_url: str, count: int, repeats: int, delay: float) -> None:
|
||||
list_url = f"{base_url}{LIST_PATH}?limit={count}&offset=0"
|
||||
print(f"[warm] Fetching list: {list_url}")
|
||||
try:
|
||||
html = fetch(list_url)
|
||||
except urllib.error.URLError as e: # pragma: no cover
|
||||
raise SystemExit(f"Failed fetching list: {e}")
|
||||
slugs = extract_slugs(html, count)
|
||||
if not slugs:
|
||||
raise SystemExit("No theme slugs extracted – cannot warm.")
|
||||
print(f"[warm] Extracted {len(slugs)} slugs: {', '.join(slugs[:8])}{'...' if len(slugs)>8 else ''}")
|
||||
total_requests = 0
|
||||
start = time.time()
|
||||
for r in range(repeats):
|
||||
print(f"[warm] Pass {r+1}/{repeats}")
|
||||
for slug in slugs:
|
||||
url = f"{base_url}{PREVIEW_PATH.format(slug=slug)}"
|
||||
try:
|
||||
fetch(url)
|
||||
except Exception as e: # pragma: no cover
|
||||
print(f" [warn] Failed {slug}: {e}")
|
||||
else:
|
||||
total_requests += 1
|
||||
if delay:
|
||||
time.sleep(delay)
|
||||
dur = time.time() - start
|
||||
print(f"[warm] Completed {total_requests} preview requests in {dur:.2f}s ({total_requests/dur if dur>0 else 0:.1f} rps)")
|
||||
print("[warm] Done. Now run metrics snapshot to capture warm p95.")
|
||||
|
||||
|
||||
def main(argv: list[str]) -> int:
|
||||
ap = argparse.ArgumentParser(description="Generate warm preview traffic")
|
||||
ap.add_argument("--base-url", default="http://localhost:8000", help="Base URL (default: %(default)s)")
|
||||
ap.add_argument("--count", type=int, default=25, help="Number of distinct theme slugs to warm (default: %(default)s)")
|
||||
ap.add_argument("--repeats", type=int, default=2, help="Repeat passes over slugs (default: %(default)s)")
|
||||
ap.add_argument("--delay", type=float, default=0.05, help="Delay between requests in seconds (default: %(default)s)")
|
||||
args = ap.parse_args(argv)
|
||||
warm(args.base_url.rstrip("/"), args.count, args.repeats, args.delay)
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
import sys
|
||||
raise SystemExit(main(sys.argv[1:]))
|
||||
Loading…
Add table
Add a link
Reference in a new issue