feat: theme catalog optimization with tag search and faster enrichment

This commit is contained in:
matt 2025-10-15 17:17:46 -07:00
parent 952b151162
commit 9e6c68f559
26 changed files with 5906 additions and 5688 deletions

View file

@ -1,203 +0,0 @@
"""
Full audit of Protection-tagged cards with kindred metadata support (M2 Phase 2).
Created: October 8, 2025
Purpose: Audit and validate Protection tag precision after implementing grant detection.
Can be re-run periodically to check tagging quality.
This script audits ALL Protection-tagged cards and categorizes them:
- Grant: Gives broad protection to other permanents YOU control
- Kindred: Gives protection to specific creature types (metadata tags)
- Mixed: Both broad and kindred/inherent
- Inherent: Only has protection itself
- ConditionalSelf: Only conditionally grants to itself
- Opponent: Grants to opponent's permanents
- Neither: False positive
Outputs:
- m2_audit_v2.json: Full analysis with summary
- m2_audit_v2_grant.csv: Cards for main Protection tag
- m2_audit_v2_kindred.csv: Cards for kindred metadata tags
- m2_audit_v2_mixed.csv: Cards with both broad and kindred grants
- m2_audit_v2_conditional.csv: Conditional self-grants (exclude)
- m2_audit_v2_inherent.csv: Inherent protection only (exclude)
- m2_audit_v2_opponent.csv: Opponent grants (exclude)
- m2_audit_v2_neither.csv: False positives (exclude)
- m2_audit_v2_all.csv: All cards combined
"""
import sys
from pathlib import Path
import pandas as pd
import json
# Add project root to path
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))
from code.tagging.protection_grant_detection import (
categorize_protection_card,
get_kindred_protection_tags,
is_granting_protection,
)
def load_all_cards():
"""Load all cards from color/identity CSV files."""
csv_dir = project_root / 'csv_files'
# Get all color/identity CSVs (not the raw cards.csv)
csv_files = list(csv_dir.glob('*_cards.csv'))
csv_files = [f for f in csv_files if f.stem not in ['cards', 'testdata']]
all_cards = []
for csv_file in csv_files:
try:
df = pd.read_csv(csv_file)
all_cards.append(df)
except Exception as e:
print(f"Warning: Could not load {csv_file.name}: {e}")
# Combine all DataFrames
combined = pd.concat(all_cards, ignore_index=True)
# Drop duplicates (cards appear in multiple color files)
combined = combined.drop_duplicates(subset=['name'], keep='first')
return combined
def audit_all_protection_cards():
"""Audit all Protection-tagged cards."""
print("Loading all cards...")
df = load_all_cards()
print(f"Total cards loaded: {len(df)}")
# Filter to Protection-tagged cards (column is 'themeTags' in color CSVs)
df_prot = df[df['themeTags'].str.contains('Protection', case=False, na=False)].copy()
print(f"Protection-tagged cards: {len(df_prot)}")
# Categorize each card
categories = []
grants_list = []
kindred_tags_list = []
for idx, row in df_prot.iterrows():
name = row['name']
text = str(row.get('text', '')).replace('\\n', '\n') # Convert escaped newlines to real newlines
keywords = str(row.get('keywords', ''))
card_type = str(row.get('type', ''))
# Categorize with kindred exclusion enabled
category = categorize_protection_card(name, text, keywords, card_type, exclude_kindred=True)
# Check if it grants broadly
grants_broad = is_granting_protection(text, keywords, exclude_kindred=True)
# Get kindred tags
kindred_tags = get_kindred_protection_tags(text)
categories.append(category)
grants_list.append(grants_broad)
kindred_tags_list.append(', '.join(sorted(kindred_tags)) if kindred_tags else '')
df_prot['category'] = categories
df_prot['grants_broad'] = grants_list
df_prot['kindred_tags'] = kindred_tags_list
# Generate summary (convert numpy types to native Python for JSON serialization)
summary = {
'total': int(len(df_prot)),
'categories': {k: int(v) for k, v in df_prot['category'].value_counts().to_dict().items()},
'grants_broad_count': int(df_prot['grants_broad'].sum()),
'kindred_cards_count': int((df_prot['kindred_tags'] != '').sum()),
}
# Calculate keep vs remove
keep_categories = {'Grant', 'Mixed'}
kindred_only = df_prot[df_prot['category'] == 'Kindred']
keep_count = len(df_prot[df_prot['category'].isin(keep_categories)])
remove_count = len(df_prot[~df_prot['category'].isin(keep_categories | {'Kindred'})])
summary['keep_main_tag'] = keep_count
summary['kindred_metadata'] = len(kindred_only)
summary['remove'] = remove_count
summary['precision_estimate'] = round((keep_count / len(df_prot)) * 100, 1) if len(df_prot) > 0 else 0
# Print summary
print(f"\n{'='*60}")
print("AUDIT SUMMARY")
print(f"{'='*60}")
print(f"Total Protection-tagged cards: {summary['total']}")
print(f"\nCategories:")
for cat, count in sorted(summary['categories'].items()):
pct = (count / summary['total']) * 100
print(f" {cat:20s} {count:4d} ({pct:5.1f}%)")
print(f"\n{'='*60}")
print(f"Main Protection tag: {keep_count:4d} ({keep_count/len(df_prot)*100:5.1f}%)")
print(f"Kindred metadata only: {len(kindred_only):4d} ({len(kindred_only)/len(df_prot)*100:5.1f}%)")
print(f"Remove: {remove_count:4d} ({remove_count/len(df_prot)*100:5.1f}%)")
print(f"{'='*60}")
print(f"Precision estimate: {summary['precision_estimate']}%")
print(f"{'='*60}\n")
# Export results
output_dir = project_root / 'logs' / 'roadmaps' / 'source' / 'tagging_refinement'
output_dir.mkdir(parents=True, exist_ok=True)
# Export JSON summary
with open(output_dir / 'm2_audit_v2.json', 'w') as f:
json.dump({
'summary': summary,
'cards': df_prot[['name', 'type', 'category', 'grants_broad', 'kindred_tags', 'keywords', 'text']].to_dict(orient='records')
}, f, indent=2)
# Export CSVs by category
export_cols = ['name', 'type', 'category', 'grants_broad', 'kindred_tags', 'keywords', 'text']
# Grant category
df_grant = df_prot[df_prot['category'] == 'Grant']
df_grant[export_cols].to_csv(output_dir / 'm2_audit_v2_grant.csv', index=False)
print(f"Exported {len(df_grant)} Grant cards to m2_audit_v2_grant.csv")
# Kindred category
df_kindred = df_prot[df_prot['category'] == 'Kindred']
df_kindred[export_cols].to_csv(output_dir / 'm2_audit_v2_kindred.csv', index=False)
print(f"Exported {len(df_kindred)} Kindred cards to m2_audit_v2_kindred.csv")
# Mixed category
df_mixed = df_prot[df_prot['category'] == 'Mixed']
df_mixed[export_cols].to_csv(output_dir / 'm2_audit_v2_mixed.csv', index=False)
print(f"Exported {len(df_mixed)} Mixed cards to m2_audit_v2_mixed.csv")
# ConditionalSelf category
df_conditional = df_prot[df_prot['category'] == 'ConditionalSelf']
df_conditional[export_cols].to_csv(output_dir / 'm2_audit_v2_conditional.csv', index=False)
print(f"Exported {len(df_conditional)} ConditionalSelf cards to m2_audit_v2_conditional.csv")
# Inherent category
df_inherent = df_prot[df_prot['category'] == 'Inherent']
df_inherent[export_cols].to_csv(output_dir / 'm2_audit_v2_inherent.csv', index=False)
print(f"Exported {len(df_inherent)} Inherent cards to m2_audit_v2_inherent.csv")
# Opponent category
df_opponent = df_prot[df_prot['category'] == 'Opponent']
df_opponent[export_cols].to_csv(output_dir / 'm2_audit_v2_opponent.csv', index=False)
print(f"Exported {len(df_opponent)} Opponent cards to m2_audit_v2_opponent.csv")
# Neither category
df_neither = df_prot[df_prot['category'] == 'Neither']
df_neither[export_cols].to_csv(output_dir / 'm2_audit_v2_neither.csv', index=False)
print(f"Exported {len(df_neither)} Neither cards to m2_audit_v2_neither.csv")
# All cards
df_prot[export_cols].to_csv(output_dir / 'm2_audit_v2_all.csv', index=False)
print(f"Exported {len(df_prot)} total cards to m2_audit_v2_all.csv")
print(f"\nAll files saved to: {output_dir}")
return df_prot, summary
if __name__ == '__main__':
df_results, summary = audit_all_protection_cards()

View file

@ -1,118 +0,0 @@
"""Opt-in guard that compares multi-theme filter performance to a stored baseline.
Run inside the project virtual environment:
python -m code.scripts.check_random_theme_perf --baseline config/random_theme_perf_baseline.json
The script executes the same profiling loop as `profile_multi_theme_filter` and fails
if the observed mean or p95 timings regress more than the allowed threshold.
"""
from __future__ import annotations
import argparse
import json
import sys
from pathlib import Path
from typing import Any, Dict, Tuple
PROJECT_ROOT = Path(__file__).resolve().parents[2]
DEFAULT_BASELINE = PROJECT_ROOT / "config" / "random_theme_perf_baseline.json"
if str(PROJECT_ROOT) not in sys.path:
sys.path.append(str(PROJECT_ROOT))
from code.scripts.profile_multi_theme_filter import run_profile # type: ignore # noqa: E402
def _load_baseline(path: Path) -> Dict[str, Any]:
if not path.exists():
raise FileNotFoundError(f"Baseline file not found: {path}")
data = json.loads(path.read_text(encoding="utf-8"))
return data
def _extract(metric: Dict[str, Any], key: str) -> float:
try:
value = float(metric.get(key, 0.0))
except Exception:
value = 0.0
return value
def _check_section(name: str, actual: Dict[str, Any], baseline: Dict[str, Any], threshold: float) -> Tuple[bool, str]:
a_mean = _extract(actual, "mean_ms")
b_mean = _extract(baseline, "mean_ms")
a_p95 = _extract(actual, "p95_ms")
b_p95 = _extract(baseline, "p95_ms")
allowed_mean = b_mean * (1.0 + threshold)
allowed_p95 = b_p95 * (1.0 + threshold)
mean_ok = a_mean <= allowed_mean or b_mean == 0.0
p95_ok = a_p95 <= allowed_p95 or b_p95 == 0.0
status = mean_ok and p95_ok
def _format_row(label: str, actual_val: float, baseline_val: float, allowed_val: float, ok: bool) -> str:
trend = ((actual_val - baseline_val) / baseline_val * 100.0) if baseline_val else 0.0
trend_str = f"{trend:+.1f}%" if baseline_val else "n/a"
limit_str = f"{allowed_val:.3f}ms" if baseline_val else "n/a"
return f" {label:<6} actual={actual_val:.3f}ms baseline={baseline_val:.3f}ms ({trend_str}), limit {limit_str} -> {'OK' if ok else 'FAIL'}"
rows = [f"Section: {name}"]
rows.append(_format_row("mean", a_mean, b_mean, allowed_mean, mean_ok))
rows.append(_format_row("p95", a_p95, b_p95, allowed_p95, p95_ok))
return status, "\n".join(rows)
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(description="Check multi-theme filtering performance against a baseline")
parser.add_argument("--baseline", type=Path, default=DEFAULT_BASELINE, help="Baseline JSON file (default: config/random_theme_perf_baseline.json)")
parser.add_argument("--iterations", type=int, default=400, help="Number of iterations to sample (default: 400)")
parser.add_argument("--seed", type=int, default=None, help="Optional RNG seed for reproducibility")
parser.add_argument("--threshold", type=float, default=0.15, help="Allowed regression threshold as a fraction (default: 0.15 = 15%)")
parser.add_argument("--update-baseline", action="store_true", help="Overwrite the baseline file with the newly collected metrics")
args = parser.parse_args(argv)
baseline_path = args.baseline if args.baseline else DEFAULT_BASELINE
if args.update_baseline and not baseline_path.parent.exists():
baseline_path.parent.mkdir(parents=True, exist_ok=True)
if not args.update_baseline:
baseline = _load_baseline(baseline_path)
else:
baseline = {}
results = run_profile(args.iterations, args.seed)
cascade_status, cascade_report = _check_section("cascade", results.get("cascade", {}), baseline.get("cascade", {}), args.threshold)
synergy_status, synergy_report = _check_section("synergy", results.get("synergy", {}), baseline.get("synergy", {}), args.threshold)
print("Iterations:", results.get("iterations"))
print("Seed:", results.get("seed"))
print(cascade_report)
print(synergy_report)
overall_ok = cascade_status and synergy_status
if args.update_baseline:
payload = {
"iterations": results.get("iterations"),
"seed": results.get("seed"),
"cascade": results.get("cascade"),
"synergy": results.get("synergy"),
}
baseline_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
print(f"Baseline updated → {baseline_path}")
return 0
if not overall_ok:
print(f"FAIL: performance regressions exceeded {args.threshold * 100:.1f}% threshold", file=sys.stderr)
return 1
print("PASS: performance within allowed threshold")
return 0
if __name__ == "__main__": # pragma: no cover
raise SystemExit(main())

View file

@ -0,0 +1,135 @@
"""CLI wrapper for theme enrichment pipeline.
Runs the consolidated theme enrichment pipeline with command-line options.
For backward compatibility, individual scripts can still be run separately,
but this provides a faster single-pass alternative.
Usage:
python code/scripts/enrich_themes.py --write
python code/scripts/enrich_themes.py --dry-run --enforce-min
"""
from __future__ import annotations
import argparse
import os
import sys
from pathlib import Path
# Add project root to path
ROOT = Path(__file__).resolve().parents[2]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
# Import after adding to path
from code.tagging.theme_enrichment import run_enrichment_pipeline # noqa: E402
def main() -> int:
"""Run theme enrichment pipeline from CLI."""
parser = argparse.ArgumentParser(
description='Consolidated theme metadata enrichment pipeline',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Dry run (no changes written):
python code/scripts/enrich_themes.py --dry-run
# Write changes:
python code/scripts/enrich_themes.py --write
# Enforce minimum examples (errors if insufficient):
python code/scripts/enrich_themes.py --write --enforce-min
# Strict validation for cornerstone themes:
python code/scripts/enrich_themes.py --write --strict
Note: This replaces running 7 separate scripts (autofill, pad, cleanup, purge,
augment, suggestions, lint) with a single 5-10x faster operation.
"""
)
parser.add_argument(
'--write',
action='store_true',
help='Write changes to disk (default: dry run)'
)
parser.add_argument(
'--dry-run',
action='store_true',
help='Dry run mode: show what would be changed without writing'
)
parser.add_argument(
'--min',
'--min-examples',
type=int,
default=None,
metavar='N',
help='Minimum number of example commanders (default: $EDITORIAL_MIN_EXAMPLES or 5)'
)
parser.add_argument(
'--enforce-min',
action='store_true',
help='Treat minimum examples violations as errors'
)
parser.add_argument(
'--strict',
action='store_true',
help='Enable strict validation (cornerstone themes must have examples)'
)
args = parser.parse_args()
# Determine write mode
if args.dry_run:
write = False
elif args.write:
write = True
else:
# Default to dry run if neither specified
write = False
print("Note: Running in dry-run mode (use --write to save changes)\n")
# Get minimum examples threshold
if args.min is not None:
min_examples = args.min
else:
min_examples = int(os.environ.get('EDITORIAL_MIN_EXAMPLES', '5'))
print("Theme Enrichment Pipeline")
print("========================")
print(f"Mode: {'WRITE' if write else 'DRY RUN'}")
print(f"Min examples: {min_examples}")
print(f"Enforce min: {args.enforce_min}")
print(f"Strict: {args.strict}")
print()
try:
stats = run_enrichment_pipeline(
root=ROOT,
min_examples=min_examples,
write=write,
enforce_min=args.enforce_min,
strict=args.strict,
progress_callback=None, # Use default print
)
# Return non-zero if there are lint errors
if stats.lint_errors > 0:
print(f"\n❌ Enrichment completed with {stats.lint_errors} error(s)")
return 1
print("\n✅ Enrichment completed successfully")
return 0
except KeyboardInterrupt:
print("\n\nInterrupted by user")
return 130
except Exception as e:
print(f"\n❌ Error: {e}", file=sys.stderr)
if '--debug' in sys.argv:
raise
return 1
if __name__ == '__main__':
raise SystemExit(main())

View file

@ -123,6 +123,9 @@ def main():
enforced_set = set(enforced_synergies)
inferred_synergies = [s for s in synergy_list if s not in curated_set and s not in enforced_set]
example_cards_value = entry.get('example_cards', [])
example_commanders_value = entry.get('example_commanders', [])
doc = {
'id': slug,
'display_name': theme_name,
@ -132,13 +135,40 @@ def main():
'inferred_synergies': inferred_synergies,
'primary_color': entry.get('primary_color'),
'secondary_color': entry.get('secondary_color'),
'example_cards': example_cards_value,
'example_commanders': example_commanders_value,
'synergy_example_cards': entry.get('synergy_example_cards', []),
'synergy_commanders': entry.get('synergy_commanders', []),
'deck_archetype': entry.get('deck_archetype'),
'popularity_hint': entry.get('popularity_hint'),
'popularity_bucket': entry.get('popularity_bucket'),
'editorial_quality': entry.get('editorial_quality'),
'description': entry.get('description'),
'notes': ''
}
# Drop None color keys for cleanliness
# Drop None/empty keys for cleanliness
if doc['primary_color'] is None:
doc.pop('primary_color')
if doc.get('secondary_color') is None:
doc.pop('secondary_color')
if not doc.get('example_cards'):
doc.pop('example_cards')
if not doc.get('example_commanders'):
doc.pop('example_commanders')
if not doc.get('synergy_example_cards'):
doc.pop('synergy_example_cards')
if not doc.get('synergy_commanders'):
doc.pop('synergy_commanders')
if doc.get('deck_archetype') is None:
doc.pop('deck_archetype')
if doc.get('popularity_hint') is None:
doc.pop('popularity_hint')
if doc.get('popularity_bucket') is None:
doc.pop('popularity_bucket')
if doc.get('editorial_quality') is None:
doc.pop('editorial_quality')
if doc.get('description') is None:
doc.pop('description')
with path.open('w', encoding='utf-8') as f:
yaml.safe_dump(doc, f, sort_keys=False, allow_unicode=True)
exported += 1

View file

@ -19,6 +19,13 @@ from datetime import datetime, timezone
from pathlib import Path
from typing import Dict, Iterable, List, Optional, Sequence
try:
import pandas as pd
HAS_PANDAS = True
except ImportError:
HAS_PANDAS = False
pd = None # type: ignore
ROOT = Path(__file__).resolve().parents[2]
CODE_ROOT = ROOT / "code"
if str(CODE_ROOT) not in sys.path:
@ -29,6 +36,9 @@ try:
except Exception: # pragma: no cover - fallback for adhoc execution
DEFAULT_CSV_DIRECTORY = "csv_files"
# Parquet support requires pandas (imported at top of file, uses pyarrow under the hood)
HAS_PARQUET_SUPPORT = HAS_PANDAS
DEFAULT_OUTPUT_PATH = ROOT / "config" / "themes" / "theme_catalog.csv"
HEADER_COMMENT_PREFIX = "# theme_catalog"
@ -87,7 +97,68 @@ def parse_theme_tags(value: object) -> List[str]:
return []
def _load_theme_counts_from_parquet(
parquet_path: Path,
theme_variants: Dict[str, set[str]]
) -> Counter[str]:
"""Load theme counts from a parquet file using pandas (which uses pyarrow).
Args:
parquet_path: Path to the parquet file (commander_cards.parquet or all_cards.parquet)
theme_variants: Dict to accumulate theme name variants
Returns:
Counter of theme occurrences
"""
if pd is None:
return Counter()
counts: Counter[str] = Counter()
if not parquet_path.exists():
return counts
# Read only themeTags column for efficiency
try:
df = pd.read_parquet(parquet_path, columns=["themeTags"])
except Exception:
# If themeTags column doesn't exist, return empty
return counts
# Convert to list for fast iteration (faster than iterrows)
theme_tags_list = df["themeTags"].tolist()
for raw_value in theme_tags_list:
if raw_value is None or (isinstance(raw_value, float) and pd.isna(raw_value)):
continue
tags = parse_theme_tags(raw_value)
if not tags:
continue
seen_in_row: set[str] = set()
for tag in tags:
display = normalize_theme_display(tag)
if not display:
continue
key = canonical_key(display)
if key in seen_in_row:
continue
seen_in_row.add(key)
counts[key] += 1
theme_variants[key].add(display)
return counts
def _load_theme_counts(csv_path: Path, theme_variants: Dict[str, set[str]]) -> Counter[str]:
"""Load theme counts from CSV file (fallback method).
Args:
csv_path: Path to CSV file
theme_variants: Dict to accumulate theme name variants
Returns:
Counter of theme occurrences
"""
counts: Counter[str] = Counter()
if not csv_path.exists():
return counts
@ -146,24 +217,67 @@ def build_theme_catalog(
commander_filename: str = "commander_cards.csv",
cards_filename: str = "cards.csv",
logs_directory: Optional[Path] = None,
use_parquet: bool = True,
) -> CatalogBuildResult:
"""Build theme catalog from card data.
Args:
csv_directory: Directory containing CSV files (fallback)
output_path: Where to write the catalog CSV
generated_at: Optional timestamp for generation
commander_filename: Name of commander CSV file
cards_filename: Name of cards CSV file
logs_directory: Optional directory to copy output to
use_parquet: If True, try to use all_cards.parquet first (default: True)
Returns:
CatalogBuildResult with generated rows and metadata
"""
csv_directory = csv_directory.resolve()
output_path = output_path.resolve()
theme_variants: Dict[str, set[str]] = defaultdict(set)
commander_counts = _load_theme_counts(csv_directory / commander_filename, theme_variants)
# Try to use parquet file first (much faster)
used_parquet = False
if use_parquet and HAS_PARQUET_SUPPORT:
try:
# Use dedicated parquet files (matches CSV structure exactly)
parquet_dir = csv_directory.parent / "card_files"
# Load commander counts directly from commander_cards.parquet
commander_parquet = parquet_dir / "commander_cards.parquet"
commander_counts = _load_theme_counts_from_parquet(
commander_parquet, theme_variants=theme_variants
)
# CSV method doesn't load non-commander cards, so we don't either
card_counts = Counter()
used_parquet = True
print("✓ Loaded theme data from parquet files")
except Exception as e:
print(f"⚠ Failed to load from parquet: {e}")
print(" Falling back to CSV files...")
used_parquet = False
# Fallback to CSV files if parquet not available or failed
if not used_parquet:
commander_counts = _load_theme_counts(csv_directory / commander_filename, theme_variants)
card_counts: Counter[str] = Counter()
cards_path = csv_directory / cards_filename
if cards_path.exists():
card_counts = _load_theme_counts(cards_path, theme_variants)
else:
# Fallback: scan all *_cards.csv except commander
for candidate in csv_directory.glob("*_cards.csv"):
if candidate.name == commander_filename:
continue
card_counts += _load_theme_counts(candidate, theme_variants)
card_counts: Counter[str] = Counter()
cards_path = csv_directory / cards_filename
if cards_path.exists():
card_counts = _load_theme_counts(cards_path, theme_variants)
else:
# Fallback: scan all *_cards.csv except commander
for candidate in csv_directory.glob("*_cards.csv"):
if candidate.name == commander_filename:
continue
card_counts += _load_theme_counts(candidate, theme_variants)
print("✓ Loaded theme data from CSV files")
keys = sorted(set(card_counts.keys()) | set(commander_counts.keys()))
generated_at_iso = _derive_generated_at(generated_at)

View file

@ -1,305 +0,0 @@
"""Catalog diff helper for verifying multi-face merge output.
This utility regenerates the card CSV catalog (optionally writing compatibility
snapshots) and then compares the merged outputs against the baseline snapshots.
It is intended to support the MDFC rollout checklist by providing a concise summary
of how many rows were merged, which cards collapsed into a single record, and
whether any tag unions diverge from expectations.
Example usage (from repo root, inside virtualenv):
python -m code.scripts.preview_dfc_catalog_diff --compat-snapshot --output logs/dfc_catalog_diff.json
The script prints a human readable summary to stdout and optionally writes a JSON
artifact for release/staging review.
"""
from __future__ import annotations
import argparse
import ast
import importlib
import json
import os
import sys
import time
from collections import Counter
from pathlib import Path
from typing import Any, Dict, Iterable, List, Sequence
import pandas as pd
from settings import COLORS, CSV_DIRECTORY
DEFAULT_COMPAT_DIR = Path(os.getenv("DFC_COMPAT_DIR", "csv_files/compat_faces"))
CSV_ROOT = Path(CSV_DIRECTORY)
def _parse_list_cell(value: Any) -> List[str]:
"""Convert serialized list cells ("['A', 'B']") into Python lists."""
if isinstance(value, list):
return [str(item) for item in value]
if value is None:
return []
if isinstance(value, float) and pd.isna(value): # type: ignore[arg-type]
return []
text = str(value).strip()
if not text:
return []
try:
parsed = ast.literal_eval(text)
except (SyntaxError, ValueError):
return [text]
if isinstance(parsed, list):
return [str(item) for item in parsed]
return [str(parsed)]
def _load_catalog(path: Path) -> pd.DataFrame:
if not path.exists():
raise FileNotFoundError(f"Catalog file missing: {path}")
df = pd.read_csv(path)
for column in ("themeTags", "keywords", "creatureTypes"):
if column in df.columns:
df[column] = df[column].apply(_parse_list_cell)
return df
def _multi_face_names(df: pd.DataFrame) -> List[str]:
counts = Counter(df.get("name", []))
return [name for name, count in counts.items() if isinstance(name, str) and count > 1]
def _collect_tags(series: Iterable[List[str]]) -> List[str]:
tags: List[str] = []
for value in series:
if isinstance(value, list):
tags.extend(str(item) for item in value)
return sorted(set(tags))
def _summarize_color(
color: str,
merged: pd.DataFrame,
baseline: pd.DataFrame,
sample_size: int,
) -> Dict[str, Any]:
merged_names = set(merged.get("name", []))
baseline_names = list(baseline.get("name", []))
baseline_name_set = set(name for name in baseline_names if isinstance(name, str))
multi_face = _multi_face_names(baseline)
collapsed = []
tag_mismatches: List[str] = []
missing_after_merge: List[str] = []
for name in multi_face:
group = baseline[baseline["name"] == name]
merged_row = merged[merged["name"] == name]
if merged_row.empty:
missing_after_merge.append(name)
continue
expected_tags = _collect_tags(group["themeTags"]) if "themeTags" in group else []
merged_tags = _collect_tags(merged_row.iloc[[0]]["themeTags"]) if "themeTags" in merged_row else []
if expected_tags != merged_tags:
tag_mismatches.append(name)
collapsed.append(name)
removed_names = sorted(baseline_name_set - merged_names)
added_names = sorted(merged_names - baseline_name_set)
return {
"rows_merged": len(merged),
"rows_baseline": len(baseline),
"row_delta": len(merged) - len(baseline),
"multi_face_groups": len(multi_face),
"collapsed_sample": collapsed[:sample_size],
"tag_union_mismatches": tag_mismatches[:sample_size],
"missing_after_merge": missing_after_merge[:sample_size],
"removed_names": removed_names[:sample_size],
"added_names": added_names[:sample_size],
}
def _refresh_catalog(colors: Sequence[str], compat_snapshot: bool) -> None:
os.environ.pop("ENABLE_DFC_MERGE", None)
os.environ["DFC_COMPAT_SNAPSHOT"] = "1" if compat_snapshot else "0"
importlib.invalidate_caches()
# Reload tagger to pick up the new env var
tagger = importlib.import_module("code.tagging.tagger")
tagger = importlib.reload(tagger) # type: ignore[assignment]
for color in colors:
tagger.load_dataframe(color)
def generate_diff(
colors: Sequence[str],
compat_dir: Path,
sample_size: int,
) -> Dict[str, Any]:
per_color: Dict[str, Any] = {}
overall = {
"total_rows_merged": 0,
"total_rows_baseline": 0,
"total_multi_face_groups": 0,
"colors": len(colors),
"tag_union_mismatches": 0,
"missing_after_merge": 0,
}
for color in colors:
merged_path = CSV_ROOT / f"{color}_cards.csv"
baseline_path = compat_dir / f"{color}_cards_unmerged.csv"
merged_df = _load_catalog(merged_path)
baseline_df = _load_catalog(baseline_path)
summary = _summarize_color(color, merged_df, baseline_df, sample_size)
per_color[color] = summary
overall["total_rows_merged"] += summary["rows_merged"]
overall["total_rows_baseline"] += summary["rows_baseline"]
overall["total_multi_face_groups"] += summary["multi_face_groups"]
overall["tag_union_mismatches"] += len(summary["tag_union_mismatches"])
overall["missing_after_merge"] += len(summary["missing_after_merge"])
overall["row_delta_total"] = overall["total_rows_merged"] - overall["total_rows_baseline"]
return {"overall": overall, "per_color": per_color}
def main(argv: List[str]) -> int:
parser = argparse.ArgumentParser(description="Preview merged vs baseline DFC catalog diff")
parser.add_argument(
"--skip-refresh",
action="store_true",
help="Skip rebuilding the catalog in compatibility mode (requires existing compat snapshots)",
)
parser.add_argument(
"--mode",
default="",
help="[Deprecated] Legacy ENABLE_DFC_MERGE value (compat|1|0 etc.)",
)
parser.add_argument(
"--compat-snapshot",
dest="compat_snapshot",
action="store_true",
help="Write compatibility snapshots before diffing (default: off unless legacy --mode compat)",
)
parser.add_argument(
"--no-compat-snapshot",
dest="compat_snapshot",
action="store_false",
help="Skip compatibility snapshots even if legacy --mode compat is supplied",
)
parser.set_defaults(compat_snapshot=None)
parser.add_argument(
"--colors",
nargs="*",
help="Optional subset of colors to diff (defaults to full COLORS list)",
)
parser.add_argument(
"--compat-dir",
type=Path,
default=DEFAULT_COMPAT_DIR,
help="Directory containing unmerged compatibility snapshots (default: %(default)s)",
)
parser.add_argument(
"--output",
type=Path,
help="Optional JSON file to write with the diff summary",
)
parser.add_argument(
"--sample-size",
type=int,
default=10,
help="Number of sample entries to include per section (default: %(default)s)",
)
args = parser.parse_args(argv)
colors = tuple(args.colors) if args.colors else tuple(COLORS)
compat_dir = args.compat_dir
mode = str(args.mode or "").strip().lower()
if mode and mode not in {"compat", "dual", "both", "1", "on", "true", "0", "off", "false", "disabled"}:
print(
f" Legacy --mode value '{mode}' detected; merge remains enabled. Use --compat-snapshot as needed.",
flush=True,
)
if args.compat_snapshot is None:
compat_snapshot = mode in {"compat", "dual", "both"}
else:
compat_snapshot = args.compat_snapshot
if mode:
print(
" Ignoring deprecated --mode value because --compat-snapshot/--no-compat-snapshot was supplied.",
flush=True,
)
if mode in {"0", "off", "false", "disabled"}:
print(
"⚠ ENABLE_DFC_MERGE=off is deprecated; the merge remains enabled regardless of the value.",
flush=True,
)
if not args.skip_refresh:
start = time.perf_counter()
_refresh_catalog(colors, compat_snapshot)
duration = time.perf_counter() - start
snapshot_msg = "with compat snapshot" if compat_snapshot else "merged-only"
print(f"✔ Refreshed catalog in {duration:.1f}s ({snapshot_msg})")
else:
print(" Using existing catalog outputs (refresh skipped)")
try:
diff = generate_diff(colors, compat_dir, args.sample_size)
except FileNotFoundError as exc:
print(f"ERROR: {exc}")
print("Run without --skip-refresh (or ensure compat snapshots exist).", file=sys.stderr)
return 2
overall = diff["overall"]
print("\n=== DFC Catalog Diff Summary ===")
print(
f"Merged rows: {overall['total_rows_merged']:,} | Baseline rows: {overall['total_rows_baseline']:,} | "
f"Δ rows: {overall['row_delta_total']:,}"
)
print(
f"Multi-face groups: {overall['total_multi_face_groups']:,} | "
f"Tag union mismatches: {overall['tag_union_mismatches']} | Missing after merge: {overall['missing_after_merge']}"
)
for color, summary in diff["per_color"].items():
print(f"\n[{color}] baseline={summary['rows_baseline']} merged={summary['rows_merged']} Δ={summary['row_delta']}")
if summary["multi_face_groups"]:
print(f" multi-face groups: {summary['multi_face_groups']}")
if summary["collapsed_sample"]:
sample = ", ".join(summary["collapsed_sample"][:3])
print(f" collapsed sample: {sample}")
if summary["tag_union_mismatches"]:
print(f" TAG MISMATCH sample: {', '.join(summary['tag_union_mismatches'])}")
if summary["missing_after_merge"]:
print(f" MISSING sample: {', '.join(summary['missing_after_merge'])}")
if summary["removed_names"]:
print(f" removed sample: {', '.join(summary['removed_names'])}")
if summary["added_names"]:
print(f" added sample: {', '.join(summary['added_names'])}")
if args.output:
payload = {
"captured_at": int(time.time()),
"mode": args.mode,
"colors": colors,
"compat_dir": str(compat_dir),
"summary": diff,
}
try:
args.output.parent.mkdir(parents=True, exist_ok=True)
args.output.write_text(json.dumps(payload, indent=2, sort_keys=True), encoding="utf-8")
print(f"\n📄 Wrote JSON summary to {args.output}")
except Exception as exc: # pragma: no cover
print(f"Failed to write output file {args.output}: {exc}", file=sys.stderr)
return 3
return 0
if __name__ == "__main__": # pragma: no cover
raise SystemExit(main(sys.argv[1:]))

View file

@ -1,105 +0,0 @@
"""CLI utility: snapshot preview metrics and emit summary/top slow themes.
Usage (from repo root virtualenv):
python -m code.scripts.preview_metrics_snapshot --limit 10 --output logs/preview_metrics_snapshot.json
Fetches /themes/metrics (requires WEB_THEME_PICKER_DIAGNOSTICS=1) and writes a compact JSON plus
human-readable summary to stdout.
"""
from __future__ import annotations
import argparse
import json
import sys
import time
from pathlib import Path
from typing import Any, Dict
import urllib.request
import urllib.error
DEFAULT_URL = "http://localhost:8000/themes/metrics"
def fetch_metrics(url: str) -> Dict[str, Any]:
req = urllib.request.Request(url, headers={"Accept": "application/json"})
with urllib.request.urlopen(req, timeout=10) as resp: # nosec B310 (local trusted)
data = resp.read().decode("utf-8", "replace")
try:
return json.loads(data) # type: ignore[return-value]
except json.JSONDecodeError as e: # pragma: no cover - unlikely if server OK
raise SystemExit(f"Invalid JSON from metrics endpoint: {e}\nRaw: {data[:400]}")
def summarize(metrics: Dict[str, Any], top_n: int) -> Dict[str, Any]:
preview = (metrics.get("preview") or {}) if isinstance(metrics, dict) else {}
per_theme = preview.get("per_theme") or {}
# Compute top slow themes by avg_ms
items = []
for slug, info in per_theme.items():
if not isinstance(info, dict):
continue
avg = info.get("avg_ms")
if isinstance(avg, (int, float)):
items.append((slug, float(avg), info))
items.sort(key=lambda x: x[1], reverse=True)
top = items[:top_n]
return {
"preview_requests": preview.get("preview_requests"),
"preview_cache_hits": preview.get("preview_cache_hits"),
"preview_avg_build_ms": preview.get("preview_avg_build_ms"),
"preview_p95_build_ms": preview.get("preview_p95_build_ms"),
"preview_ttl_seconds": preview.get("preview_ttl_seconds"),
"editorial_curated_vs_sampled_pct": preview.get("editorial_curated_vs_sampled_pct"),
"top_slowest": [
{
"slug": slug,
"avg_ms": avg,
"p95_ms": info.get("p95_ms"),
"builds": info.get("builds"),
"requests": info.get("requests"),
"avg_curated_pct": info.get("avg_curated_pct"),
}
for slug, avg, info in top
],
}
def main(argv: list[str]) -> int:
ap = argparse.ArgumentParser(description="Snapshot preview metrics")
ap.add_argument("--url", default=DEFAULT_URL, help="Metrics endpoint URL (default: %(default)s)")
ap.add_argument("--limit", type=int, default=10, help="Top N slow themes to include (default: %(default)s)")
ap.add_argument("--output", type=Path, help="Optional output JSON file for snapshot")
ap.add_argument("--quiet", action="store_true", help="Suppress stdout summary (still writes file if --output)")
args = ap.parse_args(argv)
try:
raw = fetch_metrics(args.url)
except urllib.error.URLError as e:
print(f"ERROR: Failed fetching metrics endpoint: {e}", file=sys.stderr)
return 2
summary = summarize(raw, args.limit)
snapshot = {
"captured_at": int(time.time()),
"source": args.url,
"summary": summary,
}
if args.output:
try:
args.output.parent.mkdir(parents=True, exist_ok=True)
args.output.write_text(json.dumps(snapshot, indent=2, sort_keys=True), encoding="utf-8")
except Exception as e: # pragma: no cover
print(f"ERROR: writing snapshot file failed: {e}", file=sys.stderr)
return 3
if not args.quiet:
print("Preview Metrics Snapshot:")
print(json.dumps(summary, indent=2))
return 0
if __name__ == "__main__": # pragma: no cover
raise SystemExit(main(sys.argv[1:]))

View file

@ -1,349 +0,0 @@
"""Ad-hoc performance benchmark for theme preview build latency (Phase A validation).
Runs warm-up plus measured request loops against several theme slugs and prints
aggregate latency stats (p50/p90/p95, cache hit ratio evolution). Intended to
establish or validate that refactor did not introduce >5% p95 regression.
Usage (ensure server running locally commonly :8080 in docker compose):
python -m code.scripts.preview_perf_benchmark --themes 8 --loops 40 \
--url http://localhost:8080 --warm 1 --limit 12
Theme slug discovery hierarchy (when --theme not provided):
1. Try /themes/index.json (legacy / planned static index)
2. Fallback to /themes/api/themes (current API) and take the first N ids
The discovered slugs are sorted deterministically then truncated to N.
NOTE: This is intentionally minimal (no external deps). For stable comparisons
run with identical parameters pre/post-change and commit the JSON output under
logs/perf/.
"""
from __future__ import annotations
import argparse
import json
import statistics
import time
from typing import Any, Dict, List
import urllib.request
import urllib.error
import sys
from pathlib import Path
def _fetch_json(url: str) -> Dict[str, Any]:
req = urllib.request.Request(url, headers={"Accept": "application/json"})
with urllib.request.urlopen(req, timeout=15) as resp: # nosec B310 local dev
data = resp.read().decode("utf-8", "replace")
return json.loads(data) # type: ignore[return-value]
def _fetch_json_with_retry(url: str, attempts: int = 3, delay: float = 0.6) -> Dict[str, Any]:
last_error: Exception | None = None
for attempt in range(1, attempts + 1):
try:
return _fetch_json(url)
except Exception as exc: # pragma: no cover - network variability
last_error = exc
if attempt < attempts:
print(json.dumps({ # noqa: T201
"event": "preview_perf_fetch_retry",
"url": url,
"attempt": attempt,
"max_attempts": attempts,
"error": str(exc),
}))
time.sleep(delay * attempt)
else:
raise
raise last_error # pragma: no cover - defensive; should be unreachable
def select_theme_slugs(base_url: str, count: int) -> List[str]:
"""Discover theme slugs for benchmarking.
Attempts legacy static index first, then falls back to live API listing.
"""
errors: List[str] = []
slugs: List[str] = []
# Attempt 1: legacy /themes/index.json
try:
idx = _fetch_json(f"{base_url.rstrip('/')}/themes/index.json")
entries = idx.get("themes") or []
for it in entries:
if not isinstance(it, dict):
continue
slug = it.get("slug") or it.get("id") or it.get("theme_id")
if isinstance(slug, str):
slugs.append(slug)
except Exception as e: # pragma: no cover - network variability
errors.append(f"index.json failed: {e}")
if not slugs:
# Attempt 2: live API listing
try:
listing = _fetch_json(f"{base_url.rstrip('/')}/themes/api/themes")
items = listing.get("items") or []
for it in items:
if not isinstance(it, dict):
continue
tid = it.get("id") or it.get("slug") or it.get("theme_id")
if isinstance(tid, str):
slugs.append(tid)
except Exception as e: # pragma: no cover - network variability
errors.append(f"api/themes failed: {e}")
slugs = sorted(set(slugs))[:count]
if not slugs:
raise SystemExit("No theme slugs discovered; cannot benchmark (" + "; ".join(errors) + ")")
return slugs
def fetch_all_theme_slugs(base_url: str, page_limit: int = 200) -> List[str]:
"""Fetch all theme slugs via paginated /themes/api/themes endpoint.
Uses maximum page size (200) and iterates using offset until no next page.
Returns deterministic sorted unique list of slugs.
"""
slugs: List[str] = []
offset = 0
seen: set[str] = set()
page_attempts = 5
page_delay = 1.2
while True:
url = f"{base_url.rstrip('/')}/themes/api/themes?limit={page_limit}&offset={offset}"
data: Dict[str, Any] | None = None
last_error: Exception | None = None
for attempt in range(1, page_attempts + 1):
try:
data = _fetch_json_with_retry(url, attempts=4, delay=0.75)
break
except Exception as exc: # pragma: no cover - network variability
last_error = exc
if attempt < page_attempts:
print(json.dumps({ # noqa: T201
"event": "preview_perf_page_retry",
"offset": offset,
"attempt": attempt,
"max_attempts": page_attempts,
"error": str(exc),
}))
time.sleep(page_delay * attempt)
else:
raise SystemExit(f"Failed fetching themes page offset={offset}: {exc}")
if data is None: # pragma: no cover - defensive
raise SystemExit(f"Failed fetching themes page offset={offset}: {last_error}")
items = data.get("items") or []
for it in items:
if not isinstance(it, dict):
continue
tid = it.get("id") or it.get("slug") or it.get("theme_id")
if isinstance(tid, str) and tid not in seen:
seen.add(tid)
slugs.append(tid)
next_offset = data.get("next_offset")
if not next_offset or next_offset == offset:
break
offset = int(next_offset)
return sorted(slugs)
def percentile(values: List[float], pct: float) -> float:
if not values:
return 0.0
sv = sorted(values)
k = (len(sv) - 1) * pct
f = int(k)
c = min(f + 1, len(sv) - 1)
if f == c:
return sv[f]
d0 = sv[f] * (c - k)
d1 = sv[c] * (k - f)
return d0 + d1
def run_loop(base_url: str, slugs: List[str], loops: int, limit: int, warm: bool, path_template: str) -> Dict[str, Any]:
latencies: List[float] = []
per_slug_counts = {s: 0 for s in slugs}
t_start = time.time()
for i in range(loops):
slug = slugs[i % len(slugs)]
# path_template may contain {slug} and {limit}
try:
rel = path_template.format(slug=slug, limit=limit)
except Exception:
rel = f"/themes/api/theme/{slug}/preview?limit={limit}"
if not rel.startswith('/'):
rel = '/' + rel
url = f"{base_url.rstrip('/')}{rel}"
t0 = time.time()
try:
_fetch_json(url)
except Exception as e:
print(json.dumps({"event": "perf_benchmark_error", "slug": slug, "error": str(e)})) # noqa: T201
continue
ms = (time.time() - t0) * 1000.0
latencies.append(ms)
per_slug_counts[slug] += 1
elapsed = time.time() - t_start
return {
"warm": warm,
"loops": loops,
"slugs": slugs,
"per_slug_requests": per_slug_counts,
"elapsed_s": round(elapsed, 3),
"p50_ms": round(percentile(latencies, 0.50), 2),
"p90_ms": round(percentile(latencies, 0.90), 2),
"p95_ms": round(percentile(latencies, 0.95), 2),
"avg_ms": round(statistics.mean(latencies), 2) if latencies else 0.0,
"count": len(latencies),
"_latencies": latencies, # internal (removed in final result unless explicitly retained)
}
def _stats_from_latencies(latencies: List[float]) -> Dict[str, Any]:
if not latencies:
return {"count": 0, "p50_ms": 0.0, "p90_ms": 0.0, "p95_ms": 0.0, "avg_ms": 0.0}
return {
"count": len(latencies),
"p50_ms": round(percentile(latencies, 0.50), 2),
"p90_ms": round(percentile(latencies, 0.90), 2),
"p95_ms": round(percentile(latencies, 0.95), 2),
"avg_ms": round(statistics.mean(latencies), 2),
}
def main(argv: List[str]) -> int:
ap = argparse.ArgumentParser(description="Theme preview performance benchmark")
ap.add_argument("--url", default="http://localhost:8000", help="Base server URL (default: %(default)s)")
ap.add_argument("--themes", type=int, default=6, help="Number of theme slugs to exercise (default: %(default)s)")
ap.add_argument("--loops", type=int, default=60, help="Total request iterations (default: %(default)s)")
ap.add_argument("--limit", type=int, default=12, help="Preview size (default: %(default)s)")
ap.add_argument("--path-template", default="/themes/api/theme/{slug}/preview?limit={limit}", help="Format string for preview request path (default: %(default)s)")
ap.add_argument("--theme", action="append", dest="explicit_theme", help="Explicit theme slug(s); overrides automatic selection")
ap.add_argument("--warm", type=int, default=1, help="Number of warm-up loops (full cycles over selected slugs) (default: %(default)s)")
ap.add_argument("--output", type=Path, help="Optional JSON output path (committed under logs/perf)")
ap.add_argument("--all", action="store_true", help="Exercise ALL themes (ignores --themes; loops auto-set to passes*total_slugs unless --loops-explicit)")
ap.add_argument("--passes", type=int, default=1, help="When using --all, number of passes over the full theme set (default: %(default)s)")
# Hidden flag to detect if user explicitly set --loops (argparse has no direct support, so use sentinel technique)
# We keep original --loops for backwards compatibility; when --all we recompute unless user passed --loops-explicit
ap.add_argument("--loops-explicit", action="store_true", help=argparse.SUPPRESS)
ap.add_argument("--extract-warm-baseline", type=Path, help="If multi-pass (--all --passes >1), write a warm-only baseline JSON (final pass stats) to this path")
args = ap.parse_args(argv)
try:
if args.explicit_theme:
slugs = args.explicit_theme
elif args.all:
slugs = fetch_all_theme_slugs(args.url)
else:
slugs = select_theme_slugs(args.url, args.themes)
except SystemExit as e: # pragma: no cover - dependency on live server
print(str(e), file=sys.stderr)
return 2
mode = "all" if args.all else "subset"
total_slugs = len(slugs)
if args.all and not args.loops_explicit:
# Derive loops = passes * total_slugs
args.loops = max(1, args.passes) * total_slugs
print(json.dumps({ # noqa: T201
"event": "preview_perf_start",
"mode": mode,
"total_slugs": total_slugs,
"planned_loops": args.loops,
"passes": args.passes if args.all else None,
}))
# Execution paths:
# 1. Standard subset or single-pass all: warm cycles -> single measured run
# 2. Multi-pass all mode (--all --passes >1): iterate passes capturing per-pass stats (no separate warm loops)
if args.all and args.passes > 1:
pass_results: List[Dict[str, Any]] = []
combined_latencies: List[float] = []
t0_all = time.time()
for p in range(1, args.passes + 1):
r = run_loop(args.url, slugs, len(slugs), args.limit, warm=(p == 1), path_template=args.path_template)
lat = r.pop("_latencies", [])
combined_latencies.extend(lat)
pass_result = {
"pass": p,
"warm": r["warm"],
"elapsed_s": r["elapsed_s"],
"p50_ms": r["p50_ms"],
"p90_ms": r["p90_ms"],
"p95_ms": r["p95_ms"],
"avg_ms": r["avg_ms"],
"count": r["count"],
}
pass_results.append(pass_result)
total_elapsed = round(time.time() - t0_all, 3)
aggregate = _stats_from_latencies(combined_latencies)
result = {
"mode": mode,
"total_slugs": total_slugs,
"passes": args.passes,
"slugs": slugs,
"combined": {
**aggregate,
"elapsed_s": total_elapsed,
},
"passes_results": pass_results,
"cold_pass_p95_ms": pass_results[0]["p95_ms"],
"warm_pass_p95_ms": pass_results[-1]["p95_ms"],
"cold_pass_p50_ms": pass_results[0]["p50_ms"],
"warm_pass_p50_ms": pass_results[-1]["p50_ms"],
}
print(json.dumps({"event": "preview_perf_result", **result}, indent=2)) # noqa: T201
# Optional warm baseline extraction (final pass only; represents warmed steady-state)
if args.extract_warm_baseline:
try:
wb = pass_results[-1]
warm_obj = {
"event": "preview_perf_warm_baseline",
"mode": mode,
"total_slugs": total_slugs,
"warm_baseline": True,
"source_pass": wb["pass"],
"p50_ms": wb["p50_ms"],
"p90_ms": wb["p90_ms"],
"p95_ms": wb["p95_ms"],
"avg_ms": wb["avg_ms"],
"count": wb["count"],
"slugs": slugs,
}
args.extract_warm_baseline.parent.mkdir(parents=True, exist_ok=True)
args.extract_warm_baseline.write_text(json.dumps(warm_obj, indent=2, sort_keys=True), encoding="utf-8")
print(json.dumps({ # noqa: T201
"event": "preview_perf_warm_baseline_written",
"path": str(args.extract_warm_baseline),
"p95_ms": wb["p95_ms"],
}))
except Exception as e: # pragma: no cover
print(json.dumps({"event": "preview_perf_warm_baseline_error", "error": str(e)})) # noqa: T201
else:
# Warm-up loops first (if requested)
for w in range(args.warm):
run_loop(args.url, slugs, len(slugs), args.limit, warm=True, path_template=args.path_template)
result = run_loop(args.url, slugs, args.loops, args.limit, warm=False, path_template=args.path_template)
result.pop("_latencies", None)
result["slugs"] = slugs
result["mode"] = mode
result["total_slugs"] = total_slugs
if args.all:
result["passes"] = args.passes
print(json.dumps({"event": "preview_perf_result", **result}, indent=2)) # noqa: T201
if args.output:
try:
args.output.parent.mkdir(parents=True, exist_ok=True)
# Ensure we write the final result object (multi-pass already prepared above)
args.output.write_text(json.dumps(result, indent=2, sort_keys=True), encoding="utf-8")
except Exception as e: # pragma: no cover
print(f"ERROR: failed writing output file: {e}", file=sys.stderr)
return 3
return 0
if __name__ == "__main__": # pragma: no cover
raise SystemExit(main(sys.argv[1:]))

View file

@ -1,106 +0,0 @@
"""CI helper: run a warm-pass benchmark candidate (single pass over all themes)
then compare against the committed warm baseline with threshold enforcement.
Intended usage (example):
python -m code.scripts.preview_perf_ci_check --url http://localhost:8080 \
--baseline logs/perf/theme_preview_warm_baseline.json --p95-threshold 5
Exit codes:
0 success (within threshold)
2 regression (p95 delta > threshold)
3 setup / usage error
Notes:
- Uses --all --passes 1 to create a fresh candidate snapshot that approximates
a warmed steady-state (server should have background refresh / typical load).
- If you prefer multi-pass then warm-only selection, adjust logic accordingly.
"""
from __future__ import annotations
import argparse
import json
import subprocess
import sys
import time
import urllib.error
import urllib.request
from pathlib import Path
def _wait_for_service(base_url: str, attempts: int = 12, delay: float = 1.5) -> bool:
health_url = base_url.rstrip("/") + "/healthz"
last_error: Exception | None = None
for attempt in range(1, attempts + 1):
try:
with urllib.request.urlopen(health_url, timeout=5) as resp: # nosec B310 local CI
if 200 <= resp.status < 300:
return True
except urllib.error.HTTPError as exc:
last_error = exc
if 400 <= exc.code < 500 and exc.code != 429:
# Treat permanent client errors (other than rate limit) as fatal
break
except Exception as exc: # pragma: no cover - network variability
last_error = exc
time.sleep(delay * attempt)
print(json.dumps({
"event": "ci_perf_error",
"stage": "startup",
"message": "Service health check failed",
"url": health_url,
"attempts": attempts,
"error": str(last_error) if last_error else None,
}))
return False
def run(cmd: list[str]) -> subprocess.CompletedProcess:
return subprocess.run(cmd, capture_output=True, text=True, check=False)
def main(argv: list[str]) -> int:
ap = argparse.ArgumentParser(description="Preview performance CI regression gate")
ap.add_argument("--url", default="http://localhost:8080", help="Base URL of running web service")
ap.add_argument("--baseline", type=Path, required=True, help="Path to committed warm baseline JSON")
ap.add_argument("--p95-threshold", type=float, default=5.0, help="Max allowed p95 regression percent (default: %(default)s)")
ap.add_argument("--candidate-output", type=Path, default=Path("logs/perf/theme_preview_ci_candidate.json"), help="Where to write candidate benchmark JSON")
ap.add_argument("--multi-pass", action="store_true", help="Run a 2-pass all-themes benchmark and compare warm pass only (optional enhancement)")
args = ap.parse_args(argv)
if not args.baseline.exists():
print(json.dumps({"event":"ci_perf_error","message":"Baseline not found","path":str(args.baseline)}))
return 3
if not _wait_for_service(args.url):
return 3
# Run candidate single-pass all-themes benchmark (no extra warm cycles to keep CI fast)
# If multi-pass requested, run two passes over all themes so second pass represents warmed steady-state.
passes = "2" if args.multi_pass else "1"
bench_cmd = [sys.executable, "-m", "code.scripts.preview_perf_benchmark", "--url", args.url, "--all", "--passes", passes, "--output", str(args.candidate_output)]
bench_proc = run(bench_cmd)
if bench_proc.returncode != 0:
print(json.dumps({"event":"ci_perf_error","stage":"benchmark","code":bench_proc.returncode,"stderr":bench_proc.stderr}))
return 3
print(bench_proc.stdout)
if not args.candidate_output.exists():
print(json.dumps({"event":"ci_perf_error","message":"Candidate output missing"}))
return 3
compare_cmd = [
sys.executable,
"-m","code.scripts.preview_perf_compare",
"--baseline", str(args.baseline),
"--candidate", str(args.candidate_output),
"--warm-only",
"--p95-threshold", str(args.p95_threshold),
]
cmp_proc = run(compare_cmd)
print(cmp_proc.stdout)
if cmp_proc.returncode == 2:
# Already printed JSON with failure status
return 2
if cmp_proc.returncode != 0:
print(json.dumps({"event":"ci_perf_error","stage":"compare","code":cmp_proc.returncode,"stderr":cmp_proc.stderr}))
return 3
return 0
if __name__ == "__main__": # pragma: no cover
raise SystemExit(main(sys.argv[1:]))

View file

@ -1,115 +0,0 @@
"""Compare two preview benchmark JSON result files and emit delta stats.
Usage:
python -m code.scripts.preview_perf_compare --baseline logs/perf/theme_preview_baseline_all_pass1_20250923.json --candidate logs/perf/new_run.json
Outputs JSON with percentage deltas for p50/p90/p95/avg (positive = regression/slower).
If multi-pass structures are present (combined & passes_results) those are included.
"""
from __future__ import annotations
import argparse
import json
from pathlib import Path
from typing import Any, Dict
def load(path: Path) -> Dict[str, Any]:
data = json.loads(path.read_text(encoding="utf-8"))
# Multi-pass result may store stats under combined
if "combined" in data:
core = data["combined"].copy()
# Inject representative fields for uniform comparison
core["p50_ms"] = core.get("p50_ms") or data.get("p50_ms")
core["p90_ms"] = core.get("p90_ms") or data.get("p90_ms")
core["p95_ms"] = core.get("p95_ms") or data.get("p95_ms")
core["avg_ms"] = core.get("avg_ms") or data.get("avg_ms")
data["_core_stats"] = core
else:
data["_core_stats"] = {
k: data.get(k) for k in ("p50_ms", "p90_ms", "p95_ms", "avg_ms", "count")
}
return data
def pct_delta(new: float, old: float) -> float:
if old == 0:
return 0.0
return round(((new - old) / old) * 100.0, 2)
def compare(baseline: Dict[str, Any], candidate: Dict[str, Any]) -> Dict[str, Any]:
b = baseline["_core_stats"]
c = candidate["_core_stats"]
result = {"baseline_count": b.get("count"), "candidate_count": c.get("count")}
for k in ("p50_ms", "p90_ms", "p95_ms", "avg_ms"):
if b.get(k) is not None and c.get(k) is not None:
result[k] = {
"baseline": b[k],
"candidate": c[k],
"delta_pct": pct_delta(c[k], b[k]),
}
# If both have per-pass details include first and last pass p95/p50
if "passes_results" in baseline and "passes_results" in candidate:
result["passes"] = {
"baseline": {
"cold_p95": baseline.get("cold_pass_p95_ms"),
"warm_p95": baseline.get("warm_pass_p95_ms"),
"cold_p50": baseline.get("cold_pass_p50_ms"),
"warm_p50": baseline.get("warm_pass_p50_ms"),
},
"candidate": {
"cold_p95": candidate.get("cold_pass_p95_ms"),
"warm_p95": candidate.get("warm_pass_p95_ms"),
"cold_p50": candidate.get("cold_pass_p50_ms"),
"warm_p50": candidate.get("warm_pass_p50_ms"),
},
}
return result
def main(argv: list[str]) -> int:
ap = argparse.ArgumentParser(description="Compare two preview benchmark JSON result files")
ap.add_argument("--baseline", required=True, type=Path, help="Baseline JSON path")
ap.add_argument("--candidate", required=True, type=Path, help="Candidate JSON path")
ap.add_argument("--p95-threshold", type=float, default=None, help="Fail (exit 2) if p95 regression exceeds this percent (positive delta)")
ap.add_argument("--warm-only", action="store_true", help="When both results have passes, compare warm pass p95/p50 instead of combined/core")
args = ap.parse_args(argv)
if not args.baseline.exists():
raise SystemExit(f"Baseline not found: {args.baseline}")
if not args.candidate.exists():
raise SystemExit(f"Candidate not found: {args.candidate}")
baseline = load(args.baseline)
candidate = load(args.candidate)
# If warm-only requested and both have warm pass stats, override _core_stats before compare
if args.warm_only and "warm_pass_p95_ms" in baseline and "warm_pass_p95_ms" in candidate:
baseline["_core_stats"] = {
"p50_ms": baseline.get("warm_pass_p50_ms"),
"p90_ms": baseline.get("_core_stats", {}).get("p90_ms"), # p90 not tracked per-pass; retain combined
"p95_ms": baseline.get("warm_pass_p95_ms"),
"avg_ms": baseline.get("_core_stats", {}).get("avg_ms"),
"count": baseline.get("_core_stats", {}).get("count"),
}
candidate["_core_stats"] = {
"p50_ms": candidate.get("warm_pass_p50_ms"),
"p90_ms": candidate.get("_core_stats", {}).get("p90_ms"),
"p95_ms": candidate.get("warm_pass_p95_ms"),
"avg_ms": candidate.get("_core_stats", {}).get("avg_ms"),
"count": candidate.get("_core_stats", {}).get("count"),
}
cmp = compare(baseline, candidate)
payload = {"event": "preview_perf_compare", **cmp}
if args.p95_threshold is not None and "p95_ms" in cmp:
delta = cmp["p95_ms"]["delta_pct"]
payload["threshold"] = {"p95_threshold": args.p95_threshold, "p95_delta_pct": delta}
if delta is not None and delta > args.p95_threshold:
payload["result"] = "fail"
print(json.dumps(payload, indent=2)) # noqa: T201
return 2
payload["result"] = "pass"
print(json.dumps(payload, indent=2)) # noqa: T201
return 0
if __name__ == "__main__": # pragma: no cover
raise SystemExit(main(__import__('sys').argv[1:]))

View file

@ -1,91 +0,0 @@
"""Generate warm preview traffic to populate theme preview cache & metrics.
Usage:
python -m code.scripts.warm_preview_traffic --count 25 --repeats 2 \
--base-url http://localhost:8000 --delay 0.05
Requirements:
- FastAPI server running locally exposing /themes endpoints
- WEB_THEME_PICKER_DIAGNOSTICS=1 so /themes/metrics is accessible
Strategy:
1. Fetch /themes/fragment/list?limit=COUNT to obtain HTML table.
2. Extract theme slugs via regex on data-theme-id attributes.
3. Issue REPEATS preview fragment requests per slug in order.
4. Print simple timing / status summary.
This script intentionally uses stdlib only (urllib, re, time) to avoid extra deps.
"""
from __future__ import annotations
import argparse
import re
import time
import urllib.request
import urllib.error
from typing import List
LIST_PATH = "/themes/fragment/list"
PREVIEW_PATH = "/themes/fragment/preview/{slug}"
def fetch(url: str) -> str:
req = urllib.request.Request(url, headers={"User-Agent": "warm-preview/1"})
with urllib.request.urlopen(req, timeout=15) as resp: # nosec B310 (local trusted)
return resp.read().decode("utf-8", "replace")
def extract_slugs(html: str, limit: int) -> List[str]:
slugs = []
for m in re.finditer(r'data-theme-id="([^"]+)"', html):
s = m.group(1).strip()
if s and s not in slugs:
slugs.append(s)
if len(slugs) >= limit:
break
return slugs
def warm(base_url: str, count: int, repeats: int, delay: float) -> None:
list_url = f"{base_url}{LIST_PATH}?limit={count}&offset=0"
print(f"[warm] Fetching list: {list_url}")
try:
html = fetch(list_url)
except urllib.error.URLError as e: # pragma: no cover
raise SystemExit(f"Failed fetching list: {e}")
slugs = extract_slugs(html, count)
if not slugs:
raise SystemExit("No theme slugs extracted cannot warm.")
print(f"[warm] Extracted {len(slugs)} slugs: {', '.join(slugs[:8])}{'...' if len(slugs)>8 else ''}")
total_requests = 0
start = time.time()
for r in range(repeats):
print(f"[warm] Pass {r+1}/{repeats}")
for slug in slugs:
url = f"{base_url}{PREVIEW_PATH.format(slug=slug)}"
try:
fetch(url)
except Exception as e: # pragma: no cover
print(f" [warn] Failed {slug}: {e}")
else:
total_requests += 1
if delay:
time.sleep(delay)
dur = time.time() - start
print(f"[warm] Completed {total_requests} preview requests in {dur:.2f}s ({total_requests/dur if dur>0 else 0:.1f} rps)")
print("[warm] Done. Now run metrics snapshot to capture warm p95.")
def main(argv: list[str]) -> int:
ap = argparse.ArgumentParser(description="Generate warm preview traffic")
ap.add_argument("--base-url", default="http://localhost:8000", help="Base URL (default: %(default)s)")
ap.add_argument("--count", type=int, default=25, help="Number of distinct theme slugs to warm (default: %(default)s)")
ap.add_argument("--repeats", type=int, default=2, help="Repeat passes over slugs (default: %(default)s)")
ap.add_argument("--delay", type=float, default=0.05, help="Delay between requests in seconds (default: %(default)s)")
args = ap.parse_args(argv)
warm(args.base_url.rstrip("/"), args.count, args.repeats, args.delay)
return 0
if __name__ == "__main__": # pragma: no cover
import sys
raise SystemExit(main(sys.argv[1:]))