mirror of
https://github.com/siyuan-note/siyuan.git
synced 2025-12-17 07:00:12 +01:00
378 lines
13 KiB
Python
378 lines
13 KiB
Python
|
|
"""
|
||
|
|
Check if language file keys are complete.
|
||
|
|
|
||
|
|
This script checks all language files in app/appearance/langs/ directory
|
||
|
|
and finds:
|
||
|
|
- Missing keys: keys that exist in most files but not in current file
|
||
|
|
- Extra keys: keys that don't exist in most files but exist in current file
|
||
|
|
- Duplicate keys: keys that appear multiple times in the same file
|
||
|
|
using statistical methods.
|
||
|
|
|
||
|
|
Usage:
|
||
|
|
python scripts/check-lang-keys.py
|
||
|
|
python scripts/check-lang-keys.py -d app/appearance/langs
|
||
|
|
python scripts/check-lang-keys.py --dir app/appearance/langs
|
||
|
|
|
||
|
|
Options:
|
||
|
|
-d, --dir DIR Language files directory path (default: app/appearance/langs)
|
||
|
|
-h, --help Show help message and exit
|
||
|
|
|
||
|
|
Exit codes:
|
||
|
|
0 All language files have complete keys
|
||
|
|
1 Some language files have missing or extra keys
|
||
|
|
"""
|
||
|
|
|
||
|
|
import json
|
||
|
|
import os
|
||
|
|
import re
|
||
|
|
from pathlib import Path
|
||
|
|
from argparse import ArgumentParser
|
||
|
|
from collections import defaultdict, Counter
|
||
|
|
|
||
|
|
|
||
|
|
def find_duplicate_keys_recursive(data, prefix="", duplicates=None):
|
||
|
|
"""Recursively find duplicate keys in nested JSON structure.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
data: JSON data (dict, list, or primitive)
|
||
|
|
prefix: Current key prefix (for nested keys)
|
||
|
|
duplicates: Set to store duplicate keys found
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
set: Set of duplicate keys (using dot notation for nested keys)
|
||
|
|
"""
|
||
|
|
if duplicates is None:
|
||
|
|
duplicates = set()
|
||
|
|
|
||
|
|
if isinstance(data, dict):
|
||
|
|
seen_keys = {}
|
||
|
|
for key, value in data.items():
|
||
|
|
full_key = f"{prefix}.{key}" if prefix else key
|
||
|
|
|
||
|
|
# Check for duplicate at current level
|
||
|
|
if key in seen_keys:
|
||
|
|
duplicates.add(full_key)
|
||
|
|
seen_keys[key] = True
|
||
|
|
|
||
|
|
# Recursively check nested structures
|
||
|
|
if isinstance(value, dict):
|
||
|
|
find_duplicate_keys_recursive(value, full_key, duplicates)
|
||
|
|
elif isinstance(value, list):
|
||
|
|
for i, item in enumerate(value):
|
||
|
|
if isinstance(item, dict):
|
||
|
|
find_duplicate_keys_recursive(item, f"{full_key}[{i}]", duplicates)
|
||
|
|
|
||
|
|
return duplicates
|
||
|
|
|
||
|
|
|
||
|
|
def find_duplicate_keys(file_path):
|
||
|
|
"""Find duplicate keys in JSON file including nested structures.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
file_path (Path): Language file path
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
list: List of duplicate keys found in the file (using dot notation for nested keys)
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||
|
|
data = json.load(f)
|
||
|
|
|
||
|
|
if not isinstance(data, dict):
|
||
|
|
return []
|
||
|
|
|
||
|
|
duplicates = find_duplicate_keys_recursive(data)
|
||
|
|
return sorted(duplicates)
|
||
|
|
except Exception as e:
|
||
|
|
print(f"Error: Failed to check duplicate keys in {file_path}: {e}")
|
||
|
|
return []
|
||
|
|
|
||
|
|
|
||
|
|
def collect_all_keys(data, prefix=""):
|
||
|
|
"""Recursively collect all keys from nested JSON structure.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
data: JSON data (dict, list, or primitive)
|
||
|
|
prefix: Current key prefix (for nested keys)
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
set: Set of all keys (using dot notation for nested keys)
|
||
|
|
"""
|
||
|
|
keys = set()
|
||
|
|
|
||
|
|
if isinstance(data, dict):
|
||
|
|
for key, value in data.items():
|
||
|
|
full_key = f"{prefix}.{key}" if prefix else key
|
||
|
|
keys.add(full_key)
|
||
|
|
|
||
|
|
# Recursively collect nested keys
|
||
|
|
if isinstance(value, dict):
|
||
|
|
keys.update(collect_all_keys(value, full_key))
|
||
|
|
elif isinstance(value, list):
|
||
|
|
# Handle list of objects
|
||
|
|
for i, item in enumerate(value):
|
||
|
|
if isinstance(item, dict):
|
||
|
|
keys.update(collect_all_keys(item, f"{full_key}[{i}]"))
|
||
|
|
|
||
|
|
return keys
|
||
|
|
|
||
|
|
|
||
|
|
def get_key_order(file_path):
|
||
|
|
"""Get the order of keys as they appear in the JSON file.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
file_path (Path): Language file path
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
dict: Dictionary mapping key name (including nested paths) to its position index
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||
|
|
content = f.read()
|
||
|
|
data = json.loads(content)
|
||
|
|
|
||
|
|
# Collect all keys recursively
|
||
|
|
all_keys = collect_all_keys(data)
|
||
|
|
|
||
|
|
# Get order by parsing the raw text
|
||
|
|
key_order = {}
|
||
|
|
pattern = r'["\']([^"\']+)["\']\s*:'
|
||
|
|
index = 0
|
||
|
|
|
||
|
|
# Track nesting path
|
||
|
|
nesting_stack = []
|
||
|
|
|
||
|
|
for match in re.finditer(pattern, content):
|
||
|
|
key = match.group(1)
|
||
|
|
pos = match.start()
|
||
|
|
|
||
|
|
# Check nesting level
|
||
|
|
text_before = content[:pos]
|
||
|
|
open_braces = text_before.count('{')
|
||
|
|
close_braces = text_before.count('}')
|
||
|
|
nesting_level = open_braces - close_braces
|
||
|
|
|
||
|
|
# Build full key path based on nesting
|
||
|
|
if nesting_level == 1: # Top level
|
||
|
|
nesting_stack = [key]
|
||
|
|
full_key = key
|
||
|
|
elif nesting_level > 1: # Nested level
|
||
|
|
# Keep only the relevant nesting levels
|
||
|
|
nesting_stack = nesting_stack[:nesting_level - 1] + [key]
|
||
|
|
full_key = ".".join(nesting_stack)
|
||
|
|
else:
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Only record if this is a valid key we're tracking
|
||
|
|
if full_key in all_keys and full_key not in key_order:
|
||
|
|
key_order[full_key] = index
|
||
|
|
index += 1
|
||
|
|
|
||
|
|
return key_order
|
||
|
|
except Exception as e:
|
||
|
|
return {}
|
||
|
|
|
||
|
|
|
||
|
|
def load_lang_file(file_path):
|
||
|
|
"""Load language file and return key set and file content.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
file_path (Path): Language file path
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
tuple: (key set (including nested keys), file content dict, duplicate keys list, key order dict)
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||
|
|
data = json.load(f)
|
||
|
|
# Collect all keys including nested ones
|
||
|
|
all_keys = collect_all_keys(data)
|
||
|
|
duplicates = find_duplicate_keys(file_path)
|
||
|
|
key_order = get_key_order(file_path)
|
||
|
|
return all_keys, data, duplicates, key_order
|
||
|
|
except json.JSONDecodeError as e:
|
||
|
|
print(f"Error: Failed to parse file {file_path}: {e}")
|
||
|
|
return None, None, [], {}
|
||
|
|
except Exception as e:
|
||
|
|
print(f"Error: Failed to read file {file_path}: {e}")
|
||
|
|
return None, None, [], {}
|
||
|
|
|
||
|
|
|
||
|
|
def check_lang_keys(langs_dir):
|
||
|
|
"""Check if language file keys are complete.
|
||
|
|
|
||
|
|
Uses statistical method: count how many files contain each key, then determine:
|
||
|
|
- Missing keys: keys that exist in most files but not in current file
|
||
|
|
- Extra keys: keys that don't exist in most files but exist in current file
|
||
|
|
- Duplicate keys: keys that appear multiple times in the same file
|
||
|
|
|
||
|
|
Args:
|
||
|
|
langs_dir (str): Language files directory path
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
bool: True if all files have complete keys, False otherwise
|
||
|
|
"""
|
||
|
|
langs_path = Path(langs_dir)
|
||
|
|
if not langs_path.exists():
|
||
|
|
print(f"Error: Directory does not exist: {langs_dir}")
|
||
|
|
return False
|
||
|
|
|
||
|
|
# Load all language files
|
||
|
|
lang_keys = {}
|
||
|
|
duplicate_keys_by_file = {}
|
||
|
|
key_order_by_file = {}
|
||
|
|
|
||
|
|
for lang_file in sorted(langs_path.glob("*.json")):
|
||
|
|
keys, data, duplicates, key_order = load_lang_file(lang_file)
|
||
|
|
if keys is None:
|
||
|
|
continue
|
||
|
|
lang_keys[lang_file.name] = keys
|
||
|
|
if duplicates:
|
||
|
|
duplicate_keys_by_file[lang_file.name] = duplicates
|
||
|
|
key_order_by_file[lang_file.name] = key_order
|
||
|
|
|
||
|
|
if not lang_keys:
|
||
|
|
print("Error: No language files found")
|
||
|
|
return False
|
||
|
|
|
||
|
|
total_files = len(lang_keys)
|
||
|
|
if total_files == 0:
|
||
|
|
print("Error: No valid language files found")
|
||
|
|
return False
|
||
|
|
|
||
|
|
# Count how many files contain each key
|
||
|
|
key_count = defaultdict(int)
|
||
|
|
all_keys = set()
|
||
|
|
|
||
|
|
for keys in lang_keys.values():
|
||
|
|
all_keys.update(keys)
|
||
|
|
for key in keys:
|
||
|
|
key_count[key] += 1
|
||
|
|
|
||
|
|
# Calculate threshold: if a key exists in more than half of the files, it should exist
|
||
|
|
threshold = (total_files + 1) // 2 # Round up, e.g., 5 files need 3
|
||
|
|
|
||
|
|
# Classify keys: expected keys and unexpected keys
|
||
|
|
expected_keys = {key for key, count in key_count.items() if count >= threshold}
|
||
|
|
unexpected_keys = all_keys - expected_keys
|
||
|
|
|
||
|
|
# Find reference file (file with most keys) for ordering missing keys
|
||
|
|
reference_file = max(lang_keys.items(), key=lambda x: len(x[1]))[0]
|
||
|
|
reference_key_order = key_order_by_file.get(reference_file, {})
|
||
|
|
|
||
|
|
print(f"Checked {total_files} language files")
|
||
|
|
print(f"Threshold: keys need to exist in at least {threshold} files to be considered expected")
|
||
|
|
print(f"Expected keys: {len(expected_keys)}")
|
||
|
|
print(f"Unexpected keys: {len(unexpected_keys)}\n")
|
||
|
|
|
||
|
|
# Check keys for each file
|
||
|
|
all_complete = True
|
||
|
|
file_issues = {} # {lang_name: {'missing': set, 'extra': set, 'duplicates': list}}
|
||
|
|
|
||
|
|
for lang_name, keys in lang_keys.items():
|
||
|
|
# Find missing keys (should exist but don't exist in current file)
|
||
|
|
missing = expected_keys - keys
|
||
|
|
# Find extra keys (shouldn't exist but exist in current file)
|
||
|
|
extra = keys & unexpected_keys
|
||
|
|
# Get duplicate keys
|
||
|
|
duplicates = duplicate_keys_by_file.get(lang_name, [])
|
||
|
|
|
||
|
|
if missing or extra or duplicates:
|
||
|
|
file_issues[lang_name] = {'missing': missing, 'extra': extra, 'duplicates': duplicates}
|
||
|
|
if missing or duplicates:
|
||
|
|
all_complete = False
|
||
|
|
|
||
|
|
# Output results
|
||
|
|
if all_complete and not file_issues:
|
||
|
|
print("All language files have complete keys!")
|
||
|
|
return True
|
||
|
|
|
||
|
|
# Report issues grouped by file
|
||
|
|
print("Issues found:")
|
||
|
|
print(" Missing keys: exist in most files but not in current file")
|
||
|
|
print(" Extra keys: don't exist in most files but exist in current file")
|
||
|
|
print(" Duplicate keys: keys that appear multiple times in the same file\n")
|
||
|
|
|
||
|
|
for lang_name in sorted(file_issues.keys()):
|
||
|
|
issues = file_issues[lang_name]
|
||
|
|
missing = issues['missing']
|
||
|
|
extra = issues['extra']
|
||
|
|
duplicates = issues['duplicates']
|
||
|
|
key_order = key_order_by_file.get(lang_name, {})
|
||
|
|
|
||
|
|
# Sort function for missing keys: use reference file order
|
||
|
|
def sort_missing_by_order(key):
|
||
|
|
return (reference_key_order.get(key, float('inf')), key)
|
||
|
|
|
||
|
|
# Sort function for extra/duplicate keys: use current file order
|
||
|
|
def sort_by_order(key):
|
||
|
|
return (key_order.get(key, float('inf')), key)
|
||
|
|
|
||
|
|
has_issues = False
|
||
|
|
if missing or extra or duplicates:
|
||
|
|
has_issues = True
|
||
|
|
print(f" {lang_name}:")
|
||
|
|
|
||
|
|
# Show extra keys
|
||
|
|
if extra:
|
||
|
|
key_word = "key" if len(extra) == 1 else "keys"
|
||
|
|
print(f" {len(extra)} Extra {key_word}:")
|
||
|
|
extra_with_count = [(key, key_count[key]) for key in sorted(extra, key=sort_by_order)]
|
||
|
|
if len(extra_with_count) <= 10:
|
||
|
|
for key, count in extra_with_count:
|
||
|
|
print(f" - {key} (exists in only {count}/{total_files} files)")
|
||
|
|
else:
|
||
|
|
for key, count in extra_with_count[:10]:
|
||
|
|
print(f" - {key} (exists in only {count}/{total_files} files)")
|
||
|
|
print(f" ... {len(extra_with_count) - 10} more keys not shown")
|
||
|
|
|
||
|
|
# Show missing keys
|
||
|
|
if missing:
|
||
|
|
key_word = "key" if len(missing) == 1 else "keys"
|
||
|
|
print(f" {len(missing)} Missing {key_word}:")
|
||
|
|
missing_with_count = [(key, key_count[key]) for key in sorted(missing, key=sort_missing_by_order)]
|
||
|
|
if len(missing_with_count) <= 10:
|
||
|
|
for key, count in missing_with_count:
|
||
|
|
print(f" - {key} (exists in {count}/{total_files} files)")
|
||
|
|
else:
|
||
|
|
for key, count in missing_with_count[:10]:
|
||
|
|
print(f" - {key} (exists in {count}/{total_files} files)")
|
||
|
|
print(f" ... {len(missing_with_count) - 10} more keys not shown")
|
||
|
|
|
||
|
|
# Show duplicate keys
|
||
|
|
if duplicates:
|
||
|
|
key_word = "key" if len(duplicates) == 1 else "keys"
|
||
|
|
print(f" {len(duplicates)} Duplicate {key_word}:")
|
||
|
|
for key in sorted(duplicates, key=sort_by_order):
|
||
|
|
print(f" - {key}")
|
||
|
|
|
||
|
|
if has_issues:
|
||
|
|
print()
|
||
|
|
|
||
|
|
return False
|
||
|
|
|
||
|
|
|
||
|
|
def main():
|
||
|
|
parser = ArgumentParser(
|
||
|
|
description="Check if language file keys are complete"
|
||
|
|
)
|
||
|
|
parser.add_argument(
|
||
|
|
"-d", "--dir",
|
||
|
|
default="app/appearance/langs",
|
||
|
|
help="Language files directory path (default: app/appearance/langs)"
|
||
|
|
)
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
# Get project root directory (parent of script directory)
|
||
|
|
script_dir = Path(__file__).parent
|
||
|
|
project_root = script_dir.parent
|
||
|
|
langs_dir = project_root / args.dir
|
||
|
|
|
||
|
|
success = check_lang_keys(langs_dir)
|
||
|
|
exit(0 if success else 1)
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|
||
|
|
|