Files
draugnorak/scripts/validate_i18n_catalog.py
2026-02-27 23:55:31 -05:00

153 lines
4.6 KiB
Python

#!/usr/bin/env python3
from __future__ import annotations
import re
import sys
from pathlib import Path
from typing import Dict, List, Set, Tuple
ROOT = Path(__file__).resolve().parents[1]
LANG_DIR = ROOT / "lang"
BASE_FILE = LANG_DIR / "en.template.ini"
PLACEHOLDER_PATTERN = re.compile(r"\{([a-zA-Z0-9_]+)\}")
def parse_ini(path: Path) -> Dict[str, str]:
section = ""
result: Dict[str, str] = {}
for raw_line in path.read_text(encoding="utf-8", errors="replace").splitlines():
line = raw_line.strip()
if not line:
continue
if line.startswith(";") or line.startswith("#"):
continue
if line.startswith("[") and line.endswith("]"):
section = line[1:-1].strip()
continue
if "=" not in line:
continue
key, value = line.split("=", 1)
key = key.strip()
value = value.strip()
if not key:
continue
if not section:
full_key = key
elif section == "messages":
full_key = key if "." in key else f"{section}.{key}"
else:
section_prefix = f"{section}."
full_key = key if key.startswith(section_prefix) else f"{section_prefix}{key}"
result[full_key] = unescape_ini_value(value)
return result
def unescape_ini_value(value: str) -> str:
out: List[str] = []
i = 0
while i < len(value):
ch = value[i]
if ch != "\\":
out.append(ch)
i += 1
continue
if i + 1 >= len(value):
out.append("\\")
break
nxt = value[i + 1]
if nxt == "n":
out.append("\n")
elif nxt == "r":
out.append("\r")
elif nxt == "t":
out.append("\t")
else:
out.append(nxt)
i += 2
return "".join(out)
def placeholders(value: str) -> Set[str]:
return set(PLACEHOLDER_PATTERN.findall(value))
def validate_language(base: Dict[str, str], target_path: Path) -> Tuple[List[str], List[str], List[str]]:
target = parse_ini(target_path)
missing = sorted(set(base.keys()) - set(target.keys()))
extra = sorted(set(target.keys()) - set(base.keys()))
placeholder_issues: List[str] = []
for key in sorted(set(base.keys()) & set(target.keys())):
base_placeholders = placeholders(base[key])
target_placeholders = placeholders(target[key])
if base_placeholders != target_placeholders:
placeholder_issues.append(
f"{key}: expected {sorted(base_placeholders)}, found {sorted(target_placeholders)}"
)
return missing, extra, placeholder_issues
def main() -> int:
warn_extra_only = "--warn-extra" in sys.argv[1:]
if not BASE_FILE.exists():
print(f"Missing base template: {BASE_FILE}")
return 2
base = parse_ini(BASE_FILE)
language_files = sorted(path for path in LANG_DIR.glob("*.ini") if path.name not in {"en.ini", "en.template.ini"})
if not language_files:
print("No translation files found (expected lang/<code>.ini).")
return 0
failed = False
for path in language_files:
missing, extra, placeholder_issues = validate_language(base, path)
if not missing and not extra and not placeholder_issues:
print(f"{path.name}: OK")
continue
has_blocking_issues = bool(missing or placeholder_issues or (extra and not warn_extra_only))
if has_blocking_issues:
failed = True
print(f"{path.name}: FAIL")
else:
print(f"{path.name}: WARN")
if missing:
print(f" Missing keys ({len(missing)}):")
for key in missing[:20]:
print(f" - {key}")
if len(missing) > 20:
print(f" ... and {len(missing) - 20} more")
if extra:
print(f" Extra keys ({len(extra)}):")
for key in extra[:20]:
print(f" - {key}")
if len(extra) > 20:
print(f" ... and {len(extra) - 20} more")
if warn_extra_only and not missing and not placeholder_issues:
print(" Note: extra keys are warnings and do not fail validation.")
if placeholder_issues:
print(f" Placeholder mismatches ({len(placeholder_issues)}):")
for issue in placeholder_issues[:20]:
print(f" - {issue}")
if len(placeholder_issues) > 20:
print(f" ... and {len(placeholder_issues) - 20} more")
return 1 if failed else 0
if __name__ == "__main__":
sys.exit(main())