#!/usr/bin/env python3 from __future__ import annotations import re import sys from pathlib import Path from typing import Dict, Iterable, List, Optional, Set, Tuple ROOT = Path(__file__).resolve().parents[1] ALLOWLIST_PATH = ROOT / "scripts" / "i18n_audit_allowlist.txt" SKIP_DIR_NAMES = {".git", "bloodshed", "docs", "skills", "nvgt-git", "libstorm-nvgt"} SKIP_FILE_NAMES = {"crash.log"} INSERT_LAST_CONTEXT_HINTS = ( "option", "label", "line", "prompt", "instruction", "intro", "reward", "message", "title", "menu", ) TRANSLATION_WRAPPERS = ( "tr(", "trf(", "trn(", "i18n_translate_speech_message(", "i18n_lookup_key_with_fallback(", "speech_history_transform_message(", "get_barricade_option_text(", "i18n_text(", ) # Function call checks for call arguments that must be translation-wrapped when they # contain literals. Keep this conservative and focused on user-facing text paths. ARG_CHECKS: Dict[str, List[int]] = { "screen_reader_speak": [0], "menu_run_simple": [0], "text_reader": [0, 1], "text_reader_lines": [1], "text_reader_file": [1], "file_viewer": [0, 1], "file_viewer_lines": [1], "file_viewer_file": [1], } ASSIGNMENT_LHS_CHECKS = ( "intro_text", ) class Finding: def __init__(self, path: Path, line: int, context: str, expression: str): self.path = path self.line = line self.context = context self.expression = expression.strip() def key(self) -> str: return f"{self.path.relative_to(ROOT).as_posix()}:{self.line}:{self.context}" def iter_nvgt_files() -> List[Path]: files: List[Path] = [] entrypoints = [ROOT / "draugnorak.nvgt", ROOT / "src" / "sound_settings.nvgt"] for entry in entrypoints: if entry.exists(): files.append(entry) source_roots = [ROOT / "src", ROOT / "libstorm-nvgt"] for source_root in source_roots: if not source_root.exists(): continue for path in source_root.rglob("*.nvgt"): rel = path.relative_to(ROOT) if any(part in SKIP_DIR_NAMES for part in rel.parts): continue if path.name in SKIP_FILE_NAMES: continue files.append(path) return sorted(set(files)) def load_allowlist() -> Set[str]: allowed: Set[str] = set() if not ALLOWLIST_PATH.exists(): return allowed for raw_line in ALLOWLIST_PATH.read_text(encoding="utf-8", errors="replace").splitlines(): line = raw_line.strip() if not line or line.startswith("#"): continue allowed.add(line) return allowed def is_identifier_char(ch: str) -> bool: return ch.isalnum() or ch == "_" def read_identifier_backward(text: str, before_index: int) -> str: i = before_index while i >= 0 and text[i].isspace(): i -= 1 end = i while i >= 0 and is_identifier_char(text[i]): i -= 1 start = i + 1 if end < start: return "" return text[start : end + 1] def find_matching_paren(text: str, open_index: int) -> int: depth = 0 in_string = False escape = False for i in range(open_index, len(text)): ch = text[i] if in_string: if escape: escape = False elif ch == "\\": escape = True elif ch == '"': in_string = False continue if ch == '"': in_string = True continue if ch == "(": depth += 1 continue if ch == ")": depth -= 1 if depth == 0: return i continue return -1 def split_top_level(expr: str, delimiter: str) -> List[str]: parts: List[str] = [] depth_paren = 0 depth_bracket = 0 depth_brace = 0 in_string = False escape = False start = 0 for i, ch in enumerate(expr): if in_string: if escape: escape = False elif ch == "\\": escape = True elif ch == '"': in_string = False continue if ch == '"': in_string = True continue if ch == "(": depth_paren += 1 continue if ch == ")": depth_paren = max(0, depth_paren - 1) continue if ch == "[": depth_bracket += 1 continue if ch == "]": depth_bracket = max(0, depth_bracket - 1) continue if ch == "{": depth_brace += 1 continue if ch == "}": depth_brace = max(0, depth_brace - 1) continue if ch == delimiter and depth_paren == 0 and depth_bracket == 0 and depth_brace == 0: parts.append(expr[start:i]) start = i + 1 parts.append(expr[start:]) return parts def line_number_for_index(text: str, index: int) -> int: return text.count("\n", 0, index) + 1 def has_string_literal(expr: str) -> bool: return len(extract_string_literals(expr)) > 0 def extract_string_literals(expr: str) -> List[str]: literals: List[str] = [] in_string = False escape = False current: List[str] = [] for ch in expr: if in_string: if escape: current.append(ch) escape = False continue if ch == "\\": escape = True continue if ch == '"': in_string = False literals.append("".join(current)) current = [] continue current.append(ch) continue if ch == '"': in_string = True return literals def has_meaningful_literal(expr: str) -> bool: literals = extract_string_literals(expr) if not literals: return False for literal in literals: if literal == "": continue if re.fullmatch(r"[\s:,.!?;()\-\[\]/+]*", literal): continue if re.fullmatch(r"[a-z0-9_.-]+", literal): # Translation keys and identifiers are not user-facing copy. continue return True return False def is_translated_expression(expr: str) -> bool: normalized = "".join(expr.split()) for wrapper in TRANSLATION_WRAPPERS: if wrapper in normalized: return True return False def should_check_insert_last(receiver: str) -> bool: receiver_lower = receiver.lower() if not receiver_lower: return False return any(hint in receiver_lower for hint in INSERT_LAST_CONTEXT_HINTS) def add_finding(findings: List[Finding], path: Path, line: int, context: str, expr: str) -> None: findings.append(Finding(path, line, context, expr)) def check_call_args(path: Path, line: int, function_name: str, receiver: str, args: List[str], findings: List[Finding], translated_arrays: Set[str]) -> None: if function_name == "insert_last": if not should_check_insert_last(receiver): return if receiver in translated_arrays: return if not args: return expr = args[0] if has_meaningful_literal(expr) and not is_translated_expression(expr): add_finding(findings, path, line, f"{receiver}.insert_last", expr) return target_indexes = ARG_CHECKS.get(function_name) if not target_indexes: return for arg_index in target_indexes: if arg_index >= len(args): continue expr = args[arg_index] if has_meaningful_literal(expr) and not is_translated_expression(expr): add_finding(findings, path, line, f"{function_name}[{arg_index}]", expr) def check_assignment_literals(path: Path, text: str, findings: List[Finding]) -> None: for lhs in ASSIGNMENT_LHS_CHECKS: pattern = re.compile(rf"\b{re.escape(lhs)}\s*=\s*(.+?);", re.MULTILINE) for match in pattern.finditer(text): expr = match.group(1) if not has_meaningful_literal(expr): continue if is_translated_expression(expr): continue line = line_number_for_index(text, match.start()) add_finding(findings, path, line, f"assign:{lhs}", expr) def scan_file(path: Path) -> List[Finding]: findings: List[Finding] = [] text = path.read_text(encoding="utf-8", errors="replace") translated_arrays = set(re.findall(r"i18n_translate_string_array_in_place\(\s*([A-Za-z_][A-Za-z0-9_]*)\s*\)", text)) check_assignment_literals(path, text, findings) i = 0 while i < len(text): ch = text[i] if not is_identifier_char(ch): i += 1 continue start = i while i < len(text) and is_identifier_char(text[i]): i += 1 name = text[start:i] j = i while j < len(text) and text[j].isspace(): j += 1 if j >= len(text) or text[j] != "(": continue receiver = "" k = start - 1 while k >= 0 and text[k].isspace(): k -= 1 if k >= 0 and text[k] == ".": receiver = read_identifier_backward(text, k - 1) close = find_matching_paren(text, j) if close < 0: break arg_text = text[j + 1 : close] args = split_top_level(arg_text, ",") tail = close + 1 while tail < len(text) and text[tail].isspace(): tail += 1 if tail < len(text) and text[tail] == "{": # Function/method declaration, not a call site. i = close + 1 continue line = line_number_for_index(text, start) check_call_args(path, line, name, receiver, args, findings, translated_arrays) i = close + 1 return findings def summarize_expression(expr: str) -> str: collapsed = " ".join(expr.split()) if len(collapsed) > 120: return collapsed[:117] + "..." return collapsed def main() -> int: allowlist = load_allowlist() all_findings: List[Finding] = [] for nvgt_file in iter_nvgt_files(): all_findings.extend(scan_file(nvgt_file)) filtered = [f for f in all_findings if f.key() not in allowlist] filtered.sort(key=lambda item: (item.path.as_posix(), item.line, item.context)) if not filtered: print("No untranslated-string violations found.") return 0 print(f"Found {len(filtered)} untranslated-string violations:") for finding in filtered: rel = finding.path.relative_to(ROOT).as_posix() print(f"{rel}:{finding.line}: {finding.context}: {summarize_expression(finding.expression)}") print("\nAdd approved exceptions to scripts/i18n_audit_allowlist.txt if needed.") return 1 if __name__ == "__main__": sys.exit(main())