Files
draugnorak/scripts/audit_untranslated_strings.py
2026-02-27 23:55:31 -05:00

402 lines
11 KiB
Python
Executable File

#!/usr/bin/env python3
from __future__ import annotations
import re
import sys
from pathlib import Path
from typing import Dict, Iterable, List, Optional, Set, Tuple
ROOT = Path(__file__).resolve().parents[1]
ALLOWLIST_PATH = ROOT / "scripts" / "i18n_audit_allowlist.txt"
SKIP_DIR_NAMES = {".git", "bloodshed", "docs", "skills", "nvgt-git", "libstorm-nvgt"}
SKIP_FILE_NAMES = {"crash.log"}
INSERT_LAST_CONTEXT_HINTS = (
"option",
"label",
"line",
"prompt",
"instruction",
"intro",
"reward",
"message",
"title",
"menu",
)
TRANSLATION_WRAPPERS = (
"tr(",
"trf(",
"trn(",
"i18n_translate_speech_message(",
"i18n_lookup_key_with_fallback(",
"speech_history_transform_message(",
"get_barricade_option_text(",
"i18n_text(",
)
# Function call checks for call arguments that must be translation-wrapped when they
# contain literals. Keep this conservative and focused on user-facing text paths.
ARG_CHECKS: Dict[str, List[int]] = {
"screen_reader_speak": [0],
"menu_run_simple": [0],
"text_reader": [0, 1],
"text_reader_lines": [1],
"text_reader_file": [1],
"file_viewer": [0, 1],
"file_viewer_lines": [1],
"file_viewer_file": [1],
}
ASSIGNMENT_LHS_CHECKS = (
"intro_text",
)
class Finding:
def __init__(self, path: Path, line: int, context: str, expression: str):
self.path = path
self.line = line
self.context = context
self.expression = expression.strip()
def key(self) -> str:
return f"{self.path.relative_to(ROOT).as_posix()}:{self.line}:{self.context}"
def iter_nvgt_files() -> List[Path]:
files: List[Path] = []
entrypoints = [ROOT / "draugnorak.nvgt", ROOT / "src" / "sound_settings.nvgt"]
for entry in entrypoints:
if entry.exists():
files.append(entry)
source_roots = [ROOT / "src", ROOT / "libstorm-nvgt"]
for source_root in source_roots:
if not source_root.exists():
continue
for path in source_root.rglob("*.nvgt"):
rel = path.relative_to(ROOT)
if any(part in SKIP_DIR_NAMES for part in rel.parts):
continue
if path.name in SKIP_FILE_NAMES:
continue
files.append(path)
return sorted(set(files))
def load_allowlist() -> Set[str]:
allowed: Set[str] = set()
if not ALLOWLIST_PATH.exists():
return allowed
for raw_line in ALLOWLIST_PATH.read_text(encoding="utf-8", errors="replace").splitlines():
line = raw_line.strip()
if not line or line.startswith("#"):
continue
allowed.add(line)
return allowed
def is_identifier_char(ch: str) -> bool:
return ch.isalnum() or ch == "_"
def read_identifier_backward(text: str, before_index: int) -> str:
i = before_index
while i >= 0 and text[i].isspace():
i -= 1
end = i
while i >= 0 and is_identifier_char(text[i]):
i -= 1
start = i + 1
if end < start:
return ""
return text[start : end + 1]
def find_matching_paren(text: str, open_index: int) -> int:
depth = 0
in_string = False
escape = False
for i in range(open_index, len(text)):
ch = text[i]
if in_string:
if escape:
escape = False
elif ch == "\\":
escape = True
elif ch == '"':
in_string = False
continue
if ch == '"':
in_string = True
continue
if ch == "(":
depth += 1
continue
if ch == ")":
depth -= 1
if depth == 0:
return i
continue
return -1
def split_top_level(expr: str, delimiter: str) -> List[str]:
parts: List[str] = []
depth_paren = 0
depth_bracket = 0
depth_brace = 0
in_string = False
escape = False
start = 0
for i, ch in enumerate(expr):
if in_string:
if escape:
escape = False
elif ch == "\\":
escape = True
elif ch == '"':
in_string = False
continue
if ch == '"':
in_string = True
continue
if ch == "(":
depth_paren += 1
continue
if ch == ")":
depth_paren = max(0, depth_paren - 1)
continue
if ch == "[":
depth_bracket += 1
continue
if ch == "]":
depth_bracket = max(0, depth_bracket - 1)
continue
if ch == "{":
depth_brace += 1
continue
if ch == "}":
depth_brace = max(0, depth_brace - 1)
continue
if ch == delimiter and depth_paren == 0 and depth_bracket == 0 and depth_brace == 0:
parts.append(expr[start:i])
start = i + 1
parts.append(expr[start:])
return parts
def line_number_for_index(text: str, index: int) -> int:
return text.count("\n", 0, index) + 1
def has_string_literal(expr: str) -> bool:
return len(extract_string_literals(expr)) > 0
def extract_string_literals(expr: str) -> List[str]:
literals: List[str] = []
in_string = False
escape = False
current: List[str] = []
for ch in expr:
if in_string:
if escape:
current.append(ch)
escape = False
continue
if ch == "\\":
escape = True
continue
if ch == '"':
in_string = False
literals.append("".join(current))
current = []
continue
current.append(ch)
continue
if ch == '"':
in_string = True
return literals
def has_meaningful_literal(expr: str) -> bool:
literals = extract_string_literals(expr)
if not literals:
return False
for literal in literals:
if literal == "":
continue
if re.fullmatch(r"[\s:,.!?;()\-\[\]/+]*", literal):
continue
if re.fullmatch(r"[a-z0-9_.-]+", literal):
# Translation keys and identifiers are not user-facing copy.
continue
return True
return False
def is_translated_expression(expr: str) -> bool:
normalized = "".join(expr.split())
for wrapper in TRANSLATION_WRAPPERS:
if wrapper in normalized:
return True
return False
def should_check_insert_last(receiver: str) -> bool:
receiver_lower = receiver.lower()
if not receiver_lower:
return False
return any(hint in receiver_lower for hint in INSERT_LAST_CONTEXT_HINTS)
def add_finding(findings: List[Finding], path: Path, line: int, context: str, expr: str) -> None:
findings.append(Finding(path, line, context, expr))
def check_call_args(path: Path, line: int, function_name: str, receiver: str, args: List[str], findings: List[Finding],
translated_arrays: Set[str]) -> None:
if function_name == "insert_last":
if not should_check_insert_last(receiver):
return
if receiver in translated_arrays:
return
if not args:
return
expr = args[0]
if has_meaningful_literal(expr) and not is_translated_expression(expr):
add_finding(findings, path, line, f"{receiver}.insert_last", expr)
return
target_indexes = ARG_CHECKS.get(function_name)
if not target_indexes:
return
for arg_index in target_indexes:
if arg_index >= len(args):
continue
expr = args[arg_index]
if has_meaningful_literal(expr) and not is_translated_expression(expr):
add_finding(findings, path, line, f"{function_name}[{arg_index}]", expr)
def check_assignment_literals(path: Path, text: str, findings: List[Finding]) -> None:
for lhs in ASSIGNMENT_LHS_CHECKS:
pattern = re.compile(rf"\b{re.escape(lhs)}\s*=\s*(.+?);", re.MULTILINE)
for match in pattern.finditer(text):
expr = match.group(1)
if not has_meaningful_literal(expr):
continue
if is_translated_expression(expr):
continue
line = line_number_for_index(text, match.start())
add_finding(findings, path, line, f"assign:{lhs}", expr)
def scan_file(path: Path) -> List[Finding]:
findings: List[Finding] = []
text = path.read_text(encoding="utf-8", errors="replace")
translated_arrays = set(re.findall(r"i18n_translate_string_array_in_place\(\s*([A-Za-z_][A-Za-z0-9_]*)\s*\)", text))
check_assignment_literals(path, text, findings)
i = 0
while i < len(text):
ch = text[i]
if not is_identifier_char(ch):
i += 1
continue
start = i
while i < len(text) and is_identifier_char(text[i]):
i += 1
name = text[start:i]
j = i
while j < len(text) and text[j].isspace():
j += 1
if j >= len(text) or text[j] != "(":
continue
receiver = ""
k = start - 1
while k >= 0 and text[k].isspace():
k -= 1
if k >= 0 and text[k] == ".":
receiver = read_identifier_backward(text, k - 1)
close = find_matching_paren(text, j)
if close < 0:
break
arg_text = text[j + 1 : close]
args = split_top_level(arg_text, ",")
tail = close + 1
while tail < len(text) and text[tail].isspace():
tail += 1
if tail < len(text) and text[tail] == "{":
# Function/method declaration, not a call site.
i = close + 1
continue
line = line_number_for_index(text, start)
check_call_args(path, line, name, receiver, args, findings, translated_arrays)
i = close + 1
return findings
def summarize_expression(expr: str) -> str:
collapsed = " ".join(expr.split())
if len(collapsed) > 120:
return collapsed[:117] + "..."
return collapsed
def main() -> int:
allowlist = load_allowlist()
all_findings: List[Finding] = []
for nvgt_file in iter_nvgt_files():
all_findings.extend(scan_file(nvgt_file))
filtered = [f for f in all_findings if f.key() not in allowlist]
filtered.sort(key=lambda item: (item.path.as_posix(), item.line, item.context))
if not filtered:
print("No untranslated-string violations found.")
return 0
print(f"Found {len(filtered)} untranslated-string violations:")
for finding in filtered:
rel = finding.path.relative_to(ROOT).as_posix()
print(f"{rel}:{finding.line}: {finding.context}: {summarize_expression(finding.expression)}")
print("\nAdd approved exceptions to scripts/i18n_audit_allowlist.txt if needed.")
return 1
if __name__ == "__main__":
sys.exit(main())