402 lines
11 KiB
Python
Executable File
402 lines
11 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Dict, Iterable, List, Optional, Set, Tuple
|
|
|
|
ROOT = Path(__file__).resolve().parents[1]
|
|
ALLOWLIST_PATH = ROOT / "scripts" / "i18n_audit_allowlist.txt"
|
|
|
|
SKIP_DIR_NAMES = {".git", "bloodshed", "docs", "skills", "nvgt-git", "libstorm-nvgt"}
|
|
SKIP_FILE_NAMES = {"crash.log"}
|
|
|
|
INSERT_LAST_CONTEXT_HINTS = (
|
|
"option",
|
|
"label",
|
|
"line",
|
|
"prompt",
|
|
"instruction",
|
|
"intro",
|
|
"reward",
|
|
"message",
|
|
"title",
|
|
"menu",
|
|
)
|
|
|
|
TRANSLATION_WRAPPERS = (
|
|
"tr(",
|
|
"trf(",
|
|
"trn(",
|
|
"i18n_translate_speech_message(",
|
|
"i18n_lookup_key_with_fallback(",
|
|
"speech_history_transform_message(",
|
|
"get_barricade_option_text(",
|
|
"i18n_text(",
|
|
)
|
|
|
|
# Function call checks for call arguments that must be translation-wrapped when they
|
|
# contain literals. Keep this conservative and focused on user-facing text paths.
|
|
ARG_CHECKS: Dict[str, List[int]] = {
|
|
"screen_reader_speak": [0],
|
|
"menu_run_simple": [0],
|
|
"text_reader": [0, 1],
|
|
"text_reader_lines": [1],
|
|
"text_reader_file": [1],
|
|
"file_viewer": [0, 1],
|
|
"file_viewer_lines": [1],
|
|
"file_viewer_file": [1],
|
|
}
|
|
|
|
ASSIGNMENT_LHS_CHECKS = (
|
|
"intro_text",
|
|
)
|
|
|
|
|
|
class Finding:
|
|
def __init__(self, path: Path, line: int, context: str, expression: str):
|
|
self.path = path
|
|
self.line = line
|
|
self.context = context
|
|
self.expression = expression.strip()
|
|
|
|
def key(self) -> str:
|
|
return f"{self.path.relative_to(ROOT).as_posix()}:{self.line}:{self.context}"
|
|
|
|
|
|
def iter_nvgt_files() -> List[Path]:
|
|
files: List[Path] = []
|
|
|
|
entrypoints = [ROOT / "draugnorak.nvgt", ROOT / "src" / "sound_settings.nvgt"]
|
|
for entry in entrypoints:
|
|
if entry.exists():
|
|
files.append(entry)
|
|
|
|
source_roots = [ROOT / "src", ROOT / "libstorm-nvgt"]
|
|
for source_root in source_roots:
|
|
if not source_root.exists():
|
|
continue
|
|
for path in source_root.rglob("*.nvgt"):
|
|
rel = path.relative_to(ROOT)
|
|
if any(part in SKIP_DIR_NAMES for part in rel.parts):
|
|
continue
|
|
if path.name in SKIP_FILE_NAMES:
|
|
continue
|
|
files.append(path)
|
|
|
|
return sorted(set(files))
|
|
|
|
|
|
def load_allowlist() -> Set[str]:
|
|
allowed: Set[str] = set()
|
|
if not ALLOWLIST_PATH.exists():
|
|
return allowed
|
|
|
|
for raw_line in ALLOWLIST_PATH.read_text(encoding="utf-8", errors="replace").splitlines():
|
|
line = raw_line.strip()
|
|
if not line or line.startswith("#"):
|
|
continue
|
|
allowed.add(line)
|
|
return allowed
|
|
|
|
|
|
def is_identifier_char(ch: str) -> bool:
|
|
return ch.isalnum() or ch == "_"
|
|
|
|
|
|
def read_identifier_backward(text: str, before_index: int) -> str:
|
|
i = before_index
|
|
while i >= 0 and text[i].isspace():
|
|
i -= 1
|
|
end = i
|
|
while i >= 0 and is_identifier_char(text[i]):
|
|
i -= 1
|
|
start = i + 1
|
|
if end < start:
|
|
return ""
|
|
return text[start : end + 1]
|
|
|
|
|
|
def find_matching_paren(text: str, open_index: int) -> int:
|
|
depth = 0
|
|
in_string = False
|
|
escape = False
|
|
|
|
for i in range(open_index, len(text)):
|
|
ch = text[i]
|
|
if in_string:
|
|
if escape:
|
|
escape = False
|
|
elif ch == "\\":
|
|
escape = True
|
|
elif ch == '"':
|
|
in_string = False
|
|
continue
|
|
|
|
if ch == '"':
|
|
in_string = True
|
|
continue
|
|
if ch == "(":
|
|
depth += 1
|
|
continue
|
|
if ch == ")":
|
|
depth -= 1
|
|
if depth == 0:
|
|
return i
|
|
continue
|
|
|
|
return -1
|
|
|
|
|
|
def split_top_level(expr: str, delimiter: str) -> List[str]:
|
|
parts: List[str] = []
|
|
depth_paren = 0
|
|
depth_bracket = 0
|
|
depth_brace = 0
|
|
in_string = False
|
|
escape = False
|
|
start = 0
|
|
|
|
for i, ch in enumerate(expr):
|
|
if in_string:
|
|
if escape:
|
|
escape = False
|
|
elif ch == "\\":
|
|
escape = True
|
|
elif ch == '"':
|
|
in_string = False
|
|
continue
|
|
|
|
if ch == '"':
|
|
in_string = True
|
|
continue
|
|
if ch == "(":
|
|
depth_paren += 1
|
|
continue
|
|
if ch == ")":
|
|
depth_paren = max(0, depth_paren - 1)
|
|
continue
|
|
if ch == "[":
|
|
depth_bracket += 1
|
|
continue
|
|
if ch == "]":
|
|
depth_bracket = max(0, depth_bracket - 1)
|
|
continue
|
|
if ch == "{":
|
|
depth_brace += 1
|
|
continue
|
|
if ch == "}":
|
|
depth_brace = max(0, depth_brace - 1)
|
|
continue
|
|
|
|
if ch == delimiter and depth_paren == 0 and depth_bracket == 0 and depth_brace == 0:
|
|
parts.append(expr[start:i])
|
|
start = i + 1
|
|
|
|
parts.append(expr[start:])
|
|
return parts
|
|
|
|
|
|
def line_number_for_index(text: str, index: int) -> int:
|
|
return text.count("\n", 0, index) + 1
|
|
|
|
|
|
def has_string_literal(expr: str) -> bool:
|
|
return len(extract_string_literals(expr)) > 0
|
|
|
|
|
|
def extract_string_literals(expr: str) -> List[str]:
|
|
literals: List[str] = []
|
|
in_string = False
|
|
escape = False
|
|
current: List[str] = []
|
|
|
|
for ch in expr:
|
|
if in_string:
|
|
if escape:
|
|
current.append(ch)
|
|
escape = False
|
|
continue
|
|
if ch == "\\":
|
|
escape = True
|
|
continue
|
|
if ch == '"':
|
|
in_string = False
|
|
literals.append("".join(current))
|
|
current = []
|
|
continue
|
|
current.append(ch)
|
|
continue
|
|
|
|
if ch == '"':
|
|
in_string = True
|
|
|
|
return literals
|
|
|
|
|
|
def has_meaningful_literal(expr: str) -> bool:
|
|
literals = extract_string_literals(expr)
|
|
if not literals:
|
|
return False
|
|
|
|
for literal in literals:
|
|
if literal == "":
|
|
continue
|
|
if re.fullmatch(r"[\s:,.!?;()\-\[\]/+]*", literal):
|
|
continue
|
|
if re.fullmatch(r"[a-z0-9_.-]+", literal):
|
|
# Translation keys and identifiers are not user-facing copy.
|
|
continue
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def is_translated_expression(expr: str) -> bool:
|
|
normalized = "".join(expr.split())
|
|
for wrapper in TRANSLATION_WRAPPERS:
|
|
if wrapper in normalized:
|
|
return True
|
|
return False
|
|
|
|
|
|
def should_check_insert_last(receiver: str) -> bool:
|
|
receiver_lower = receiver.lower()
|
|
if not receiver_lower:
|
|
return False
|
|
return any(hint in receiver_lower for hint in INSERT_LAST_CONTEXT_HINTS)
|
|
|
|
|
|
def add_finding(findings: List[Finding], path: Path, line: int, context: str, expr: str) -> None:
|
|
findings.append(Finding(path, line, context, expr))
|
|
|
|
|
|
def check_call_args(path: Path, line: int, function_name: str, receiver: str, args: List[str], findings: List[Finding],
|
|
translated_arrays: Set[str]) -> None:
|
|
if function_name == "insert_last":
|
|
if not should_check_insert_last(receiver):
|
|
return
|
|
if receiver in translated_arrays:
|
|
return
|
|
if not args:
|
|
return
|
|
expr = args[0]
|
|
if has_meaningful_literal(expr) and not is_translated_expression(expr):
|
|
add_finding(findings, path, line, f"{receiver}.insert_last", expr)
|
|
return
|
|
|
|
target_indexes = ARG_CHECKS.get(function_name)
|
|
if not target_indexes:
|
|
return
|
|
|
|
for arg_index in target_indexes:
|
|
if arg_index >= len(args):
|
|
continue
|
|
expr = args[arg_index]
|
|
if has_meaningful_literal(expr) and not is_translated_expression(expr):
|
|
add_finding(findings, path, line, f"{function_name}[{arg_index}]", expr)
|
|
|
|
|
|
def check_assignment_literals(path: Path, text: str, findings: List[Finding]) -> None:
|
|
for lhs in ASSIGNMENT_LHS_CHECKS:
|
|
pattern = re.compile(rf"\b{re.escape(lhs)}\s*=\s*(.+?);", re.MULTILINE)
|
|
for match in pattern.finditer(text):
|
|
expr = match.group(1)
|
|
if not has_meaningful_literal(expr):
|
|
continue
|
|
if is_translated_expression(expr):
|
|
continue
|
|
line = line_number_for_index(text, match.start())
|
|
add_finding(findings, path, line, f"assign:{lhs}", expr)
|
|
|
|
|
|
def scan_file(path: Path) -> List[Finding]:
|
|
findings: List[Finding] = []
|
|
text = path.read_text(encoding="utf-8", errors="replace")
|
|
translated_arrays = set(re.findall(r"i18n_translate_string_array_in_place\(\s*([A-Za-z_][A-Za-z0-9_]*)\s*\)", text))
|
|
|
|
check_assignment_literals(path, text, findings)
|
|
|
|
i = 0
|
|
while i < len(text):
|
|
ch = text[i]
|
|
if not is_identifier_char(ch):
|
|
i += 1
|
|
continue
|
|
|
|
start = i
|
|
while i < len(text) and is_identifier_char(text[i]):
|
|
i += 1
|
|
name = text[start:i]
|
|
|
|
j = i
|
|
while j < len(text) and text[j].isspace():
|
|
j += 1
|
|
|
|
if j >= len(text) or text[j] != "(":
|
|
continue
|
|
|
|
receiver = ""
|
|
k = start - 1
|
|
while k >= 0 and text[k].isspace():
|
|
k -= 1
|
|
if k >= 0 and text[k] == ".":
|
|
receiver = read_identifier_backward(text, k - 1)
|
|
|
|
close = find_matching_paren(text, j)
|
|
if close < 0:
|
|
break
|
|
|
|
arg_text = text[j + 1 : close]
|
|
args = split_top_level(arg_text, ",")
|
|
|
|
tail = close + 1
|
|
while tail < len(text) and text[tail].isspace():
|
|
tail += 1
|
|
if tail < len(text) and text[tail] == "{":
|
|
# Function/method declaration, not a call site.
|
|
i = close + 1
|
|
continue
|
|
|
|
line = line_number_for_index(text, start)
|
|
check_call_args(path, line, name, receiver, args, findings, translated_arrays)
|
|
|
|
i = close + 1
|
|
|
|
return findings
|
|
|
|
|
|
def summarize_expression(expr: str) -> str:
|
|
collapsed = " ".join(expr.split())
|
|
if len(collapsed) > 120:
|
|
return collapsed[:117] + "..."
|
|
return collapsed
|
|
|
|
|
|
def main() -> int:
|
|
allowlist = load_allowlist()
|
|
|
|
all_findings: List[Finding] = []
|
|
for nvgt_file in iter_nvgt_files():
|
|
all_findings.extend(scan_file(nvgt_file))
|
|
|
|
filtered = [f for f in all_findings if f.key() not in allowlist]
|
|
filtered.sort(key=lambda item: (item.path.as_posix(), item.line, item.context))
|
|
|
|
if not filtered:
|
|
print("No untranslated-string violations found.")
|
|
return 0
|
|
|
|
print(f"Found {len(filtered)} untranslated-string violations:")
|
|
for finding in filtered:
|
|
rel = finding.path.relative_to(ROOT).as_posix()
|
|
print(f"{rel}:{finding.line}: {finding.context}: {summarize_expression(finding.expression)}")
|
|
|
|
print("\nAdd approved exceptions to scripts/i18n_audit_allowlist.txt if needed.")
|
|
return 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|