daisy · Kostenkov-2021 · May 31, 2026 · May 31, 2026 · Jun 1, 2026 · Jun 1, 2026
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "mathcat"
-version = "0.7.6-beta.4"
+version = "0.7.6-beta.5"
 authors = ["Neil Soiffer <soiffer@alum.mit.edu>"]
 license = "MIT"
 description = "MathCAT: Math Capable Assistive Technology ('Speech and braille from MathML')"

@@ -1,15 +1,15 @@
 # MathCAT Translation Audit Tool
 
-This tool compares English YAML rule files with translated versions to identify translation gaps and formatting issues. It assists translators in ensuring their translations are complete, consistent, and properly formatted.
+This tool compares YAML rule files from a source language with translated versions to identify translation gaps and formatting issues. It assists translators in ensuring their translations are complete, consistent, and properly formatted.
 
 ### 🔍 Detection Capabilities
 
 The tool analyzes rule files to detect the following issues:
 
-* **Missing Rules:** Rules present in the master English file but missing in the target translation.
-* **Extra Rules:** Rules present in the translation but absent in English (flagged as potentially intentional language-specific additions).
+* **Missing Rules:** Rules present in the source file but missing in the target translation.
+* **Extra Rules:** Rules present in the target translation but absent in the source (flagged as potentially intentional language-specific additions).
 * **Untranslated Text:** Detects text keys that still use **lowercase** formatting, indicating they haven't been verified or translated yet.
-* **Rule Differences:** Structural changes (match expressions, conditions, variables, or test/replace layout) between English and the translation.
+* **Rule Differences:** Structural changes (match expressions, conditions, variables, or test/replace layout) between the source and target translation.
 
 Add `# audit-ignore` to a rule block to suppress auditing that rule.
 
@@ -57,20 +57,23 @@ The tool automatically adjusts its matching logic based on the file type:
 **Syntax:**
 ```bash
 uv run audit-translations <language> [--file <specific_file>]
+uv run audit-translations <language> --source <source-language>
 uv run audit-translations --list
 
 # If running from the repo root, point uv at the project:
 uv run --project PythonScripts audit-translations <language>
+uv run --project PythonScripts audit-translations <language> --source <source-language>
 uv run --project PythonScripts audit-translations --list
 ```
 
 **Convenience Features:**
 * `--list`: Displays all available languages.
   * Region variants are shown as `lang-region` (e.g., `zz-aa`) based on subdirectories under `Rules/Languages/<lang>`.
+* `--source`: Sets the source/reference language. Defaults to `en`.
 * `--file`: Audits a single specific file instead of the whole directory.
 * `--rules-dir`: Override the Rules/Languages directory path.
 * `--only`: Filter issue types (comma-separated): `missing`, `untranslated`, `extra`, `diffs`, `all`.
-* `--verbose`: Show detailed output including English/translated snippets for rule differences.
+* `--verbose`: Show detailed output including source/target snippets for rule differences.
 * **Summary Stats:** Provides a statistical summary after every run.
 
 **Examples:**
@@ -88,19 +91,23 @@ uv run audit-translations es
 # Audit German translations
 uv run audit-translations de
 
+# Compare Norwegian Bokmal against Swedish instead of English
+uv run audit-translations nb --source sv
+
 # Audit only a specific file
 uv run audit-translations es --file SharedRules/default.yaml
 
 # Audit a regional variant (merges Rules/Languages/de and Rules/Languages/de/CH)
 uv run audit-translations de-CH
 
-# Show detailed output with English/translated snippets for rule differences
+# Show detailed output with source/target snippets for rule differences
 uv run audit-translations es --verbose
 ```
 
 **Running from the repo root (without `cd PythonScripts`):**
 ```bash
 uv run --project PythonScripts audit-translations es
+uv run --project PythonScripts audit-translations nb --source sv
 uv run --project PythonScripts audit-translations --list
 ```
 

@@ -1,7 +1,7 @@
 """
 MathCAT Translation Audit Tool
 
-Compares English YAML rule files with translated versions to identify translation
+Compares source YAML rule files with translated versions to identify translation
 gaps and issues. This tool helps translators ensure their translations are complete
 and properly formatted.
 

@@ -1,7 +1,7 @@
 """
 Auditing and comparison logic.
 
-Contains functions for comparing English and translated files,
+Contains functions for comparing source and translated files,
 and for performing full language audits.
 """
 
@@ -60,7 +60,7 @@ def compare_files(
     translated_region_path: Path | None = None,
     english_region_path: Path | None = None,
 ) -> ComparisonResult:
-    """Compare English and translated YAML files"""
+    """Compare source and translated YAML files"""
 
     def load_rules(path: Path | None) -> list[RuleInfo]:
         if path and path.exists():
@@ -95,14 +95,14 @@ def merge_rules(base_rules: list[RuleInfo], region_rules: list[RuleInfo]) -> lis
     include_extra = include_all or "extra" in issue_filter
     include_diffs = include_all or "diffs" in issue_filter
 
-    # Find missing rules (in English but not in translation)
+    # Find missing rules (in source but not in translation)
     missing_rules = []
     if include_missing:
         for key, rule in english_by_key.items():
             if key not in translated_by_key:
                 missing_rules.append(rule)
 
-    # Find extra rules (in translation but not in English)
+    # Find extra rules (in translation but not in source)
     extra_rules = []
     if include_extra:
         for key, rule in translated_by_key.items():
@@ -142,29 +142,35 @@ def audit_language(
     rules_dir: str | None = None,
     issue_filter: set[str] | None = None,
     verbose: bool = False,
+    source_language: str = "en",
 ) -> int:
     """Audit translations for a specific language. Returns total issue count."""
     rules_dir_path = get_rules_dir(rules_dir)
-    english_dir = rules_dir_path / "en"
 
-    base_language, region = split_language_into_base_and_region(language)
-    translated_dir = rules_dir_path / base_language
-    translated_region_dir = translated_dir / region if region else None
-    english_region_dir = english_dir / region if region else None
+    source_base_language, source_region = split_language_into_base_and_region(source_language)
+    source_dir = rules_dir_path / source_base_language
+    source_region_dir = source_dir / source_region if source_region else None
 
-    if not english_dir.exists():
-        raise AuditError(f"English rules directory not found: {english_dir}")
+    target_base_language, target_region = split_language_into_base_and_region(language)
+    translated_dir = rules_dir_path / target_base_language
+    translated_region_dir = translated_dir / target_region if target_region else None
+
+    if not source_dir.exists():
+        raise AuditError(f"Source rules directory not found: {source_dir}")
+
+    if source_region and not (source_region_dir and source_region_dir.exists()):
+        raise AuditError(f"Source region directory not found: {source_region_dir}")
 
     if not translated_dir.exists():
-        raise AuditError(f"Translation directory not found: {translated_dir}")
+        raise AuditError(f"Target rules directory not found: {translated_dir}")
 
-    if region and not (translated_region_dir and translated_region_dir.exists()):
-        raise AuditError(f"Region directory not found: {translated_region_dir}")
+    if target_region and not (translated_region_dir and translated_region_dir.exists()):
+        raise AuditError(f"Target region directory not found: {translated_region_dir}")
 
     # Get list of files to audit
-    files = [specific_file] if specific_file else get_yaml_files(english_dir, english_region_dir)
+    files = [specific_file] if specific_file else get_yaml_files(source_dir, source_region_dir)
 
-    print_audit_header(language, len(files))
+    print_audit_header(language, len(files), source_language)
 
     total_issues = 0
     total_missing = 0
@@ -175,13 +181,13 @@ def audit_language(
     files_ok = 0
 
     for file_name in files:
-        english_path = english_dir / file_name
+        english_path = source_dir / file_name
         translated_path = translated_dir / file_name
         translated_region_path = translated_region_dir / file_name if translated_region_dir else None
-        english_region_path = english_region_dir / file_name if english_region_dir else None
+        english_region_path = source_region_dir / file_name if source_region_dir else None
 
         if not english_path.exists():
-            console.print(f"\n[yellow]⚠ Warning:[/] English file not found: {english_path}")
+            console.print(f"\n[yellow]⚠ Warning:[/] Source file not found: {english_path}")
             continue
 
         result = compare_files(
@@ -193,7 +199,7 @@ def audit_language(
         )
 
         if result.has_issues:
-            issues = print_warnings(result, file_name, verbose, language)
+            issues = print_warnings(result, file_name, verbose, language, source_language)
             if issues > 0:
                 files_with_issues += 1
             total_issues += issues

@@ -17,17 +17,19 @@ def main() -> None:
     sys.stdout.reconfigure(encoding="utf-8")
 
     parser = argparse.ArgumentParser(
-        description="Audit MathCAT translation files against English originals",
+        description="Audit MathCAT translation files against a source language",
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog="""
 Examples:
     uv run audit-translations es
+    uv run audit-translations nb --source sv
     uv run audit-translations de --file SharedRules/default.yaml
     uv run audit-translations --list
         """,
     )
 
     parser.add_argument("language", nargs="?", help="Language code to audit (e.g., 'es', 'de', 'fi')")
+    parser.add_argument("--source", default="en", help="Source/reference language code (default: 'en')")
     parser.add_argument("--file", dest="specific_file", help="Audit only a specific file (e.g., 'SharedRules/default.yaml')")
     parser.add_argument("--list", action="store_true", help="List available languages")
     parser.add_argument("--rules-dir", help="Override Rules/Languages directory path")
@@ -68,6 +70,7 @@ def main() -> None:
                 args.rules_dir,
                 issue_filter,
                 args.verbose,
+                args.source,
             )
         except AuditError as exc:
             console.print(f"\n[red]✗ Error:[/] {exc}")

@@ -1,7 +1,7 @@
 """
 Rule diffing logic.
 
-Compares English and translated rules to find fine-grained structural differences.
+Compares source and translated rules to find fine-grained structural differences.
 """
 
 from .extractors import (

@@ -25,7 +25,7 @@ class IssueType(StrEnum):
 class DiffType(StrEnum):
     """Rule-difference subcategories used for fine-grained diagnostics."""
 
-    MATCH = "match"  # `match` XPath differs between English and translation.
+    MATCH = "match"  # `match` XPath differs between source and translation.
     CONDITION = "condition"  # `if` / `test` condition expressions differ.
     VARIABLES = "variables"  # Variable names defined in `variables` differ.
     STRUCTURE = "structure"  # Control-flow block shape/order differs (if/then/else/with/replace).
@@ -91,7 +91,7 @@ def untranslated_keys(self) -> list[str]:
 
 @dataclass
 class RuleDifference:
-    """Fine-grained difference between English and translated rule"""
+    """Fine-grained difference between source and translated rule"""
 
     english_rule: RuleInfo
     translated_rule: RuleInfo
@@ -107,10 +107,10 @@ def __post_init__(self) -> None:
 
 @dataclass
 class ComparisonResult:
-    """Results from comparing English and translated files"""
+    """Results from comparing source and translated files"""
 
-    missing_rules: list[RuleInfo]  # Rules in English but not in translation
-    extra_rules: list[RuleInfo]  # Rules in translation but not in English
+    missing_rules: list[RuleInfo]  # Rules in source but not in translation
+    extra_rules: list[RuleInfo]  # Rules in translation but not in source
     untranslated_text: list[tuple[RuleInfo, list[UntranslatedEntry]]]
     english_rule_count: int
     translated_rule_count: int

@@ -44,16 +44,23 @@ def rule_label(rule: RuleInfo) -> str:
     return f"[cyan]{escape(rule.name)}[/] [dim]({escape(tag)})[/]"
 
 
+def language_label(language: str) -> str:
+    """Normalize a language code for display."""
+    return language.lower().replace("_", "-")
+
+
 def print_warnings(
     result: ComparisonResult,
     file_name: str,
     verbose: bool = False,
     target_language: str = "tr",
+    source_language: str = "en",
 ) -> int:
     """Print warnings to console. Returns count of issues found."""
     issues = 0
     display_name = Path(file_name).as_posix()
-    target_label = target_language.lower().replace("_", "-")
+    source_label = language_label(source_language)
+    target_label = language_label(target_language)
 
     if not result.has_issues:
         return issues
@@ -68,7 +75,9 @@ def print_warnings(
     console.print()
     console.rule(style="cyan")
     console.print(f"[{style}]{icon}[/] [bold]{escape(display_name)}[/]")
-    console.print(f"  [dim]English: {result.english_rule_count} rules  →  Translated: {result.translated_rule_count} rules[/]")
+    console.print(
+        f"  [dim]{source_label}: {result.english_rule_count} rules  →  {target_label}: {result.translated_rule_count} rules[/]"
+    )
     console.rule(style="cyan")
 
     grouped_issues: dict[str, dict[str, Any]] = {}
@@ -120,7 +129,7 @@ def add_issue(rule: RuleInfo, group_key: IssueGroupKey, payload: dict[str, Any])
                 console.print(f"          [dim]{ISSUE_GROUP_LABELS[group_key]} [{len(entries)}][/]")
                 for entry in entries:
                     if issue_type is IssueType.MISSING_RULE:
-                        console.print(f"              [dim]•[/] [dim](line {entry['line_en']} in English)[/]")
+                        console.print(f"              [dim]•[/] [dim](line {entry['line_en']} in {source_label})[/]")
                     elif issue_type is IssueType.EXTRA_RULE:
                         console.print(f"              [dim]•[/] [dim](line {entry['line_tr']} in {target_label})[/]")
                     elif issue_type is IssueType.UNTRANSLATED_TEXT:
@@ -131,11 +140,12 @@ def add_issue(rule: RuleInfo, group_key: IssueGroupKey, payload: dict[str, Any])
                     else:
                         diff: RuleDifference = entry["diff"]
                         console.print(
-                            f"              [dim]•[/] [dim](line {entry['line_en']} en, {entry['line_tr']} {target_label})[/]"
+                            f"              [dim]•[/] [dim](line {entry['line_en']} {source_label}, "
+                            f"{entry['line_tr']} {target_label})[/]"
                         )
                         console.print(f"                  [dim]{diff.description}[/]")
                         if verbose:
-                            console.print(f"                  [green]en:[/] {escape(diff.english_snippet)}")
+                            console.print(f"                  [green]{source_label}:[/] {escape(diff.english_snippet)}")
                             console.print(f"                  [red]{target_label}:[/] {escape(diff.translated_snippet)}")
                 issues += len(entries)
 
@@ -155,10 +165,10 @@ def file_count_color(file_count: int) -> str:
     return "red"
 
 
-def print_audit_header(language: str, file_count: int) -> None:
+def print_audit_header(language: str, file_count: int, source_language: str = "en") -> None:
     """Print the audit header panel."""
     console.print(Panel(f"MathCAT Translation Audit: {language.upper()}", style="bold cyan"))
-    console.print("\n  [dim]Comparing against English (en) reference files[/]")
+    console.print(f"\n  [dim]Comparing against {language_label(source_language)} reference files[/]")
     console.print(f"  [dim]Files to check: {file_count}[/]")
 
 
@@ -197,4 +207,4 @@ def print_language_list(languages: list[tuple[str, int]]) -> None:
         table.add_row(code, f"[{color}]{count}[/] files")
 
     console.print(table)
-    console.print("\n  [dim]Reference: en (English) - base translation[/]\n")
+    console.print("\n  [dim]Default reference: en; use --source to compare against another language[/]\n")
@@ -1,4 +1,38 @@
+"""
+Shared pytest configuration for audit translation tests.
+
+Rich can emit ANSI styling codes into captured test output when a terminal or
+environment variable forces color output. That made string and golden-output
+assertions fail on some machines even though the visible CLI output was correct.
+These helpers normalize captured renderer/CLI output so tests compare the text
+users see, not terminal control bytes.
+"""
+
+import re
 import sys
 
+import pytest
+
+from audit_translations.renderer import console
+
 # needed for running tests on Windows
 sys.stdout.reconfigure(encoding="utf-8")
+
+ANSI_RE = re.compile(r"\x1b\[[0-?]*[ -/]*[@-~]")
+
+
+def strip_ansi(text: str) -> str:
+    """Remove ANSI escape sequences from Rich output captured in tests."""
+    return ANSI_RE.sub("", text)
+
+
+@pytest.fixture(autouse=True)
+def deterministic_rich_output():
+    """Keep Rich output assertions stable when the shell forces ANSI colors."""
+    old_no_color = console.no_color
+    old_force_terminal = console._force_terminal
+    console.no_color = True
+    console._force_terminal = False
+    yield
+    console.no_color = old_no_color
+    console._force_terminal = old_force_terminal