From 255c068b37651a7ec60758c1cb822744d9b3291f Mon Sep 17 00:00:00 2001 From: Matthew Beech Date: Fri, 12 Jun 2026 14:39:45 -0500 Subject: [PATCH 1/4] Add Normalization Form and Language to ParatextSettingsParser --- machine/corpora/paratext_project_settings.py | 2 ++ machine/corpora/paratext_project_settings_parser_base.py | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/machine/corpora/paratext_project_settings.py b/machine/corpora/paratext_project_settings.py index b932b214..03279379 100644 --- a/machine/corpora/paratext_project_settings.py +++ b/machine/corpora/paratext_project_settings.py @@ -23,6 +23,8 @@ class ParatextProjectSettings: language_code: Optional[str] translation_type: str visibility: Optional[str] = None + normalization_form: str + language: str parent_guid: Optional[str] = None parent_name: Optional[str] = None _parent: Optional["ParatextProjectSettings"] = None diff --git a/machine/corpora/paratext_project_settings_parser_base.py b/machine/corpora/paratext_project_settings_parser_base.py index ca09971d..8ff9affe 100644 --- a/machine/corpora/paratext_project_settings_parser_base.py +++ b/machine/corpora/paratext_project_settings_parser_base.py @@ -100,6 +100,9 @@ def parse(self) -> ParatextProjectSettings: parent_guid = translation_info_setting_parts[2] if translation_info_setting_parts[2] != "" else None visibility: Optional[str] = settings_tree.getroot().findtext("Visibility") + normalization_form: str = settings_tree.getroot().findtext("NormalizationForm", "Off") + + language: str = settings_tree.getroot().findtext("Language", "en") settings = ParatextProjectSettings( guid, @@ -117,6 +120,8 @@ def parse(self) -> ParatextProjectSettings: language_code, translation_type, visibility, + normalization_form, + language, parent_guid, parent_name, ) From 0b725b539c2aaa094499540e884f67943db6e420 Mon Sep 17 00:00:00 2001 From: Matthew Beech Date: Fri, 12 Jun 2026 14:40:53 -0500 Subject: [PATCH 2/4] Change language default --- machine/corpora/paratext_project_settings_parser_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine/corpora/paratext_project_settings_parser_base.py b/machine/corpora/paratext_project_settings_parser_base.py index 8ff9affe..729bdd6e 100644 --- a/machine/corpora/paratext_project_settings_parser_base.py +++ b/machine/corpora/paratext_project_settings_parser_base.py @@ -102,7 +102,7 @@ def parse(self) -> ParatextProjectSettings: visibility: Optional[str] = settings_tree.getroot().findtext("Visibility") normalization_form: str = settings_tree.getroot().findtext("NormalizationForm", "Off") - language: str = settings_tree.getroot().findtext("Language", "en") + language: str = settings_tree.getroot().findtext("Language", "") settings = ParatextProjectSettings( guid, From f6bad13bd8f95410aeedd7cfb68f6522adc120f8 Mon Sep 17 00:00:00 2001 From: Matthew Beech Date: Tue, 16 Jun 2026 13:40:47 -0500 Subject: [PATCH 3/4] Add tests for normalization_form and change default to "Undefined" --- machine/corpora/paratext_project_settings.py | 2 +- machine/corpora/paratext_project_settings_parser_base.py | 4 ++-- tests/corpora/test_paratext_project_settings.py | 2 ++ tests/corpora/test_paratext_project_settings_parser.py | 5 +++++ tests/testutils/memory_paratext_project_file_handler.py | 4 ++++ 5 files changed, 14 insertions(+), 3 deletions(-) diff --git a/machine/corpora/paratext_project_settings.py b/machine/corpora/paratext_project_settings.py index 03279379..04820cf5 100644 --- a/machine/corpora/paratext_project_settings.py +++ b/machine/corpora/paratext_project_settings.py @@ -22,9 +22,9 @@ class ParatextProjectSettings: biblical_terms_file_name: str language_code: Optional[str] translation_type: str - visibility: Optional[str] = None normalization_form: str language: str + visibility: Optional[str] = None parent_guid: Optional[str] = None parent_name: Optional[str] = None _parent: Optional["ParatextProjectSettings"] = None diff --git a/machine/corpora/paratext_project_settings_parser_base.py b/machine/corpora/paratext_project_settings_parser_base.py index 729bdd6e..73d51142 100644 --- a/machine/corpora/paratext_project_settings_parser_base.py +++ b/machine/corpora/paratext_project_settings_parser_base.py @@ -100,7 +100,7 @@ def parse(self) -> ParatextProjectSettings: parent_guid = translation_info_setting_parts[2] if translation_info_setting_parts[2] != "" else None visibility: Optional[str] = settings_tree.getroot().findtext("Visibility") - normalization_form: str = settings_tree.getroot().findtext("NormalizationForm", "Off") + normalization_form: str = settings_tree.getroot().findtext("NormalizationForm", "Undefined") language: str = settings_tree.getroot().findtext("Language", "") @@ -119,9 +119,9 @@ def parse(self) -> ParatextProjectSettings: parts[2], language_code, translation_type, - visibility, normalization_form, language, + visibility, parent_guid, parent_name, ) diff --git a/tests/corpora/test_paratext_project_settings.py b/tests/corpora/test_paratext_project_settings.py index fc607410..d3fe5812 100644 --- a/tests/corpora/test_paratext_project_settings.py +++ b/tests/corpora/test_paratext_project_settings.py @@ -129,5 +129,7 @@ def _create_settings(file_name_form: str) -> ParatextProjectSettings: "BiblicalTerms.xml", "en", "Standard", + "Undefined", + "English", "Public", ) diff --git a/tests/corpora/test_paratext_project_settings_parser.py b/tests/corpora/test_paratext_project_settings_parser.py index 141f826b..d2a477ce 100644 --- a/tests/corpora/test_paratext_project_settings_parser.py +++ b/tests/corpora/test_paratext_project_settings_parser.py @@ -24,6 +24,11 @@ def test_translation_info_specified() -> None: assert settings.parent_name == "DEF" assert settings.parent_guid == "22222222222222222222222222222222" +def test_normalization_form_default() -> None: + settings = _create_settings() + + assert settings.normalization_form == "Undefined" + def _create_settings(additional_settings_xml: str = ""): files = { diff --git a/tests/testutils/memory_paratext_project_file_handler.py b/tests/testutils/memory_paratext_project_file_handler.py index 8916e6cd..be6692a5 100644 --- a/tests/testutils/memory_paratext_project_file_handler.py +++ b/tests/testutils/memory_paratext_project_file_handler.py @@ -50,6 +50,8 @@ def __init__( biblical_terms_file_name: str = "ProjectBiblicalTerms.xml", language_code: str = "en", translation_type: str = "Standard", + normalization_form: str = "Undefined", + language: str = "", parent_guid: Optional[str] = None, parent_name: Optional[str] = None, ): @@ -69,6 +71,8 @@ def __init__( biblical_terms_file_name, language_code, translation_type, + normalization_form, + language, parent_guid, parent_name, ) From 96da6779fc330e61e1502357861f2cd276a27f39 Mon Sep 17 00:00:00 2001 From: Matthew Beech Date: Tue, 16 Jun 2026 14:22:04 -0500 Subject: [PATCH 4/4] Fix formatting --- tests/corpora/test_paratext_project_settings_parser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/corpora/test_paratext_project_settings_parser.py b/tests/corpora/test_paratext_project_settings_parser.py index d2a477ce..62d66b57 100644 --- a/tests/corpora/test_paratext_project_settings_parser.py +++ b/tests/corpora/test_paratext_project_settings_parser.py @@ -24,6 +24,7 @@ def test_translation_info_specified() -> None: assert settings.parent_name == "DEF" assert settings.parent_guid == "22222222222222222222222222222222" + def test_normalization_form_default() -> None: settings = _create_settings()