Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion machine/punctuation_analysis/chapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
@dataclass(frozen=True)
class Chapter:
verses: list[Verse]
chatper_num: int = 0
chapter_num: int = 0
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def find_best_chapter_strategies(self) -> List[Tuple[int, QuotationMarkUpdateStr
best_actions_by_chapter: List[Tuple[int, QuotationMarkUpdateStrategy]] = []

for chapter in self.get_chapters():
best_actions_by_chapter.append((chapter.chatper_num, self._find_best_strategy_for_chapter(chapter)))
best_actions_by_chapter.append((chapter.chapter_num, self._find_best_strategy_for_chapter(chapter)))

return best_actions_by_chapter

Expand Down
16 changes: 8 additions & 8 deletions machine/punctuation_analysis/usfm_structure_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,14 @@ def get_chapters(self, include_chapters: Optional[Dict[int, List[int]]] = None)
current_chapter_verses: list[Verse] = []
current_verse_segments: list[TextSegment] = []
for text_segment in self._text_segments:
if text_segment.marker_is_in_preceding_context(UsfmMarkerType.VERSE):
if len(current_verse_segments) > 0:
current_chapter_verses.append(Verse(current_verse_segments))
current_verse_segments = []
if text_segment.marker_is_in_preceding_context(UsfmMarkerType.CHAPTER):
if len(current_chapter_verses) > 0:
chapters.append(Chapter(current_chapter_verses, current_chapter))
current_chapter_verses = []
if text_segment.book is not None:
current_book = book_id_to_number(text_segment.book)
if text_segment.chapter is not None:
Expand All @@ -104,14 +112,6 @@ def get_chapters(self, include_chapters: Optional[Dict[int, List[int]]] = None)
and current_chapter not in include_chapters[current_book]
):
continue
if text_segment.marker_is_in_preceding_context(UsfmMarkerType.VERSE):
if len(current_verse_segments) > 0:
current_chapter_verses.append(Verse(current_verse_segments))
current_verse_segments = []
if text_segment.marker_is_in_preceding_context(UsfmMarkerType.CHAPTER):
if len(current_chapter_verses) > 0:
chapters.append(Chapter(current_chapter_verses, current_chapter))
current_chapter_verses = []
current_verse_segments.append(text_segment)
if len(current_verse_segments) > 0:
current_chapter_verses.append(Verse(current_verse_segments))
Expand Down
16 changes: 13 additions & 3 deletions tests/punctuation_analysis/test_quotation_denormalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,13 @@
def test_full_quotation_denormalization_pipeline() -> None:
normalized_usfm = """
\\id GEN
\\c 1
\\c 2
\\v 1 Thus the heavens and the earth were completed in all their vast array.
\\v 2 And by the seventh day God had finished the work He had been doing;
so on that day He rested from all His work.
\\v 3 Then God blessed the seventh day and sanctified it,
because on that day He rested from all the work of creation that He had accomplished.
\\c 3
\\v 1 Now the serpent was more subtle than any animal
of the field which Yahweh God had made.
He said to the woman, "Has God really said,
Expand All @@ -24,7 +30,11 @@ def test_full_quotation_denormalization_pipeline() -> None:
"""

expected_denormalized_usfm = """\\id GEN
\\c 1
\\c 2
\\v 1 Thus the heavens and the earth were completed in all their vast array.
\\v 2 And by the seventh day God had finished the work He had been doing; so on that day He rested from all His work.
\\v 3 Then God blessed the seventh day and sanctified it, because on that day He rested from all the work of creation that He had accomplished.
\\c 3
\\v 1 Now the serpent was more subtle than any animal of the field which Yahweh God had made. He said to the woman, “Has God really said, ‘You shall not eat of any tree of the garden’?”
\\v 2 The woman said to the serpent, “We may eat fruit from the trees of the garden,
\\v 3 but not the fruit of the tree which is in the middle of the garden. God has said, ‘You shall not eat of it. You shall not touch it, lest you die.’”
Expand All @@ -38,7 +48,7 @@ def test_full_quotation_denormalization_pipeline() -> None:
parse_usfm(normalized_usfm, quotation_mark_denormalization_first_pass)
best_chapter_strategies = quotation_mark_denormalization_first_pass.find_best_chapter_strategies()

assert [chapter for chapter, _ in best_chapter_strategies] == [1]
assert [chapter for chapter, _ in best_chapter_strategies] == [2, 3]

quotation_mark_denormalizer = QuotationMarkDenormalizationUsfmUpdateBlockHandler(
standard_english_quote_convention,
Expand Down
56 changes: 42 additions & 14 deletions tests/punctuation_analysis/test_usfm_structure_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ def test_get_chapters_filter_by_chapter():
.build()
]
)
]
],
2,
)
]

Expand All @@ -58,6 +59,7 @@ def test_get_chapters_filter_by_chapter():

def test_chapter_and_verse_markers():
usfm_structure_extractor = UsfmStructureExtractor()
verse_text_parser_state.verse_ref.chapter_num = 1
usfm_structure_extractor.chapter(verse_text_parser_state, "1", "c", None, None)
usfm_structure_extractor.verse(verse_text_parser_state, "1", "v", None, None)
usfm_structure_extractor.text(verse_text_parser_state, "test")
Expand All @@ -74,7 +76,8 @@ def test_chapter_and_verse_markers():
.build()
]
)
]
],
1,
)
]

Expand All @@ -86,6 +89,7 @@ def test_chapter_and_verse_markers():

def test_start_paragraph_marker():
usfm_structure_extractor = UsfmStructureExtractor()
verse_text_parser_state.verse_ref.chapter_num = 1
usfm_structure_extractor.chapter(verse_text_parser_state, "1", "c", None, None)
usfm_structure_extractor.verse(verse_text_parser_state, "1", "v", None, None)
usfm_structure_extractor.start_para(verse_text_parser_state, "p", False, None)
Expand All @@ -104,7 +108,8 @@ def test_start_paragraph_marker():
.build()
]
)
]
],
1,
)
]

Expand All @@ -116,6 +121,7 @@ def test_start_paragraph_marker():

def test_start_character_marker():
usfm_structure_extractor = UsfmStructureExtractor()
verse_text_parser_state.verse_ref.chapter_num = 1
usfm_structure_extractor.chapter(verse_text_parser_state, "1", "c", None, None)
usfm_structure_extractor.verse(verse_text_parser_state, "1", "v", None, None)
usfm_structure_extractor.start_char(verse_text_parser_state, "k", False, None)
Expand All @@ -134,7 +140,8 @@ def test_start_character_marker():
.build()
]
)
]
],
1,
)
]

Expand All @@ -146,6 +153,7 @@ def test_start_character_marker():

def test_end_character_marker():
usfm_structure_extractor = UsfmStructureExtractor()
verse_text_parser_state.verse_ref.chapter_num = 1
usfm_structure_extractor.chapter(verse_text_parser_state, "1", "c", None, None)
usfm_structure_extractor.verse(verse_text_parser_state, "1", "v", None, None)
usfm_structure_extractor.end_char(verse_text_parser_state, "k", None, False)
Expand All @@ -164,7 +172,8 @@ def test_end_character_marker():
.build()
]
)
]
],
1,
)
]

Expand All @@ -176,6 +185,7 @@ def test_end_character_marker():

def test_end_note_marker():
usfm_structure_extractor = UsfmStructureExtractor()
verse_text_parser_state.verse_ref.chapter_num = 1
usfm_structure_extractor.chapter(verse_text_parser_state, "1", "c", None, None)
usfm_structure_extractor.verse(verse_text_parser_state, "1", "v", None, None)
usfm_structure_extractor.end_note(verse_text_parser_state, "f", False)
Expand All @@ -194,7 +204,8 @@ def test_end_note_marker():
.build()
]
)
]
],
1,
)
]

Expand All @@ -206,6 +217,7 @@ def test_end_note_marker():

def test_end_table_marker():
usfm_structure_extractor = UsfmStructureExtractor()
verse_text_parser_state.verse_ref.chapter_num = 1
usfm_structure_extractor.chapter(verse_text_parser_state, "1", "c", None, None)
usfm_structure_extractor.verse(verse_text_parser_state, "1", "v", None, None)
usfm_structure_extractor.end_note(verse_text_parser_state, "tr", False)
Expand All @@ -224,7 +236,8 @@ def test_end_table_marker():
.build()
]
)
]
],
1,
)
]

Expand All @@ -236,6 +249,7 @@ def test_end_table_marker():

def test_ref_marker():
usfm_structure_extractor = UsfmStructureExtractor()
verse_text_parser_state.verse_ref.chapter_num = 1
usfm_structure_extractor.chapter(verse_text_parser_state, "1", "c", None, None)
usfm_structure_extractor.verse(verse_text_parser_state, "1", "v", None, None)
usfm_structure_extractor.end_note(verse_text_parser_state, "x", False)
Expand All @@ -254,7 +268,8 @@ def test_ref_marker():
.build()
]
)
]
],
1,
)
]

Expand All @@ -266,6 +281,7 @@ def test_ref_marker():

def test_sidebar_marker():
usfm_structure_extractor = UsfmStructureExtractor()
verse_text_parser_state.verse_ref.chapter_num = 1
usfm_structure_extractor.chapter(verse_text_parser_state, "1", "c", None, None)
usfm_structure_extractor.verse(verse_text_parser_state, "1", "v", None, None)
usfm_structure_extractor.end_note(verse_text_parser_state, "esb", False)
Expand All @@ -284,7 +300,8 @@ def test_sidebar_marker():
.build()
]
)
]
],
1,
)
]

Expand All @@ -296,6 +313,7 @@ def test_sidebar_marker():

def test_multiple_verses():
usfm_structure_extractor = UsfmStructureExtractor()
verse_text_parser_state.verse_ref.chapter_num = 1
usfm_structure_extractor.chapter(verse_text_parser_state, "1", "c", None, None)
usfm_structure_extractor.verse(verse_text_parser_state, "1", "v", None, None)
usfm_structure_extractor.text(verse_text_parser_state, "test")
Expand Down Expand Up @@ -323,7 +341,8 @@ def test_multiple_verses():
.build()
]
),
]
],
1,
)
]

Expand All @@ -337,9 +356,11 @@ def test_multiple_verses():

def test_multiple_chapters():
usfm_structure_extractor = UsfmStructureExtractor()
verse_text_parser_state.verse_ref.chapter_num = 1
usfm_structure_extractor.chapter(verse_text_parser_state, "1", "c", None, None)
usfm_structure_extractor.verse(verse_text_parser_state, "1", "v", None, None)
usfm_structure_extractor.text(verse_text_parser_state, "test")
verse_text_parser_state.verse_ref.chapter_num = 2
usfm_structure_extractor.chapter(verse_text_parser_state, "2", "c", None, None)
usfm_structure_extractor.verse(verse_text_parser_state, "1", "v", None, None)
usfm_structure_extractor.text(verse_text_parser_state, "test2")
Expand All @@ -356,7 +377,8 @@ def test_multiple_chapters():
.build()
]
),
]
],
1,
),
Chapter(
[
Expand All @@ -369,7 +391,8 @@ def test_multiple_chapters():
.build()
]
),
]
],
2,
),
]

Expand All @@ -383,6 +406,7 @@ def test_multiple_chapters():

def test_character_marker_in_text():
usfm_structure_extractor = UsfmStructureExtractor()
verse_text_parser_state.verse_ref.chapter_num = 1
usfm_structure_extractor.chapter(verse_text_parser_state, "1", "c", None, None)
usfm_structure_extractor.verse(verse_text_parser_state, "1", "v", None, None)
usfm_structure_extractor.text(verse_text_parser_state, "test")
Expand All @@ -407,7 +431,8 @@ def test_character_marker_in_text():
.build(),
]
),
]
],
1,
)
]

Expand All @@ -424,6 +449,7 @@ def test_character_marker_in_text():

def test_empty_text():
usfm_structure_extractor = UsfmStructureExtractor()
verse_text_parser_state.verse_ref.chapter_num = 1
usfm_structure_extractor.chapter(verse_text_parser_state, "1", "c", None, None)
usfm_structure_extractor.verse(verse_text_parser_state, "1", "v", None, None)
usfm_structure_extractor.text(verse_text_parser_state, "test")
Expand All @@ -450,7 +476,8 @@ def test_empty_text():
.build(),
]
),
]
],
1,
)
]

Expand All @@ -468,6 +495,7 @@ def test_empty_text():
def assert_chapter_equal(expected_chapters: List[Chapter], actual_chapters: List[Chapter]):
assert len(expected_chapters) == len(actual_chapters)
for expected_chapter, actual_chapter in zip(expected_chapters, actual_chapters):
assert expected_chapter.chapter_num == actual_chapter.chapter_num
assert len(expected_chapter.verses) == len(actual_chapter.verses)
for expected_verse, actual_verse in zip(expected_chapter.verses, actual_chapter.verses):
assert len(expected_verse._text_segments) == len(actual_verse._text_segments)
Expand Down
Loading