From 1d230603532c43646b4f9497a487486abda30bcf Mon Sep 17 00:00:00 2001 From: Storm Dragon Date: Tue, 6 Jan 2026 16:40:26 -0500 Subject: [PATCH] More ports from orca. --- src/cthulhu/ax_text.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cthulhu/ax_text.py b/src/cthulhu/ax_text.py index 9cafdbf..11f429c 100644 --- a/src/cthulhu/ax_text.py +++ b/src/cthulhu/ax_text.py @@ -585,8 +585,9 @@ class AXText: pattern = r"[.!?]+(?=\s|\ufffc|$)" for match in re.finditer(pattern, text): end_pos = match.end() - # Skip whitespace and embedded objects to find start of next sentence. - while end_pos < len(text) and (text[end_pos].isspace() or text[end_pos] == "\ufffc"): + # Skip whitespace to find start of next sentence. Do not skip embedded object + # characters since they represent child objects that must be traversed. + while end_pos < len(text) and text[end_pos].isspace(): end_pos += 1 # Only add boundary if we haven't reached the end and it's not a duplicate. if end_pos < len(text) and end_pos not in boundaries: