diff --git a/src/epub_parser.py b/src/epub_parser.py
index 0c9335d..a599b4e 100644
--- a/src/epub_parser.py
+++ b/src/epub_parser.py
@@ -10,8 +10,10 @@ EPUB files are ZIP archives containing XHTML/HTML content.
 import zipfile
 import tempfile
 import shutil
+import re
 from pathlib import Path
-from bs4 import BeautifulSoup
+from urllib.parse import unquote
+from bs4 import BeautifulSoup, Tag, NavigableString
 from src.book import Book, Chapter
 
 
@@ -63,10 +65,10 @@ class EpubParser:
             # Parse content files in spine order (authoritative reading sequence)
             for itemId in spine:
                 if itemId in manifest:
-                    contentPath = opfDir / manifest[itemId]
+                    contentPath = (opfDir / unquote(manifest[itemId])).resolve()
                     if contentPath.exists():
-                        chapters = self._parse_content_file(contentPath, tocMap)
-                        for chapter in chapters:
+                        chapter = self._parse_content_file(contentPath, tocMap)
+                        if chapter:
                             book.add_chapter(chapter)
 
             return book
@@ -143,7 +145,8 @@ class EpubParser:
         if spineTag:
             for itemref in spineTag.find_all('itemref'):
                 idref = itemref.get('idref')
-                if idref:
+                linear = itemref.get('linear', 'yes').lower()
+                if idref and linear != 'no':
                     spine.append(idref)
 
         return metadata, spine, manifest
@@ -215,10 +218,9 @@ class EpubParser:
                 if not chapterTitle or not href:
                     continue
 
-                # Strip anchor from href
-                hrefFile = href.split('#')[0]
-                if hrefFile:
-                    tocMap[hrefFile] = chapterTitle
+                contentKey = self._normalize_content_key(navPath.parent, href)
+                if contentKey:
+                    tocMap[contentKey] = chapterTitle
 
             return tocMap if tocMap else None
 
@@ -280,10 +282,9 @@ class EpubParser:
                     continue
 
                 href = content.get('src')
-                # Strip anchor from href
-                hrefFile = href.split('#')[0]
-                if hrefFile:
-                    tocMap[hrefFile] = chapterTitle
+                contentKey = self._normalize_content_key(ncxPath.parent, href)
+                if contentKey:
+                    tocMap[contentKey] = chapterTitle
 
             return tocMap if tocMap else None
 
@@ -300,7 +301,7 @@ class EpubParser:
             tocMap: Optional dict mapping filename to TOC title
 
         Returns:
-            List of Chapter objects
+            Chapter object, or None if the file has no readable text
         """
         if tocMap is None:
             tocMap = {}
@@ -310,75 +311,164 @@ class EpubParser:
                 soup = BeautifulSoup(f.read(), 'html.parser')
         except Exception as e:
             print(f"Error reading content file {contentPath}: {e}")
-            return []
+            return None
 
-        chapters = []
+        bodyTag = soup.find('body') or soup
+        self._remove_non_readable_elements(bodyTag)
 
-        # Check if this file has a TOC title
-        tocTitle = tocMap.get(contentPath.name)
+        paragraphs = self._extract_paragraphs(bodyTag)
+        if not paragraphs:
+            return None
 
-        # Look for main content sections
-        # Try h1, h2, section elements
-        sections = soup.find_all(['section', 'div'], class_=lambda x: x and 'section' in x.lower() if x else False)
+        title = self._resolve_chapter_title(contentPath, bodyTag, tocMap)
 
-        if not sections:
-            # Fallback: treat entire file as one chapter
-            sections = [soup.find('body') or soup]
+        chapter = Chapter(title)
+        chapter.paragraphs = paragraphs
+        return chapter
 
-        for sectionIndex, section in enumerate(sections):
-            # Find chapter title
-            title = None
+    def _normalize_content_key(self, baseDir, href):
+        """Normalize TOC hrefs and manifest paths to a comparable absolute key"""
+        hrefFile = unquote(href.split('#')[0].strip())
+        if not hrefFile:
+            return None
 
-            # Priority 1: Use TOC title for the first section if available
-            if sectionIndex == 0 and tocTitle:
-                title = tocTitle
-            else:
-                # Priority 2: Look for heading in content
-                for hTag in ['h1', 'h2', 'h3']:
-                    heading = section.find(hTag)
-                    if heading:
-                        title = heading.get_text(strip=True)
-                        break
+        return str((baseDir / hrefFile).resolve())
 
-            # Priority 3: Fallback to filename
-            if not title:
-                if tocTitle:
-                    title = tocTitle
-                else:
-                    title = contentPath.stem
+    def _resolve_chapter_title(self, contentPath, bodyTag, tocMap):
+        """Resolve the best title for a spine item"""
+        tocTitle = tocMap.get(str(contentPath.resolve()))
+        if tocTitle:
+            return tocTitle
 
-            # Extract paragraphs (including headings)
-            paragraphs = []
+        for headingName in ['h1', 'h2', 'h3']:
+            heading = bodyTag.find(headingName)
+            if heading:
+                headingText = self._extract_tag_text(heading)
+                if headingText:
+                    return headingText
 
-            # Include all headings and paragraphs in reading order
-            for element in section.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p']):
-                text = element.get_text(strip=True)
+        return contentPath.stem
+
+    def _remove_non_readable_elements(self, rootTag):
+        """Remove tags that should not contribute spoken text"""
+        for element in rootTag.find_all(['script', 'style', 'noscript']):
+            element.decompose()
+
+    def _extract_paragraphs(self, rootTag):
+        """Extract readable text blocks from XHTML in document order"""
+        paragraphs = []
+        pendingInlineText = []
+        blockTags = {
+            'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
+            'p', 'li', 'blockquote', 'pre', 'figcaption',
+            'caption', 'td', 'th', 'dd', 'dt', 'address'
+        }
+        containerTags = {
+            'body', 'section', 'article', 'main', 'div',
+            'aside', 'nav', 'header', 'footer'
+        }
+        structuredTags = blockTags | containerTags | {
+            'ul', 'ol', 'dl', 'table', 'thead', 'tbody', 'tfoot', 'tr'
+        }
+
+        for child in rootTag.children:
+            self._collect_paragraphs(
+                child,
+                paragraphs,
+                pendingInlineText,
+                blockTags,
+                containerTags,
+                structuredTags
+            )
+
+        self._flush_inline_text(paragraphs, pendingInlineText)
+        return paragraphs
+
+    def _collect_paragraphs(self, node, paragraphs, pendingInlineText, blockTags, containerTags, structuredTags):
+        """Walk the DOM tree and collect readable blocks without duplicating nested content"""
+        if isinstance(node, NavigableString):
+            text = self._normalize_text(str(node))
+            if text:
+                pendingInlineText.append(text)
+            return
+
+        if not isinstance(node, Tag):
+            return
+
+        if node.name == 'br':
+            self._flush_inline_text(paragraphs, pendingInlineText)
+            return
+
+        if node.name in blockTags:
+            self._flush_inline_text(paragraphs, pendingInlineText)
+            text = self._extract_tag_text(node)
+            if text:
+                paragraphs.append(text)
+            return
+
+        if node.name in containerTags or node.name in {'ul', 'ol', 'dl', 'table', 'thead', 'tbody', 'tfoot', 'tr'}:
+            if not self._has_structured_children(node, structuredTags):
+                self._flush_inline_text(paragraphs, pendingInlineText)
+                text = self._extract_tag_text(node)
                 if text:
                     paragraphs.append(text)
+                return
 
-            # Only add chapter if it has content
-            if paragraphs:
-                chapter = Chapter(title)
-                chapter.paragraphs = paragraphs
-                chapters.append(chapter)
+            for child in node.children:
+                self._collect_paragraphs(
+                    child,
+                    paragraphs,
+                    pendingInlineText,
+                    blockTags,
+                    containerTags,
+                    structuredTags
+                )
 
-        # If no chapters found, extract all content as one chapter
-        if not chapters:
-            title = tocTitle if tocTitle else contentPath.stem
-            paragraphs = []
+            if node.name in containerTags:
+                self._flush_inline_text(paragraphs, pendingInlineText)
+            return
 
-            # Include all headings and paragraphs in reading order
-            for element in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p']):
-                text = element.get_text(strip=True)
-                if text:
-                    paragraphs.append(text)
+        for child in node.children:
+            self._collect_paragraphs(
+                child,
+                paragraphs,
+                pendingInlineText,
+                blockTags,
+                containerTags,
+                structuredTags
+            )
 
-            if paragraphs:
-                chapter = Chapter(title)
-                chapter.paragraphs = paragraphs
-                chapters.append(chapter)
+    def _has_structured_children(self, node, structuredTags):
+        """Return True when a container has nested structural elements to recurse into"""
+        for child in node.children:
+            if isinstance(child, Tag) and child.name in structuredTags:
+                return True
+        return False
 
-        return chapters
+    def _extract_tag_text(self, tag):
+        """Extract normalized text from a tag while preserving inline spacing"""
+        return self._normalize_text(tag.get_text(' ', strip=True))
+
+    def _flush_inline_text(self, paragraphs, pendingInlineText):
+        """Convert accumulated inline text into a paragraph"""
+        if not pendingInlineText:
+            return
+
+        text = self._normalize_text(' '.join(pendingInlineText))
+        pendingInlineText.clear()
+        if text:
+            paragraphs.append(text)
+
+    def _normalize_text(self, text):
+        """Collapse whitespace while keeping natural word boundaries for TTS"""
+        text = re.sub(r'\s+', ' ', text).strip()
+        if not text:
+            return ''
+
+        text = re.sub(r'\s+([,.;:!?])', r'\1', text)
+        text = re.sub(r'([\(\[\{])\s+', r'\1', text)
+        text = re.sub(r'\s+([\)\]\}])', r'\1', text)
+        return text
 
     def cleanup(self):
         """Clean up temporary files and memory"""
diff --git a/test_epub_parser.py b/test_epub_parser.py
new file mode 100644
index 0000000..517955d
--- /dev/null
+++ b/test_epub_parser.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Regression tests for EPUB parsing behavior.
+"""
+
+from pathlib import Path
+from tempfile import TemporaryDirectory
+import zipfile
+
+from src.epub_parser import EpubParser
+
+
+def write_text_file(filePath, content):
+    """Write a UTF-8 text file"""
+    filePath.parent.mkdir(parents=True, exist_ok=True)
+    filePath.write_text(content, encoding='utf-8')
+
+
+def build_test_epub(epubPath):
+    """Build a minimal EPUB fixture for parser regression testing"""
+    with TemporaryDirectory() as tempDir:
+        tempPath = Path(tempDir)
+
+        write_text_file(
+            tempPath / 'mimetype',
+            'application/epub+zip'
+        )
+        write_text_file(
+            tempPath / 'META-INF' / 'container.xml',
+            '''<?xml version="1.0" encoding="UTF-8"?>
+<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
+  <rootfiles>
+    <rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
+  </rootfiles>
+</container>
+'''
+        )
+        write_text_file(
+            tempPath / 'OEBPS' / 'content.opf',
+            '''<?xml version="1.0" encoding="utf-8"?>
+<package version="3.0" xmlns="http://www.idpf.org/2007/opf" unique-identifier="BookId">
+  <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
+    <dc:title>Parser Regression Book</dc:title>
+    <dc:creator>BookStorm Test</dc:creator>
+  </metadata>
+  <manifest>
+    <item id="navdoc" href="nav/toc.xhtml" media-type="application/xhtml+xml" properties="nav"/>
+    <item id="chap1" href="text/chapter1.xhtml" media-type="application/xhtml+xml"/>
+    <item id="chap2" href="text/chapter2.xhtml" media-type="application/xhtml+xml"/>
+    <item id="notes" href="text/notes.xhtml" media-type="application/xhtml+xml"/>
+  </manifest>
+  <spine>
+    <itemref idref="chap1"/>
+    <itemref idref="notes" linear="no"/>
+    <itemref idref="chap2"/>
+  </spine>
+</package>
+'''
+        )
+        write_text_file(
+            tempPath / 'OEBPS' / 'nav' / 'toc.xhtml',
+            '''<?xml version="1.0" encoding="utf-8"?>
+<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
+  <body>
+    <nav epub:type="toc">
+      <ol>
+        <li><a href="../text/chapter1.xhtml">Chapter One</a></li>
+        <li><a href="../text/chapter2.xhtml">Chapter Two</a></li>
+      </ol>
+    </nav>
+  </body>
+</html>
+'''
+        )
+        write_text_file(
+            tempPath / 'OEBPS' / 'text' / 'chapter1.xhtml',
+            '''<?xml version="1.0" encoding="utf-8"?>
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <body>
+    <section>
+      <h1>Visible Heading</h1>
+      <p>This is <em>very</em> bad.</p>
+      <ul>
+        <li>First item</li>
+        <li>Second item</li>
+      </ul>
+    </section>
+  </body>
+</html>
+'''
+        )
+        write_text_file(
+            tempPath / 'OEBPS' / 'text' / 'chapter2.xhtml',
+            '''<?xml version="1.0" encoding="utf-8"?>
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <body>
+    <div>Lead in <span>text</span>.</div>
+    <div class="section"><p>More text.</p></div>
+    <p>Tail text.</p>
+  </body>
+</html>
+'''
+        )
+        write_text_file(
+            tempPath / 'OEBPS' / 'text' / 'notes.xhtml',
+            '''<?xml version="1.0" encoding="utf-8"?>
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <body>
+    <p>This non-linear note should not appear.</p>
+  </body>
+</html>
+'''
+        )
+
+        with zipfile.ZipFile(epubPath, 'w') as zipRef:
+            mimetypePath = tempPath / 'mimetype'
+            zipRef.write(mimetypePath, 'mimetype', compress_type=zipfile.ZIP_STORED)
+
+            for filePath in sorted(tempPath.rglob('*')):
+                if filePath.is_file() and filePath != mimetypePath:
+                    archivePath = filePath.relative_to(tempPath).as_posix()
+                    zipRef.write(filePath, archivePath, compress_type=zipfile.ZIP_DEFLATED)
+
+
+def test_epub_parser():
+    """Verify EPUB parsing follows linear spine order and preserves readable text"""
+    with TemporaryDirectory() as tempDir:
+        epubPath = Path(tempDir) / 'fixture.epub'
+        build_test_epub(epubPath)
+
+        parser = EpubParser()
+        try:
+            book = parser.parse(epubPath)
+        finally:
+            parser.cleanup()
+
+    print("Testing spine-based chapter extraction...")
+    assert book.title == 'Parser Regression Book'
+    assert book.author == 'BookStorm Test'
+    assert book.get_total_chapters() == 2
+    assert [chapter.title for chapter in book.chapters] == ['Chapter One', 'Chapter Two']
+    print("Chapter extraction tests passed")
+
+    print("\nTesting readable text extraction...")
+    firstChapter = book.get_chapter(0)
+    secondChapter = book.get_chapter(1)
+
+    assert firstChapter is not None
+    assert secondChapter is not None
+
+    assert firstChapter.paragraphs == [
+        'Visible Heading',
+        'This is very bad.',
+        'First item',
+        'Second item'
+    ]
+    assert secondChapter.paragraphs == [
+        'Lead in text.',
+        'More text.',
+        'Tail text.'
+    ]
+    assert all('non-linear note' not in paragraph.lower() for chapter in book.chapters for paragraph in chapter.paragraphs)
+    print("Readable text tests passed")
+
+    print("\nAll EPUB parser tests passed!")
+
+
+if __name__ == "__main__":
+    test_epub_parser()