Epub now uses spine order for book navigation and headings are included in paragraph navigation.
This commit is contained in:
+3
-106
@@ -50,6 +50,7 @@ from src.audiobookshelf_client import AudiobookshelfClient
|
|||||||
from src.audiobookshelf_menu import AudiobookshelfMenu
|
from src.audiobookshelf_menu import AudiobookshelfMenu
|
||||||
from src.server_link_manager import ServerLinkManager
|
from src.server_link_manager import ServerLinkManager
|
||||||
from src.bookmarks_menu import BookmarksMenu
|
from src.bookmarks_menu import BookmarksMenu
|
||||||
|
from src.wav_exporter import WavExporter
|
||||||
|
|
||||||
|
|
||||||
class BookReader:
|
class BookReader:
|
||||||
@@ -2595,7 +2596,8 @@ def main():
|
|||||||
|
|
||||||
# Handle export mode
|
# Handle export mode
|
||||||
if args.wav:
|
if args.wav:
|
||||||
return export_to_wav(bookPath, config, args.outputDir)
|
exporter = WavExporter(config)
|
||||||
|
return exporter.export(bookPath, args.outputDir)
|
||||||
|
|
||||||
# Interactive reading mode
|
# Interactive reading mode
|
||||||
config.set_last_book(bookPath)
|
config.set_last_book(bookPath)
|
||||||
@@ -2614,110 +2616,5 @@ def main():
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def export_to_wav(bookPath, config, outputDir=None):
|
|
||||||
"""
|
|
||||||
Export book to WAV files split by chapter
|
|
||||||
|
|
||||||
Args:
|
|
||||||
bookPath: Path to book file
|
|
||||||
config: ConfigManager instance
|
|
||||||
outputDir: Output directory (optional)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Exit code
|
|
||||||
"""
|
|
||||||
from src.daisy_parser import DaisyParser
|
|
||||||
from src.epub_parser import EpubParser
|
|
||||||
from src.pdf_parser import PdfParser
|
|
||||||
from src.txt_parser import TxtParser
|
|
||||||
from src.tts_engine import TtsEngine
|
|
||||||
import wave
|
|
||||||
|
|
||||||
print(f"Exporting book to WAV: {bookPath}")
|
|
||||||
|
|
||||||
# Parse book using appropriate parser
|
|
||||||
bookPath = Path(bookPath)
|
|
||||||
suffix = bookPath.suffix.lower()
|
|
||||||
|
|
||||||
if suffix in ['.epub']:
|
|
||||||
parser = EpubParser()
|
|
||||||
elif suffix in ['.zip']:
|
|
||||||
parser = DaisyParser()
|
|
||||||
elif suffix in ['.pdf']:
|
|
||||||
parser = PdfParser()
|
|
||||||
elif suffix in ['.txt']:
|
|
||||||
parser = TxtParser()
|
|
||||||
else:
|
|
||||||
print(f"Error: Unsupported book format: {suffix}")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
try:
|
|
||||||
book = parser.parse(bookPath)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error parsing book: {e}")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
# Determine output directory
|
|
||||||
if outputDir is None:
|
|
||||||
bookName = Path(bookPath).stem
|
|
||||||
outputDir = Path(f"./{bookName}_audio")
|
|
||||||
else:
|
|
||||||
outputDir = Path(outputDir)
|
|
||||||
|
|
||||||
outputDir.mkdir(parents=True, exist_ok=True)
|
|
||||||
print(f"Output directory: {outputDir}")
|
|
||||||
|
|
||||||
# Initialize TTS engine
|
|
||||||
readerEngine = config.get_reader_engine()
|
|
||||||
if readerEngine == 'speechd':
|
|
||||||
print("Error: WAV export requires piper-tts. Set reader_engine=piper in config.")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
voiceModel = config.get_voice_model()
|
|
||||||
tts = TtsEngine(voiceModel)
|
|
||||||
|
|
||||||
print(f"Using voice: {voiceModel}")
|
|
||||||
print(f"Chapters: {book.get_total_chapters()}")
|
|
||||||
print()
|
|
||||||
|
|
||||||
# Export each chapter
|
|
||||||
for chapterIdx in range(book.get_total_chapters()):
|
|
||||||
chapter = book.get_chapter(chapterIdx)
|
|
||||||
if not chapter:
|
|
||||||
continue
|
|
||||||
|
|
||||||
chapterNum = chapterIdx + 1
|
|
||||||
print(f"Exporting Chapter {chapterNum}/{book.get_total_chapters()}: {chapter.title}")
|
|
||||||
|
|
||||||
# Combine all paragraphs in chapter
|
|
||||||
chapterText = "\n\n".join(chapter.paragraphs)
|
|
||||||
|
|
||||||
# Generate audio
|
|
||||||
try:
|
|
||||||
wavData = tts.text_to_wav_data(chapterText)
|
|
||||||
if not wavData:
|
|
||||||
print(f" Warning: No audio generated for chapter {chapterNum}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Save to file
|
|
||||||
sanitizedTitle = "".join(c for c in chapter.title if c.isalnum() or c in (' ', '-', '_')).strip()
|
|
||||||
if not sanitizedTitle:
|
|
||||||
sanitizedTitle = f"Chapter_{chapterNum}"
|
|
||||||
|
|
||||||
outputFile = outputDir / f"{chapterNum:03d}_{sanitizedTitle}.wav"
|
|
||||||
with open(outputFile, 'wb') as f:
|
|
||||||
f.write(wavData)
|
|
||||||
|
|
||||||
print(f" Saved: {outputFile.name}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f" Error generating audio for chapter {chapterNum}: {e}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
parser.cleanup()
|
|
||||||
print(f"\nExport complete! Files saved to: {outputDir}")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
sys.exit(main())
|
sys.exit(main())
|
||||||
|
|||||||
+77
-114
@@ -56,23 +56,18 @@ class EpubParser:
|
|||||||
book.title = metadata.get('title', epubPath.stem)
|
book.title = metadata.get('title', epubPath.stem)
|
||||||
book.author = metadata.get('creator', 'Unknown')
|
book.author = metadata.get('creator', 'Unknown')
|
||||||
|
|
||||||
# Try to use TOC structure first
|
# Parse TOC for chapter titles
|
||||||
opfDir = opfPath.parent
|
opfDir = opfPath.parent
|
||||||
tocChapters = self._parse_toc_structure(tempPath, opfDir, manifest)
|
tocMap = self._build_toc_map(tempPath, opfDir, manifest)
|
||||||
|
|
||||||
if tocChapters:
|
# Parse content files in spine order (authoritative reading sequence)
|
||||||
# Successfully parsed using TOC
|
for itemId in spine:
|
||||||
for chapter in tocChapters:
|
if itemId in manifest:
|
||||||
book.add_chapter(chapter)
|
contentPath = opfDir / manifest[itemId]
|
||||||
else:
|
if contentPath.exists():
|
||||||
# Fallback: Parse content files in spine order
|
chapters = self._parse_content_file(contentPath, tocMap)
|
||||||
for itemId in spine:
|
for chapter in chapters:
|
||||||
if itemId in manifest:
|
book.add_chapter(chapter)
|
||||||
contentPath = opfDir / manifest[itemId]
|
|
||||||
if contentPath.exists():
|
|
||||||
chapters = self._parse_content_file(contentPath)
|
|
||||||
for chapter in chapters:
|
|
||||||
book.add_chapter(chapter)
|
|
||||||
|
|
||||||
return book
|
return book
|
||||||
|
|
||||||
@@ -153,9 +148,9 @@ class EpubParser:
|
|||||||
|
|
||||||
return metadata, spine, manifest
|
return metadata, spine, manifest
|
||||||
|
|
||||||
def _parse_toc_structure(self, epubDir, opfDir, manifest):
|
def _build_toc_map(self, epubDir, opfDir, manifest):
|
||||||
"""
|
"""
|
||||||
Parse TOC structure (NCX or nav.xhtml) to get chapters
|
Build a map of href -> chapter title from TOC
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
epubDir: Root EPUB directory
|
epubDir: Root EPUB directory
|
||||||
@@ -163,23 +158,23 @@ class EpubParser:
|
|||||||
manifest: Manifest dict from OPF
|
manifest: Manifest dict from OPF
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of Chapter objects or None if TOC not found
|
Dict mapping href (without anchor) to chapter title
|
||||||
"""
|
"""
|
||||||
# Try EPUB 3 nav.xhtml first
|
# Try EPUB 3 nav.xhtml first
|
||||||
navChapters = self._parse_nav_xhtml(epubDir, opfDir, manifest)
|
tocMap = self._parse_nav_xhtml_map(epubDir, opfDir, manifest)
|
||||||
if navChapters:
|
if tocMap:
|
||||||
return navChapters
|
return tocMap
|
||||||
|
|
||||||
# Try EPUB 2 NCX
|
# Try EPUB 2 NCX
|
||||||
ncxChapters = self._parse_ncx(epubDir, opfDir, manifest)
|
tocMap = self._parse_ncx_map(epubDir, opfDir, manifest)
|
||||||
if ncxChapters:
|
if tocMap:
|
||||||
return ncxChapters
|
return tocMap
|
||||||
|
|
||||||
return None
|
return {}
|
||||||
|
|
||||||
def _parse_nav_xhtml(self, epubDir, opfDir, manifest):
|
def _parse_nav_xhtml_map(self, epubDir, opfDir, manifest):
|
||||||
"""
|
"""
|
||||||
Parse EPUB 3 nav.xhtml for TOC structure
|
Parse EPUB 3 nav.xhtml to build href -> title map
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
epubDir: Root EPUB directory
|
epubDir: Root EPUB directory
|
||||||
@@ -187,7 +182,7 @@ class EpubParser:
|
|||||||
manifest: Manifest dict from OPF
|
manifest: Manifest dict from OPF
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of Chapter objects or None
|
Dict mapping href to chapter title, or None
|
||||||
"""
|
"""
|
||||||
# Find nav document in manifest
|
# Find nav document in manifest
|
||||||
navPath = None
|
navPath = None
|
||||||
@@ -211,8 +206,8 @@ class EpubParser:
|
|||||||
if not tocNav:
|
if not tocNav:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Extract chapters from nav list
|
# Extract href -> title mappings
|
||||||
chapters = []
|
tocMap = {}
|
||||||
for link in tocNav.find_all('a', href=True):
|
for link in tocNav.find_all('a', href=True):
|
||||||
chapterTitle = link.get_text(strip=True)
|
chapterTitle = link.get_text(strip=True)
|
||||||
href = link.get('href')
|
href = link.get('href')
|
||||||
@@ -220,22 +215,20 @@ class EpubParser:
|
|||||||
if not chapterTitle or not href:
|
if not chapterTitle or not href:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Extract content from href location
|
# Strip anchor from href
|
||||||
paragraphs = self._extract_content_from_href(opfDir, href)
|
hrefFile = href.split('#')[0]
|
||||||
if paragraphs:
|
if hrefFile:
|
||||||
chapter = Chapter(chapterTitle)
|
tocMap[hrefFile] = chapterTitle
|
||||||
chapter.paragraphs = paragraphs
|
|
||||||
chapters.append(chapter)
|
|
||||||
|
|
||||||
return chapters if chapters else None
|
return tocMap if tocMap else None
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error parsing nav.xhtml: {e}")
|
print(f"Error parsing nav.xhtml: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _parse_ncx(self, epubDir, opfDir, manifest):
|
def _parse_ncx_map(self, epubDir, opfDir, manifest):
|
||||||
"""
|
"""
|
||||||
Parse EPUB 2 NCX file for TOC structure
|
Parse EPUB 2 NCX file to build href -> title map
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
epubDir: Root EPUB directory
|
epubDir: Root EPUB directory
|
||||||
@@ -243,7 +236,7 @@ class EpubParser:
|
|||||||
manifest: Manifest dict from OPF
|
manifest: Manifest dict from OPF
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of Chapter objects or None
|
Dict mapping href to chapter title, or None
|
||||||
"""
|
"""
|
||||||
# Find NCX file in manifest
|
# Find NCX file in manifest
|
||||||
ncxPath = None
|
ncxPath = None
|
||||||
@@ -266,13 +259,13 @@ class EpubParser:
|
|||||||
with open(ncxPath, 'r', encoding='utf-8', errors='ignore') as f:
|
with open(ncxPath, 'r', encoding='utf-8', errors='ignore') as f:
|
||||||
soup = BeautifulSoup(f.read(), features='xml')
|
soup = BeautifulSoup(f.read(), features='xml')
|
||||||
|
|
||||||
# Find all navPoints (top-level only)
|
# Find all navPoints (including nested)
|
||||||
navMap = soup.find('navMap')
|
navMap = soup.find('navMap')
|
||||||
if not navMap:
|
if not navMap:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
chapters = []
|
tocMap = {}
|
||||||
for navPoint in navMap.find_all('navPoint', recursive=False):
|
for navPoint in navMap.find_all('navPoint'):
|
||||||
# Get chapter title
|
# Get chapter title
|
||||||
navLabel = navPoint.find('navLabel')
|
navLabel = navPoint.find('navLabel')
|
||||||
if navLabel:
|
if navLabel:
|
||||||
@@ -287,78 +280,31 @@ class EpubParser:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
href = content.get('src')
|
href = content.get('src')
|
||||||
|
# Strip anchor from href
|
||||||
|
hrefFile = href.split('#')[0]
|
||||||
|
if hrefFile:
|
||||||
|
tocMap[hrefFile] = chapterTitle
|
||||||
|
|
||||||
# Extract content from href location
|
return tocMap if tocMap else None
|
||||||
paragraphs = self._extract_content_from_href(opfDir, href)
|
|
||||||
if paragraphs:
|
|
||||||
chapter = Chapter(chapterTitle)
|
|
||||||
chapter.paragraphs = paragraphs
|
|
||||||
chapters.append(chapter)
|
|
||||||
|
|
||||||
return chapters if chapters else None
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error parsing NCX: {e}")
|
print(f"Error parsing NCX: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _extract_content_from_href(self, opfDir, href):
|
def _parse_content_file(self, contentPath, tocMap=None):
|
||||||
"""
|
|
||||||
Extract paragraphs from a specific href location
|
|
||||||
|
|
||||||
Args:
|
|
||||||
opfDir: Directory containing OPF file
|
|
||||||
href: Content href (may include #anchor)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of paragraph strings
|
|
||||||
"""
|
|
||||||
# Split href into file and anchor
|
|
||||||
parts = href.split('#')
|
|
||||||
filePath = opfDir / parts[0]
|
|
||||||
anchor = parts[1] if len(parts) > 1 else None
|
|
||||||
|
|
||||||
if not filePath.exists():
|
|
||||||
return []
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(filePath, 'r', encoding='utf-8', errors='ignore') as f:
|
|
||||||
soup = BeautifulSoup(f.read(), 'html.parser')
|
|
||||||
|
|
||||||
# If anchor specified, find that element
|
|
||||||
if anchor:
|
|
||||||
section = soup.find(id=anchor)
|
|
||||||
if not section:
|
|
||||||
# Try to find by name attribute
|
|
||||||
section = soup.find(attrs={'name': anchor})
|
|
||||||
if not section:
|
|
||||||
# Fallback to entire body
|
|
||||||
section = soup.find('body') or soup
|
|
||||||
else:
|
|
||||||
section = soup.find('body') or soup
|
|
||||||
|
|
||||||
# Extract paragraphs from section
|
|
||||||
paragraphs = []
|
|
||||||
for p in section.find_all('p'):
|
|
||||||
text = p.get_text(strip=True)
|
|
||||||
if text:
|
|
||||||
paragraphs.append(text)
|
|
||||||
|
|
||||||
return paragraphs
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error extracting content from {href}: {e}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
def _parse_content_file(self, contentPath):
|
|
||||||
"""
|
"""
|
||||||
Parse XHTML/HTML content file
|
Parse XHTML/HTML content file
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
contentPath: Path to content file
|
contentPath: Path to content file
|
||||||
|
tocMap: Optional dict mapping filename to TOC title
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of Chapter objects
|
List of Chapter objects
|
||||||
"""
|
"""
|
||||||
|
if tocMap is None:
|
||||||
|
tocMap = {}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(contentPath, 'r', encoding='utf-8', errors='ignore') as f:
|
with open(contentPath, 'r', encoding='utf-8', errors='ignore') as f:
|
||||||
soup = BeautifulSoup(f.read(), 'html.parser')
|
soup = BeautifulSoup(f.read(), 'html.parser')
|
||||||
@@ -368,6 +314,9 @@ class EpubParser:
|
|||||||
|
|
||||||
chapters = []
|
chapters = []
|
||||||
|
|
||||||
|
# Check if this file has a TOC title
|
||||||
|
tocTitle = tocMap.get(contentPath.name)
|
||||||
|
|
||||||
# Look for main content sections
|
# Look for main content sections
|
||||||
# Try h1, h2, section elements
|
# Try h1, h2, section elements
|
||||||
sections = soup.find_all(['section', 'div'], class_=lambda x: x and 'section' in x.lower() if x else False)
|
sections = soup.find_all(['section', 'div'], class_=lambda x: x and 'section' in x.lower() if x else False)
|
||||||
@@ -376,22 +325,34 @@ class EpubParser:
|
|||||||
# Fallback: treat entire file as one chapter
|
# Fallback: treat entire file as one chapter
|
||||||
sections = [soup.find('body') or soup]
|
sections = [soup.find('body') or soup]
|
||||||
|
|
||||||
for section in sections:
|
for sectionIndex, section in enumerate(sections):
|
||||||
# Find chapter title
|
# Find chapter title
|
||||||
title = None
|
title = None
|
||||||
for hTag in ['h1', 'h2', 'h3']:
|
|
||||||
heading = section.find(hTag)
|
|
||||||
if heading:
|
|
||||||
title = heading.get_text(strip=True)
|
|
||||||
break
|
|
||||||
|
|
||||||
|
# Priority 1: Use TOC title for the first section if available
|
||||||
|
if sectionIndex == 0 and tocTitle:
|
||||||
|
title = tocTitle
|
||||||
|
else:
|
||||||
|
# Priority 2: Look for heading in content
|
||||||
|
for hTag in ['h1', 'h2', 'h3']:
|
||||||
|
heading = section.find(hTag)
|
||||||
|
if heading:
|
||||||
|
title = heading.get_text(strip=True)
|
||||||
|
break
|
||||||
|
|
||||||
|
# Priority 3: Fallback to filename
|
||||||
if not title:
|
if not title:
|
||||||
title = contentPath.stem
|
if tocTitle:
|
||||||
|
title = tocTitle
|
||||||
|
else:
|
||||||
|
title = contentPath.stem
|
||||||
|
|
||||||
# Extract paragraphs
|
# Extract paragraphs (including headings)
|
||||||
paragraphs = []
|
paragraphs = []
|
||||||
for p in section.find_all('p'):
|
|
||||||
text = p.get_text(strip=True)
|
# Include all headings and paragraphs in reading order
|
||||||
|
for element in section.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p']):
|
||||||
|
text = element.get_text(strip=True)
|
||||||
if text:
|
if text:
|
||||||
paragraphs.append(text)
|
paragraphs.append(text)
|
||||||
|
|
||||||
@@ -401,12 +362,14 @@ class EpubParser:
|
|||||||
chapter.paragraphs = paragraphs
|
chapter.paragraphs = paragraphs
|
||||||
chapters.append(chapter)
|
chapters.append(chapter)
|
||||||
|
|
||||||
# If no chapters found, extract all paragraphs as one chapter
|
# If no chapters found, extract all content as one chapter
|
||||||
if not chapters:
|
if not chapters:
|
||||||
title = contentPath.stem
|
title = tocTitle if tocTitle else contentPath.stem
|
||||||
paragraphs = []
|
paragraphs = []
|
||||||
for p in soup.find_all('p'):
|
|
||||||
text = p.get_text(strip=True)
|
# Include all headings and paragraphs in reading order
|
||||||
|
for element in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p']):
|
||||||
|
text = element.get_text(strip=True)
|
||||||
if text:
|
if text:
|
||||||
paragraphs.append(text)
|
paragraphs.append(text)
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,192 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
WAV Exporter - Export text books to WAV audio files
|
||||||
|
|
||||||
|
Converts text books (DAISY, EPUB, PDF, TXT) to WAV files split by chapter
|
||||||
|
using piper-tts for speech synthesis.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from src.daisy_parser import DaisyParser
|
||||||
|
from src.epub_parser import EpubParser
|
||||||
|
from src.pdf_parser import PdfParser
|
||||||
|
from src.txt_parser import TxtParser
|
||||||
|
from src.tts_engine import TtsEngine
|
||||||
|
|
||||||
|
|
||||||
|
class WavExporter:
|
||||||
|
"""Export text books to WAV audio files"""
|
||||||
|
|
||||||
|
def __init__(self, config):
|
||||||
|
"""
|
||||||
|
Initialize WAV exporter
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config: ConfigManager instance
|
||||||
|
"""
|
||||||
|
self.config = config
|
||||||
|
|
||||||
|
def export(self, bookPath, outputDir=None):
|
||||||
|
"""
|
||||||
|
Export book to WAV files split by chapter
|
||||||
|
|
||||||
|
Args:
|
||||||
|
bookPath: Path to book file
|
||||||
|
outputDir: Output directory (optional, defaults to ./{bookname}_audio)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Exit code (0 for success, 1 for error)
|
||||||
|
"""
|
||||||
|
print(f"Exporting book to WAV: {bookPath}")
|
||||||
|
|
||||||
|
# Parse book using appropriate parser
|
||||||
|
bookPath = Path(bookPath)
|
||||||
|
parser = self._create_parser(bookPath)
|
||||||
|
if not parser:
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
book = parser.parse(bookPath)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error parsing book: {e}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Determine output directory
|
||||||
|
if outputDir is None:
|
||||||
|
bookName = bookPath.stem
|
||||||
|
outputDir = Path(f"./{bookName}_audio")
|
||||||
|
else:
|
||||||
|
outputDir = Path(outputDir)
|
||||||
|
|
||||||
|
outputDir.mkdir(parents=True, exist_ok=True)
|
||||||
|
print(f"Output directory: {outputDir}")
|
||||||
|
|
||||||
|
# Initialize TTS engine
|
||||||
|
tts = self._create_tts_engine()
|
||||||
|
if not tts:
|
||||||
|
return 1
|
||||||
|
|
||||||
|
voiceModel = self.config.get_voice_model()
|
||||||
|
print(f"Using voice: {voiceModel}")
|
||||||
|
print(f"Chapters: {book.get_total_chapters()}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Export each chapter
|
||||||
|
successCount = 0
|
||||||
|
for chapterIdx in range(book.get_total_chapters()):
|
||||||
|
chapter = book.get_chapter(chapterIdx)
|
||||||
|
if not chapter:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if self._export_chapter(chapter, chapterIdx, book.get_total_chapters(), tts, outputDir):
|
||||||
|
successCount += 1
|
||||||
|
|
||||||
|
parser.cleanup()
|
||||||
|
|
||||||
|
if successCount > 0:
|
||||||
|
print(f"\nExport complete! {successCount} chapters saved to: {outputDir}")
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
print("\nExport failed! No chapters were successfully exported.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
def _create_parser(self, bookPath):
|
||||||
|
"""
|
||||||
|
Create appropriate parser for book format
|
||||||
|
|
||||||
|
Args:
|
||||||
|
bookPath: Path to book file
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Parser instance or None if unsupported format
|
||||||
|
"""
|
||||||
|
suffix = bookPath.suffix.lower()
|
||||||
|
|
||||||
|
if suffix in ['.epub']:
|
||||||
|
return EpubParser()
|
||||||
|
elif suffix in ['.zip']:
|
||||||
|
return DaisyParser()
|
||||||
|
elif suffix in ['.pdf']:
|
||||||
|
return PdfParser()
|
||||||
|
elif suffix in ['.txt']:
|
||||||
|
return TxtParser()
|
||||||
|
else:
|
||||||
|
print(f"Error: Unsupported book format: {suffix}")
|
||||||
|
print("Supported formats: .epub, .zip (DAISY), .pdf, .txt")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _create_tts_engine(self):
|
||||||
|
"""
|
||||||
|
Create TTS engine for export
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
TtsEngine instance or None if not available
|
||||||
|
"""
|
||||||
|
readerEngine = self.config.get_reader_engine()
|
||||||
|
if readerEngine == 'speechd':
|
||||||
|
print("Error: WAV export requires piper-tts. Set reader_engine=piper in config.")
|
||||||
|
return None
|
||||||
|
|
||||||
|
voiceModel = self.config.get_voice_model()
|
||||||
|
try:
|
||||||
|
return TtsEngine(voiceModel)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error initializing TTS engine: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _export_chapter(self, chapter, chapterIdx, totalChapters, tts, outputDir):
|
||||||
|
"""
|
||||||
|
Export a single chapter to WAV file
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chapter: Chapter object
|
||||||
|
chapterIdx: Chapter index (0-based)
|
||||||
|
totalChapters: Total number of chapters
|
||||||
|
tts: TtsEngine instance
|
||||||
|
outputDir: Output directory path
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if successful, False otherwise
|
||||||
|
"""
|
||||||
|
chapterNum = chapterIdx + 1
|
||||||
|
print(f"Exporting Chapter {chapterNum}/{totalChapters}: {chapter.title}")
|
||||||
|
|
||||||
|
# Combine all paragraphs in chapter
|
||||||
|
chapterText = "\n\n".join(chapter.paragraphs)
|
||||||
|
|
||||||
|
# Generate audio
|
||||||
|
try:
|
||||||
|
wavData = tts.text_to_wav_data(chapterText)
|
||||||
|
if not wavData:
|
||||||
|
print(f" Warning: No audio generated for chapter {chapterNum}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Save to file
|
||||||
|
sanitizedTitle = self._sanitize_filename(chapter.title)
|
||||||
|
if not sanitizedTitle:
|
||||||
|
sanitizedTitle = f"Chapter_{chapterNum}"
|
||||||
|
|
||||||
|
outputFile = outputDir / f"{chapterNum:03d}_{sanitizedTitle}.wav"
|
||||||
|
with open(outputFile, 'wb') as f:
|
||||||
|
f.write(wavData)
|
||||||
|
|
||||||
|
print(f" Saved: {outputFile.name}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Error generating audio for chapter {chapterNum}: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _sanitize_filename(self, title):
|
||||||
|
"""
|
||||||
|
Sanitize chapter title for use as filename
|
||||||
|
|
||||||
|
Args:
|
||||||
|
title: Chapter title
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Sanitized filename string
|
||||||
|
"""
|
||||||
|
return "".join(c for c in title if c.isalnum() or c in (' ', '-', '_')).strip()
|
||||||
Reference in New Issue
Block a user