Conversion to mpv for playback mostly complete.

This commit is contained in:
Storm Dragon
2025-10-08 19:33:29 -04:00
parent 4387a5cb56
commit d19c90e69a
8 changed files with 744 additions and 608 deletions
+381 -84
View File
@@ -13,6 +13,7 @@ import threading
import gc
import os
from pathlib import Path
import subprocess
try:
from setproctitle import setproctitle
@@ -39,7 +40,7 @@ from src.tts_engine import TtsEngine
from src.config_manager import ConfigManager
from src.voice_selector import VoiceSelector
from src.book_selector import BookSelector
from src.pygame_player import PygamePlayer
from src.mpv_player import MpvPlayer
from src.speech_engine import SpeechEngine
from src.options_menu import OptionsMenu
from src.sleep_timer_menu import SleepTimerMenu
@@ -71,7 +72,8 @@ class BookReader:
self.parser = None # Will be set based on file type
self.bookmarkManager = BookmarkManager()
self.speechEngine = SpeechEngine() # UI feedback
self.audioPlayer = PygamePlayer()
self.audioPlayer = MpvPlayer()
self.ttsMpvProcess = None # For direct mpv subprocess for TTS
# Configure speech engine from saved settings
speechRate = self.config.get_speech_rate()
@@ -187,7 +189,9 @@ class BookReader:
# If it's an audio book, load it into the player
if hasattr(self.book, 'isAudioBook') and self.book.isAudioBook:
if not self.audioPlayer.load_audio_file(self.book.audioPath):
# Get saved playback speed from config
playbackSpeed = self.config.get_playback_speed()
if not self.audioPlayer.load_audio_file(self.book.audioPath, playbackSpeed=playbackSpeed):
raise Exception("Failed to load audio file")
# Inform user about navigation capabilities
@@ -199,23 +203,61 @@ class BookReader:
print(f"\nChapter navigation: Enabled ({self.book.get_total_chapters()} chapters)")
self.speechEngine.speak(f"Audio book loaded with {self.book.get_total_chapters()} chapters. Chapter navigation enabled.")
# Load bookmark if exists (but don't announce it)
bookmark = self.bookmarkManager.get_bookmark(self.bookPath)
if bookmark:
self.currentChapter = bookmark['chapterIndex']
self.currentParagraph = bookmark['paragraphIndex']
self.savedAudioPosition = bookmark.get('audioPosition', 0.0)
# Check if this book is linked to Audiobookshelf server
# If so, prioritize server progress over local bookmark
serverLink = self.serverLinkManager.get_link(str(self.bookPath))
serverProgressLoaded = False
# For audio books, show resume position
if hasattr(self.book, 'isAudioBook') and self.book.isAudioBook and self.savedAudioPosition > 0:
minutes = int(self.savedAudioPosition // 60)
seconds = int(self.savedAudioPosition % 60)
print(f"Resuming from chapter {self.currentChapter + 1} at {minutes}m {seconds}s")
if serverLink and self.absClient and self.absClient.is_authenticated():
serverId = serverLink.get('server_id')
if serverId:
try:
serverProgress = self.absClient.get_progress(serverId)
if serverProgress:
progressTime = serverProgress.get('currentTime', 0.0)
if progressTime > 0:
minutes = int(progressTime // 60)
seconds = int(progressTime % 60)
print(f"Resuming from server progress: {minutes}m {seconds}s")
# For audio books, save exact position
if hasattr(self.book, 'isAudioBook') and self.book.isAudioBook:
self.savedAudioPosition = progressTime
# Find chapter that contains this time
for i, chap in enumerate(self.book.chapters):
if hasattr(chap, 'startTime'):
chapterEnd = chap.startTime + chap.duration
if chap.startTime <= progressTime < chapterEnd:
self.currentChapter = i
break
else:
# Text book - use chapter/paragraph from server if available
# (Audiobookshelf doesn't track paragraph, so we'd need to enhance this)
pass
serverProgressLoaded = True
except Exception as e:
print(f"Could not load server progress: {e}")
# Fall back to local bookmark if no server progress
if not serverProgressLoaded:
bookmark = self.bookmarkManager.get_bookmark(self.bookPath)
if bookmark:
self.currentChapter = bookmark['chapterIndex']
self.currentParagraph = bookmark['paragraphIndex']
self.savedAudioPosition = bookmark.get('audioPosition', 0.0)
# For audio books, show resume position
if hasattr(self.book, 'isAudioBook') and self.book.isAudioBook and self.savedAudioPosition > 0:
minutes = int(self.savedAudioPosition // 60)
seconds = int(self.savedAudioPosition % 60)
print(f"Resuming from local bookmark: chapter {self.currentChapter + 1} at {minutes}m {seconds}s")
else:
print(f"Resuming from chapter {self.currentChapter + 1}, paragraph {self.currentParagraph + 1}")
else:
print(f"Resuming from chapter {self.currentChapter + 1}, paragraph {self.currentParagraph + 1}")
else:
print("Starting from beginning")
self.savedAudioPosition = 0.0
print("Starting from beginning")
self.savedAudioPosition = 0.0
def read_current_paragraph(self):
"""Read the current paragraph aloud"""
@@ -370,11 +412,24 @@ class BookReader:
# Upload progress to server
success = self.absClient.update_progress(serverId, currentTime, duration, progress)
if success:
print(f"Progress synced to server: {int(progress * 100)}%")
print(f"Progress synced to server: {progress * 100:.1f}%")
# Also sync session if active
if self.sessionId:
self.absClient.sync_session(self.sessionId, currentTime, duration, progress)
syncSuccess = self.absClient.sync_session(self.sessionId, currentTime, duration, progress)
if syncSuccess:
# Update session in server link to persist it
if self.bookPath:
self.serverLinkManager.update_session(str(self.bookPath), self.sessionId)
else:
# Session sync failed - might be expired, create new one
print(f"Session sync failed, creating new session...")
newSessionId = self.absClient.create_session(serverId)
if newSessionId:
self.sessionId = newSessionId
if self.bookPath:
self.serverLinkManager.update_session(str(self.bookPath), self.sessionId)
print(f"Created new session: {self.sessionId}")
def reload_tts_engine(self):
"""Reload TTS engine with current config settings"""
@@ -597,8 +652,16 @@ class BookReader:
# Start next chapter
self._start_paragraph_playback()
elif readerEngine == 'piper':
# Check piper-tts / pygame player state
playbackFinished = not self.audioPlayer.is_playing() and not self.audioPlayer.is_paused()
# Check piper-tts subprocess state
# The TTS mpv process runs independently, so we need to check if it's still running
playbackFinished = False
if self.ttsMpvProcess:
# Check if the mpv subprocess has finished
if self.ttsMpvProcess.poll() is not None:
playbackFinished = True
else:
# No process exists, consider playback finished
playbackFinished = True
if playbackFinished:
# Current paragraph finished, advance
@@ -633,6 +696,7 @@ class BookReader:
# Debug: Print memory usage every 10 seconds
try:
import resource
# pylint: disable=no-member
memUsage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 # MB
print(f"DEBUG: Memory usage: {memUsage:.1f} MB")
@@ -651,6 +715,9 @@ class BookReader:
except KeyboardInterrupt:
print("\n\nInterrupted")
finally:
# Save bookmark BEFORE stopping (so we can get current position)
self.save_bookmark(speakFeedback=False)
# Stop playback
readerEngine = self.config.get_reader_engine()
if readerEngine == 'speechd':
@@ -658,9 +725,6 @@ class BookReader:
else:
self.audioPlayer.stop()
# Save bookmark
self.save_bookmark(speakFeedback=False)
# Close Audiobookshelf session if active
if self.sessionId and self.absClient:
try:
@@ -891,7 +955,7 @@ class BookReader:
elif event.key == pygame.K_h:
# Help
self.speechEngine.speak("SPACE: play pause. n: next paragraph. p: previous paragraph. Shift N: next chapter. Shift P: previous chapter. s: save bookmark. k: bookmarks menu. r: recent books. b: browse books. a: audiobookshelf. o: options menu. i: current info. Page Up Down: adjust speech rate. t: time remaining. h: help. q: quit or sleep timer")
self.speechEngine.speak("SPACE: play pause. n: next paragraph. p: previous paragraph. Shift N: next chapter. Shift P: previous chapter. s: save bookmark. k: bookmarks menu. r: recent books. b: browse books. a: audiobookshelf. o: options menu. i: current info. Page Up Down: adjust speech rate. Right bracket: increase playback speed. Left bracket: decrease playback speed. Backspace: reset playback speed. t: time remaining. h: help. q: quit or sleep timer")
elif event.key == pygame.K_i:
if not self.book:
@@ -921,6 +985,39 @@ class BookReader:
# Open sleep timer menu
self.sleepTimerMenu.enter_menu()
elif event.key == pygame.K_RIGHTBRACKET:
# Increase playback speed (works for all books!)
if self.book:
currentSpeed = self.config.get_playback_speed()
newSpeed = min(2.0, currentSpeed + 0.1)
newSpeed = round(newSpeed, 1) # Round to 1 decimal place
self._change_playback_speed(newSpeed)
else:
self.speechEngine.speak("No book loaded")
elif event.key == pygame.K_LEFTBRACKET:
# Decrease playback speed (works for all books!)
if self.book:
currentSpeed = self.config.get_playback_speed()
newSpeed = max(0.5, currentSpeed - 0.1)
newSpeed = round(newSpeed, 1) # Round to 1 decimal place
self._change_playback_speed(newSpeed)
else:
self.speechEngine.speak("No book loaded")
elif event.key == pygame.K_BACKSPACE:
# Reset playback speed to 1.0
# Only if not in a menu (menus handle backspace themselves)
if not (self.optionsMenu.is_in_menu() or
self.bookSelector.is_in_browser() or
self.sleepTimerMenu.is_in_menu() or
self.recentBooksMenu.is_in_menu() or
(self.absMenu and self.absMenu.is_in_menu())):
if self.book:
self._change_playback_speed(1.0)
else:
self.speechEngine.speak("No book loaded")
def _handle_recent_books_key(self, event):
"""Handle key events when in recent books menu"""
if event.key == pygame.K_UP:
@@ -1327,16 +1424,39 @@ class BookReader:
self.config.set_last_book(f"abs://{serverId}")
print(f"DEBUG: Saved last_book as: abs://{serverId}")
# Create listening session
self.sessionId = self.absClient.create_session(serverId)
if self.sessionId:
print(f"Created listening session: {self.sessionId}")
# Create listening session (only if we don't already have one from resume)
if not self.sessionId:
self.sessionId = self.absClient.create_session(serverId)
if self.sessionId:
print(f"Created listening session: {self.sessionId}")
else:
print(f"Using existing session ID: {self.sessionId}")
# Save session and server metadata to server link manager
# This allows resuming the stream with the same session
self.serverLinkManager.create_link(
bookPath=streamUrl,
serverUrl=self.absClient.serverUrl,
serverId=serverId,
libraryId=serverBook.get('libraryId', ''),
title=title,
author=author,
duration=duration,
chapters=len(book.chapters),
sessionId=self.sessionId,
serverBook=serverBook
)
# Try to load progress from server
serverProgress = self.absClient.get_progress(serverId)
if serverProgress:
progressTime = serverProgress.get('currentTime', 0.0)
print(f"Resuming from server progress: {int(progressTime)}s")
minutes = int(progressTime // 60)
seconds = int(progressTime % 60)
print(f"Resuming from server progress: {minutes}m {seconds}s ({progressTime:.1f}s)")
# Save the exact position for playback resume
self.savedAudioPosition = progressTime
# Find chapter that contains this time
for i, chap in enumerate(book.chapters):
@@ -1348,20 +1468,23 @@ class BookReader:
else:
# No server progress, start from beginning
self.currentChapter = 0
self.savedAudioPosition = 0.0
# Initialize position
self.currentParagraph = 0
# Load stream URL directly - pygame_player will handle caching via ffmpeg
# Load stream URL directly - mpv will handle streaming natively
print(f"Loading stream: {streamUrl[:80]}...")
self.speechEngine.speak("Loading stream. This may take a moment.")
# Load the stream URL - pygame_player will cache it using ffmpeg
# Pass auth token so ffmpeg can authenticate
if not self.audioPlayer.load_audio_file(streamUrl, authToken=self.absClient.authToken):
# Load the stream URL - mpv handles streaming with auth headers
# Pass auth token for authentication
# Use saved playback speed from config
playbackSpeed = self.config.get_playback_speed()
if not self.audioPlayer.load_audio_file(streamUrl, authToken=self.absClient.authToken, playbackSpeed=playbackSpeed):
self.speechEngine.speak("Failed to load stream. Check terminal for errors.")
print("\nERROR: Failed to load stream from server")
print("Make sure ffmpeg is installed: sudo pacman -S ffmpeg")
print("Make sure mpv is installed: sudo pacman -S mpv")
return
# Success! Start playing
@@ -1373,15 +1496,23 @@ class BookReader:
if self.absMenu:
self.absMenu.exit_menu()
# Update UI if enabled
if self.config.get_show_text():
# Update UI if enabled (only if screen is initialized)
if self.config.get_show_text() and hasattr(self, 'screen') and self.screen:
self._render_screen()
# Start playback
self.audioPlayer.play_audio_file()
# Start playback from saved position (if any)
startPos = self.savedAudioPosition if self.savedAudioPosition > 0 else 0.0
if startPos > 0:
minutes = int(startPos // 60)
seconds = int(startPos % 60)
print(f"Seeking to {minutes}m {seconds}s...")
self.audioPlayer.play_audio_file(startPosition=startPos)
self.isPlaying = True
self.isAudioBook = True
# Clear saved position after using it
self.savedAudioPosition = 0.0
def _download_audiobook(self, serverBook):
"""
Download audiobook from Audiobookshelf server
@@ -1496,6 +1627,39 @@ class BookReader:
self.absMenu.exit_menu()
self._load_new_book(str(outputPath))
def _change_playback_speed(self, newSpeed):
"""
Change audio playback speed (instant with mpv!)
Args:
newSpeed: New playback speed (0.5 to 2.0)
"""
# Clamp speed to valid range
newSpeed = max(0.5, min(2.0, float(newSpeed)))
# Save to config
self.config.set_playback_speed(newSpeed)
# Apply speed change based on reader engine and book type
readerEngine = self.config.get_reader_engine()
isAudioBook = self.book and hasattr(self.book, 'isAudioBook') and self.book.isAudioBook
if isAudioBook:
# Audio books: instant speed change via MpvPlayer
self.audioPlayer.set_speed(newSpeed)
elif readerEngine == 'piper' and self.isPlaying:
# Piper-TTS: restart current paragraph with new speed
# Stop current subprocess
if self.ttsMpvProcess and self.ttsMpvProcess.poll() is None:
self.ttsMpvProcess.terminate()
self.ttsMpvProcess.wait(timeout=0.5)
# Restart playback of current paragraph
self._start_paragraph_playback()
# Speak feedback
speedPercent = int(newSpeed * 100)
self.speechEngine.speak(f"Speed {speedPercent} percent")
def _load_new_book(self, bookPath):
"""
Load a new book from file path
@@ -1548,6 +1712,10 @@ class BookReader:
else:
# Stop piper-tts playback and cancel buffering
self._cancel_buffer()
# Terminate the TTS mpv subprocess if it's running
if self.ttsMpvProcess and self.ttsMpvProcess.poll() is None:
self.ttsMpvProcess.terminate()
self.ttsMpvProcess.wait(timeout=1)
self.audioPlayer.stop()
def _start_paragraph_playback(self):
@@ -1603,11 +1771,65 @@ class BookReader:
wavData = self.ttsEngine.text_to_wav_data(paragraph)
if wavData:
self.audioPlayer.play_wav_data(wavData)
# Stop any existing TTS mpv process
if self.ttsMpvProcess and self.ttsMpvProcess.poll() is None:
self.ttsMpvProcess.terminate()
self.ttsMpvProcess.wait()
# Get audio parameters from TTS engine
audioParams = self.ttsEngine.get_audio_params()
sampleRate = audioParams['sampleRate']
sampleWidth = audioParams['sampleWidth']
channels = audioParams['channels']
# Determine mpv audio format string
# piper-tts outputs 16-bit signed PCM
mpvAudioFormat = 's16' # 16-bit signed integer
if channels == 2: # Stereo
mpvAudioFormat += 'le' # Little-endian (default for WAV)
# Launch mpv subprocess to read from stdin
# Get current playback speed from config
playbackSpeed = self.config.get_playback_speed()
mpvCmd = [
'mpv',
'--no-terminal',
f'--speed={playbackSpeed}',
'--', '-'
]
self.ttsMpvProcess = subprocess.Popen(
mpvCmd,
stdin=subprocess.PIPE
)
# Write WAV data to mpv's stdin in a separate thread
def write_mpv_stdin(process, data):
try:
process.stdin.write(data)
process.stdin.flush()
process.stdin.close()
# Explicitly delete data to free memory immediately
del data
except Exception as e:
print(f"Error writing WAV data to mpv stdin: {e}")
finally:
# Wait for mpv to finish and clean up the process
try:
process.wait()
except:
pass
threading.Thread(
target=write_mpv_stdin,
args=(self.ttsMpvProcess, wavData),
daemon=True
).start()
# Explicitly delete wavData after playback starts to free memory
# (pygame.mixer.Sound has already copied it)
del wavData
wavData = None
# Start buffering next paragraph in background
self._buffer_next_paragraph()
else:
@@ -1618,9 +1840,8 @@ class BookReader:
self.isPlaying = False
raise
finally:
# Ensure wavData is freed even on error
if wavData is not None:
del wavData
# The variable is already set to None in all relevant paths
pass
def _start_audio_chapter_playback(self, chapter):
"""Start playing audio book chapter"""
@@ -1694,37 +1915,42 @@ class BookReader:
def buffer_thread():
"""Background thread to generate audio"""
wavData = None
_wavData_to_cleanup = None
try:
# Generate audio
wavData = self.ttsEngine.text_to_wav_data(paragraph)
wavData_generated = self.ttsEngine.text_to_wav_data(paragraph)
_wavData_to_cleanup = wavData_generated
# Check if cancelled
if self.cancelBuffer:
# Clean up if cancelled
if wavData:
del wavData
del _wavData_to_cleanup
_wavData_to_cleanup = None
return
# Store buffered audio
with self.bufferLock:
if not self.cancelBuffer:
self.bufferedAudio = wavData
wavData = None # Transfer ownership, don't delete
self.bufferedAudio = _wavData_to_cleanup
_wavData_to_cleanup = None # Transfer ownership
except Exception as e:
print(f"Error buffering paragraph: {e}")
# Clear buffer state on error to prevent stalls
with self.bufferLock:
self.bufferedAudio = None
if _wavData_to_cleanup is not None:
del _wavData_to_cleanup
_wavData_to_cleanup = None
finally:
# Clean up wavData if not transferred to bufferedAudio
if wavData is not None:
del wavData
# The variable is already set to None in all relevant paths
pass
# Clear any cancelled buffer state
with self.bufferLock:
self.cancelBuffer = False
# Clean up previous buffer thread reference before starting new one
if self.bufferThread and not self.bufferThread.is_alive():
self.bufferThread = None
# Start new buffer thread
self.bufferThread = threading.Thread(target=buffer_thread, daemon=True)
self.bufferThread.start()
@@ -1755,9 +1981,16 @@ class BookReader:
if self.sessionId and self.absClient:
print(f"Closing listening session: {self.sessionId}")
self.absClient.close_session(self.sessionId)
# Clear session from link metadata
if self.bookPath:
self.serverLinkManager.clear_session(str(self.bookPath))
self.sessionId = None
self._cancel_buffer()
# Terminate the TTS mpv subprocess if it's running
if self.ttsMpvProcess and self.ttsMpvProcess.poll() is None:
self.ttsMpvProcess.terminate()
self.ttsMpvProcess.wait(timeout=1)
self.audioPlayer.cleanup()
self.speechEngine.cleanup()
if self.readingEngine:
@@ -1814,53 +2047,117 @@ def main():
serverId = lastBook[6:] # Remove 'abs://' prefix
print(f"Resuming Audiobookshelf book: {serverId}")
# Try to reconnect and stream
print(f"Last book was an Audiobookshelf stream")
# Try to restore from cached server link
from src.server_link_manager import ServerLinkManager
serverLinkManager = ServerLinkManager()
# Start BookStorm even if stream fails - user can browse/select
bookPathFallback = None # Will trigger interactive mode if stream fails
if config.is_abs_configured():
# Try to find cached server book metadata by server ID
# The bookPath in the link will be the stream URL from last session
cachedLink = None
for sidecarPath in (Path.home() / ".bookstorm" / "server_links").glob("*.json"):
try:
# Initialize Audiobookshelf client
from src.audiobookshelf_client import AudiobookshelfClient
serverUrl = config.get_abs_server_url()
absClient = AudiobookshelfClient(serverUrl, config)
import json
with open(sidecarPath, 'r') as f:
linkData = json.load(f)
if linkData.get('server_id') == serverId:
cachedLink = linkData
break
except:
continue
if absClient.is_authenticated() and absClient.test_connection():
# Get book details
print(f"Fetching book details from server...")
bookDetails = absClient.get_library_item_details(serverId)
if cachedLink and cachedLink.get('server_book'):
# We have cached metadata - try to resume using it
print(f"Found cached server book metadata, resuming stream...")
if config.is_abs_configured():
try:
# Initialize Audiobookshelf client
from src.audiobookshelf_client import AudiobookshelfClient
serverUrl = config.get_abs_server_url()
absClient = AudiobookshelfClient(serverUrl, config)
if absClient.is_authenticated() and absClient.test_connection():
# Use cached server book metadata instead of re-fetching
serverBook = cachedLink['server_book']
print(f"Restoring from cached metadata...")
if bookDetails:
# Successfully got book details - try to stream
print(f"Found book on server, preparing to stream...")
try:
from src.speech_engine import SpeechEngine
speechEngine = SpeechEngine()
reader = BookReader(None, config)
reader.absClient = absClient
reader._stream_audiobook(bookDetails)
# Restore session ID if it was saved
savedSessionId = cachedLink.get('session_id')
if savedSessionId:
print(f"Restoring listening session: {savedSessionId}")
reader.sessionId = savedSessionId
reader._stream_audiobook(serverBook)
reader.run_interactive()
return 0
except Exception as e:
print(f"Error resuming stream: {e}")
import traceback
traceback.print_exc()
print("Opening BookStorm anyway - use 'a' to browse server or 'b' for local books")
else:
print(f"Book not found on server (may have been deleted)")
print("Opening BookStorm anyway - use 'a' to browse server or 'b' for local books")
else:
print("Cannot connect to Audiobookshelf server or session expired")
print("Opening BookStorm anyway - use 'a' to reconnect or 'b' for local books")
print("Cannot connect to Audiobookshelf server or session expired")
print("Opening BookStorm anyway - use 'a' to reconnect or 'b' for local books")
except Exception as e:
print(f"Error connecting to Audiobookshelf: {e}")
print("Opening BookStorm anyway - use 'a' to reconnect or 'b' for local books")
except Exception as e:
print(f"Error connecting to Audiobookshelf: {e}")
print("Opening BookStorm anyway - use 'a' to reconnect or 'b' for local books")
else:
print("Audiobookshelf not configured")
print("Opening BookStorm anyway - use 'o' to configure server or 'b' for local books")
else:
print("Audiobookshelf not configured")
print("Opening BookStorm anyway - use 'o' to configure server or 'b' for local books")
# No cached metadata - try fetching from server
print(f"No cached metadata, fetching from server...")
if config.is_abs_configured():
try:
# Initialize Audiobookshelf client
from src.audiobookshelf_client import AudiobookshelfClient
serverUrl = config.get_abs_server_url()
absClient = AudiobookshelfClient(serverUrl, config)
if absClient.is_authenticated() and absClient.test_connection():
# Get book details
print(f"Fetching book details from server...")
bookDetails = absClient.get_library_item_details(serverId)
if bookDetails:
# Successfully got book details - try to stream
print(f"Found book on server, preparing to stream...")
try:
from src.speech_engine import SpeechEngine
speechEngine = SpeechEngine()
reader = BookReader(None, config)
reader.absClient = absClient
reader._stream_audiobook(bookDetails)
reader.run_interactive()
return 0
except Exception as e:
print(f"Error resuming stream: {e}")
print("Opening BookStorm anyway - use 'a' to browse server or 'b' for local books")
else:
print(f"Book not found on server (may have been deleted)")
print("Opening BookStorm anyway - use 'a' to browse server or 'b' for local books")
else:
print("Cannot connect to Audiobookshelf server or session expired")
print("Opening BookStorm anyway - use 'a' to reconnect or 'b' for local books")
except Exception as e:
print(f"Error connecting to Audiobookshelf: {e}")
print("Opening BookStorm anyway - use 'a' to reconnect or 'b' for local books")
else:
print("Audiobookshelf not configured")
print("Opening BookStorm anyway - use 'o' to configure server or 'b' for local books")
# Fall through to open BookStorm in interactive mode
# Clear last_book so we don't loop on this error
+20
View File
@@ -56,6 +56,10 @@ class ConfigManager:
'show_text': 'true'
}
self.config['Audio'] = {
'playback_speed': '1.0'
}
self.config['Paths'] = {
'last_book': '',
'books_directory': str(Path.home()),
@@ -306,3 +310,19 @@ class ConfigManager:
serverUrl = self.get_abs_server_url()
username = self.get_abs_username()
return bool(serverUrl and username)
def get_playback_speed(self):
"""Get audio playback speed (0.5 to 2.0)"""
try:
speed = float(self.get('Audio', 'playback_speed', '1.0'))
# Clamp to valid range
return max(0.5, min(2.0, speed))
except ValueError:
return 1.0
def set_playback_speed(self, speed):
"""Set audio playback speed (0.5 to 2.0)"""
# Clamp to valid range
speed = max(0.5, min(2.0, float(speed)))
self.set('Audio', 'playback_speed', str(speed))
self.save()
+294
View File
@@ -0,0 +1,294 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
MPV Audio Player
Audio playback using python-mpv for both TTS and audio books.
Supports real-time speed control without re-encoding.
"""
import os
from pathlib import Path
import threading
try:
import mpv
HAS_MPV = True
except ImportError:
HAS_MPV = False
class MpvPlayer:
"""Audio player using mpv for all playback"""
def __init__(self):
"""Initialize mpv audio player"""
self.isInitialized = False
self.player = None
self.isPaused = False
self.audioFileLoaded = False # Track if audio file is loaded
self.playbackSpeed = 1.0 # Current playback speed
self.endFileCallback = None # Callback for when file finishes
if not HAS_MPV:
print("Warning: python-mpv not installed. Audio playback will not work.")
print("Install with: pip install python-mpv")
return
try:
# Initialize mpv player
# pylint: disable=no-member
self.player = mpv.MPV(
input_default_bindings=False, # Disable default key bindings
input_vo_keyboard=False, # Disable keyboard input
video=False, # Audio only
ytdl=False, # Don't use youtube-dl
quiet=True, # Minimal console output
)
# Register event handler for cleanup
@self.player.event_callback('end-file')
def on_end_file(event):
"""Called when file finishes playing"""
# Call user callback if set
if self.endFileCallback:
self.endFileCallback()
self.isInitialized = True
except Exception as e:
print(f"Warning: Could not initialize mpv: {e}")
self.isInitialized = False
def play_wav_data(self, wavData):
"""
This method is no longer used for TTS playback.
TTS playback is now handled directly by BookReader using subprocess.
"""
print("Warning: MpvPlayer.play_wav_data is deprecated and should not be called.")
return False
def pause(self):
"""Pause playback"""
if self.isInitialized and self.player:
self.player.pause = True
self.isPaused = True
def resume(self):
"""Resume playback"""
if self.isInitialized and self.player:
self.player.pause = False
self.isPaused = False
def stop(self):
"""Stop playback"""
if self.isInitialized and self.player:
self.player.stop()
self.isPaused = False
def is_playing(self):
"""Check if audio is currently playing"""
if not self.isInitialized or not self.player:
return False
try:
# mpv is playing if not paused and not idle
# pylint: disable=no-member
return not self.player.pause and not self.player.idle_active
except:
return False
def is_paused(self):
"""Check if audio is paused"""
return self.isPaused
def set_speed(self, speed):
"""
Set playback speed
Args:
speed: Playback speed (0.5 to 2.0)
"""
if not self.isInitialized or not self.player:
return
# Clamp speed to valid range
speed = max(0.5, min(2.0, float(speed)))
self.playbackSpeed = speed
self.player.speed = speed
def get_speed(self):
"""Get current playback speed"""
return self.playbackSpeed
def cleanup(self):
"""Cleanup resources"""
if self.isInitialized and self.player:
try:
self.player.stop()
except:
pass
try:
self.player.terminate()
except:
pass
self.isInitialized = False
def is_available(self):
"""Check if mpv is available"""
return self.isInitialized
# Audio file playback methods (for audiobooks)
def load_audio_file(self, audioPath, authToken=None, playbackSpeed=1.0):
"""
Load an audio file for streaming playback
Args:
audioPath: Path to audio file or URL
authToken: Optional Bearer token for authenticated URLs
playbackSpeed: Playback speed (0.5 to 2.0, default 1.0)
Returns:
True if loaded successfully
"""
if not self.isInitialized:
return False
try:
audioPath = str(audioPath)
self.playbackSpeed = max(0.5, min(2.0, float(playbackSpeed)))
# Check if this is a URL (for streaming from Audiobookshelf)
isUrl = audioPath.startswith('http://') or audioPath.startswith('https://')
if isUrl and authToken:
# Load with authentication header
# pylint: disable=no-member
self.player.http_header_fields = [f'Authorization: Bearer {authToken}']
else:
# Clear any previous headers
# pylint: disable=no-member
self.player.http_header_fields = []
# Load the file (mpv handles all formats natively)
self.player.loadfile(audioPath, 'replace')
self.player.pause = True # Keep paused until play_audio_file() is called
self.player.speed = self.playbackSpeed
self.audioFileLoaded = True
return True
except Exception as e:
print(f"Error loading audio file: {e}")
self.audioFileLoaded = False
return False
def play_audio_file(self, startPosition=0.0):
"""
Play loaded audio file from a specific position
Args:
startPosition: Start time in seconds
Returns:
True if playback started successfully
"""
if not self.isInitialized or not self.audioFileLoaded:
return False
try:
# Seek to start position
if startPosition > 0:
self.player.seek(startPosition, reference='absolute')
# Start playback
self.player.pause = False
self.isPaused = False
return True
except Exception as e:
print(f"Error playing audio file: {e}")
return False
def pause_audio_file(self):
"""Pause audio file playback"""
if self.isInitialized and self.audioFileLoaded:
self.player.pause = True
self.isPaused = True
def resume_audio_file(self):
"""Resume audio file playback"""
if self.isInitialized and self.audioFileLoaded:
self.player.pause = False
self.isPaused = False
def stop_audio_file(self):
"""Stop audio file playback"""
if self.isInitialized and self.audioFileLoaded:
try:
self.player.stop()
except:
pass
self.isPaused = False
def is_audio_file_playing(self):
"""Check if audio file is currently playing"""
if not self.isInitialized or not self.audioFileLoaded:
return False
return self.is_playing()
def get_audio_position(self):
"""
Get current playback position in seconds
Returns:
Position in seconds, or 0.0 if not playing
"""
if not self.isInitialized or not self.audioFileLoaded:
return 0.0
try:
# pylint: disable=no-member
pos = self.player.time_pos
return pos if pos is not None else 0.0
except:
return 0.0
def seek_audio(self, position):
"""
Seek to a specific position in the audio file
Args:
position: Position in seconds
Returns:
True if seek successful
"""
if not self.isInitialized or not self.audioFileLoaded:
return False
try:
self.player.seek(position, reference='absolute')
return True
except Exception as e:
print(f"Error seeking audio: {e}")
return False
def unload_audio_file(self):
"""Unload the current audio file"""
if self.audioFileLoaded:
self.stop_audio_file()
self.audioFileLoaded = False
def is_audio_file_loaded(self):
"""Check if audio file is loaded"""
return self.audioFileLoaded
def set_end_file_callback(self, callback):
"""
Set callback to be called when file finishes playing
Args:
callback: Function to call (no arguments)
"""
self.endFileCallback = callback
+1 -1
View File
@@ -22,7 +22,7 @@ class OptionsMenu:
config: ConfigManager instance
speechEngine: SpeechEngine instance
voiceSelector: VoiceSelector instance
audioPlayer: PygamePlayer instance
audioPlayer: MpvPlayer instance
ttsReloadCallback: Optional callback to reload TTS engine
"""
self.config = config
-519
View File
@@ -1,519 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Pygame Audio Player
Audio playback using pygame.mixer with integrated event handling.
Simpler and more reliable than PyAudio approach.
"""
import io
import pygame
class PygamePlayer:
"""Audio player using pygame.mixer"""
def __init__(self):
"""Initialize pygame audio player"""
self.isInitialized = False
self.isPaused = False
self.currentSound = None # Track current sound for cleanup
self.audioFileLoaded = False # Track if audio file is loaded
self.audioFilePath = None # Current audio file path
self.tempAudioFile = None # Temporary transcoded audio file
try:
# Initialize pygame mixer only (not full pygame)
# Use only 1 channel to force immediate cleanup of old sounds
pygame.mixer.init(frequency=22050, size=-16, channels=1, buffer=512)
pygame.mixer.set_num_channels(1) # Limit to 1 channel for sequential playback
self.isInitialized = True
except Exception as e:
print(f"Warning: Could not initialize pygame mixer: {e}")
self.isInitialized = False
def play_wav_data(self, wavData):
"""
Play WAV audio data
Args:
wavData: Bytes containing WAV audio data
Returns:
True if playback started successfully
"""
if not self.isInitialized:
return False
try:
# Cleanup previous sound to prevent memory leak
if self.currentSound:
# Explicitly stop to release pygame's internal buffers
# This is safe since we call play_wav_data only when ready for next paragraph
self.currentSound.stop()
del self.currentSound
self.currentSound = None
# Load WAV data from bytes
# CRITICAL: Must close BytesIO after Sound is created to prevent memory leak
wavBuffer = io.BytesIO(wavData)
try:
sound = pygame.mixer.Sound(wavBuffer)
finally:
# Close BytesIO buffer immediately - pygame.mixer.Sound copies the data
wavBuffer.close()
del wavBuffer # Explicitly delete
# Play the sound and keep reference for cleanup
sound.play()
self.currentSound = sound
self.isPaused = False
return True
except Exception as e:
print(f"Error playing audio: {e}")
return False
def pause(self):
"""Pause playback"""
if self.isInitialized:
pygame.mixer.pause()
self.isPaused = True
def resume(self):
"""Resume playback"""
if self.isInitialized:
pygame.mixer.unpause()
self.isPaused = False
def stop(self):
"""Stop playback"""
if self.isInitialized:
pygame.mixer.stop()
self.isPaused = False
# Cleanup current sound reference
if self.currentSound:
del self.currentSound
self.currentSound = None
def is_playing(self):
"""Check if audio is currently playing"""
if not self.isInitialized:
return False
return pygame.mixer.get_busy()
def is_paused(self):
"""Check if audio is paused"""
return self.isPaused
def cleanup(self):
"""Cleanup resources"""
# Clean up audio file playback state
if self.audioFileLoaded:
# Note: We don't delete cached files - they're kept for future use
self.tempAudioFile = None
self.audioFileLoaded = False
self.audioFilePath = None
if self.isInitialized:
# Stop and cleanup current sound
if self.currentSound:
try:
self.currentSound.stop()
except Exception:
pass # Mixer may be shutting down
del self.currentSound
self.currentSound = None
pygame.mixer.quit()
self.isInitialized = False
def is_available(self):
"""Check if pygame mixer is available"""
return self.isInitialized
# Audio file playback methods (for audiobooks)
def load_audio_file(self, audioPath, authToken=None):
"""
Load an audio file for streaming playback
Args:
audioPath: Path to audio file or URL
authToken: Optional Bearer token for authenticated URLs
Returns:
True if loaded successfully
"""
if not self.isInitialized:
return False
from pathlib import Path
audioPath = str(audioPath) # Ensure it's a string
# Check if this is a URL (for streaming from Audiobookshelf)
isUrl = audioPath.startswith('http://') or audioPath.startswith('https://')
if isUrl:
# Use ffmpeg for streaming from URLs
print(f"DEBUG: Loading URL for streaming")
return self._load_url_with_ffmpeg(audioPath, authToken=authToken)
# Local file - use existing logic
fileSuffix = Path(audioPath).suffix.lower()
try:
# Stop any current playback and clean up temp files
self.stop_audio_file()
# Try to load audio file directly using pygame.mixer.music
pygame.mixer.music.load(audioPath)
self.audioFileLoaded = True
self.audioFilePath = audioPath
return True
except Exception as e:
print(f"Direct load failed: {e}")
# Try transcoding with ffmpeg if direct load failed
if "ModPlug_Load failed" in str(e) or "Unrecognized" in str(e):
print(f"Attempting to transcode {fileSuffix} with ffmpeg...")
return self._load_with_ffmpeg_transcode(audioPath)
# Unknown error
print(f"Error loading audio file: {e}")
self.audioFileLoaded = False
return False
def _load_url_with_ffmpeg(self, streamUrl, authToken=None):
"""
Stream from URL using ffmpeg to transcode to cache
Args:
streamUrl: URL to stream from (e.g., Audiobookshelf URL)
authToken: Optional Bearer token for authentication
Returns:
True if successful
"""
import subprocess
import shutil
import hashlib
from pathlib import Path
# Check if ffmpeg is available
if not shutil.which('ffmpeg'):
print("\nffmpeg not found. Falling back to direct download...")
print("Install ffmpeg for better streaming: sudo pacman -S ffmpeg")
return False
# Set up cache directory
cacheDir = Path.home() / '.cache' / 'bookstorm' / 'audiobookshelf'
cacheDir.mkdir(parents=True, exist_ok=True)
# Generate cache filename from hash of URL (without token for consistency)
# Extract base URL without token parameter
baseUrl = streamUrl.split('?')[0] if '?' in streamUrl else streamUrl
urlHash = hashlib.sha256(baseUrl.encode()).hexdigest()[:16]
cachedPath = cacheDir / f"{urlHash}.ogg"
# Check if cached version exists
if cachedPath.exists():
print(f"\nUsing cached stream")
try:
pygame.mixer.music.load(str(cachedPath))
self.audioFileLoaded = True
self.audioFilePath = streamUrl
self.tempAudioFile = str(cachedPath)
print("Cached file loaded! Starting playback...")
return True
except Exception as e:
print(f"Cached file corrupted, re-downloading: {e}")
cachedPath.unlink(missing_ok=True)
# No cache available, stream and transcode
try:
print(f"\nStreaming from server...")
print("Transcoding to cache. This will take a moment.")
print(f"(Cached for future use in {cacheDir})\n")
# Build ffmpeg command with authentication headers if token provided
ffmpegCmd = ['ffmpeg']
# Add authentication header for Audiobookshelf
if authToken:
# ffmpeg needs headers in the format "Name: Value\r\n"
authHeader = f"Authorization: Bearer {authToken}"
ffmpegCmd.extend(['-headers', authHeader])
print(f"DEBUG: Using Bearer token authentication")
ffmpegCmd.extend([
'-i', streamUrl,
'-vn', # No video
'-c:a', 'libvorbis',
'-q:a', '4', # Medium quality
'-threads', '0', # Use all CPU cores
'-y',
str(cachedPath)
])
print(f"DEBUG: ffmpeg command: {' '.join(ffmpegCmd[:6])}...")
# Run ffmpeg with progress output
result = subprocess.run(
ffmpegCmd,
capture_output=False, # Show progress to user
text=True,
timeout=1800 # 30 minute timeout for large audiobooks
)
if result.returncode != 0:
print(f"\nStreaming/transcoding failed (exit code {result.returncode})")
cachedPath.unlink(missing_ok=True)
return False
# Try to load the transcoded file
try:
pygame.mixer.music.load(str(cachedPath))
self.audioFileLoaded = True
self.audioFilePath = streamUrl # Keep URL for reference
self.tempAudioFile = str(cachedPath)
print("\nStream cached successfully!")
print("Starting playback...")
return True
except Exception as e:
print(f"Error loading transcoded stream: {e}")
cachedPath.unlink(missing_ok=True)
return False
except subprocess.TimeoutExpired:
print("\nStreaming timed out (file too large or connection too slow)")
cachedPath.unlink(missing_ok=True)
return False
except KeyboardInterrupt:
print("\nStreaming cancelled by user")
cachedPath.unlink(missing_ok=True)
return False
except Exception as e:
print(f"Error during streaming: {e}")
cachedPath.unlink(missing_ok=True)
return False
def _load_with_ffmpeg_transcode(self, audioPath, fastMode=False):
"""
Transcode audio file using ffmpeg and load the result
Args:
audioPath: Path to original audio file
fastMode: If True, use faster/lower quality settings
Returns:
True if successful
"""
import subprocess
import shutil
import hashlib
from pathlib import Path
# Check if ffmpeg is available
if not shutil.which('ffmpeg'):
print("\nffmpeg not found. Please install ffmpeg or convert the file manually:")
print(f" ffmpeg -i '{audioPath}' -c:a libmp3lame -q:a 2 output.mp3")
return False
# Set up persistent cache directory
cacheDir = Path.home() / '.cache' / 'bookstorm' / 'audio'
cacheDir.mkdir(parents=True, exist_ok=True)
# Generate cache filename from hash of original file path
pathHash = hashlib.sha256(str(Path(audioPath).resolve()).encode()).hexdigest()[:16]
cachedPath = cacheDir / f"{pathHash}.ogg"
# Check if cached version exists
if cachedPath.exists():
print(f"\nUsing cached transcoded file for {Path(audioPath).name}")
try:
pygame.mixer.music.load(str(cachedPath))
self.audioFileLoaded = True
self.audioFilePath = audioPath
self.tempAudioFile = str(cachedPath)
print("Cached file loaded! Starting playback...")
return True
except Exception as e:
print(f"Cached file corrupted, re-transcoding: {e}")
cachedPath.unlink(missing_ok=True)
# No cache available, transcode the file
try:
print(f"\nTranscoding {Path(audioPath).name}...")
print("This will take a moment. Press Ctrl+C to cancel.")
print(f"(Cached for future use in {cacheDir})\n")
# Build ffmpeg command
if fastMode:
# Fast mode: lower quality, faster encoding
ffmpegCmd = [
'ffmpeg',
'-i', audioPath,
'-vn', # No video
'-c:a', 'libvorbis',
'-q:a', '1', # Lower quality (0-10, lower is better)
'-threads', '0', # Use all CPU cores
'-y',
str(cachedPath)
]
else:
# Normal mode: balanced quality
ffmpegCmd = [
'ffmpeg',
'-i', audioPath,
'-vn',
'-c:a', 'libvorbis',
'-q:a', '4', # Medium quality
'-threads', '0',
'-y',
str(cachedPath)
]
# Run ffmpeg with progress output
result = subprocess.run(
ffmpegCmd,
capture_output=False, # Show progress to user
text=True,
timeout=600 # 10 minute timeout
)
if result.returncode != 0:
print(f"\nTranscoding failed (exit code {result.returncode})")
cachedPath.unlink(missing_ok=True)
return False
# Try to load the transcoded file
try:
pygame.mixer.music.load(str(cachedPath))
self.audioFileLoaded = True
self.audioFilePath = audioPath # Keep original path for reference
self.tempAudioFile = str(cachedPath)
print("\nTranscoding complete! Cached for future use.")
print("Starting playback...")
return True
except Exception as e:
print(f"Error loading transcoded file: {e}")
cachedPath.unlink(missing_ok=True)
return False
except subprocess.TimeoutExpired:
print("\nTranscoding timed out (file too large or system too slow)")
cachedPath.unlink(missing_ok=True)
return False
except KeyboardInterrupt:
print("\nTranscoding cancelled by user")
cachedPath.unlink(missing_ok=True)
return False
except Exception as e:
print(f"Error during transcoding: {e}")
cachedPath.unlink(missing_ok=True)
return False
def play_audio_file(self, startPosition=0.0):
"""
Play loaded audio file from a specific position
Args:
startPosition: Start time in seconds
Returns:
True if playback started successfully
"""
if not self.isInitialized or not self.audioFileLoaded:
return False
try:
# Start playback
pygame.mixer.music.play(start=startPosition)
self.isPaused = False
return True
except Exception as e:
print(f"Error playing audio file: {e}")
return False
def pause_audio_file(self):
"""Pause audio file playback"""
if self.isInitialized and self.audioFileLoaded:
pygame.mixer.music.pause()
self.isPaused = True
def resume_audio_file(self):
"""Resume audio file playback"""
if self.isInitialized and self.audioFileLoaded:
pygame.mixer.music.unpause()
self.isPaused = False
def stop_audio_file(self):
"""Stop audio file playback"""
# Only stop if mixer is initialized
if self.isInitialized and self.audioFileLoaded:
try:
pygame.mixer.music.stop()
except Exception:
pass # Mixer may already be shut down
self.isPaused = False
# Note: We don't delete tempAudioFile anymore since it's a persistent cache
# The cache files are kept in ~/.cache/bookstorm/audio/ for future use
def is_audio_file_playing(self):
"""Check if audio file is currently playing"""
if not self.isInitialized or not self.audioFileLoaded:
return False
return pygame.mixer.music.get_busy()
def get_audio_position(self):
"""
Get current playback position in milliseconds
Returns:
Position in milliseconds, or 0.0 if not playing
"""
if not self.isInitialized or not self.audioFileLoaded:
return 0.0
# pygame.mixer.music.get_pos() returns time in milliseconds
return pygame.mixer.music.get_pos() / 1000.0
def seek_audio(self, position):
"""
Seek to a specific position in the audio file
Args:
position: Position in seconds
Note:
pygame.mixer.music doesn't support direct seeking.
We need to stop and restart from the position.
"""
if not self.isInitialized or not self.audioFileLoaded:
return False
try:
# Stop current playback
pygame.mixer.music.stop()
# Restart from new position
pygame.mixer.music.play(start=position)
self.isPaused = False
return True
except Exception as e:
print(f"Error seeking audio: {e}")
return False
def unload_audio_file(self):
"""Unload the current audio file"""
if self.audioFileLoaded:
self.stop_audio_file() # This also cleans up temp files
self.audioFileLoaded = False
self.audioFilePath = None
+45 -2
View File
@@ -31,7 +31,8 @@ class ServerLinkManager:
def create_link(self, bookPath: str, serverUrl: str, serverId: str, libraryId: str,
title: str = "", author: str = "", duration: float = 0.0,
chapters: int = 0, manualOverride: bool = False):
chapters: int = 0, manualOverride: bool = False, sessionId: str = None,
serverBook: Dict = None):
"""
Create server link for a local book
@@ -45,6 +46,8 @@ class ServerLinkManager:
duration: Audio duration in seconds
chapters: Number of chapters
manualOverride: True if user manually linked despite mismatch
sessionId: Active listening session ID (for streaming)
serverBook: Full server book metadata (for streaming)
"""
bookHash = self._get_book_hash(bookPath)
sidecarPath = self.sidecarDir / f"{bookHash}.json"
@@ -61,7 +64,9 @@ class ServerLinkManager:
'title': title,
'author': author
},
'manual_override': manualOverride
'manual_override': manualOverride,
'session_id': sessionId,
'server_book': serverBook
}
with open(sidecarPath, 'w') as f:
@@ -129,3 +134,41 @@ class ServerLinkManager:
if sidecarPath.exists():
sidecarPath.unlink()
print(f"Deleted server link: {sidecarPath}")
def update_session(self, bookPath: str, sessionId: str):
"""
Update session ID for an existing link
Args:
bookPath: Path to book file (or stream URL)
sessionId: New session ID
"""
linkData = self.get_link(bookPath)
if not linkData:
return
linkData['session_id'] = sessionId
bookHash = self._get_book_hash(bookPath)
sidecarPath = self.sidecarDir / f"{bookHash}.json"
with open(sidecarPath, 'w') as f:
json.dump(linkData, f, indent=2)
def clear_session(self, bookPath: str):
"""
Clear session ID from link (when session closed)
Args:
bookPath: Path to book file (or stream URL)
"""
linkData = self.get_link(bookPath)
if not linkData:
return
linkData['session_id'] = None
bookHash = self._get_book_hash(bookPath)
sidecarPath = self.sidecarDir / f"{bookHash}.json"
with open(sidecarPath, 'w') as f:
json.dump(linkData, f, indent=2)
+1
View File
@@ -147,6 +147,7 @@ class SpeechEngine:
self.readingCallback('INTERRUPTED')
# Speak with callback (event_types is speechd API parameter)
# pylint: disable=no-member
self.client.speak(
textStr,
callback=speech_callback,
+2 -2
View File
@@ -9,7 +9,7 @@ Allows browsing, testing, and selecting voice models.
from pathlib import Path
from src.tts_engine import TtsEngine
from src.pygame_player import PygamePlayer
from src.mpv_player import MpvPlayer
class VoiceSelector:
@@ -161,7 +161,7 @@ class VoiceSelector:
try:
tts = TtsEngine(voice['path'])
player = PygamePlayer()
player = MpvPlayer()
print("Generating speech...")
wavData = tts.text_to_wav_data(testText)