Initial commit.

2025-10-04 02:55:01 -04:00
commit 1d19ed377c
16 changed files with 4401 additions and 0 deletions
--- a/src/tts_engine.py
+++ b/src/tts_engine.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+TTS Engine Wrapper
+
+Wrapper for piper-tts to generate speech from text.
+Handles streaming audio generation for real-time playback.
+"""
+
+import subprocess
+import wave
+import io
+import struct
+
+
+class TtsEngine:
+    """Text-to-speech engine using piper-tts"""
+
+    def __init__(self, modelPath="/usr/share/piper-voices/en/en_US/hfc_male/medium/en_US-hfc_male-medium.onnx"):
+        """
+        Initialize TTS engine
+
+        Args:
+            modelPath: Path to piper-tts voice model
+        """
+        self.modelPath = modelPath
+        self.sampleRate = 22050
+        self.sampleWidth = 2  # 16-bit
+        self.channels = 1
+
+    def text_to_wav_data(self, text):
+        """
+        Convert text to WAV audio data
+
+        Args:
+            text: Text to convert to speech
+
+        Returns:
+            Bytes containing WAV audio data
+
+        Raises:
+            RuntimeError: If piper-tts fails
+        """
+        if not text.strip():
+            return None
+
+        # Safety: Limit text size to prevent excessive memory usage
+        # ~10,000 chars = ~10-15 minutes of audio at normal reading speed
+        MAX_TEXT_LENGTH = 10000
+        if len(text) > MAX_TEXT_LENGTH:
+            print(f"Warning: Paragraph too long ({len(text)} chars), truncating to {MAX_TEXT_LENGTH}")
+            text = text[:MAX_TEXT_LENGTH] + "..."
+
+        process = None
+        try:
+            # Run piper-tts with raw output
+            process = subprocess.Popen(
+                [
+                    'piper-tts',
+                    '--model', self.modelPath,
+                    '--output-raw'
+                ],
+                stdin=subprocess.PIPE,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE
+            )
+
+            # Send text and get raw audio with timeout (60 seconds max)
+            # This prevents hanging if piper-tts gets stuck
+            rawAudio = None
+            stderr = None
+            try:
+                rawAudio, stderr = process.communicate(
+                    input=text.encode('utf-8'),
+                    timeout=60
+                )
+            except subprocess.TimeoutExpired:
+                process.kill()
+                process.communicate()  # Clean up
+                raise RuntimeError("piper-tts timed out (>60s)")
+
+            if process.returncode != 0:
+                errorMsg = stderr.decode('utf-8', errors='ignore')
+                del stderr  # Free stderr buffer immediately
+                stderr = None
+                raise RuntimeError(f"piper-tts failed: {errorMsg}")
+
+            # Free stderr buffer immediately (can be large if piper-tts is verbose)
+            if stderr:
+                del stderr
+                stderr = None
+
+            # Convert raw PCM to WAV format
+            wavData = self._raw_to_wav(rawAudio)
+
+            # CRITICAL: Delete rawAudio immediately after conversion
+            # This is a huge uncompressed buffer (~1-2MB per paragraph)
+            if rawAudio:
+                del rawAudio
+                rawAudio = None
+
+            # Ensure process is fully terminated and cleaned up
+            if process:
+                try:
+                    process.wait(timeout=0.1)
+                except subprocess.TimeoutExpired:
+                    pass
+
+            return wavData
+
+        except FileNotFoundError:
+            raise RuntimeError("piper-tts not found. Please install piper-tts.")
+        except Exception as e:
+            # Clean up buffers on error
+            if rawAudio:
+                del rawAudio
+            if stderr:
+                del stderr
+            # Ensure subprocess is terminated if something goes wrong
+            if process and process.poll() is None:
+                process.kill()
+                try:
+                    process.wait(timeout=2)
+                except subprocess.TimeoutExpired:
+                    pass  # Process is truly stuck, nothing we can do
+            raise RuntimeError(f"TTS generation failed: {str(e)}")
+
+    def _raw_to_wav(self, rawData):
+        """
+        Convert raw PCM data to WAV format
+
+        Args:
+            rawData: Raw PCM audio bytes
+
+        Returns:
+            WAV formatted bytes
+        """
+        wavBuffer = io.BytesIO()
+
+        try:
+            with wave.open(wavBuffer, 'wb') as wavFile:
+                wavFile.setnchannels(self.channels)
+                wavFile.setsampwidth(self.sampleWidth)
+                wavFile.setframerate(self.sampleRate)
+                wavFile.writeframes(rawData)
+
+            wavBuffer.seek(0)
+            result = wavBuffer.read()
+        finally:
+            # Explicitly close BytesIO to free memory
+            wavBuffer.close()
+
+        return result
+
+    def get_audio_params(self):
+        """
+        Get audio parameters for playback
+
+        Returns:
+            Dictionary with sampleRate, sampleWidth, channels
+        """
+        return {
+            'sampleRate': self.sampleRate,
+            'sampleWidth': self.sampleWidth,
+            'channels': self.channels
+        }