199 lines
5.6 KiB
Python
199 lines
5.6 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Voice Selector
|
|
|
|
Interactive voice selection interface for piper-tts voices.
|
|
Allows browsing, testing, and selecting voice models.
|
|
"""
|
|
|
|
from pathlib import Path
|
|
from src.tts_engine import TtsEngine
|
|
from src.mpv_player import MpvPlayer
|
|
|
|
|
|
class VoiceSelector:
|
|
"""Voice selection interface"""
|
|
|
|
def __init__(self, voiceDir=None):
|
|
"""
|
|
Initialize voice selector
|
|
|
|
Args:
|
|
voiceDir: Directory containing voice models
|
|
"""
|
|
if voiceDir is None:
|
|
voiceDir = "/usr/share/piper-voices/en/en_US"
|
|
|
|
self.voiceDir = Path(voiceDir)
|
|
self.voices = []
|
|
self._scan_voices()
|
|
|
|
def _scan_voices(self):
|
|
"""Scan for available voice models"""
|
|
self.voices = []
|
|
|
|
if not self.voiceDir.exists():
|
|
return
|
|
|
|
# Find all .onnx files recursively
|
|
for onnxFile in self.voiceDir.rglob("*.onnx"):
|
|
voiceName = self._extract_voice_name(onnxFile)
|
|
self.voices.append({
|
|
'name': voiceName,
|
|
'path': str(onnxFile),
|
|
'relPath': str(onnxFile.relative_to(self.voiceDir))
|
|
})
|
|
|
|
# Sort by name
|
|
self.voices.sort(key=lambda v: v['name'])
|
|
|
|
def _extract_voice_name(self, voicePath):
|
|
"""
|
|
Extract readable voice name from path
|
|
|
|
Args:
|
|
voicePath: Path to voice model file
|
|
|
|
Returns:
|
|
Human-readable voice name
|
|
"""
|
|
voicePath = Path(voicePath)
|
|
|
|
# Get parts of the path
|
|
parts = voicePath.parts
|
|
|
|
# Try to extract from filename pattern: en_US-voicename-quality.onnx
|
|
filename = voicePath.stem # Remove .onnx
|
|
nameParts = filename.split('-')
|
|
|
|
if len(nameParts) >= 2:
|
|
# nameParts[1] is usually the voice name
|
|
voiceName = nameParts[1].replace('_', ' ').title()
|
|
quality = nameParts[2] if len(nameParts) > 2 else ''
|
|
|
|
if quality:
|
|
return f"{voiceName} ({quality})"
|
|
return voiceName
|
|
|
|
# Fallback to filename
|
|
return voicePath.stem
|
|
|
|
def get_voices(self):
|
|
"""
|
|
Get list of available voices
|
|
|
|
Returns:
|
|
List of voice dictionaries
|
|
"""
|
|
return self.voices
|
|
|
|
def select_voice_interactive(self):
|
|
"""
|
|
Interactive voice selection
|
|
|
|
Returns:
|
|
Selected voice path or None if cancelled
|
|
"""
|
|
if not self.voices:
|
|
print("No voices found in", self.voiceDir)
|
|
return None
|
|
|
|
print("\nAvailable Voices:")
|
|
print("-" * 60)
|
|
|
|
for idx, voice in enumerate(self.voices):
|
|
print(f"{idx + 1}. {voice['name']}")
|
|
|
|
print("-" * 60)
|
|
print("\nCommands:")
|
|
print(" <number> - Select voice")
|
|
print(" t <number> - Test voice")
|
|
print(" q - Cancel")
|
|
print()
|
|
|
|
while True:
|
|
try:
|
|
choice = input("Select voice> ").strip().lower()
|
|
|
|
if choice == 'q':
|
|
return None
|
|
|
|
# Test voice
|
|
if choice.startswith('t '):
|
|
try:
|
|
voiceNum = int(choice[2:])
|
|
if 1 <= voiceNum <= len(self.voices):
|
|
self._test_voice(self.voices[voiceNum - 1])
|
|
else:
|
|
print(f"Invalid voice number. Choose 1-{len(self.voices)}")
|
|
except ValueError:
|
|
print("Invalid input. Use: t <number>")
|
|
continue
|
|
|
|
# Select voice
|
|
try:
|
|
voiceNum = int(choice)
|
|
if 1 <= voiceNum <= len(self.voices):
|
|
selectedVoice = self.voices[voiceNum - 1]
|
|
print(f"Selected: {selectedVoice['name']}")
|
|
return selectedVoice['path']
|
|
else:
|
|
print(f"Invalid voice number. Choose 1-{len(self.voices)}")
|
|
except ValueError:
|
|
print("Invalid input. Enter a number, 't <number>' to test, or 'q' to cancel")
|
|
|
|
except (EOFError, KeyboardInterrupt):
|
|
print("\nCancelled")
|
|
return None
|
|
|
|
def _test_voice(self, voice):
|
|
"""
|
|
Test a voice by playing sample text
|
|
|
|
Args:
|
|
voice: Voice dictionary
|
|
"""
|
|
print(f"\nTesting voice: {voice['name']}")
|
|
|
|
voiceName = voice['name'].split('(')[0].strip() # Remove quality suffix
|
|
testText = f"Hi, my name is {voiceName}, and I am a piper text to speech voice. Do you like the way I sound?"
|
|
|
|
try:
|
|
tts = TtsEngine(voice['path'])
|
|
player = MpvPlayer()
|
|
|
|
print("Generating speech...")
|
|
wavData = tts.text_to_wav_data(testText)
|
|
|
|
if wavData:
|
|
print("Playing...")
|
|
player.play_wav_data(wavData)
|
|
# Wait for playback to finish
|
|
import time
|
|
while player.is_playing():
|
|
time.sleep(0.1)
|
|
|
|
player.cleanup()
|
|
|
|
except Exception as e:
|
|
print(f"Error testing voice: {e}")
|
|
|
|
def find_voice_by_name(self, name):
|
|
"""
|
|
Find voice by name (case-insensitive partial match)
|
|
|
|
Args:
|
|
name: Voice name to search for
|
|
|
|
Returns:
|
|
Voice path or None if not found
|
|
"""
|
|
name = name.lower()
|
|
|
|
for voice in self.voices:
|
|
if name in voice['name'].lower():
|
|
return voice['path']
|
|
|
|
return None
|