audiogame-manager/speech/set-voice.sh

#!/usr/bin/env bash
# Set Speech
# Configure SAPI voices and speech rate for wine32 bottle

# License header
#    The contents of this file are subject to the Common Public Attribution
#    License Version 1.0 (the "License"); you may not use this file except in
#    compliance with the License. You may obtain a copy of the License at
#    https://opensource.org/licenses/CPAL-1.0. The License is based on the Mozilla Public License Version
#    1.1 but Sections 14 and 15 have been added to cover use of software over a
#    computer network and provide for limited attribution for the Original
#    Developer. In addition, Exhibit A has been modified to be consistent with
#    Exhibit B.
#
#    Software distributed under the License is distributed on an "AS IS" basis,
#    WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
#    for the specific language governing rights and limitations under the
#    License.
#
#    The Original Code is audiogame manager.
#
#    The Original Developer is not the Initial Developer and is . If
#    left blank, the Original Developer is the Initial Developer.
#
#    The Initial Developer of the Original Code is Billy "Storm Dragon" Wolfe. All portions of
#    the code written by Billy Wolfe are Copyright (c) 2020. All Rights
#    Reserved.
#
#    Contributor Michael Taboada.
#
#    Attribution Copyright Notice: Audiogame manager copyright 2020 Storm Dragon. All rights reserved.
#
#    Attribution Phrase (not exceeding 10 words): A Stormux project
#
#    Attribution URL: https://stormgames.wolfe.casa
#
#    Graphic Image as provided in the Covered Code, if any.
#
#    Display of Attribution Information is required in Larger
#    Works which are defined in the CPAL as a work which combines Covered Code
#    or portions thereof with code not governed by the terms of the CPAL.

# Detect dialog interface type BEFORE potentially setting DISPLAY
if [[ -z "$DISPLAY" ]]; then
    dialogType="dialog"
else
    dialogType="yad"
fi

# Source dialog interface wrapper
source "${0%/*}/../.includes/dialog-interface.sh"

# Settings to improve accessibility of dialog
export DIALOGOPTS='--insecure --no-lines --visit-items'
# Turn off debug messages
export WINEDEBUG="-all"
# Set DISPLAY if needed
if [[ -z "$DISPLAY" ]]; then
    export DISPLAY=:0
fi

# Set wine prefix to wine32 - this is the only supported bottle for SAPI
export WINEPREFIX="$HOME/.local/wine32"
export bottle="$HOME/.local/wine32"

help() {
    echo "${0##*/}"
    echo "Released under the terms of the Common Public Attribution License Version 1.0"
    echo -e "This is a Stormux project: https://stormux.org\n"
    echo -e "Usage:\n"
    echo "With no arguments, open the voice configuration menu."
    echo "-h: Show this help screen."
    echo "-r <rate>: Set voice rate (0-10, where 10 is fastest)."
    echo "-v <voice>: Set voice by name."
    exit 0
}

msgbox() {
    agm_msgbox "Set Speech" "Set Speech" "$*"
}

infobox() {
    local timeout=3
    agm_infobox "Set Speech" "Set Speech" "$*"
    read -r -n1 -t $timeout
    read -r -t 0.01
}

yesno() {
    if agm_yesno "Set Speech" "Set Speech" "$*"; then
        echo "Yes"
    else
        echo "No"
    fi
}

voice_menu() {
    declare -a menuList=()
    for i in "${@}"; do
        menuList+=("$i" "$i")
    done
    agm_menu "Set Speech" "Set Speech" "Please select a voice:" "${menuList[@]}"
}

rate_menu() {
    declare -a rateList=()
    for i in {0..10}; do
        local desc="Rate $i"
        if [[ $i -eq 0 ]]; then
            desc="$desc (Slowest)"
        elif [[ $i -eq 5 ]]; then
            desc="$desc (Default)"
        elif [[ $i -eq 10 ]]; then
            desc="$desc (Fastest)"
        fi
        rateList+=("$i" "$desc")
    done
    agm_menu "Set Speech" "Set Speech" "Please select speech rate:" "${rateList[@]}"
}

get_voices() {
    # Get list of available voices using wine reg query
    declare -a voices
    declare -a voiceKeys

    # First, get Microsoft SAPI voices
    local allOutput
    allOutput=$(WINEPREFIX="$WINEPREFIX" "$wine" reg query "HKLM\\SOFTWARE\\Microsoft\\Speech\\Voices\\Tokens" /s 2>/dev/null)

    # Parse output to find voice keys and names
    local currentKey=""
    local currentName=""
    local inMainVoiceToken=false

    while IFS= read -r line; do
        # Clean up the line first
        line=$(echo "$line" | tr -d '\r\n' | sed 's/[[:space:]]*$//')

        # Check if this is a main voice token registry key line (exactly one level deep)
        if [[ "$line" =~ ^HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Speech\\Voices\\Tokens\\([^\\]+)$ ]]; then
            # Save previous voice if we found a name and had collected both key and name
            if [[ -n "$currentKey" && -n "$currentName" ]]; then
                voices+=("$currentName")
                voiceKeys+=("$currentKey")
            fi
            # Start new voice - clean up any carriage returns or whitespace
            currentKey="${BASH_REMATCH[1]}"
            currentName=""
            inMainVoiceToken=true
        # Check if this is a subkey (contains additional parts after token name)
        elif [[ "$line" =~ ^HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Speech\\Voices\\Tokens\\[^\\]+\\ ]]; then
            # This is a subkey, not a main voice token
            inMainVoiceToken=false
        # Check if this is a default value line with voice name and we're in main token
        elif [[ "$line" =~ ^[[:space:]]*\(Default\)[[:space:]]+REG_SZ[[:space:]]+(.+)$ ]] && [[ "$inMainVoiceToken" == true ]]; then
            # Only capture voice name if we haven't already got one for this token
            if [[ -z "$currentName" ]]; then
                currentName="${BASH_REMATCH[1]}"
                # Clean up the voice name as well
                currentName=$(echo "$currentName" | tr -d '\r\n' | sed 's/[[:space:]]*$//')
            fi
        fi
    done <<< "$allOutput"

    # Don't forget the last voice
    if [[ -n "$currentKey" && -n "$currentName" ]]; then
        voices+=("$currentName")
        voiceKeys+=("$currentKey")
    fi

    # Now search for L&H TTS voices
    local lhOutput
    lhOutput=$(WINEPREFIX="$WINEPREFIX" "$wine" reg query "HKLM\\SOFTWARE\\L&H\\TTS\\V6.0\\Voice" /s 2>/dev/null)

    if [[ -n "$lhOutput" ]]; then
        while IFS= read -r line; do
            # Clean up the line first
            line=$(echo "$line" | tr -d '\r\n' | sed 's/[[:space:]]*$//')

            # Look for voice name entries like "Carol    REG_SZ    {227A0E40-A92A-11d1-B17B-0020AFED142E}"
            if [[ "$line" =~ ^[[:space:]]*([A-Za-z]+)[[:space:]]+REG_SZ[[:space:]]+\{([^}]+)\}$ ]]; then
                local lhVoiceName="${BASH_REMATCH[1]}"
                local lhVoiceGuid="${BASH_REMATCH[2]}"

                # Add L&H prefix to distinguish from SAPI voices (escape ampersand for YAD)
                voices+=("L&amp;H $lhVoiceName")
                voiceKeys+=("LH_$lhVoiceGuid")
            fi
        done <<< "$lhOutput"
    fi

    if [[ ${#voices[@]} -eq 0 ]]; then
        return 1
    fi

    # Export arrays for use in other functions
    export voiceList=("${voices[@]}")
    export voiceKeyList=("${voiceKeys[@]}")
    return 0
}

get_current_voice() {
    # Get current default voice
    local currentVoice
    currentVoice=$(WINEPREFIX="$WINEPREFIX" "$wine" reg query "HKCU\\SOFTWARE\\Microsoft\\Speech\\Voices" /v "DefaultTokenId" 2>/dev/null | grep "REG_SZ" | sed 's/.*REG_SZ[[:space:]]*//')

    if [[ -n "$currentVoice" ]]; then
        # Extract just the voice key from the full registry path
        # The path format is: HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\MSMike
        if [[ "$currentVoice" =~ Tokens\\([^\\]+)$ ]]; then
            local voiceKey="${BASH_REMATCH[1]}"
            # Clean up any whitespace or special characters
            voiceKey=$(echo "$voiceKey" | tr -d '\r\n' | sed 's/[[:space:]]*$//')
            echo "$voiceKey"
        fi
    fi
}

get_current_rate() {
    # Get current speech rate
    local currentRate
    currentRate=$(WINEPREFIX="$WINEPREFIX" "$wine" reg query "HKCU\\SOFTWARE\\Microsoft\\Speech\\Voices" /v "DefaultTTSRate" 2>/dev/null | grep "REG_DWORD" | sed 's/.*REG_DWORD[[:space:]]*//' | sed 's/^0x//')

    if [[ -n "$currentRate" && "$currentRate" =~ ^[0-9a-fA-F]+$ ]]; then
        # Convert hex to decimal
        echo $((16#$currentRate))
    else
        echo "5"  # Default rate
    fi
}

set_voice() {
    local voiceName="$1"
    local voiceKey=""

    # Find the voice key for the given voice name
    local i
    for i in "${!voiceList[@]}"; do
        if [[ "${voiceList[i]}" == "$voiceName" ]]; then
            voiceKey="${voiceKeyList[i]}"
            break
        fi
    done

    if [[ -z "$voiceKey" ]]; then
        msgbox "Error: Voice '$voiceName' not found."
        return 1
    fi

    # Check if this is an L&H voice (cannot be set as system default)
    if [[ "$voiceKey" =~ ^LH_ ]]; then
        msgbox "L&H voices cannot be set as the system default SAPI voice. They use a proprietary API and must be selected by individual applications that support L&H TTS."
        return 1
    fi

    # Kill wine server to ensure registry changes take effect
    WINEPREFIX="$WINEPREFIX" "$wineserver" -k 2>/dev/null

    # Set the default voice using wine reg
    local fullVoicePath="HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Speech\\Voices\\Tokens\\$voiceKey"
    if WINEPREFIX="$WINEPREFIX" "$wine" reg add "HKCU\\SOFTWARE\\Microsoft\\Speech\\Voices" /v "DefaultTokenId" /t REG_SZ /d "$fullVoicePath" /f 2>/dev/null; then
        infobox "Voice set to: $voiceName"
        return 0
    else
        msgbox "Error: Failed to set voice."
        return 1
    fi
}

set_rate() {
    local rate="$1"

    # Validate rate (0-10)
    if ! [[ "$rate" =~ ^[0-9]$|^10$ ]]; then
        msgbox "Error: Rate must be between 0 and 10."
        return 1
    fi

    # Kill wine server to ensure registry changes take effect
    WINEPREFIX="$WINEPREFIX" "$wineserver" -k 2>/dev/null

    # Set the speech rate using wine reg (convert to hex)
    local hexRate
    hexRate=$(printf "%x" "$rate")
    if WINEPREFIX="$WINEPREFIX" "$wine" reg add "HKCU\\SOFTWARE\\Microsoft\\Speech\\Voices" /v "DefaultTTSRate" /t REG_DWORD /d "0x$hexRate" /f 2>/dev/null; then
        infobox "Speech rate set to: $rate"
        return 0
    else
        msgbox "Error: Failed to set speech rate."
        return 1
    fi
}

test_voice() {
    local voiceName="$1"

    # Set the voice temporarily for testing
    set_voice "$voiceName" || return 1

    # Create test script
    mkdir -p "${WINEPREFIX}/drive_c/windows/temp"
    cat << "EOF" > "${WINEPREFIX}/drive_c/windows/temp/speak.vbs"
dim speechObject
set speechObject = createObject("sapi.spvoice")
speechObject.speak "This is a test of your chosen voice. It contains multiple sentences and punctuation, and is designed to give a full representation of this voice's qualities."
EOF

    # Test the voice
    WINEPREFIX="$WINEPREFIX" "$wine" cscript "c:\\windows\\temp\\speak.vbs" 2>/dev/null
}

initialize_sapi() {
    # Initialize SAPI if not already done
    mkdir -p "${WINEPREFIX}/drive_c/windows/temp"
    cat << "EOF" > "${WINEPREFIX}/drive_c/windows/temp/init.vbs"
dim speechObject
set speechObject = createObject("sapi.spvoice")
speechObject.speak ""
EOF
    WINEPREFIX="$WINEPREFIX" "$wine" cscript "c:\\windows\\temp\\init.vbs" 2>/dev/null
}

# Handle command line arguments
while getopts "hr:v:" option; do
    case "$option" in
        h) help ;;
        r)
            if ! [[ "$OPTARG" =~ ^[0-9]$|^10$ ]]; then
                echo "Error: Rate must be between 0 and 10."
                exit 1
            fi
            requestedRate="$OPTARG"
            ;;
        v) requestedVoice="$OPTARG" ;;
        *) help ;;
    esac
done

# Check if wine32 bottle exists
if [[ ! -d "$WINEPREFIX" ]]; then
    msgbox "Error: Wine32 bottle not found at $WINEPREFIX. Please create it first."
    exit 1
fi

# Get wine version if available
if [[ -r "${WINEPREFIX}/agm.conf" ]]; then
    source "${WINEPREFIX}/agm.conf"
    export WINE
    export WINESERVER
fi
wine="${WINE:-$HOME/.local/share/audiogame-manager/wine32/bin/wine}"
wineserver="${WINESERVER:-$HOME/.local/share/audiogame-manager/wine32/bin/wineserver}"

# Check if wine executable exists
if [[ ! -x "$wine" ]]; then
    msgbox "Error: Wine executable not found at $wine"
    exit 1
fi

# Initialize SAPI
initialize_sapi

# Get available voices
if ! get_voices; then
    msgbox "No SAPI voices found in wine32 bottle. Please install SAPI voices first."
    exit 1
fi

# Handle command line options
if [[ -n "$requestedRate" ]]; then
    set_rate "$requestedRate"
fi

if [[ -n "$requestedVoice" ]]; then
    set_voice "$requestedVoice"
    exit 0
fi

# Show interactive menu if no voice specified
currentVoice=$(get_current_voice)
currentRate=$(get_current_rate)

# Verify arrays are properly set
if [[ ${#voiceList[@]} -eq 0 ]]; then
    msgbox "Error: No voices loaded."
    exit 1
fi


while true; do
    # Build menu with current settings
    declare -a menuOptions=()

    # Find current voice name for display
    currentVoiceName="None"
    if [[ -n "$currentVoice" ]]; then
        for i in "${!voiceKeyList[@]}"; do
            # Clean up the voice key for comparison
            cleanKey=$(echo "${voiceKeyList[i]}" | tr -d '\r\n' | sed 's/[[:space:]]*$//')
            if [[ "$cleanKey" == "$currentVoice" ]]; then
                currentVoiceName="${voiceList[i]}"
                break
            fi
        done
    fi

    menuOptions+=("voice" "Select Voice (Current: $currentVoiceName)")
    menuOptions+=("rate" "Set Rate (Current: $currentRate)")
    menuOptions+=("test" "Test Current Voice")
    menuOptions+=("quit" "Exit")

    choice=$(agm_menu "Set Speech" "SAPI Voice Configuration" "Choose an option:" "${menuOptions[@]}")

    case "$choice" in
        "voice")
            if selectedVoice=$(voice_menu "${voiceList[@]}"); then
                if [[ -n "$selectedVoice" ]] && set_voice "$selectedVoice"; then
                    currentVoice=$(get_current_voice)
                fi
            fi
            ;;
        "rate")
            if newRate=$(rate_menu); then
                if [[ -n "$newRate" ]] && set_rate "$newRate"; then
                    currentRate=$(get_current_rate)
                fi
            fi
            ;;
        "test")
            if [[ -n "$currentVoice" && "$currentVoiceName" != "None" ]]; then
                infobox "Testing voice: $currentVoiceName"
                test_voice "$currentVoiceName"
            else
                msgbox "No voice selected. Please select a voice first."
            fi
            ;;
        "quit"|"")
            break
            ;;
    esac
done

exit 0