From 63be4fc9e706fa6ff2df9633f5f6dc98a1fef819 Mon Sep 17 00:00:00 2001 From: Storm Dragon Date: Mon, 1 Dec 2025 02:24:20 -0500 Subject: [PATCH] tightened up Sway support. There shouldn't be any bugs, but with something like this you never know. I3 users should certainly not encounter any bugs from these changes, please yell if something weird happens. Oh, and finally pushed the ai.py script I have been using for some time without pushing. I guess several months of testing should be fine. lol --- README.md | 27 +- i38.sh | 54 +- scripts/ai.py | 1600 +++++++++++++++++++++++++++++++++ scripts/bind_to_scratchpad.sh | 44 +- scripts/keyboard.sh | 68 +- scripts/reminder.sh | 14 +- scripts/screen_controller.sh | 53 +- 7 files changed, 1805 insertions(+), 55 deletions(-) create mode 100755 scripts/ai.py diff --git a/README.md b/README.md index bee7882..0374b7b 100644 --- a/README.md +++ b/README.md @@ -42,10 +42,35 @@ scrot: For OCR - x11bell: [optional] Bell support if you do not have a PC speaker. Available from https://github.com/jovanlanik/x11bell - xbacklight: [optional] for screen brightness adjustment - xclip: Clipboard support -- xfce4-notifyd: For sending notifications. Replaces notification-daemon (Sway users will need to install the customized variant at ) +- xfce4-notifyd: For sending notifications. Replaces notification-daemon - xorg-setxkbmap: [optional] for multiple keyboard layouts - yad: For screen reader accessible dialogs +### Sway/Wayland Specific Dependencies + +When using I38 with Sway instead of i3, the following Wayland-native alternatives are recommended for optimal compatibility. I38 will automatically detect and use these when running with the `-s` flag: + +- **Recommended (Wayland-native)**: + - mako: Wayland-native notification daemon (preferred over xfce4-notifyd) + - wl-clipboard: Wayland clipboard utilities (provides wl-copy and wl-paste) + - clipman: Wayland clipboard manager (works with wl-clipboard) + - wlr-randr: [optional] Wayland display configuration tool for screen brightness control + - brightnessctl: [optional] Alternative brightness control tool for Sway + +- **Not needed on Sway/Wayland** (these are X11-only): + - xbrlapi: X11 braille helper - not needed on Wayland, BRLTTY works directly + - x11bell: X11 bell support - Wayland has native alternatives + - clipster: X11 clipboard manager - use clipman/wl-clipboard instead + - xdotool: X11 window manipulation - Sway uses native IPC instead + - xprop: X11 window properties - Sway uses native IPC instead + - xrandr: X11 display configuration - use wlr-randr or brightnessctl instead + - setxkbmap: X11 keyboard layout - Sway uses native input configuration + +- **Works on both** (via XWayland fallback if Wayland-native not available): + - xfce4-notifyd: Works via XWayland, but mako is preferred for native Wayland support + +**Note**: On Sway, keyboard layouts must be configured in the Sway config file (`~/.config/sway/config`) using the `input` block. The keyboard.sh script will cycle through configured layouts but cannot set layouts directly like on X11. + ### AI Assistant (Optional) - python-requests: For Ollama integration - claude: [optional] Claude Code CLI for Claude AI integration. Install from https://claude.ai/code diff --git a/i38.sh b/i38.sh index 314c03f..1eac8fb 100755 --- a/i38.sh +++ b/i38.sh @@ -883,20 +883,36 @@ $(if [[ $sounds -eq 0 ]]; then echo "exec_always --no-startup-id ${i3Path}/scripts/sound.py" fi fi -if [[ $brlapi -eq 0 ]]; then +# xbrlapi is X11-only, skip on Sway/Wayland +if [[ $brlapi -eq 0 ]] && [[ $usingSway -ne 0 ]]; then echo 'exec --no-startup-id xbrlapi --quiet' fi if [[ $udiskie -eq 0 ]]; then echo 'exec --no-startup-id udiskie' fi -if [[ -x "/usr/lib/xfce4/notifyd/xfce4-notifyd" ]]; then - echo 'exec_always --no-startup-id /usr/lib/xfce4/notifyd/xfce4-notifyd' -elif [[ -x "/usr/lib/notification-daemon-1.0/notification-daemon" ]]; then - echo 'exec_always --no-startup-id /usr/lib/notification-daemon-1.0/notification-daemon -r' -fi -# Work around for weird Void Linux stuff. -if [[ -x "/usr/libexec/notification-daemon" ]]; then - echo 'exec_always --no-startup-id /usr/libexec/notification-daemon -r' +# Notification daemon +if [[ $usingSway -eq 0 ]]; then + # Sway: prefer Wayland-native notification daemons + if command -v mako &> /dev/null; then + echo 'exec_always --no-startup-id mako' + elif [[ -x "/usr/lib/xfce4/notifyd/xfce4-notifyd" ]]; then + # Fallback to X11 variant via XWayland + echo 'exec_always --no-startup-id /usr/lib/xfce4/notifyd/xfce4-notifyd' + elif [[ -x "/usr/lib/notification-daemon-1.0/notification-daemon" ]]; then + echo 'exec_always --no-startup-id /usr/lib/notification-daemon-1.0/notification-daemon -r' + elif [[ -x "/usr/libexec/notification-daemon" ]]; then + echo 'exec_always --no-startup-id /usr/libexec/notification-daemon -r' + fi +else + # i3: use X11 notification daemons + if [[ -x "/usr/lib/xfce4/notifyd/xfce4-notifyd" ]]; then + echo 'exec_always --no-startup-id /usr/lib/xfce4/notifyd/xfce4-notifyd' + elif [[ -x "/usr/lib/notification-daemon-1.0/notification-daemon" ]]; then + echo 'exec_always --no-startup-id /usr/lib/notification-daemon-1.0/notification-daemon -r' + elif [[ -x "/usr/libexec/notification-daemon" ]]; then + # Work around for weird Void Linux stuff + echo 'exec_always --no-startup-id /usr/libexec/notification-daemon -r' + fi fi if command -v remind &> /dev/null && command -v notify-send &> /dev/null ; then echo "exec_always --no-startup-id ${i3Path}/scripts/launch_remind.sh" @@ -908,13 +924,27 @@ fi if [[ $dex -eq 0 ]]; then echo '# Start XDG autostart .desktop files using dex. See also' echo '# https://wiki.archlinux.org/index.php/XDG_Autostart' - echo 'exec --no-startup-id dex --autostart --environment i3' + if [[ $usingSway -eq 0 ]]; then + echo 'exec --no-startup-id dex --autostart --environment sway' + else + echo 'exec --no-startup-id dex --autostart --environment i3' + fi else echo '# Startup applications' - if command -v x11bell &> /dev/null ; then + # x11bell is X11-only, skip on Sway/Wayland + if command -v x11bell &> /dev/null && [[ $usingSway -ne 0 ]]; then echo 'exec --no-startup-id x11bell play -nqV0 synth .1 sq norm -12' fi - echo 'exec --no-startup-id clipster -d' + # Clipboard manager + if [[ $usingSway -eq 0 ]]; then + # Sway: use Wayland clipboard manager + if command -v wl-paste &> /dev/null && command -v clipman &> /dev/null; then + echo 'exec wl-paste -t text --watch clipman store' + fi + else + # i3: use X11 clipboard manager + echo 'exec --no-startup-id clipster -d' + fi echo "exec $screenReader" echo "exec_always --no-startup-id ${i3Path}/scripts/desktop.sh" fi) diff --git a/scripts/ai.py b/scripts/ai.py new file mode 100755 index 0000000..941c590 --- /dev/null +++ b/scripts/ai.py @@ -0,0 +1,1600 @@ +#!/usr/bin/env python3 +""" +AI Assistant Interface +Provides accessibility-focused AI interaction with multiple providers +""" + +import gi +gi.require_version('Gtk', '3.0') +from gi.repository import Gtk, GLib, Gdk +import json +import os +import subprocess +import tempfile +import configparser +from pathlib import Path +import i3ipc +import threading +import requests +import time +import pyaudio +import wave + +class VoiceRecognition: + """Voice recognition system for AI assistant""" + + def __init__(self, config): + self.config = config + self.is_recording = False + self.audio = None + self.stream = None + + # Audio settings + self.sample_rate = 16000 + self.chunk_size = 1024 + self.audio_format = pyaudio.paInt16 + self.channels = 1 + + try: + import speech_recognition as sr + self.recognizer = sr.Recognizer() + self.microphone = sr.Microphone() + self.sr_available = True + + # Adjust for ambient noise + with self.microphone as source: + self.recognizer.adjust_for_ambient_noise(source) + except ImportError: + self.sr_available = False + self.recognizer = None + self.microphone = None + + def is_available(self): + """Check if voice recognition is available""" + return self.sr_available + + def start_recording(self): + """Start recording audio""" + if not self.sr_available: + return False + + try: + self.audio = pyaudio.PyAudio() + self.is_recording = True + return True + except Exception as e: + print(f"Error starting recording: {e}") + return False + + def stop_recording(self): + """Stop recording audio""" + self.is_recording = False + if self.stream: + self.stream.stop_stream() + self.stream.close() + self.stream = None + if self.audio: + self.audio.terminate() + self.audio = None + + def recognize_speech(self, timeout=5, phrase_timeout=1): + """Recognize speech from microphone""" + if not self.sr_available: + return "Error: Speech recognition not available. Install python-speech-recognition." + + try: + import speech_recognition as sr + + with self.microphone as source: + # Listen for audio with timeout + audio = self.recognizer.listen(source, timeout=timeout, phrase_time_limit=phrase_timeout) + + # Try to recognize speech using Google Speech Recognition + try: + text = self.recognizer.recognize_google(audio) + return text + except sr.RequestError: + # Try offline recognition as fallback + try: + text = self.recognizer.recognize_sphinx(audio) + return text + except sr.RequestError: + return "Error: Speech recognition service unavailable" + except sr.UnknownValueError: + return "Sorry, I couldn't understand that. Please try again." + + except sr.WaitTimeoutError: + return "No speech detected. Please try again." + except Exception as e: + return f"Error during speech recognition: {str(e)}" + + def recognize_speech_continuous(self, callback, stop_event): + """Continuous speech recognition for wake word detection""" + if not self.sr_available: + return + + try: + import speech_recognition as sr + + with self.microphone as source: + while not stop_event.is_set(): + try: + # Listen for 1 second chunks + audio = self.recognizer.listen(source, timeout=1, phrase_time_limit=1) + + try: + text = self.recognizer.recognize_google(audio) + callback(text.lower()) + except (sr.UnknownValueError, sr.RequestError): + # Ignore recognition errors in continuous mode + pass + except sr.WaitTimeoutError: + # Normal timeout, continue listening + continue + except Exception as e: + print(f"Error in continuous recognition: {e}") + break + except ImportError: + print("Speech recognition not available") + return + +class AiConfig: + """Configuration manager for AI settings with XDG directory support""" + + def __init__(self): + self.configDir = Path(os.environ.get('XDG_CONFIG_HOME', + os.path.expanduser('~/.config'))) / 'stormux' / 'I38' + self.configFile = self.configDir / 'ai.conf' + self.configDir.mkdir(parents=True, exist_ok=True) + self.load_config() + + def load_config(self): + """Load configuration from file""" + self.config = configparser.ConfigParser() + self.config.read(self.configFile) + + # Set defaults if sections don't exist + if 'ai' not in self.config: + self.config.add_section('ai') + + # Default values + defaults = { + 'provider': 'claude-code', + 'ollama_model': 'llama2', + 'ollama_vision_model': 'llava', + 'ollama_host': 'http://localhost:11434', + 'confirm_actions': 'true', + 'voice_enabled': 'false', + 'voice_output': 'true', + 'wake_word': 'hey assistant', + 'voice_timeout': '5', + 'continuous_listening': 'false' + } + + for key, value in defaults.items(): + if key not in self.config['ai']: + self.config.set('ai', key, value) + + def save_config(self): + """Save configuration to file""" + with open(self.configFile, 'w') as f: + self.config.write(f) + + def get(self, key, fallback=None): + """Get configuration value""" + return self.config.get('ai', key, fallback=fallback) + + def set(self, key, value): + """Set configuration value""" + self.config.set('ai', key, str(value)) + self.save_config() + +class OllamaInterface: + """Interface for Ollama AI provider""" + + def __init__(self, host='http://localhost:11434'): + self.host = host + + def get_models(self): + """Get list of available Ollama models""" + try: + response = requests.get(f'{self.host}/api/tags', timeout=5) + if response.status_code == 200: + models = response.json().get('models', []) + return [model['name'] for model in models] + except Exception as e: + print(f"Error getting Ollama models: {e}") + return [] + + def get_vision_models(self): + """Get list of models that can handle images""" + all_models = self.get_models() + # Common vision model patterns + vision_patterns = ['llava', 'llama3.2-vision', 'minicpm-v', 'bakllava', 'moondream'] + vision_models = [] + + for model in all_models: + model_lower = model.lower() + if any(pattern in model_lower for pattern in vision_patterns): + vision_models.append(model) + + return vision_models + + def is_vision_model(self, model_name): + """Check if a model can handle images""" + if not model_name: + return False + model_lower = model_name.lower() + vision_patterns = ['llava', 'llama3.2-vision', 'minicpm-v', 'bakllava', 'moondream'] + return any(pattern in model_lower for pattern in vision_patterns) + + def is_available(self): + """Check if Ollama is running and available""" + try: + response = requests.get(f'{self.host}/api/tags', timeout=3) + return response.status_code == 200 + except: + return False + + def send_message(self, message, model, context=None, image_path=None): + """Send message to Ollama""" + try: + data = { + 'model': model, + 'prompt': message, + 'stream': False + } + + if context and not context.startswith("You are a helpful AI assistant"): + data['system'] = context + + # Handle image if provided + if image_path and os.path.exists(image_path): + import base64 + + # Check if the model can handle images + if not self.is_vision_model(model): + return f"Error: Model '{model}' cannot process images. Please select a vision model like llava or llama3.2-vision in settings." + + # Encode image to base64 + try: + with open(image_path, 'rb') as image_file: + image_data = base64.b64encode(image_file.read()).decode('utf-8') + data['images'] = [image_data] + except Exception as e: + return f"Error reading image: {str(e)}" + + response = requests.post(f'{self.host}/api/generate', + json=data, timeout=60) # Longer timeout for image processing + if response.status_code == 200: + return response.json().get('response', 'No response received') + else: + return f"Error: HTTP {response.status_code}" + except Exception as e: + return f"Error communicating with Ollama: {str(e)}" + +class ClaudeCodeInterface: + """Interface for Claude Code AI provider""" + + def is_available(self): + """Check if Claude Code is available""" + try: + result = subprocess.run(['claude', '--version'], + capture_output=True, text=True, timeout=5) + return result.returncode == 0 + except: + return False + + def send_message(self, message, context=None, image_path=None): + """Send message to Claude Code""" + try: + cmd = ['claude'] + + # Add context if provided + if context and not context.startswith("You are a helpful AI assistant"): + message = f"Context: {context}\n\n{message}" + + # Add image if provided + if image_path and os.path.exists(image_path): + cmd.extend(['--image', image_path]) + + # Send the message + cmd.append(message) + + # Run from home directory to avoid picking up project context + result = subprocess.run(cmd, capture_output=True, text=True, timeout=60, cwd=os.path.expanduser('~')) + + if result.returncode == 0: + return result.stdout.strip() + else: + return f"Error: {result.stderr.strip()}" + except subprocess.TimeoutExpired: + return "Error: Request timed out" + except Exception as e: + return f"Error communicating with Claude Code: {str(e)}" + +class WindowContext: + """Get context information from focused window""" + + def __init__(self): + try: + self.i3 = i3ipc.Connection() + except: + self.i3 = None + + def get_focused_window_info(self): + """Get information about the currently focused window""" + if not self.i3: + return "Unable to connect to i3" + + try: + tree = self.i3.get_tree() + focused = tree.find_focused() + + if not focused: + return "No focused window found" + + info = { + 'name': focused.name or 'Unknown', + 'class': getattr(focused, 'window_class', 'Unknown'), + 'title': getattr(focused, 'window_title', 'Unknown'), + 'workspace': focused.workspace().name if focused.workspace() else 'Unknown' + } + + return f"Current application: {info['name']}\nWindow type: {info['class']}" + except Exception as e: + return f"Error getting window info: {str(e)}" + +class AiAssistant(Gtk.Window): + """Main AI Assistant window with accessibility features""" + + def __init__(self): + super().__init__(title="AI Assistant") + + # Initialize components + self.config = AiConfig() + self.claudeInterface = ClaudeCodeInterface() + self.ollamaInterface = OllamaInterface(self.config.get('ollama_host')) + self.windowContext = WindowContext() + self.voiceRecognition = VoiceRecognition(self.config) + + # Voice mode state + self.continuousListening = False + self.listeningThread = None + self.stopListening = threading.Event() + + # Window setup + self.set_default_size(600, 500) + self.set_position(Gtk.WindowPosition.CENTER) + self.connect("destroy", Gtk.main_quit) + self.connect("key-press-event", self.on_key_press) + + # Enable accessibility + self.set_can_focus(True) + self.set_focus_on_map(True) + + # Create notebook for tabs + self.notebook = Gtk.Notebook() + self.notebook.set_tab_pos(Gtk.PositionType.TOP) + self.notebook.set_can_focus(True) + self.notebook.set_scrollable(True) + self.notebook.connect("switch-page", self.on_tab_switched) + self.add(self.notebook) + + # Create tabs + self.create_interaction_tab() + self.create_settings_tab() + + # Set focus to interaction tab and initial focus + self.notebook.set_current_page(0) + + # Set initial focus to question text after window is shown + GLib.idle_add(self.set_initial_focus) + + # Update button labels with current AI provider + GLib.idle_add(self.update_button_labels) + + + def create_interaction_tab(self): + """Create the main interaction tab""" + # Create main container + vbox = Gtk.VBox(spacing=10) + vbox.set_border_width(10) + + # Add question section + questionLabel = Gtk.Label("_Ask AI a question:") + questionLabel.set_use_underline(True) + questionLabel.set_alignment(0, 0.5) + vbox.pack_start(questionLabel, False, False, 0) + + # Question text view with scrolling + scrollWindow = Gtk.ScrolledWindow() + scrollWindow.set_policy(Gtk.PolicyType.AUTOMATIC, Gtk.PolicyType.AUTOMATIC) + scrollWindow.set_size_request(-1, 100) + scrollWindow.set_can_focus(True) + + self.questionText = Gtk.TextView() + self.questionText.set_wrap_mode(Gtk.WrapMode.WORD) + self.questionText.set_can_focus(True) + self.questionText.set_accepts_tab(False) # Allow Tab to move focus instead of inserting tab + + # Set accessibility properties + atk_obj = self.questionText.get_accessible() + atk_obj.set_name("Question input") + atk_obj.set_description("Enter your question for the AI assistant here") + + # Link label to text view for screen readers + questionLabel.set_mnemonic_widget(self.questionText) + + # Connect key press event for additional navigation + self.questionText.connect("key-press-event", self.on_textview_key_press) + + scrollWindow.add(self.questionText) + vbox.pack_start(scrollWindow, False, False, 0) + + # Action buttons row + buttonBox = Gtk.HBox(spacing=10) + + self.askButton = Gtk.Button("Ask _Question") + self.askButton.set_use_underline(True) + self.askButton.connect("clicked", self.on_ask_question) + self.askButton.set_can_focus(True) + self.askButton.get_accessible().set_description("Send your question to the AI assistant") + buttonBox.pack_start(self.askButton, True, True, 0) + + self.contextButton = Gtk.Button("Ask About _Window") + self.contextButton.set_use_underline(True) + self.contextButton.connect("clicked", self.on_ask_with_context) + self.contextButton.set_can_focus(True) + self.contextButton.get_accessible().set_description("Ask about the currently focused window") + buttonBox.pack_start(self.contextButton, True, True, 0) + + self.actionButton = Gtk.Button("Request _Action") + self.actionButton.set_use_underline(True) + self.actionButton.connect("clicked", self.on_request_action) + self.actionButton.set_can_focus(True) + self.actionButton.get_accessible().set_description("Request step-by-step instructions from AI") + buttonBox.pack_start(self.actionButton, True, True, 0) + + vbox.pack_start(buttonBox, False, False, 0) + + # Voice input section + voiceFrame = Gtk.Frame(label="Voice Input") + voiceBox = Gtk.HBox(spacing=10) + voiceBox.set_border_width(10) + + self.voiceButton = Gtk.Button("🎤 _Voice Question") + self.voiceButton.set_use_underline(True) + self.voiceButton.connect("clicked", self.on_voice_question) + self.voiceButton.set_can_focus(True) + self.voiceButton.set_sensitive(self.voiceRecognition.is_available()) + self.voiceButton.get_accessible().set_description("Record your question using voice input") + voiceBox.pack_start(self.voiceButton, True, True, 0) + + self.listenToggle = Gtk.ToggleButton("👂 _Continuous Listen") + self.listenToggle.set_use_underline(True) + self.listenToggle.connect("toggled", self.on_toggle_continuous_listening) + self.listenToggle.set_can_focus(True) + self.listenToggle.set_sensitive(self.voiceRecognition.is_available()) + self.listenToggle.get_accessible().set_description("Toggle continuous listening for wake word") + voiceBox.pack_start(self.listenToggle, True, True, 0) + + # Voice status label + self.voiceStatus = Gtk.Label("") + voiceBox.pack_start(self.voiceStatus, False, False, 0) + + voiceFrame.add(voiceBox) + vbox.pack_start(voiceFrame, False, False, 0) + + # File sharing section + fileLabel = Gtk.Label("Share _file with AI:") + fileLabel.set_use_underline(True) + fileLabel.set_alignment(0, 0.5) + vbox.pack_start(fileLabel, False, False, 0) + + fileBox = Gtk.HBox(spacing=10) + + self.fileEntry = Gtk.Entry() + self.fileEntry.set_placeholder_text("Select a file to share...") + self.fileEntry.set_can_focus(True) + self.fileEntry.get_accessible().set_name("File path") + self.fileEntry.get_accessible().set_description("Path to file to share with AI") + fileLabel.set_mnemonic_widget(self.fileEntry) + fileBox.pack_start(self.fileEntry, True, True, 0) + + self.browseButton = Gtk.Button("_Browse") + self.browseButton.set_use_underline(True) + self.browseButton.connect("clicked", self.on_browse_file) + self.browseButton.set_can_focus(True) + self.browseButton.get_accessible().set_description("Browse for file to share") + fileBox.pack_start(self.browseButton, False, False, 0) + + self.shareButton = Gtk.Button("Ask About _File") + self.shareButton.set_use_underline(True) + self.shareButton.connect("clicked", self.on_ask_about_file) + self.shareButton.set_can_focus(True) + self.shareButton.get_accessible().set_description("Ask AI about the selected file") + fileBox.pack_start(self.shareButton, False, False, 0) + + vbox.pack_start(fileBox, False, False, 0) + + # Image description section + self.imageButton = Gtk.Button("Describe _Screenshot") + self.imageButton.set_use_underline(True) + self.imageButton.connect("clicked", self.on_describe_image) + self.imageButton.set_can_focus(True) + self.imageButton.get_accessible().set_description("Take screenshot and get AI description") + vbox.pack_start(self.imageButton, False, False, 0) + + # Selected text section + self.selectedButton = Gtk.Button("Analyze _Selected Content") + self.selectedButton.set_use_underline(True) + self.selectedButton.connect("clicked", self.on_analyze_selected) + self.selectedButton.set_can_focus(True) + self.selectedButton.get_accessible().set_description("Analyze selected text or screen content using OCR") + vbox.pack_start(self.selectedButton, False, False, 0) + + # Response section + self.responseLabel = Gtk.Label("AI _Response:") + self.responseLabel.set_use_underline(True) + self.responseLabel.set_alignment(0, 0.5) + vbox.pack_start(self.responseLabel, False, False, 0) + + # Response text view with scrolling + responseScrollWindow = Gtk.ScrolledWindow() + responseScrollWindow.set_policy(Gtk.PolicyType.AUTOMATIC, Gtk.PolicyType.AUTOMATIC) + responseScrollWindow.set_can_focus(True) + + self.responseText = Gtk.TextView() + self.responseText.set_wrap_mode(Gtk.WrapMode.WORD) + self.responseText.set_editable(False) + self.responseText.set_can_focus(True) + self.responseText.set_accepts_tab(False) + + # Set accessibility properties for response + response_atk = self.responseText.get_accessible() + response_atk.set_name("AI Response") + response_atk.set_description("AI assistant's response to your question") + + # Link response label to text view + self.responseLabel.set_mnemonic_widget(self.responseText) + + responseScrollWindow.add(self.responseText) + vbox.pack_start(responseScrollWindow, True, True, 0) + + # Add tab to notebook + tabLabel = Gtk.Label("Interaction") + self.notebook.append_page(vbox, tabLabel) + + # Set initial focus + self.questionText.grab_focus() + + def create_settings_tab(self): + """Create the settings tab""" + # Create main container + vbox = Gtk.VBox(spacing=15) + vbox.set_border_width(15) + vbox.set_can_focus(False) # Container shouldn't steal focus + + # AI Provider section + providerFrame = Gtk.Frame(label="AI Provider") + providerBox = Gtk.VBox(spacing=10) + providerBox.set_border_width(10) + # Make sure the box itself doesn't interfere with focus + providerBox.set_can_focus(False) + + # Claude Code option + self.claudeRadio = Gtk.RadioButton.new_with_mnemonic(None, "_Claude Code") + self.claudeRadio.connect("toggled", self.on_provider_changed) + self.claudeRadio.set_can_focus(True) + self.claudeRadio.get_accessible().set_description("Use Claude Code CLI as AI provider") + providerBox.pack_start(self.claudeRadio, False, False, 0) + + # Ollama option + self.ollamaRadio = Gtk.RadioButton.new_with_mnemonic_from_widget(self.claudeRadio, "_Ollama") + self.ollamaRadio.connect("toggled", self.on_provider_changed) + self.ollamaRadio.set_can_focus(True) + self.ollamaRadio.get_accessible().set_description("Use local Ollama service as AI provider") + providerBox.pack_start(self.ollamaRadio, False, False, 0) + + providerFrame.add(providerBox) + vbox.pack_start(providerFrame, False, False, 0) + + # Ollama settings + self.ollamaFrame = Gtk.Frame(label="Ollama Settings") + ollamaBox = Gtk.VBox(spacing=10) + ollamaBox.set_border_width(10) + + # Text Models section + modelLabel = Gtk.Label("Text Models:") + modelLabel.set_alignment(0, 0.5) + ollamaBox.pack_start(modelLabel, False, False, 0) + + # Container for text model radio buttons + self.textModelBox = Gtk.VBox(spacing=5) + self.textModelBox.set_border_width(10) + ollamaBox.pack_start(self.textModelBox, False, False, 0) + + # Will be populated with radio buttons in refresh_ollama_models() + self.textModelRadios = [] + self.textModelGroup = None + + # Vision Models section + visionModelLabel = Gtk.Label("Vision Models:") + visionModelLabel.set_alignment(0, 0.5) + ollamaBox.pack_start(visionModelLabel, False, False, 0) + + # Container for vision model radio buttons + self.visionModelBox = Gtk.VBox(spacing=5) + self.visionModelBox.set_border_width(10) + ollamaBox.pack_start(self.visionModelBox, False, False, 0) + + # Will be populated with radio buttons in refresh_ollama_models() + self.visionModelRadios = [] + self.visionModelGroup = None + + # Refresh models button + self.refreshButton = Gtk.Button("_Refresh Models") + self.refreshButton.set_use_underline(True) + self.refreshButton.connect("clicked", self.on_refresh_models) + self.refreshButton.set_can_focus(True) + self.refreshButton.get_accessible().set_description("Refresh the list of available Ollama models") + ollamaBox.pack_start(self.refreshButton, False, False, 0) + + # Host entry + hostLabel = Gtk.Label("Ollama _Host:") + hostLabel.set_use_underline(True) + hostLabel.set_alignment(0, 0.5) + ollamaBox.pack_start(hostLabel, False, False, 0) + + self.hostEntry = Gtk.Entry() + self.hostEntry.set_text(self.config.get('ollama_host')) + self.hostEntry.set_can_focus(True) + self.hostEntry.get_accessible().set_name("Ollama host URL") + self.hostEntry.get_accessible().set_description("URL of the Ollama service") + hostLabel.set_mnemonic_widget(self.hostEntry) + ollamaBox.pack_start(self.hostEntry, False, False, 0) + + self.ollamaFrame.add(ollamaBox) + vbox.pack_start(self.ollamaFrame, False, False, 0) + + # Voice settings + self.voiceFrame = Gtk.Frame(label="Voice Settings") + voiceSettingsBox = Gtk.VBox(spacing=10) + voiceSettingsBox.set_border_width(10) + + self.voiceEnabledCheck = Gtk.CheckButton("Enable _voice input") + self.voiceEnabledCheck.set_use_underline(True) + self.voiceEnabledCheck.set_active(self.config.get('voice_enabled') == 'true') + self.voiceEnabledCheck.set_sensitive(self.voiceRecognition.is_available()) + self.voiceEnabledCheck.set_can_focus(True) + self.voiceEnabledCheck.get_accessible().set_description("Enable voice input for asking questions") + voiceSettingsBox.pack_start(self.voiceEnabledCheck, False, False, 0) + + self.voiceOutputCheck = Gtk.CheckButton("Enable voice _output (speak responses)") + self.voiceOutputCheck.set_use_underline(True) + self.voiceOutputCheck.set_active(self.config.get('voice_output') == 'true') + self.voiceOutputCheck.set_can_focus(True) + self.voiceOutputCheck.get_accessible().set_description("Speak AI responses aloud using text-to-speech") + voiceSettingsBox.pack_start(self.voiceOutputCheck, False, False, 0) + + # Wake word entry + wakeWordLabel = Gtk.Label("_Wake word phrase:") + wakeWordLabel.set_use_underline(True) + wakeWordLabel.set_alignment(0, 0.5) + voiceSettingsBox.pack_start(wakeWordLabel, False, False, 0) + + self.wakeWordEntry = Gtk.Entry() + self.wakeWordEntry.set_text(self.config.get('wake_word')) + self.wakeWordEntry.set_placeholder_text("e.g., 'hey assistant'") + self.wakeWordEntry.set_can_focus(True) + self.wakeWordEntry.get_accessible().set_name("Wake word phrase") + self.wakeWordEntry.get_accessible().set_description("Phrase to activate voice listening") + wakeWordLabel.set_mnemonic_widget(self.wakeWordEntry) + voiceSettingsBox.pack_start(self.wakeWordEntry, False, False, 0) + + # Voice timeout + timeoutLabel = Gtk.Label("Voice recognition _timeout (seconds):") + timeoutLabel.set_use_underline(True) + timeoutLabel.set_alignment(0, 0.5) + voiceSettingsBox.pack_start(timeoutLabel, False, False, 0) + + self.timeoutSpin = Gtk.SpinButton.new_with_range(1, 30, 1) + self.timeoutSpin.set_value(int(self.config.get('voice_timeout', '5'))) + self.timeoutSpin.set_can_focus(True) + self.timeoutSpin.get_accessible().set_name("Voice timeout") + self.timeoutSpin.get_accessible().set_description("How long to listen for speech in seconds") + timeoutLabel.set_mnemonic_widget(self.timeoutSpin) + voiceSettingsBox.pack_start(self.timeoutSpin, False, False, 0) + + # Voice status + voiceStatusLabel = Gtk.Label("") + if not self.voiceRecognition.is_available(): + voiceStatusLabel.set_text("Voice recognition unavailable - install python-speech-recognition and python-pyaudio") + voiceStatusLabel.set_line_wrap(True) + else: + voiceStatusLabel.set_text("Voice recognition available") + voiceSettingsBox.pack_start(voiceStatusLabel, False, False, 0) + + self.voiceFrame.add(voiceSettingsBox) + vbox.pack_start(self.voiceFrame, False, False, 0) + + # General settings + generalFrame = Gtk.Frame(label="General Settings") + generalBox = Gtk.VBox(spacing=10) + generalBox.set_border_width(10) + + self.confirmCheck = Gtk.CheckButton("_Confirm AI actions before execution") + self.confirmCheck.set_use_underline(True) + self.confirmCheck.set_active(self.config.get('confirm_actions') == 'true') + self.confirmCheck.set_can_focus(True) + self.confirmCheck.get_accessible().set_description("Show confirmation dialog before executing AI suggested actions") + generalBox.pack_start(self.confirmCheck, False, False, 0) + + generalFrame.add(generalBox) + vbox.pack_start(generalFrame, False, False, 0) + + # Save button + self.saveButton = Gtk.Button("_Save Settings") + self.saveButton.set_use_underline(True) + self.saveButton.connect("clicked", self.on_save_settings) + self.saveButton.set_can_focus(True) + self.saveButton.get_accessible().set_description("Save all configuration changes") + vbox.pack_start(self.saveButton, False, False, 0) + + # Status label + self.statusLabel = Gtk.Label("") + vbox.pack_start(self.statusLabel, False, False, 0) + + # Add tab to notebook + tabLabel = Gtk.Label("Settings") + self.notebook.append_page(vbox, tabLabel) + + # Don't set focus chain - let GTK handle it naturally + + # Load current settings + self.load_current_settings() + + def on_radio_key_press(self, widget, event): + """Handle key press events for radio buttons""" + keyval = event.keyval + + # Arrow keys and space to change radio button selection + if keyval in [Gdk.KEY_Up, Gdk.KEY_Down, Gdk.KEY_Left, Gdk.KEY_Right, Gdk.KEY_space]: + if widget == self.claudeRadio: + self.ollamaRadio.set_active(True) + self.ollamaRadio.grab_focus() + else: + self.claudeRadio.set_active(True) + self.claudeRadio.grab_focus() + return True + + return False + + def on_combo_key_press(self, widget, event): + """Handle key press events for combo boxes to allow Tab navigation""" + keyval = event.keyval + state = event.state & Gdk.ModifierType.CONTROL_MASK + + # Allow Tab and Shift+Tab to move focus away from combo box + if keyval == Gdk.KEY_Tab: + # Close combo box popup if open + widget.popdown() + + # Let the normal tab handling take over + if event.state & Gdk.ModifierType.SHIFT_MASK: + # Shift+Tab - move to previous widget + widget.get_toplevel().child_focus(Gtk.DirectionType.TAB_BACKWARD) + else: + # Tab - move to next widget + widget.get_toplevel().child_focus(Gtk.DirectionType.TAB_FORWARD) + return True + + return False + + def setup_settings_focus_chain(self, container): + """Set up explicit focus chain for settings tab - disabled for now""" + # Commenting out focus chain to let GTK handle it naturally + # GTK accessibility with explicit focus chains is problematic + pass + + def load_current_settings(self): + """Load current settings into UI""" + provider = self.config.get('provider') + if provider == 'claude-code': + self.claudeRadio.set_active(True) + else: + self.ollamaRadio.set_active(True) + + self.on_provider_changed(None) + self.refresh_ollama_models() + + # Set saved models after radio buttons are created + self.set_saved_model_selections() + + def set_saved_model_selections(self): + """Set the saved model selections on radio buttons""" + saved_model = self.config.get('ollama_model') + saved_vision_model = self.config.get('ollama_vision_model') + + # Set text model selection + for radio in self.textModelRadios: + if radio.get_label() == saved_model: + radio.set_active(True) + break + + # Set vision model selection + for radio in self.visionModelRadios: + if radio.get_label() == saved_vision_model: + radio.set_active(True) + break + + def on_provider_changed(self, widget): + """Handle provider radio button change""" + if self.claudeRadio.get_active(): + self.ollamaFrame.set_sensitive(False) + self.update_status("Claude Code selected") + else: + self.ollamaFrame.set_sensitive(True) + self.update_status("Ollama selected") + + def refresh_ollama_models(self): + """Refresh the list of available Ollama models using radio buttons""" + # Clear existing radio buttons + for radio in self.textModelRadios: + self.textModelBox.remove(radio) + for radio in self.visionModelRadios: + self.visionModelBox.remove(radio) + + self.textModelRadios = [] + self.visionModelRadios = [] + self.textModelGroup = None + self.visionModelGroup = None + + if self.ollamaInterface.is_available(): + all_models = self.ollamaInterface.get_models() + vision_models = self.ollamaInterface.get_vision_models() + + # Create radio buttons for text models (all models) + for i, model in enumerate(all_models): + if i == 0: + # First radio button in group + radio = Gtk.RadioButton.new_with_label(None, model) + self.textModelGroup = radio + else: + # Additional radio buttons in group + radio = Gtk.RadioButton.new_with_label_from_widget(self.textModelGroup, model) + + radio.set_can_focus(True) + radio.get_accessible().set_description(f"Use {model} for text questions") + radio.connect("toggled", self.on_text_model_changed) + self.textModelRadios.append(radio) + self.textModelBox.pack_start(radio, False, False, 0) + + # Create radio buttons for vision models + for i, model in enumerate(vision_models): + if i == 0: + # First radio button in vision group + radio = Gtk.RadioButton.new_with_label(None, model) + self.visionModelGroup = radio + else: + # Additional radio buttons in vision group + radio = Gtk.RadioButton.new_with_label_from_widget(self.visionModelGroup, model) + + radio.set_can_focus(True) + radio.get_accessible().set_description(f"Use {model} for image analysis") + radio.connect("toggled", self.on_vision_model_changed) + self.visionModelRadios.append(radio) + self.visionModelBox.pack_start(radio, False, False, 0) + + # Add "None" option for vision models + if vision_models: + radio = Gtk.RadioButton.new_with_label_from_widget(self.visionModelGroup, "(No vision model)") + radio.set_can_focus(True) + radio.get_accessible().set_description("Don't use vision models") + radio.connect("toggled", self.on_vision_model_changed) + self.visionModelRadios.append(radio) + self.visionModelBox.pack_start(radio, False, False, 0) + + # Show the new radio buttons + self.textModelBox.show_all() + self.visionModelBox.show_all() + + if all_models: + # Select first text model by default + if self.textModelRadios: + self.textModelRadios[0].set_active(True) + # Select first vision model by default + if self.visionModelRadios: + self.visionModelRadios[0].set_active(True) + + status = f"Found {len(all_models)} total models" + if vision_models: + status += f", {len(vision_models)} vision models" + self.update_status(status) + else: + self.update_status("Ollama running but no models found") + else: + self.update_status("Ollama not available") + + def on_text_model_changed(self, widget): + """Handle text model radio button change""" + if widget.get_active(): + model = widget.get_label() + self.config.set('ollama_model', model) + + def on_vision_model_changed(self, widget): + """Handle vision model radio button change""" + if widget.get_active(): + model = widget.get_label() + if model != "(No vision model)": + self.config.set('ollama_vision_model', model) + + def on_refresh_models(self, widget): + """Handle refresh models button click""" + # Update host if changed + new_host = self.hostEntry.get_text() + self.ollamaInterface = OllamaInterface(new_host) + self.refresh_ollama_models() + + def on_save_settings(self, widget): + """Save settings to configuration""" + if self.claudeRadio.get_active(): + self.config.set('provider', 'claude-code') + else: + self.config.set('provider', 'ollama') + + self.config.set('ollama_host', self.hostEntry.get_text()) + + # Save selected text model + for radio in self.textModelRadios: + if radio.get_active(): + self.config.set('ollama_model', radio.get_label()) + break + + # Save selected vision model + for radio in self.visionModelRadios: + if radio.get_active(): + model = radio.get_label() + if model != "(No vision model)": + self.config.set('ollama_vision_model', model) + break + + self.config.set('confirm_actions', 'true' if self.confirmCheck.get_active() else 'false') + + # Save voice settings + self.config.set('voice_enabled', 'true' if self.voiceEnabledCheck.get_active() else 'false') + self.config.set('voice_output', 'true' if self.voiceOutputCheck.get_active() else 'false') + self.config.set('wake_word', self.wakeWordEntry.get_text()) + self.config.set('voice_timeout', str(int(self.timeoutSpin.get_value()))) + + self.update_status("Settings saved successfully!") + + # Update button labels with new AI provider + self.update_button_labels() + + def update_status(self, message): + """Update status label""" + self.statusLabel.set_text(message) + GLib.timeout_add_seconds(5, lambda: self.statusLabel.set_text("")) + + def get_current_ai_name(self): + """Get the name of the currently selected AI provider""" + provider = self.config.get('provider') + if provider == 'claude-code': + return "Claude" + elif provider == 'ollama': + model = self.config.get('ollama_model', 'llama2') + return f"Ollama ({model})" if model != 'llama2' else "Ollama" + else: + return "AI" + + def update_button_labels(self): + """Update button labels with current AI provider name""" + ai_name = self.get_current_ai_name() + self.askButton.set_label(f"Ask _{ai_name}") + self.contextButton.set_label(f"Ask {ai_name} About _Window") + self.actionButton.set_label(f"Request {ai_name} _Action") + if hasattr(self, 'shareButton'): + self.shareButton.set_label(f"Ask {ai_name} About _File") + + # Update response label + self.responseLabel.set_label(f"{ai_name} _Response:") + + # Update accessible descriptions + response_atk = self.responseText.get_accessible() + response_atk.set_name(f"{ai_name} Response") + response_atk.set_description(f"{ai_name}'s response to your question") + + # Update button descriptions + self.askButton.get_accessible().set_description(f"Send your question to {ai_name}") + self.contextButton.get_accessible().set_description(f"Ask {ai_name} about the currently focused window") + self.actionButton.get_accessible().set_description(f"Request step-by-step instructions from {ai_name}") + if hasattr(self, 'shareButton'): + self.shareButton.get_accessible().set_description(f"Ask {ai_name} about the selected file") + + def get_question_text(self): + """Get text from question text view""" + buffer = self.questionText.get_buffer() + start_iter = buffer.get_start_iter() + end_iter = buffer.get_end_iter() + return buffer.get_text(start_iter, end_iter, False) + + def set_response_text(self, text): + """Set text in response text view""" + buffer = self.responseText.get_buffer() + buffer.set_text(text) + + def append_response_text(self, text): + """Append text to response text view""" + buffer = self.responseText.get_buffer() + end_iter = buffer.get_end_iter() + buffer.insert(end_iter, "\n\n" + text) + + def show_processing(self, provider_type): + """Show processing message""" + # Show specific model name if available + if hasattr(self, 'current_processing_model') and self.current_processing_model: + if provider_type == 'ollama': + ai_name = f"Ollama ({self.current_processing_model})" + else: + ai_name = self.get_current_ai_name() + else: + ai_name = self.get_current_ai_name() + + self.set_response_text(f"{ai_name} is processing your request...") + self.askButton.set_sensitive(False) + self.contextButton.set_sensitive(False) + self.actionButton.set_sensitive(False) + + # Play processing sound + subprocess.run(['play', '-qnG', 'synth', '0.1', 'sin', '800'], + capture_output=True) + + def hide_processing(self): + """Hide processing message and re-enable buttons""" + self.askButton.set_sensitive(True) + self.contextButton.set_sensitive(True) + self.actionButton.set_sensitive(True) + + # Play completion sound + subprocess.run(['play', '-qnG', 'synth', '0.05', 'sin', '1200'], + capture_output=True) + + def send_ai_request(self, message, context=None, image_path=None): + """Send request to selected AI provider""" + provider = self.config.get('provider') + + # Add neutral system context to avoid AI making assumptions + if context is None and not image_path: + system_context = "You are a helpful AI assistant. Please provide a direct and helpful response to the user's question without making assumptions about their specific use case or technical setup." + else: + system_context = context + + # Store which model is being used for status display + self.current_processing_model = None + + if provider == 'claude-code': + if not self.claudeInterface.is_available(): + return "Error: Claude Code is not available. Please install or configure Claude Code." + + self.show_processing("claude-code") + try: + response = self.claudeInterface.send_message(message, system_context, image_path) + return response + finally: + self.hide_processing() + + elif provider == 'ollama': + if not self.ollamaInterface.is_available(): + return "Error: Ollama is not available. Please start Ollama service." + + # Choose model based on whether we have an image + if image_path: + model = self.config.get('ollama_vision_model', 'llava') + # Verify the vision model is available + available_models = self.ollamaInterface.get_models() + if model not in available_models: + vision_models = self.ollamaInterface.get_vision_models() + if vision_models: + model = vision_models[0] # Use first available vision model + else: + return "Error: No vision models available for image processing. Please install a vision model like llava." + else: + model = self.config.get('ollama_model') + + # Store the actual model being used for status display + self.current_processing_model = model + self.show_processing("ollama") + try: + response = self.ollamaInterface.send_message(message, model, system_context, image_path) + return response + finally: + self.hide_processing() + + return "Error: No AI provider configured" + + def on_ask_question(self, widget): + """Handle ask question button click""" + question = self.get_question_text().strip() + if not question: + self.set_response_text("Please enter a question first.") + return + + def ask_in_thread(): + response = self.send_ai_request(question) + GLib.idle_add(self.set_response_text, response) + + threading.Thread(target=ask_in_thread, daemon=True).start() + + def on_ask_with_context(self, widget): + """Handle ask with context button click""" + question = self.get_question_text().strip() + if not question: + self.set_response_text("Please enter a question first.") + return + + def ask_with_context_in_thread(): + context = self.windowContext.get_focused_window_info() + response = self.send_ai_request(question, context) + GLib.idle_add(self.set_response_text, response) + + threading.Thread(target=ask_with_context_in_thread, daemon=True).start() + + def on_request_action(self, widget): + """Handle request action button click""" + question = self.get_question_text().strip() + if not question: + self.set_response_text("Please enter an action request first.") + return + + # Add action context to the request + action_prompt = f"Please provide step-by-step instructions for: {question}\n\nFormat your response as a numbered list of specific actions I should take." + + def request_action_in_thread(): + response = self.send_ai_request(action_prompt) + + # Show confirmation dialog if enabled + if self.config.get('confirm_actions') == 'true': + GLib.idle_add(self.show_action_confirmation, response) + else: + GLib.idle_add(self.set_response_text, response) + + threading.Thread(target=request_action_in_thread, daemon=True).start() + + def show_action_confirmation(self, response): + """Show confirmation dialog for AI actions""" + dialog = Gtk.MessageDialog( + transient_for=self, + flags=0, + message_type=Gtk.MessageType.QUESTION, + buttons=Gtk.ButtonsType.YES_NO, + text="AI Action Confirmation" + ) + + dialog.format_secondary_text( + f"The AI has provided the following action plan:\n\n{response}\n\n" + "Do you want to proceed with these actions?" + ) + + response_id = dialog.run() + dialog.destroy() + + if response_id == Gtk.ResponseType.YES: + self.set_response_text(f"Action plan approved:\n\n{response}") + else: + self.set_response_text("Action cancelled by user.") + + def on_browse_file(self, widget): + """Handle browse file button click""" + dialog = Gtk.FileChooserDialog( + title="Select file to share", + parent=self, + action=Gtk.FileChooserAction.OPEN + ) + + dialog.add_buttons( + Gtk.STOCK_CANCEL, Gtk.ResponseType.CANCEL, + Gtk.STOCK_OPEN, Gtk.ResponseType.OK + ) + + response = dialog.run() + if response == Gtk.ResponseType.OK: + filename = dialog.get_filename() + self.fileEntry.set_text(filename) + + dialog.destroy() + + def on_ask_about_file(self, widget): + """Handle ask about file button click""" + file_path = self.fileEntry.get_text().strip() + if not file_path or not os.path.exists(file_path): + self.set_response_text("Please select a valid file first.") + return + + question = self.get_question_text().strip() + if not question: + question = "Please analyze this file and tell me what it does." + + def ask_about_file_in_thread(): + # For text files, read content and add to message + if file_path.lower().endswith(('.txt', '.py', '.sh', '.conf', '.md', '.json', '.xml', '.html')): + try: + with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: + file_content = f.read() + + file_question = f"{question}\n\nFile: {file_path}\nContent:\n{file_content}" + response = self.send_ai_request(file_question) + except Exception as e: + response = f"Error reading file: {str(e)}" + else: + # For other files (including images), use Claude Code's file handling + response = self.send_ai_request(question, image_path=file_path) + + GLib.idle_add(self.set_response_text, response) + + threading.Thread(target=ask_about_file_in_thread, daemon=True).start() + + def on_describe_image(self, widget): + """Handle describe screenshot button click""" + def describe_image_in_thread(): + # Take screenshot + temp_dir = tempfile.mkdtemp() + screenshot_path = os.path.join(temp_dir, 'screenshot.png') + + try: + # Use scrot to take screenshot + result = subprocess.run(['scrot', screenshot_path], + capture_output=True, text=True, timeout=10) + + if result.returncode != 0: + GLib.idle_add(self.set_response_text, "Error: Could not take screenshot") + return + + # Send to AI for description + response = self.send_ai_request( + "Please describe what you see in this screenshot in detail. " + "Focus on any text, interface elements, and visual content.", + image_path=screenshot_path + ) + + GLib.idle_add(self.set_response_text, response) + + except Exception as e: + GLib.idle_add(self.set_response_text, f"Error taking screenshot: {str(e)}") + finally: + # Clean up temp file + try: + os.unlink(screenshot_path) + os.rmdir(temp_dir) + except: + pass + + threading.Thread(target=describe_image_in_thread, daemon=True).start() + + def on_analyze_selected(self, widget): + """Handle analyze selected text/screen content button click""" + def analyze_selected_in_thread(): + try: + # First, try to get clipboard content (selected text) + # Use wl-paste on Wayland, xclip on X11 + if os.environ.get('WAYLAND_DISPLAY'): + clipboard_result = subprocess.run(['wl-paste', '-p'], + capture_output=True, text=True, timeout=5) + else: + clipboard_result = subprocess.run(['xclip', '-o', '-selection', 'primary'], + capture_output=True, text=True, timeout=5) + + selected_text = clipboard_result.stdout.strip() if clipboard_result.returncode == 0 else "" + + if selected_text: + # We have selected text, analyze it + question = self.get_question_text().strip() + if not question: + question = "Please analyze this selected text and tell me what it means or what I should know about it." + + full_question = f"{question}\n\nSelected text: {selected_text}" + response = self.send_ai_request(full_question) + + else: + # No selected text, fallback to OCR of current screen + # Take screenshot first + temp_dir = tempfile.mkdtemp() + screenshot_path = os.path.join(temp_dir, 'screen_analysis.png') + + try: + # Take screenshot + scrot_result = subprocess.run(['scrot', screenshot_path], + capture_output=True, text=True, timeout=10) + + if scrot_result.returncode != 0: + GLib.idle_add(self.set_response_text, "Error: Could not capture screen content") + return + + # Try OCR first to get text content + try: + from PIL import Image + import pytesseract + + image = Image.open(screenshot_path) + ocr_text = pytesseract.image_to_string(image).strip() + + if ocr_text: + # We found text via OCR + question = self.get_question_text().strip() + if not question: + question = "Please analyze this text content and tell me what's important or what I should know about it." + + full_question = f"{question}\n\nScreen text content: {ocr_text}" + response = self.send_ai_request(full_question) + + else: + # No text found, do visual analysis + question = self.get_question_text().strip() + if not question: + question = "Please analyze this screen content and tell me what you see." + + response = self.send_ai_request(question, image_path=screenshot_path) + + except ImportError: + # Fallback to AI image analysis without OCR + question = self.get_question_text().strip() + if not question: + question = "Please analyze this screen content and tell me what you see." + + response = self.send_ai_request(question, image_path=screenshot_path) + + finally: + # Clean up temp file + try: + os.unlink(screenshot_path) + os.rmdir(temp_dir) + except: + pass + + GLib.idle_add(self.set_response_text, response) + + except Exception as e: + GLib.idle_add(self.set_response_text, f"Error analyzing content: {str(e)}") + + threading.Thread(target=analyze_selected_in_thread, daemon=True).start() + + def speak_text(self, text): + """Use spd-say to speak text if voice output is enabled""" + if self.config.get('voice_output') == 'true': + try: + subprocess.run(['spd-say', '-P', 'important', text], + capture_output=True, timeout=30) + except Exception as e: + print(f"Error speaking text: {e}") + + def update_voice_status(self, message): + """Update voice status label""" + GLib.idle_add(self.voiceStatus.set_text, message) + + def on_voice_question(self, widget): + """Handle voice question button click""" + if not self.voiceRecognition.is_available(): + self.set_response_text("Voice recognition not available. Please install python-speech-recognition and python-pyaudio.") + return + + def voice_question_thread(): + try: + self.update_voice_status("🎤 Listening...") + + # Play recording start sound + subprocess.run(['play', '-qnG', 'synth', '0.1', 'sin', '1000', 'vol', '0.3'], + capture_output=True) + + timeout = int(self.config.get('voice_timeout', '5')) + recognized_text = self.voiceRecognition.recognize_speech(timeout=timeout) + + # Play recording end sound + subprocess.run(['play', '-qnG', 'synth', '0.05', 'sin', '1200', 'vol', '0.3'], + capture_output=True) + + if recognized_text.startswith("Error:") or recognized_text.startswith("Sorry,"): + self.update_voice_status(recognized_text) + self.speak_text(recognized_text) + return + + # Set the recognized text in the question field + GLib.idle_add(self.set_question_text, recognized_text) + self.update_voice_status(f"Recognized: {recognized_text}") + + # Automatically send the question to AI + response = self.send_ai_request(recognized_text) + GLib.idle_add(self.set_response_text, response) + + # Speak the response if enabled + self.speak_text(response) + + except Exception as e: + error_msg = f"Voice recognition error: {str(e)}" + self.update_voice_status(error_msg) + GLib.idle_add(self.set_response_text, error_msg) + finally: + self.update_voice_status("") + + threading.Thread(target=voice_question_thread, daemon=True).start() + + def on_toggle_continuous_listening(self, widget): + """Handle continuous listening toggle""" + if not self.voiceRecognition.is_available(): + widget.set_active(False) + self.set_response_text("Voice recognition not available.") + return + + if widget.get_active(): + self.start_continuous_listening() + else: + self.stop_continuous_listening() + + def start_continuous_listening(self): + """Start continuous listening for wake word""" + if self.continuousListening: + return + + self.continuousListening = True + self.stopListening.clear() + self.update_voice_status("👂 Listening for wake word...") + + wake_word = self.config.get('wake_word', 'hey assistant').lower() + + def wake_word_callback(text): + if wake_word in text: + GLib.idle_add(self.on_wake_word_detected) + + def continuous_listening_thread(): + self.voiceRecognition.recognize_speech_continuous(wake_word_callback, self.stopListening) + + self.listeningThread = threading.Thread(target=continuous_listening_thread, daemon=True) + self.listeningThread.start() + + def stop_continuous_listening(self): + """Stop continuous listening""" + if not self.continuousListening: + return + + self.continuousListening = False + self.stopListening.set() + self.update_voice_status("") + + if self.listeningThread: + self.listeningThread.join(timeout=2) + + def on_wake_word_detected(self): + """Handle wake word detection""" + ai_name = self.get_current_ai_name() + self.speak_text("Yes, what can I help you with?") + self.update_voice_status(f"🎤 Wake word detected, listening for {ai_name}...") + + # Play wake word detection sound + subprocess.run(['play', '-qnG', 'synth', '0.1', 'sin', '800', 'vol', '0.4'], + capture_output=True) + + def wake_response_thread(): + try: + timeout = int(self.config.get('voice_timeout', '5')) + recognized_text = self.voiceRecognition.recognize_speech(timeout=timeout) + + if recognized_text.startswith("Error:") or recognized_text.startswith("Sorry,"): + self.update_voice_status("") + self.speak_text("I didn't catch that. Please try again.") + return + + # Process the question + GLib.idle_add(self.set_question_text, recognized_text) + response = self.send_ai_request(recognized_text) + GLib.idle_add(self.set_response_text, response) + + # Speak the response + self.speak_text(response) + + except Exception as e: + self.speak_text("Sorry, there was an error processing your question.") + finally: + self.update_voice_status("👂 Listening for wake word...") + + threading.Thread(target=wake_response_thread, daemon=True).start() + + def set_question_text(self, text): + """Set text in question text view""" + buffer = self.questionText.get_buffer() + buffer.set_text(text) + + def on_key_press(self, widget, event): + """Handle keyboard shortcuts""" + keyval = event.keyval + state = event.state & (Gdk.ModifierType.CONTROL_MASK | Gdk.ModifierType.MOD1_MASK) + + # Ctrl+Tab to switch tabs + if (state & Gdk.ModifierType.CONTROL_MASK) and keyval == Gdk.KEY_Tab: + current = self.notebook.get_current_page() + next_page = (current + 1) % self.notebook.get_n_pages() + self.notebook.set_current_page(next_page) + return True + + # Ctrl+Shift+Tab to switch tabs backwards + if (state & Gdk.ModifierType.CONTROL_MASK) and keyval == Gdk.KEY_ISO_Left_Tab: + current = self.notebook.get_current_page() + prev_page = (current - 1) % self.notebook.get_n_pages() + self.notebook.set_current_page(prev_page) + return True + + # F4 for voice input (accessibility shortcut) + if keyval == Gdk.KEY_F4: + if self.voiceRecognition.is_available(): + self.on_voice_question(None) + return True + + # F5 to toggle continuous listening + if keyval == Gdk.KEY_F5: + if self.voiceRecognition.is_available(): + self.listenToggle.set_active(not self.listenToggle.get_active()) + return True + + # Ctrl+S to save settings (when on settings tab) + if (state & Gdk.ModifierType.CONTROL_MASK) and keyval == Gdk.KEY_s: + if self.notebook.get_current_page() == 1: # Settings tab + self.on_save_settings(None) + return True + + # Escape to close program + if keyval == Gdk.KEY_Escape: + self.cleanup() + Gtk.main_quit() + return True + + return False + + def on_textview_key_press(self, widget, event): + """Handle key press events in text views for better navigation""" + keyval = event.keyval + state = event.state & Gdk.ModifierType.CONTROL_MASK + + # Ctrl+Enter to submit question + if state and keyval == Gdk.KEY_Return: + self.on_ask_question(None) + return True + + return False + + def set_initial_focus(self): + """Set initial focus to the question text input""" + self.questionText.grab_focus() + return False # Don't repeat this idle callback + + def on_tab_switched(self, notebook, page, page_num): + """Handle tab switching to set proper focus""" + if page_num == 0: # Interaction tab + GLib.idle_add(lambda: self.questionText.grab_focus()) + elif page_num == 1: # Settings tab + # Focus the first radio button in settings + GLib.idle_add(lambda: self.claudeRadio.grab_focus()) + + def cleanup(self): + """Cleanup voice resources on exit""" + self.stop_continuous_listening() + if self.voiceRecognition: + self.voiceRecognition.stop_recording() + +def main(): + """Main entry point""" + app = AiAssistant() + app.show_all() + + # Play startup sound + subprocess.run(['play', '-qnG', 'synth', '0.1', 'sin', '1000'], + capture_output=True) + + # Connect cleanup on destroy + app.connect("destroy", lambda w: app.cleanup()) + + try: + Gtk.main() + except KeyboardInterrupt: + app.cleanup() + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/scripts/bind_to_scratchpad.sh b/scripts/bind_to_scratchpad.sh index 9e7eb95..1d51104 100755 --- a/scripts/bind_to_scratchpad.sh +++ b/scripts/bind_to_scratchpad.sh @@ -1,21 +1,31 @@ #!/usr/bin/env bash -# Find out if we're using i3 +# Find out if we're using i3 or sway if ! [[ -n "${WAYLAND_DISPLAY}" ]]; then cmd="i3-msg" scratchConfig="${XDG_CONFIG_HOME:-$HOME/.config}/i3" + usingWayland=false else cmd="swaymsg" scratchConfig="${XDG_CONFIG_HOME:-$HOME/.config}/sway" + usingWayland=true fi scratchConfig+="/scratchpad" touch "${scratchConfig}" -# Get the focused window ID -windowId=$(xdotool getactivewindow) - -# Get the class name of the window -class=$(xprop -id "$windowId" WM_CLASS | awk -F '"' '{print $4}') +# Get the class name of the focused window +if [[ "$usingWayland" == "true" ]]; then + # Wayland/Sway: use swaymsg to get focused window info + class=$($cmd -t get_tree | jq -r '.. | select(.focused? == true) | .app_id // .window_properties.class' | head -n 1) +else + # X11/i3: use xdotool and xprop + if ! command -v xdotool &> /dev/null || ! command -v xprop &> /dev/null; then + notify-send "Error: xdotool and xprop required for i3" + exit 1 + fi + windowId=$(xdotool getactivewindow) + class=$(xprop -id "$windowId" WM_CLASS | awk -F '"' '{print $4}') +fi if [[ -z "$class" ]]; then notify-send "Unable to move to scratchpad." @@ -23,10 +33,20 @@ if [[ -z "$class" ]]; then fi # Check if it's already in the config -if ! grep -q "class=\"$class\"" "$scratchConfig"; then - echo "for_window [class=\"$class\"] move to scratchpad" >> "$scratchConfig" - notify-send "Added window class $class to scratchpad" +if [[ "$usingWayland" == "true" ]]; then + # Sway uses app_id for Wayland-native apps, class for XWayland apps + if ! grep -q "app_id=\"$class\"" "$scratchConfig" && ! grep -q "class=\"$class\"" "$scratchConfig"; then + echo "for_window [app_id=\"$class\"] move to scratchpad" >> "$scratchConfig" + notify-send "Added window app_id $class to scratchpad" + fi + # Move the window to scratchpad now (try both app_id and class) + $cmd "[app_id=\"$class\"] move to scratchpad" 2>/dev/null || $cmd "[class=\"$class\"] move to scratchpad" +else + # i3 uses class + if ! grep -q "class=\"$class\"" "$scratchConfig"; then + echo "for_window [class=\"$class\"] move to scratchpad" >> "$scratchConfig" + notify-send "Added window class $class to scratchpad" + fi + # Move the window to scratchpad now + $cmd "[class=\"$class\"] move to scratchpad" fi - -# Move the window to scratchpad now -$cmd "[class=\"$class\"] move to scratchpad" diff --git a/scripts/keyboard.sh b/scripts/keyboard.sh index 7f29530..c5e577e 100755 --- a/scripts/keyboard.sh +++ b/scripts/keyboard.sh @@ -1,16 +1,16 @@ #!/usr/bin/env bash # This file is part of I38. - + # I38 is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, # either version 3 of the License, or (at your option) any later version. - + # I38 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR # PURPOSE. See the GNU General Public License for more details. - + # You should have received a copy of the GNU General Public License along with I38. If not, see . - - + + # This script is a modified version of i3-keyboard-layout. # Originally Copyright (c) 2018 Sergio Gil. # https://github.com/porras/i3-keyboard-layout @@ -19,26 +19,58 @@ set -e +# Detect if we're on Wayland or X11 +if [[ -n "${WAYLAND_DISPLAY}" ]]; then + usingWayland=true +else + usingWayland=false +fi + get_kbdlayout() { - layout=$(setxkbmap -query | grep -oP 'layout:\s*\K([\w,]+)') - variant=$(setxkbmap -query | grep -oP 'variant:\s*\K(\w+)') - echo "$layout" "$variant" + if [[ "$usingWayland" == "true" ]]; then + # Sway: Get keyboard layout from input devices + # This gets the xkb_active_layout_name from the first keyboard + layout=$(swaymsg -t get_inputs | jq -r '.[] | select(.type=="keyboard") | .xkb_active_layout_name' | head -n 1) + echo "$layout" + else + # i3: Use setxkbmap + layout=$(setxkbmap -query | grep -oP 'layout:\s*\K([\w,]+)') + variant=$(setxkbmap -query | grep -oP 'variant:\s*\K(\w+)') + echo "$layout" "$variant" + fi } set_kbdlayout() { eval "array=($1)" - setxkbmap "${array[@]}" && - spd-say -P important -Cw "${array[@]}" + + if [[ "$usingWayland" == "true" ]]; then + # Sway: Switch to next keyboard layout + # Sway cycles through layouts configured in the config, so we just trigger next + swaymsg input type:keyboard xkb_switch_layout next && + spd-say -P important -Cw "${array[@]}" + else + # i3: Use setxkbmap + setxkbmap "${array[@]}" && + spd-say -P important -Cw "${array[@]}" + fi } cycle() { - current_layout=$(get_kbdlayout | xargs) - layouts=("$@" "$1") # add the first one at the end so that it cycles - index=0 - while [ "${layouts[$index]}" != "$current_layout" ] && [ $index -lt "${#layouts[@]}" ]; do index=$[index +1]; done - next_index=$[index +1] - next_layout=${layouts[$next_index]} - set_kbdlayout "$next_layout" + if [[ "$usingWayland" == "true" ]]; then + # Sway: Just switch to next layout (Sway handles cycling internally) + swaymsg input type:keyboard xkb_switch_layout next + currentLayout=$(get_kbdlayout) + spd-say -P important -Cw "$currentLayout" + else + # i3: Cycle through provided layouts + currentLayout=$(get_kbdlayout | xargs) + layouts=("$@" "$1") # add the first one at the end so that it cycles + index=0 + while [ "${layouts[$index]}" != "$currentLayout" ] && [ $index -lt "${#layouts[@]}" ]; do index=$((index + 1)); done + nextIndex=$((index + 1)) + nextLayout=${layouts[$nextIndex]} + set_kbdlayout "$nextLayout" + fi } @@ -47,7 +79,7 @@ shift || exit 1 case $subcommand in "get") - echo -n $(get_kbdlayout) + echo -n "$(get_kbdlayout)" ;; "cycle") cycle "$@" diff --git a/scripts/reminder.sh b/scripts/reminder.sh index c984c46..77b09e9 100755 --- a/scripts/reminder.sh +++ b/scripts/reminder.sh @@ -329,9 +329,17 @@ if [[ $# -ne 0 ]]; then notification="${notification/${url}[[:space:]]/}" url="${url%[[:space:].?!]}" fi - if command -v xclip &> /dev/null && [[ "${#url}" -gt 3 ]]; then - echo "${url}" | xclip -selection clipboard - notify-send --hint=int:transient:1 -t 500 -r 38 "${notification} The URL has been copied to the clipboard." + if [[ "${#url}" -gt 3 ]]; then + # Copy URL to clipboard using appropriate tool + if [[ -n "${WAYLAND_DISPLAY}" ]] && command -v wl-copy &> /dev/null; then + echo "${url}" | wl-copy + notify-send --hint=int:transient:1 -t 500 -r 38 "${notification} The URL has been copied to the clipboard." + elif command -v xclip &> /dev/null; then + echo "${url}" | xclip -selection clipboard + notify-send --hint=int:transient:1 -t 500 -r 38 "${notification} The URL has been copied to the clipboard." + else + notify-send --hint=int:transient:1 -t 500 -r 38 "${*}" + fi else notify-send --hint=int:transient:1 -t 500 -r 38 "${*}" fi diff --git a/scripts/screen_controller.sh b/scripts/screen_controller.sh index b966582..1a7eca1 100755 --- a/scripts/screen_controller.sh +++ b/scripts/screen_controller.sh @@ -1,19 +1,33 @@ #!/usr/bin/env bash # This file is part of I38. - + # I38 is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, # either version 3 of the License, or (at your option) any later version. - + # I38 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR # PURPOSE. See the GNU General Public License for more details. - + # You should have received a copy of the GNU General Public License along with I38. If not, see . - - + + # Not for multiple screens. -# Get the name of the screen. -screenName="$(xrandr --query | grep "connected" | cut -d ' ' -f1 | head -n 1)" +# Detect if we're on Wayland or X11 +if [[ -n "${WAYLAND_DISPLAY}" ]]; then + # Sway/Wayland: Get the name of the first output + screenName="$(swaymsg -t get_outputs | jq -r '.[0].name')" + if [[ -z "$screenName" ]]; then + notify-send "Error: Could not detect output" + exit 1 + fi +else + # i3/X11: Get the name of the screen + if ! command -v xrandr &> /dev/null; then + notify-send "Error: xrandr not found" + exit 1 + fi + screenName="$(xrandr --query | grep "connected" | cut -d ' ' -f1 | head -n 1)" +fi menuOptions=( "1.0" "Maximum Brightness" @@ -29,7 +43,28 @@ if [[ ${#brightness} -lt 1 ]]; then exit 0 fi -xrandr --output ${screenName} --brightness ${brightness%%|*} && - spd-say -P important -Cw "Screen set to ${brightness#*|}." +# Apply brightness setting +if [[ -n "${WAYLAND_DISPLAY}" ]]; then + # Sway: Use swaymsg to set output brightness + # Note: Sway doesn't have native brightness control, using wlr-randr if available + if command -v wlr-randr &> /dev/null; then + wlr-randr --output "${screenName}" --brightness "${brightness%%|*}" && + spd-say -P important -Cw "Screen set to ${brightness#*|}." + else + # Fallback to gamma adjustment via wl-gammactl or brightnessctl + if command -v brightnessctl &> /dev/null; then + brightnessValue=$(echo "${brightness%%|*} * 100" | bc) + brightnessctl set "${brightnessValue%.*}%" && + spd-say -P important -Cw "Screen set to ${brightness#*|}." + else + notify-send "Error: wlr-randr or brightnessctl required for Sway brightness control" + exit 1 + fi + fi +else + # i3: Use xrandr + xrandr --output "${screenName}" --brightness "${brightness%%|*}" && + spd-say -P important -Cw "Screen set to ${brightness#*|}." +fi exit 0