From 37bd89ab874b75736ad419e6584b6a82ca1da7ea Mon Sep 17 00:00:00 2001 From: Storm Dragon Date: Tue, 5 Aug 2025 19:05:45 -0400 Subject: [PATCH] many updates to AI Assistant plugin. Improved UI, hopefully improved accuracy as well. --- src/cthulhu/cthulhu.py | 7 +- src/cthulhu/cthulhuVersion.py | 2 +- src/cthulhu/input_event.py | 4 +- src/cthulhu/learn_mode_presenter.py | 25 +++ src/cthulhu/plugin_system_manager.py | 12 +- src/cthulhu/plugins/AIAssistant/plugin.info | 15 +- src/cthulhu/plugins/AIAssistant/plugin.py | 234 +++++++++++++------- src/cthulhu/script.py | 34 +++ src/cthulhu/settings.py | 2 +- 9 files changed, 242 insertions(+), 93 deletions(-) diff --git a/src/cthulhu/cthulhu.py b/src/cthulhu/cthulhu.py index 98f29b6..26b6420 100644 --- a/src/cthulhu/cthulhu.py +++ b/src/cthulhu/cthulhu.py @@ -100,20 +100,21 @@ class APIHelper: # Create a keybinding handler class GestureHandler: - def __init__(self, function, description): + def __init__(self, function, description, learnModeEnabled=True): self.function = function self.description = description + self.learnModeEnabled = learnModeEnabled def __call__(self, script, inputEvent): try: - logger.info(f"=== DisplayVersion keybinding handler called! ===") + logger.info(f"=== Plugin keybinding handler called! ===") return function(script, inputEvent) except Exception as e: import logging logging.getLogger(__name__).error(f"Error in keybinding handler: {e}") return True - handler = GestureHandler(function, name) + handler = GestureHandler(function, name, learnModeEnabled) logger.info(f"Created handler: {handler}") # Create the binding object regardless of whether there's an active script diff --git a/src/cthulhu/cthulhuVersion.py b/src/cthulhu/cthulhuVersion.py index 3cf47f9..797d0b7 100644 --- a/src/cthulhu/cthulhuVersion.py +++ b/src/cthulhu/cthulhuVersion.py @@ -23,5 +23,5 @@ # Fork of Orca Screen Reader (GNOME) # Original source: https://gitlab.gnome.org/GNOME/orca -version = "2025.08.03" +version = "2025.08.05" codeName = "testing" diff --git a/src/cthulhu/input_event.py b/src/cthulhu/input_event.py index 77d3119..87fd66a 100644 --- a/src/cthulhu/input_event.py +++ b/src/cthulhu/input_event.py @@ -339,7 +339,9 @@ class KeyboardEvent(InputEvent): if KeyboardEvent.lastCthulhuModifierAlone: if _isPressed: KeyboardEvent.secondCthulhuModifierTime = now - if (KeyboardEvent.secondCthulhuModifierTime < + if (KeyboardEvent.secondCthulhuModifierTime is not None and + KeyboardEvent.lastCthulhuModifierAloneTime is not None and + KeyboardEvent.secondCthulhuModifierTime < KeyboardEvent.lastCthulhuModifierAloneTime + 0.5): # double-cthulhu, let the real action happen self._bypassCthulhu = True diff --git a/src/cthulhu/learn_mode_presenter.py b/src/cthulhu/learn_mode_presenter.py index 7a96970..6ff3fd3 100644 --- a/src/cthulhu/learn_mode_presenter.py +++ b/src/cthulhu/learn_mode_presenter.py @@ -191,10 +191,18 @@ class LearnModePresenter: handler = event.getHandler() if handler is None: + debug.printMessage(debug.LEVEL_INFO, "LEARN MODE PRESENTER: No handler found for event", True) return True + debug.printMessage(debug.LEVEL_INFO, f"LEARN MODE PRESENTER: Handler found: {handler}", True) + debug.printMessage(debug.LEVEL_INFO, f"LEARN MODE PRESENTER: Handler description: {getattr(handler, 'description', 'No description')}", True) + debug.printMessage(debug.LEVEL_INFO, f"LEARN MODE PRESENTER: Handler learnModeEnabled: {getattr(handler, 'learnModeEnabled', 'No learnModeEnabled')}", True) + if handler.learnModeEnabled and handler.description: + debug.printMessage(debug.LEVEL_INFO, f"LEARN MODE PRESENTER: Presenting message: {handler.description}", True) cthulhu_state.activeScript.presentMessage(handler.description) + else: + debug.printMessage(debug.LEVEL_INFO, "LEARN MODE PRESENTER: Handler missing required properties for learn mode", True) return True @@ -254,6 +262,23 @@ class LearnModePresenter: bindings[guilabels.KB_GROUP_ACTIONS] = bound items += len(bound) + # Add plugin keybindings + try: + debug.printMessage(debug.LEVEL_INFO, "LEARN MODE PRESENTER: Getting plugin keybindings", True) + plugin_bindings = script.getPluginKeyBindings() + debug.printMessage(debug.LEVEL_INFO, f"LEARN MODE PRESENTER: Got plugin bindings object: {plugin_bindings}", True) + bound = plugin_bindings.getBoundBindings() + debug.printMessage(debug.LEVEL_INFO, f"LEARN MODE PRESENTER: Got {len(bound)} bound plugin keybindings", True) + if bound: + bindings["Plugins"] = bound + items += len(bound) + debug.printMessage(debug.LEVEL_INFO, f"LEARN MODE PRESENTER: Added {len(bound)} plugin keybindings to learn mode", True) + except Exception as e: + msg = f"LEARN MODE PRESENTER: Could not get plugin keybindings: {e}" + debug.printMessage(debug.LEVEL_INFO, msg, True) + import traceback + debug.printMessage(debug.LEVEL_INFO, traceback.format_exc(), True) + title = messages.shortcutsFoundCthulhu(items) else: app_name = AXObject.get_name(script.app) or messages.APPLICATION_NO_NAME diff --git a/src/cthulhu/plugin_system_manager.py b/src/cthulhu/plugin_system_manager.py index 623cafd..c868da5 100644 --- a/src/cthulhu/plugin_system_manager.py +++ b/src/cthulhu/plugin_system_manager.py @@ -395,9 +395,19 @@ class PluginSystemManager: """Get the list of active plugin names.""" return self._active_plugins + def get_active_plugins(self): + """Get the list of active plugin instances.""" + active_instances = [] + for plugin_name in self._active_plugins: + if plugin_name in self._plugins: + plugin_info = self._plugins[plugin_name] + if plugin_info.loaded and plugin_info.instance: + active_instances.append(plugin_info.instance) + return active_instances + def setActivePlugins(self, activePlugins): """Set active plugins and sync their state.""" - logger.info(f"=== PluginSystemManager.setActivePlugins called ===") + logger.info(f"PLUGIN SYSTEM: setActivePlugins called with: {activePlugins}") logger.info(f"Setting active plugins: {activePlugins}") # Make sure we have scanned for plugins first diff --git a/src/cthulhu/plugins/AIAssistant/plugin.info b/src/cthulhu/plugins/AIAssistant/plugin.info index ed2b929..f9c5206 100644 --- a/src/cthulhu/plugins/AIAssistant/plugin.info +++ b/src/cthulhu/plugins/AIAssistant/plugin.info @@ -1,8 +1,7 @@ -name = AI Assistant -version = 1.0.0 -description = AI-powered accessibility assistant for analyzing screens and taking actions -authors = Stormux -website = https://stormux.org -copyright = Copyright 2025 -builtin = false -hidden = false \ No newline at end of file +[Plugin] +Name = AI Assistant +Module = AIAssistant +Description = AI-powered accessibility assistant for analyzing screens and taking actions +Authors = Stormux +Version = 1.0.0 +Category = Accessibility \ No newline at end of file diff --git a/src/cthulhu/plugins/AIAssistant/plugin.py b/src/cthulhu/plugins/AIAssistant/plugin.py index fbbc87f..3616a8c 100644 --- a/src/cthulhu/plugins/AIAssistant/plugin.py +++ b/src/cthulhu/plugins/AIAssistant/plugin.py @@ -46,15 +46,11 @@ class AIAssistant(Plugin): def __init__(self, *args, **kwargs): """Initialize the AI Assistant plugin.""" super().__init__(*args, **kwargs) - logger.info("AI Assistant plugin initialized") - print("DEBUG: AI Assistant plugin __init__ called") - # Write to a debug file so we can see if the plugin is being loaded - try: - with open('/tmp/ai_assistant_debug.log', 'a') as f: - f.write("AI Assistant plugin __init__ called\n") - except: - pass + # Use print to ensure we see this message + print("DEBUG: AI ASSISTANT __init__ called") + logger.info("AI ASSISTANT: Plugin __init__ starting") + logger.info("AI ASSISTANT: Plugin initialized successfully") # Menu and keybinding storage self._kb_binding_menu = None @@ -79,31 +75,45 @@ class AIAssistant(Plugin): if plugin is not None and plugin is not self: return + # Prevent multiple activations + if self._enabled: + logger.info("AI ASSISTANT: Already activated, skipping") + print("DEBUG: AI ASSISTANT already activated, skipping") + return + try: - logger.info("=== AI Assistant plugin activation starting ===") - print("DEBUG: AI Assistant plugin activation starting") + logger.info("AI ASSISTANT: === Plugin activation starting ===") + print("DEBUG: AI ASSISTANT activation starting") # Check if AI Assistant is enabled in settings enabled = self._settings_manager.getSetting('aiAssistantEnabled') - print(f"DEBUG: AI Assistant enabled setting: {enabled}") + logger.info(f"AI ASSISTANT: Enabled setting: {enabled}") + print(f"DEBUG: AI ASSISTANT enabled setting: {enabled}") if not enabled: logger.info("AI Assistant is disabled in settings, skipping activation") - print("DEBUG: AI Assistant is disabled in settings, skipping activation") + print("DEBUG: AI Assistant disabled, skipping activation") return # Load AI settings self._load_ai_settings() + print(f"DEBUG: AI settings loaded - provider: {self._provider_type}") # Check if we have valid configuration - if not self._validate_configuration(): + config_valid = self._validate_configuration() + logger.info(f"AI Assistant configuration valid: {config_valid}") + print(f"DEBUG: AI Assistant configuration valid: {config_valid}") + if not config_valid: logger.warning("AI Assistant configuration invalid, skipping activation") + print("DEBUG: AI Assistant configuration invalid, skipping activation") return # Initialize AI provider self._initialize_ai_provider() + print("DEBUG: AI provider initialized") # Register keybindings only if configuration is valid self._register_keybindings() + print("DEBUG: AI keybindings registered") self._enabled = True logger.info("AI Assistant plugin activated successfully") @@ -111,6 +121,7 @@ class AIAssistant(Plugin): except Exception as e: logger.error(f"Error activating AI Assistant plugin: {e}") + print(f"DEBUG: Error activating AI Assistant plugin: {e}") import traceback logger.error(traceback.format_exc()) @@ -156,21 +167,28 @@ class AIAssistant(Plugin): def _validate_configuration(self): """Validate AI Assistant configuration.""" + logger.info(f"Validating AI configuration - provider_type: {self._provider_type}") if not self._provider_type: logger.warning("No AI provider configured") return False # Providers that don't need API keys if self._provider_type == settings.AI_PROVIDER_OLLAMA: + logger.info("Checking Ollama availability") return self._check_ollama_availability() elif self._provider_type == settings.AI_PROVIDER_CLAUDE_CODE: - return self._check_claude_code_availability() + logger.info("Checking Claude Code availability") + result = self._check_claude_code_availability() + logger.info(f"Claude Code availability check result: {result}") + return result # Other providers need API keys + logger.info(f"Checking API key for provider {self._provider_type}") if not self._api_key: logger.warning(f"No API key configured for provider {self._provider_type}") return False + logger.info("Configuration validation passed") return True def _check_ollama_availability(self): @@ -243,7 +261,7 @@ class AIAssistant(Plugin): self._kb_binding_menu = self.registerGestureByString( self._show_ai_menu, "Show AI Assistant menu", - 'kb:cthulhu+control+shift+a' + 'kb:cthulhu+shift+control+a' ) logger.info("AI Assistant menu keybinding registered") @@ -261,19 +279,23 @@ class AIAssistant(Plugin): def _show_ai_menu(self, script=None, inputEvent=None): """Show the AI Assistant menu.""" try: - logger.info("Showing AI Assistant menu") + logger.info("AI ASSISTANT: _show_ai_menu called!") + print("DEBUG: AI ASSISTANT _show_ai_menu called!") # IMPORTANT: Capture screen data BEFORE showing menu # This ensures we get the actual screen content, not the menu itself self._pre_menu_screen_data = self._collect_ai_data() logger.info("Pre-captured screen data for menu actions") + print("DEBUG: Pre-captured screen data for menu actions") # Now show the menu self._menu_gui = AIAssistantMenu(self._handle_menu_selection) self._menu_gui.show_gui() + print("DEBUG: AI menu GUI shown") return True except Exception as e: logger.error(f"Error showing AI menu: {e}") + print(f"DEBUG: Error showing AI menu: {e}") import traceback traceback.print_exc() return False @@ -309,7 +331,8 @@ class AIAssistant(Plugin): self._present_message("AI provider not available. Check configuration.") return True - self._present_message("AI Assistant analyzing screen...") + provider_name = self._provider_type.replace('_', ' ').title() + self._present_message(f"AI Assistant ({provider_name}) analyzing screen...") # Use pre-captured data if data: @@ -453,7 +476,8 @@ class AIAssistant(Plugin): self._present_message("AI provider not available. Check configuration.") return True - self._present_message("AI Assistant analyzing screen...") + provider_name = self._provider_type.replace('_', ' ').title() + self._present_message(f"AI Assistant ({provider_name}) analyzing screen...") # Collect data and get AI description data = self._collect_ai_data() @@ -745,8 +769,9 @@ class AIAssistant(Plugin): def _show_question_dialog(self): """Show a dialog for the user to enter their question.""" try: + provider_name = self._provider_type.replace('_', ' ').title() dialog = Gtk.Dialog( - title="AI Assistant Question", + title=f"AI Assistant Question ({provider_name})", parent=None, flags=Gtk.DialogFlags.MODAL, buttons=( @@ -806,8 +831,9 @@ class AIAssistant(Plugin): for child in dialog.get_action_area().get_children(): dialog.get_action_area().remove(child) - # Change title - dialog.set_title("AI Assistant Response") + # Change title + provider_name = self._provider_type.replace('_', ' ').title() + dialog.set_title(f"AI Assistant Response ({provider_name})") # Show question and processing message question_label = Gtk.Label() @@ -816,12 +842,22 @@ class AIAssistant(Plugin): question_label.set_halign(Gtk.Align.START) content_area.pack_start(question_label, False, False, 10) - # Processing label (will be updated with response) - self._response_label = Gtk.Label(label="Processing your question...") - self._response_label.set_line_wrap(True) - self._response_label.set_halign(Gtk.Align.START) - self._response_label.set_selectable(True) # Allow text selection - content_area.pack_start(self._response_label, True, True, 10) + # Create scrollable text view for response (same as description dialog) + scrolled_window = Gtk.ScrolledWindow() + scrolled_window.set_policy(Gtk.PolicyType.AUTOMATIC, Gtk.PolicyType.AUTOMATIC) + scrolled_window.set_shadow_type(Gtk.ShadowType.IN) + + self._response_text_view = Gtk.TextView() + self._response_text_view.set_editable(False) # Read-only + self._response_text_view.set_cursor_visible(True) # Allow cursor navigation for screen readers + self._response_text_view.set_wrap_mode(Gtk.WrapMode.WORD) + + # Set initial processing text + text_buffer = self._response_text_view.get_buffer() + text_buffer.set_text("Processing your question...") + + scrolled_window.add(self._response_text_view) + content_area.pack_start(scrolled_window, True, True, 10) # Add close button close_button = dialog.add_button(Gtk.STOCK_CLOSE, Gtk.ResponseType.CLOSE) @@ -831,8 +867,8 @@ class AIAssistant(Plugin): dialog.set_default_size(600, 400) dialog.show_all() - # Focus the response label so screen reader announces it - self._response_label.grab_focus() + # Focus the response text view so screen reader announces it + self._response_text_view.grab_focus() # Process question asynchronously self._process_user_question_async(dialog, question) @@ -855,8 +891,9 @@ class AIAssistant(Plugin): data.get('accessibility') ) - # Update the response label - self._response_label.set_markup(f"Response:\n{response}") + # Update the response text view + text_buffer = self._response_text_view.get_buffer() + text_buffer.set_text(f"Response:\n{response}") # Also speak the response self._present_message(response) @@ -869,23 +906,27 @@ class AIAssistant(Plugin): except Exception as e: logger.error(f"Error getting AI response: {e}") - self._response_label.set_markup(f"Error: {e}") + text_buffer = self._response_text_view.get_buffer() + text_buffer.set_text(f"Error: {e}") self._present_message(f"Error getting AI response: {e}") else: - self._response_label.set_markup("Error: No screen data available") + text_buffer = self._response_text_view.get_buffer() + text_buffer.set_text("Error: No screen data available") self._present_message("No screen data available") except Exception as e: logger.error(f"Error processing user question: {e}") - self._response_label.set_markup(f"Error: {e}") + text_buffer = self._response_text_view.get_buffer() + text_buffer.set_text(f"Error: {e}") self._present_message(f"Error processing question: {e}") # ============================================================================ def _show_description_dialog(self, description): """Show a read-only dialog with the screen description.""" try: + provider_name = self._provider_type.replace('_', ' ').title() dialog = Gtk.Dialog( - title="AI Screen Description", + title=f"AI Screen Description ({provider_name})", parent=None, flags=Gtk.DialogFlags.MODAL, buttons=(Gtk.STOCK_CLOSE, Gtk.ResponseType.CLOSE) @@ -902,7 +943,7 @@ class AIAssistant(Plugin): text_view = Gtk.TextView() text_view.set_editable(False) # Read-only - text_view.set_cursor_visible(False) + text_view.set_cursor_visible(True) # Allow cursor navigation for screen readers text_view.set_wrap_mode(Gtk.WrapMode.WORD) # Set the description text @@ -1694,54 +1735,91 @@ class AIAssistant(Plugin): return None -class AIAssistantMenu(Gtk.Menu): - """A menu containing AI Assistant options.""" +class AIAssistantMenu(Gtk.Dialog): + """A dialog containing AI Assistant options.""" def __init__(self, handler): - super().__init__() - self.connect("popped-up", self._on_popped_up) + super().__init__(title="AI Assistant", transient_for=None, flags=0) self.on_option_selected = handler - # AI Assistant menu options - options = [ - ("ask_question", "Ask Question"), - ("describe_screen", "Describe Screen"), - ("request_action", "Request Action") - ] + # Set dialog properties for better screen reader accessibility + self.set_modal(True) + self.set_position(Gtk.WindowPosition.CENTER) + self.set_default_size(350, 250) - for action_id, label in options: - menu_item = Gtk.MenuItem(label=label) - menu_item.connect("activate", self._on_activate, action_id) - self.append(menu_item) + # Add OK and Cancel buttons + self.add_button("OK", Gtk.ResponseType.OK) + self.add_button("Cancel", Gtk.ResponseType.CANCEL) + + # Connect response signal + self.connect("response", self._on_response) + + # Add content to dialog + content_area = self.get_content_area() + + # Add label + label = Gtk.Label(label="Choose an AI Assistant action:") + content_area.pack_start(label, False, False, 10) + + # Create radio button group for options + self.radio_ask = Gtk.RadioButton.new_with_label(None, "Ask Question") + self.radio_describe = Gtk.RadioButton.new_with_label_from_widget(self.radio_ask, "Describe Screen") + self.radio_action = Gtk.RadioButton.new_with_label_from_widget(self.radio_ask, "Request Action") + + # Pack radio buttons + content_area.pack_start(self.radio_ask, False, False, 5) + content_area.pack_start(self.radio_describe, False, False, 5) + content_area.pack_start(self.radio_action, False, False, 5) + + # Set first option as selected by default + self.radio_ask.set_active(True) + + # Connect keyboard events for Enter key handling + self.connect("key-press-event", self._on_key_press) + + print("DEBUG: AIAssistantMenu dialog created with radio buttons") - def _on_activate(self, widget, action_id): - """Handler for menu item activation.""" - self.on_option_selected(action_id) + def _on_response(self, dialog, response_id): + """Handler for dialog response.""" + print(f"DEBUG: Dialog response: {response_id}") + + if response_id == Gtk.ResponseType.OK: + # Determine which radio button is selected + if self.radio_ask.get_active(): + action_id = "ask_question" + elif self.radio_describe.get_active(): + action_id = "describe_screen" + elif self.radio_action.get_active(): + action_id = "request_action" + else: + action_id = None + + if action_id: + print(f"DEBUG: Selected action: {action_id}") + self.on_option_selected(action_id) + + self.destroy() - def _on_popped_up(self, *args): - """Handler for menu popup.""" - logger.info("AI Assistant menu popped up") + def _on_key_press(self, widget, event): + """Handle key press events.""" + # Allow Enter key to activate OK button + if event.keyval == 65293: # Enter key + self.response(Gtk.ResponseType.OK) + return True + return False def show_gui(self): - """Shows the AI Assistant menu.""" - self.show_all() - display = Gdk.Display.get_default() - seat = display.get_default_seat() - device = seat.get_pointer() - screen, x, y = device.get_position() - - event = Gdk.Event.new(Gdk.EventType.BUTTON_PRESS) - event.set_screen(screen) - event.set_device(device) - event.time = Gtk.get_current_event_time() - event.x = x - event.y = y - - rect = Gdk.Rectangle() - rect.x = x - rect.y = y - rect.width = 1 - rect.height = 1 - - window = Gdk.get_default_root_window() - self.popup_at_rect(window, rect, Gdk.Gravity.NORTH_WEST, Gdk.Gravity.NORTH_WEST, event) + """Shows the AI Assistant dialog.""" + try: + print("DEBUG: Starting dialog show_gui") + self.show_all() + print("DEBUG: Dialog show_all() called - should be visible and accessible now") + + # Present the dialog to ensure it gets focus + self.present() + print("DEBUG: Dialog presented") + + except Exception as e: + print(f"DEBUG: Error in show_gui: {e}") + import traceback + traceback.print_exc() diff --git a/src/cthulhu/script.py b/src/cthulhu/script.py index 7763297..459e30f 100644 --- a/src/cthulhu/script.py +++ b/src/cthulhu/script.py @@ -180,6 +180,40 @@ class Script: return keybindings.KeyBindings() + def getPluginKeyBindings(self): + """Returns the plugin keybindings for this script.""" + + from . import debug + debug.printMessage(debug.LEVEL_INFO, "SCRIPT: getPluginKeyBindings() called", True) + + plugin_bindings = keybindings.KeyBindings() + + # Get the plugin system manager + try: + from . import plugin_system_manager + manager = plugin_system_manager.getManager() + debug.printMessage(debug.LEVEL_INFO, f"SCRIPT: Plugin manager: {manager}", True) + if manager: + # Get all active plugins + active_plugins = manager.get_active_plugins() + debug.printMessage(debug.LEVEL_INFO, f"SCRIPT: Found {len(active_plugins)} active plugins", True) + for plugin in active_plugins: + # Get bindings from each plugin + bindings = plugin.get_bindings() + debug.printMessage(debug.LEVEL_INFO, f"SCRIPT: Plugin {plugin.name} has bindings: {bindings}", True) + if bindings: + # Add each binding from the plugin to our collection + for binding in bindings.keyBindings: + plugin_bindings.add(binding) + debug.printMessage(debug.LEVEL_INFO, f"SCRIPT: Added plugin binding: {binding.asString()}", True) + except Exception as e: + debug.printMessage(debug.LEVEL_INFO, f"Could not get plugin keybindings: {e}", True) + import traceback + debug.printMessage(debug.LEVEL_INFO, traceback.format_exc(), True) + + debug.printMessage(debug.LEVEL_INFO, f"SCRIPT: Returning {len(plugin_bindings.keyBindings)} plugin bindings", True) + return plugin_bindings + def getBrailleBindings(self): """Defines the braille bindings for this script. diff --git a/src/cthulhu/settings.py b/src/cthulhu/settings.py index 70a3686..b5758d7 100644 --- a/src/cthulhu/settings.py +++ b/src/cthulhu/settings.py @@ -435,7 +435,7 @@ presentLiveRegionFromInactiveTab = False activePlugins = ['AIAssistant', 'DisplayVersion', 'PluginManager', 'HelloCthulhu', 'ByeCthulhu'] # AI Assistant settings (disabled by default for opt-in behavior) -aiAssistantEnabled = False +aiAssistantEnabled = True aiProvider = AI_PROVIDER_CLAUDE_CODE aiApiKeyFile = "" aiOllamaModel = "llama3.2-vision"