many updates to AI Assistant plugin. Improved UI, hopefully improved accuracy as well.

This commit is contained in:
Storm Dragon
2025-08-05 19:05:45 -04:00
parent a322c1d9b2
commit 37bd89ab87
9 changed files with 242 additions and 93 deletions
+4 -3
View File
@@ -100,20 +100,21 @@ class APIHelper:
# Create a keybinding handler
class GestureHandler:
def __init__(self, function, description):
def __init__(self, function, description, learnModeEnabled=True):
self.function = function
self.description = description
self.learnModeEnabled = learnModeEnabled
def __call__(self, script, inputEvent):
try:
logger.info(f"=== DisplayVersion keybinding handler called! ===")
logger.info(f"=== Plugin keybinding handler called! ===")
return function(script, inputEvent)
except Exception as e:
import logging
logging.getLogger(__name__).error(f"Error in keybinding handler: {e}")
return True
handler = GestureHandler(function, name)
handler = GestureHandler(function, name, learnModeEnabled)
logger.info(f"Created handler: {handler}")
# Create the binding object regardless of whether there's an active script
+1 -1
View File
@@ -23,5 +23,5 @@
# Fork of Orca Screen Reader (GNOME)
# Original source: https://gitlab.gnome.org/GNOME/orca
version = "2025.08.03"
version = "2025.08.05"
codeName = "testing"
+3 -1
View File
@@ -339,7 +339,9 @@ class KeyboardEvent(InputEvent):
if KeyboardEvent.lastCthulhuModifierAlone:
if _isPressed:
KeyboardEvent.secondCthulhuModifierTime = now
if (KeyboardEvent.secondCthulhuModifierTime <
if (KeyboardEvent.secondCthulhuModifierTime is not None and
KeyboardEvent.lastCthulhuModifierAloneTime is not None and
KeyboardEvent.secondCthulhuModifierTime <
KeyboardEvent.lastCthulhuModifierAloneTime + 0.5):
# double-cthulhu, let the real action happen
self._bypassCthulhu = True
+25
View File
@@ -191,10 +191,18 @@ class LearnModePresenter:
handler = event.getHandler()
if handler is None:
debug.printMessage(debug.LEVEL_INFO, "LEARN MODE PRESENTER: No handler found for event", True)
return True
debug.printMessage(debug.LEVEL_INFO, f"LEARN MODE PRESENTER: Handler found: {handler}", True)
debug.printMessage(debug.LEVEL_INFO, f"LEARN MODE PRESENTER: Handler description: {getattr(handler, 'description', 'No description')}", True)
debug.printMessage(debug.LEVEL_INFO, f"LEARN MODE PRESENTER: Handler learnModeEnabled: {getattr(handler, 'learnModeEnabled', 'No learnModeEnabled')}", True)
if handler.learnModeEnabled and handler.description:
debug.printMessage(debug.LEVEL_INFO, f"LEARN MODE PRESENTER: Presenting message: {handler.description}", True)
cthulhu_state.activeScript.presentMessage(handler.description)
else:
debug.printMessage(debug.LEVEL_INFO, "LEARN MODE PRESENTER: Handler missing required properties for learn mode", True)
return True
@@ -254,6 +262,23 @@ class LearnModePresenter:
bindings[guilabels.KB_GROUP_ACTIONS] = bound
items += len(bound)
# Add plugin keybindings
try:
debug.printMessage(debug.LEVEL_INFO, "LEARN MODE PRESENTER: Getting plugin keybindings", True)
plugin_bindings = script.getPluginKeyBindings()
debug.printMessage(debug.LEVEL_INFO, f"LEARN MODE PRESENTER: Got plugin bindings object: {plugin_bindings}", True)
bound = plugin_bindings.getBoundBindings()
debug.printMessage(debug.LEVEL_INFO, f"LEARN MODE PRESENTER: Got {len(bound)} bound plugin keybindings", True)
if bound:
bindings["Plugins"] = bound
items += len(bound)
debug.printMessage(debug.LEVEL_INFO, f"LEARN MODE PRESENTER: Added {len(bound)} plugin keybindings to learn mode", True)
except Exception as e:
msg = f"LEARN MODE PRESENTER: Could not get plugin keybindings: {e}"
debug.printMessage(debug.LEVEL_INFO, msg, True)
import traceback
debug.printMessage(debug.LEVEL_INFO, traceback.format_exc(), True)
title = messages.shortcutsFoundCthulhu(items)
else:
app_name = AXObject.get_name(script.app) or messages.APPLICATION_NO_NAME
+11 -1
View File
@@ -395,9 +395,19 @@ class PluginSystemManager:
"""Get the list of active plugin names."""
return self._active_plugins
def get_active_plugins(self):
"""Get the list of active plugin instances."""
active_instances = []
for plugin_name in self._active_plugins:
if plugin_name in self._plugins:
plugin_info = self._plugins[plugin_name]
if plugin_info.loaded and plugin_info.instance:
active_instances.append(plugin_info.instance)
return active_instances
def setActivePlugins(self, activePlugins):
"""Set active plugins and sync their state."""
logger.info(f"=== PluginSystemManager.setActivePlugins called ===")
logger.info(f"PLUGIN SYSTEM: setActivePlugins called with: {activePlugins}")
logger.info(f"Setting active plugins: {activePlugins}")
# Make sure we have scanned for plugins first
+7 -8
View File
@@ -1,8 +1,7 @@
name = AI Assistant
version = 1.0.0
description = AI-powered accessibility assistant for analyzing screens and taking actions
authors = Stormux <storm_dragon@stormux.org>
website = https://stormux.org
copyright = Copyright 2025
builtin = false
hidden = false
[Plugin]
Name = AI Assistant
Module = AIAssistant
Description = AI-powered accessibility assistant for analyzing screens and taking actions
Authors = Stormux <storm_dragon@stormux.org>
Version = 1.0.0
Category = Accessibility
+156 -78
View File
@@ -46,15 +46,11 @@ class AIAssistant(Plugin):
def __init__(self, *args, **kwargs):
"""Initialize the AI Assistant plugin."""
super().__init__(*args, **kwargs)
logger.info("AI Assistant plugin initialized")
print("DEBUG: AI Assistant plugin __init__ called")
# Write to a debug file so we can see if the plugin is being loaded
try:
with open('/tmp/ai_assistant_debug.log', 'a') as f:
f.write("AI Assistant plugin __init__ called\n")
except:
pass
# Use print to ensure we see this message
print("DEBUG: AI ASSISTANT __init__ called")
logger.info("AI ASSISTANT: Plugin __init__ starting")
logger.info("AI ASSISTANT: Plugin initialized successfully")
# Menu and keybinding storage
self._kb_binding_menu = None
@@ -79,31 +75,45 @@ class AIAssistant(Plugin):
if plugin is not None and plugin is not self:
return
# Prevent multiple activations
if self._enabled:
logger.info("AI ASSISTANT: Already activated, skipping")
print("DEBUG: AI ASSISTANT already activated, skipping")
return
try:
logger.info("=== AI Assistant plugin activation starting ===")
print("DEBUG: AI Assistant plugin activation starting")
logger.info("AI ASSISTANT: === Plugin activation starting ===")
print("DEBUG: AI ASSISTANT activation starting")
# Check if AI Assistant is enabled in settings
enabled = self._settings_manager.getSetting('aiAssistantEnabled')
print(f"DEBUG: AI Assistant enabled setting: {enabled}")
logger.info(f"AI ASSISTANT: Enabled setting: {enabled}")
print(f"DEBUG: AI ASSISTANT enabled setting: {enabled}")
if not enabled:
logger.info("AI Assistant is disabled in settings, skipping activation")
print("DEBUG: AI Assistant is disabled in settings, skipping activation")
print("DEBUG: AI Assistant disabled, skipping activation")
return
# Load AI settings
self._load_ai_settings()
print(f"DEBUG: AI settings loaded - provider: {self._provider_type}")
# Check if we have valid configuration
if not self._validate_configuration():
config_valid = self._validate_configuration()
logger.info(f"AI Assistant configuration valid: {config_valid}")
print(f"DEBUG: AI Assistant configuration valid: {config_valid}")
if not config_valid:
logger.warning("AI Assistant configuration invalid, skipping activation")
print("DEBUG: AI Assistant configuration invalid, skipping activation")
return
# Initialize AI provider
self._initialize_ai_provider()
print("DEBUG: AI provider initialized")
# Register keybindings only if configuration is valid
self._register_keybindings()
print("DEBUG: AI keybindings registered")
self._enabled = True
logger.info("AI Assistant plugin activated successfully")
@@ -111,6 +121,7 @@ class AIAssistant(Plugin):
except Exception as e:
logger.error(f"Error activating AI Assistant plugin: {e}")
print(f"DEBUG: Error activating AI Assistant plugin: {e}")
import traceback
logger.error(traceback.format_exc())
@@ -156,21 +167,28 @@ class AIAssistant(Plugin):
def _validate_configuration(self):
"""Validate AI Assistant configuration."""
logger.info(f"Validating AI configuration - provider_type: {self._provider_type}")
if not self._provider_type:
logger.warning("No AI provider configured")
return False
# Providers that don't need API keys
if self._provider_type == settings.AI_PROVIDER_OLLAMA:
logger.info("Checking Ollama availability")
return self._check_ollama_availability()
elif self._provider_type == settings.AI_PROVIDER_CLAUDE_CODE:
return self._check_claude_code_availability()
logger.info("Checking Claude Code availability")
result = self._check_claude_code_availability()
logger.info(f"Claude Code availability check result: {result}")
return result
# Other providers need API keys
logger.info(f"Checking API key for provider {self._provider_type}")
if not self._api_key:
logger.warning(f"No API key configured for provider {self._provider_type}")
return False
logger.info("Configuration validation passed")
return True
def _check_ollama_availability(self):
@@ -243,7 +261,7 @@ class AIAssistant(Plugin):
self._kb_binding_menu = self.registerGestureByString(
self._show_ai_menu,
"Show AI Assistant menu",
'kb:cthulhu+control+shift+a'
'kb:cthulhu+shift+control+a'
)
logger.info("AI Assistant menu keybinding registered")
@@ -261,19 +279,23 @@ class AIAssistant(Plugin):
def _show_ai_menu(self, script=None, inputEvent=None):
"""Show the AI Assistant menu."""
try:
logger.info("Showing AI Assistant menu")
logger.info("AI ASSISTANT: _show_ai_menu called!")
print("DEBUG: AI ASSISTANT _show_ai_menu called!")
# IMPORTANT: Capture screen data BEFORE showing menu
# This ensures we get the actual screen content, not the menu itself
self._pre_menu_screen_data = self._collect_ai_data()
logger.info("Pre-captured screen data for menu actions")
print("DEBUG: Pre-captured screen data for menu actions")
# Now show the menu
self._menu_gui = AIAssistantMenu(self._handle_menu_selection)
self._menu_gui.show_gui()
print("DEBUG: AI menu GUI shown")
return True
except Exception as e:
logger.error(f"Error showing AI menu: {e}")
print(f"DEBUG: Error showing AI menu: {e}")
import traceback
traceback.print_exc()
return False
@@ -309,7 +331,8 @@ class AIAssistant(Plugin):
self._present_message("AI provider not available. Check configuration.")
return True
self._present_message("AI Assistant analyzing screen...")
provider_name = self._provider_type.replace('_', ' ').title()
self._present_message(f"AI Assistant ({provider_name}) analyzing screen...")
# Use pre-captured data
if data:
@@ -453,7 +476,8 @@ class AIAssistant(Plugin):
self._present_message("AI provider not available. Check configuration.")
return True
self._present_message("AI Assistant analyzing screen...")
provider_name = self._provider_type.replace('_', ' ').title()
self._present_message(f"AI Assistant ({provider_name}) analyzing screen...")
# Collect data and get AI description
data = self._collect_ai_data()
@@ -745,8 +769,9 @@ class AIAssistant(Plugin):
def _show_question_dialog(self):
"""Show a dialog for the user to enter their question."""
try:
provider_name = self._provider_type.replace('_', ' ').title()
dialog = Gtk.Dialog(
title="AI Assistant Question",
title=f"AI Assistant Question ({provider_name})",
parent=None,
flags=Gtk.DialogFlags.MODAL,
buttons=(
@@ -806,8 +831,9 @@ class AIAssistant(Plugin):
for child in dialog.get_action_area().get_children():
dialog.get_action_area().remove(child)
# Change title
dialog.set_title("AI Assistant Response")
# Change title
provider_name = self._provider_type.replace('_', ' ').title()
dialog.set_title(f"AI Assistant Response ({provider_name})")
# Show question and processing message
question_label = Gtk.Label()
@@ -816,12 +842,22 @@ class AIAssistant(Plugin):
question_label.set_halign(Gtk.Align.START)
content_area.pack_start(question_label, False, False, 10)
# Processing label (will be updated with response)
self._response_label = Gtk.Label(label="Processing your question...")
self._response_label.set_line_wrap(True)
self._response_label.set_halign(Gtk.Align.START)
self._response_label.set_selectable(True) # Allow text selection
content_area.pack_start(self._response_label, True, True, 10)
# Create scrollable text view for response (same as description dialog)
scrolled_window = Gtk.ScrolledWindow()
scrolled_window.set_policy(Gtk.PolicyType.AUTOMATIC, Gtk.PolicyType.AUTOMATIC)
scrolled_window.set_shadow_type(Gtk.ShadowType.IN)
self._response_text_view = Gtk.TextView()
self._response_text_view.set_editable(False) # Read-only
self._response_text_view.set_cursor_visible(True) # Allow cursor navigation for screen readers
self._response_text_view.set_wrap_mode(Gtk.WrapMode.WORD)
# Set initial processing text
text_buffer = self._response_text_view.get_buffer()
text_buffer.set_text("Processing your question...")
scrolled_window.add(self._response_text_view)
content_area.pack_start(scrolled_window, True, True, 10)
# Add close button
close_button = dialog.add_button(Gtk.STOCK_CLOSE, Gtk.ResponseType.CLOSE)
@@ -831,8 +867,8 @@ class AIAssistant(Plugin):
dialog.set_default_size(600, 400)
dialog.show_all()
# Focus the response label so screen reader announces it
self._response_label.grab_focus()
# Focus the response text view so screen reader announces it
self._response_text_view.grab_focus()
# Process question asynchronously
self._process_user_question_async(dialog, question)
@@ -855,8 +891,9 @@ class AIAssistant(Plugin):
data.get('accessibility')
)
# Update the response label
self._response_label.set_markup(f"<b>Response:</b>\n{response}")
# Update the response text view
text_buffer = self._response_text_view.get_buffer()
text_buffer.set_text(f"Response:\n{response}")
# Also speak the response
self._present_message(response)
@@ -869,23 +906,27 @@ class AIAssistant(Plugin):
except Exception as e:
logger.error(f"Error getting AI response: {e}")
self._response_label.set_markup(f"<b>Error:</b> {e}")
text_buffer = self._response_text_view.get_buffer()
text_buffer.set_text(f"Error: {e}")
self._present_message(f"Error getting AI response: {e}")
else:
self._response_label.set_markup("<b>Error:</b> No screen data available")
text_buffer = self._response_text_view.get_buffer()
text_buffer.set_text("Error: No screen data available")
self._present_message("No screen data available")
except Exception as e:
logger.error(f"Error processing user question: {e}")
self._response_label.set_markup(f"<b>Error:</b> {e}")
text_buffer = self._response_text_view.get_buffer()
text_buffer.set_text(f"Error: {e}")
self._present_message(f"Error processing question: {e}")
# ============================================================================
def _show_description_dialog(self, description):
"""Show a read-only dialog with the screen description."""
try:
provider_name = self._provider_type.replace('_', ' ').title()
dialog = Gtk.Dialog(
title="AI Screen Description",
title=f"AI Screen Description ({provider_name})",
parent=None,
flags=Gtk.DialogFlags.MODAL,
buttons=(Gtk.STOCK_CLOSE, Gtk.ResponseType.CLOSE)
@@ -902,7 +943,7 @@ class AIAssistant(Plugin):
text_view = Gtk.TextView()
text_view.set_editable(False) # Read-only
text_view.set_cursor_visible(False)
text_view.set_cursor_visible(True) # Allow cursor navigation for screen readers
text_view.set_wrap_mode(Gtk.WrapMode.WORD)
# Set the description text
@@ -1694,54 +1735,91 @@ class AIAssistant(Plugin):
return None
class AIAssistantMenu(Gtk.Menu):
"""A menu containing AI Assistant options."""
class AIAssistantMenu(Gtk.Dialog):
"""A dialog containing AI Assistant options."""
def __init__(self, handler):
super().__init__()
self.connect("popped-up", self._on_popped_up)
super().__init__(title="AI Assistant", transient_for=None, flags=0)
self.on_option_selected = handler
# AI Assistant menu options
options = [
("ask_question", "Ask Question"),
("describe_screen", "Describe Screen"),
("request_action", "Request Action")
]
# Set dialog properties for better screen reader accessibility
self.set_modal(True)
self.set_position(Gtk.WindowPosition.CENTER)
self.set_default_size(350, 250)
for action_id, label in options:
menu_item = Gtk.MenuItem(label=label)
menu_item.connect("activate", self._on_activate, action_id)
self.append(menu_item)
# Add OK and Cancel buttons
self.add_button("OK", Gtk.ResponseType.OK)
self.add_button("Cancel", Gtk.ResponseType.CANCEL)
# Connect response signal
self.connect("response", self._on_response)
# Add content to dialog
content_area = self.get_content_area()
# Add label
label = Gtk.Label(label="Choose an AI Assistant action:")
content_area.pack_start(label, False, False, 10)
# Create radio button group for options
self.radio_ask = Gtk.RadioButton.new_with_label(None, "Ask Question")
self.radio_describe = Gtk.RadioButton.new_with_label_from_widget(self.radio_ask, "Describe Screen")
self.radio_action = Gtk.RadioButton.new_with_label_from_widget(self.radio_ask, "Request Action")
# Pack radio buttons
content_area.pack_start(self.radio_ask, False, False, 5)
content_area.pack_start(self.radio_describe, False, False, 5)
content_area.pack_start(self.radio_action, False, False, 5)
# Set first option as selected by default
self.radio_ask.set_active(True)
# Connect keyboard events for Enter key handling
self.connect("key-press-event", self._on_key_press)
print("DEBUG: AIAssistantMenu dialog created with radio buttons")
def _on_activate(self, widget, action_id):
"""Handler for menu item activation."""
self.on_option_selected(action_id)
def _on_response(self, dialog, response_id):
"""Handler for dialog response."""
print(f"DEBUG: Dialog response: {response_id}")
if response_id == Gtk.ResponseType.OK:
# Determine which radio button is selected
if self.radio_ask.get_active():
action_id = "ask_question"
elif self.radio_describe.get_active():
action_id = "describe_screen"
elif self.radio_action.get_active():
action_id = "request_action"
else:
action_id = None
if action_id:
print(f"DEBUG: Selected action: {action_id}")
self.on_option_selected(action_id)
self.destroy()
def _on_popped_up(self, *args):
"""Handler for menu popup."""
logger.info("AI Assistant menu popped up")
def _on_key_press(self, widget, event):
"""Handle key press events."""
# Allow Enter key to activate OK button
if event.keyval == 65293: # Enter key
self.response(Gtk.ResponseType.OK)
return True
return False
def show_gui(self):
"""Shows the AI Assistant menu."""
self.show_all()
display = Gdk.Display.get_default()
seat = display.get_default_seat()
device = seat.get_pointer()
screen, x, y = device.get_position()
event = Gdk.Event.new(Gdk.EventType.BUTTON_PRESS)
event.set_screen(screen)
event.set_device(device)
event.time = Gtk.get_current_event_time()
event.x = x
event.y = y
rect = Gdk.Rectangle()
rect.x = x
rect.y = y
rect.width = 1
rect.height = 1
window = Gdk.get_default_root_window()
self.popup_at_rect(window, rect, Gdk.Gravity.NORTH_WEST, Gdk.Gravity.NORTH_WEST, event)
"""Shows the AI Assistant dialog."""
try:
print("DEBUG: Starting dialog show_gui")
self.show_all()
print("DEBUG: Dialog show_all() called - should be visible and accessible now")
# Present the dialog to ensure it gets focus
self.present()
print("DEBUG: Dialog presented")
except Exception as e:
print(f"DEBUG: Error in show_gui: {e}")
import traceback
traceback.print_exc()
+34
View File
@@ -180,6 +180,40 @@ class Script:
return keybindings.KeyBindings()
def getPluginKeyBindings(self):
"""Returns the plugin keybindings for this script."""
from . import debug
debug.printMessage(debug.LEVEL_INFO, "SCRIPT: getPluginKeyBindings() called", True)
plugin_bindings = keybindings.KeyBindings()
# Get the plugin system manager
try:
from . import plugin_system_manager
manager = plugin_system_manager.getManager()
debug.printMessage(debug.LEVEL_INFO, f"SCRIPT: Plugin manager: {manager}", True)
if manager:
# Get all active plugins
active_plugins = manager.get_active_plugins()
debug.printMessage(debug.LEVEL_INFO, f"SCRIPT: Found {len(active_plugins)} active plugins", True)
for plugin in active_plugins:
# Get bindings from each plugin
bindings = plugin.get_bindings()
debug.printMessage(debug.LEVEL_INFO, f"SCRIPT: Plugin {plugin.name} has bindings: {bindings}", True)
if bindings:
# Add each binding from the plugin to our collection
for binding in bindings.keyBindings:
plugin_bindings.add(binding)
debug.printMessage(debug.LEVEL_INFO, f"SCRIPT: Added plugin binding: {binding.asString()}", True)
except Exception as e:
debug.printMessage(debug.LEVEL_INFO, f"Could not get plugin keybindings: {e}", True)
import traceback
debug.printMessage(debug.LEVEL_INFO, traceback.format_exc(), True)
debug.printMessage(debug.LEVEL_INFO, f"SCRIPT: Returning {len(plugin_bindings.keyBindings)} plugin bindings", True)
return plugin_bindings
def getBrailleBindings(self):
"""Defines the braille bindings for this script.
+1 -1
View File
@@ -435,7 +435,7 @@ presentLiveRegionFromInactiveTab = False
activePlugins = ['AIAssistant', 'DisplayVersion', 'PluginManager', 'HelloCthulhu', 'ByeCthulhu']
# AI Assistant settings (disabled by default for opt-in behavior)
aiAssistantEnabled = False
aiAssistantEnabled = True
aiProvider = AI_PROVIDER_CLAUDE_CODE
aiApiKeyFile = ""
aiOllamaModel = "llama3.2-vision"