Initial work on ocr integration. Is broken currently.
This commit is contained in:
@@ -23,5 +23,5 @@
|
|||||||
# Fork of Orca Screen Reader (GNOME)
|
# Fork of Orca Screen Reader (GNOME)
|
||||||
# Original source: https://gitlab.gnome.org/GNOME/orca
|
# Original source: https://gitlab.gnome.org/GNOME/orca
|
||||||
|
|
||||||
version = "2025.08.19"
|
version = "2025.08.21"
|
||||||
codeName = "testing"
|
codeName = "testing"
|
||||||
|
23
src/cthulhu/plugins/OCR/__init__.py
Normal file
23
src/cthulhu/plugins/OCR/__init__.py
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
#
|
||||||
|
# Copyright (c) 2025 Stormux
|
||||||
|
# Copyright (c) 2022 Chrys (original ocrdesktop)
|
||||||
|
#
|
||||||
|
# This library is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU Lesser General Public
|
||||||
|
# License as published by the Free Software Foundation; either
|
||||||
|
# version 2.1 of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This library is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
# Lesser General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Lesser General Public
|
||||||
|
# License along with this library; if not, write to the
|
||||||
|
# Free Software Foundation, Inc., Franklin Street, Fifth Floor,
|
||||||
|
# Boston MA 02110-1301 USA.
|
||||||
|
|
||||||
|
"""OCRDesktop plugin package."""
|
||||||
|
|
||||||
|
from .plugin import OCRDesktop
|
14
src/cthulhu/plugins/OCR/meson.build
Normal file
14
src/cthulhu/plugins/OCR/meson.build
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
ocrdesktop_python_sources = files([
|
||||||
|
'__init__.py',
|
||||||
|
'plugin.py'
|
||||||
|
])
|
||||||
|
|
||||||
|
python3.install_sources(
|
||||||
|
ocrdesktop_python_sources,
|
||||||
|
subdir: 'cthulhu/plugins/OCRDesktop'
|
||||||
|
)
|
||||||
|
|
||||||
|
install_data(
|
||||||
|
'plugin.info',
|
||||||
|
install_dir: python3.get_install_dir() / 'cthulhu' / 'plugins' / 'OCRDesktop'
|
||||||
|
)
|
8
src/cthulhu/plugins/OCR/plugin.info
Normal file
8
src/cthulhu/plugins/OCR/plugin.info
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
name = OCR Desktop
|
||||||
|
version = 4.0.0
|
||||||
|
description = OCR accessibility tool for reading inaccessible windows and dialogs using Tesseract OCR
|
||||||
|
authors = Storm Dragon <storm_dragon@stormux.org>
|
||||||
|
website = https://github.com/chrys87/ocrdesktop
|
||||||
|
copyright = Copyright 2022 Chrys, Copyright 2025 Stormux
|
||||||
|
builtin = false
|
||||||
|
hidden = false
|
535
src/cthulhu/plugins/OCR/plugin.py
Normal file
535
src/cthulhu/plugins/OCR/plugin.py
Normal file
@@ -0,0 +1,535 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
#
|
||||||
|
# Copyright (c) 2025 Stormux
|
||||||
|
# Copyright (c) 2022 Chrys (original ocrdesktop)
|
||||||
|
#
|
||||||
|
# This library is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU Lesser General Public
|
||||||
|
# License as published by the Free Software Foundation; either
|
||||||
|
# version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
"""OCRDesktop plugin for Cthulhu screen reader."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import locale
|
||||||
|
import time
|
||||||
|
import re
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
from mimetypes import MimeTypes
|
||||||
|
|
||||||
|
from cthulhu.plugin import Plugin, cthulhu_hookimpl
|
||||||
|
from cthulhu import debug
|
||||||
|
|
||||||
|
# Import Cthulhu's sound system
|
||||||
|
try:
|
||||||
|
from cthulhu import sound
|
||||||
|
from cthulhu.sound_generator import Tone
|
||||||
|
SOUND_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
SOUND_AVAILABLE = False
|
||||||
|
|
||||||
|
# PIL
|
||||||
|
try:
|
||||||
|
from PIL import Image
|
||||||
|
from PIL import ImageOps
|
||||||
|
PIL_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
PIL_AVAILABLE = False
|
||||||
|
|
||||||
|
# pytesseract
|
||||||
|
try:
|
||||||
|
import pytesseract
|
||||||
|
from pytesseract import Output
|
||||||
|
PYTESSERACT_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
PYTESSERACT_AVAILABLE = False
|
||||||
|
|
||||||
|
# pdf2image
|
||||||
|
try:
|
||||||
|
from pdf2image import convert_from_path
|
||||||
|
PDF2IMAGE_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
PDF2IMAGE_AVAILABLE = False
|
||||||
|
|
||||||
|
# scipy
|
||||||
|
try:
|
||||||
|
from scipy.spatial import KDTree
|
||||||
|
SCIPY_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
SCIPY_AVAILABLE = False
|
||||||
|
|
||||||
|
# webcolors
|
||||||
|
try:
|
||||||
|
from webcolors import CSS3_HEX_TO_NAMES
|
||||||
|
from webcolors import hex_to_rgb
|
||||||
|
WEBCOLORS_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
WEBCOLORS_AVAILABLE = False
|
||||||
|
|
||||||
|
# GTK/GDK/Wnck
|
||||||
|
try:
|
||||||
|
import gi
|
||||||
|
gi.require_version("Gtk", "3.0")
|
||||||
|
gi.require_version("Gdk", "3.0")
|
||||||
|
gi.require_version("Wnck", "3.0")
|
||||||
|
from gi.repository import Gtk, Gdk, Wnck
|
||||||
|
GTK_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
GTK_AVAILABLE = False
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class OCRDesktop(Plugin):
|
||||||
|
"""OCR Desktop accessibility plugin for reading inaccessible windows."""
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
"""Initialize the plugin."""
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: Plugin initialized", True)
|
||||||
|
|
||||||
|
# Keybinding storage
|
||||||
|
self._kb_binding_window = None
|
||||||
|
self._kb_binding_desktop = None
|
||||||
|
self._kb_binding_clipboard = None
|
||||||
|
|
||||||
|
# OCR settings
|
||||||
|
self._languageCode = 'eng'
|
||||||
|
self._scaleFactor = 3
|
||||||
|
self._grayscaleImg = False
|
||||||
|
self._invertImg = False
|
||||||
|
self._blackWhiteImg = False
|
||||||
|
self._blackWhiteImgValue = 200
|
||||||
|
self._colorCalculation = False
|
||||||
|
self._colorCalculationMax = 3
|
||||||
|
|
||||||
|
# Internal state
|
||||||
|
self._img = []
|
||||||
|
self._modifiedImg = []
|
||||||
|
self._OCRText = ''
|
||||||
|
self._offsetXpos = 0
|
||||||
|
self._offsetYpos = 0
|
||||||
|
self._activated = False
|
||||||
|
|
||||||
|
# Progress feedback
|
||||||
|
self._is_processing = False
|
||||||
|
self._beep_thread = None
|
||||||
|
self._stop_beeping = False
|
||||||
|
self._player = None
|
||||||
|
|
||||||
|
# Color analysis
|
||||||
|
self._kdtDB = None
|
||||||
|
self.colorNames = []
|
||||||
|
self.colorCache = {}
|
||||||
|
|
||||||
|
# Set locale for tesseract
|
||||||
|
locale.setlocale(locale.LC_ALL, 'C')
|
||||||
|
|
||||||
|
# Initialize sound player for progress beeps
|
||||||
|
if SOUND_AVAILABLE:
|
||||||
|
try:
|
||||||
|
self._player = sound.getPlayer()
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: Sound player initialized", True)
|
||||||
|
except Exception as e:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, f"OCRDesktop: Failed to initialize sound: {e}", True)
|
||||||
|
|
||||||
|
# Check dependencies
|
||||||
|
self._checkDependencies()
|
||||||
|
|
||||||
|
def _checkDependencies(self):
|
||||||
|
"""Check if required dependencies are available."""
|
||||||
|
missing_deps = []
|
||||||
|
|
||||||
|
if not PIL_AVAILABLE:
|
||||||
|
missing_deps.append("python3-pillow")
|
||||||
|
if not PYTESSERACT_AVAILABLE:
|
||||||
|
missing_deps.append("python-pytesseract")
|
||||||
|
if not GTK_AVAILABLE:
|
||||||
|
missing_deps.append("GTK3/GDK/Wnck")
|
||||||
|
|
||||||
|
if missing_deps:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO,
|
||||||
|
f"OCRDesktop: Missing dependencies: {', '.join(missing_deps)}", True)
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
@cthulhu_hookimpl
|
||||||
|
def activate(self, plugin=None):
|
||||||
|
"""Activate the plugin."""
|
||||||
|
if plugin is not None and plugin is not self:
|
||||||
|
return
|
||||||
|
|
||||||
|
if self._activated:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: Already activated", True)
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: Plugin activation starting", True)
|
||||||
|
|
||||||
|
if not self.app:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: ERROR - No app reference", True)
|
||||||
|
return
|
||||||
|
|
||||||
|
if not self._checkDependencies():
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: Cannot activate - missing dependencies", True)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Register keybindings
|
||||||
|
self._registerKeybindings()
|
||||||
|
|
||||||
|
self._activated = True
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: Plugin activated successfully", True)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, f"OCRDesktop: Error activating: {e}", True)
|
||||||
|
import traceback
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, f"OCRDesktop: {traceback.format_exc()}", True)
|
||||||
|
|
||||||
|
@cthulhu_hookimpl
|
||||||
|
def deactivate(self, plugin=None):
|
||||||
|
"""Deactivate the plugin."""
|
||||||
|
if plugin is not None and plugin is not self:
|
||||||
|
return
|
||||||
|
|
||||||
|
self._activated = False
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: Plugin deactivated", True)
|
||||||
|
|
||||||
|
def _registerKeybindings(self):
|
||||||
|
"""Register plugin keybindings."""
|
||||||
|
try:
|
||||||
|
# OCR active window
|
||||||
|
self._kb_binding_window = self.registerGestureByString(
|
||||||
|
self._ocrActiveWindow,
|
||||||
|
"OCR read active window",
|
||||||
|
'kb:cthulhu+control+w'
|
||||||
|
)
|
||||||
|
|
||||||
|
# OCR entire desktop
|
||||||
|
self._kb_binding_desktop = self.registerGestureByString(
|
||||||
|
self._ocrDesktop,
|
||||||
|
"OCR read entire desktop",
|
||||||
|
'kb:cthulhu+control+d'
|
||||||
|
)
|
||||||
|
|
||||||
|
# OCR from clipboard
|
||||||
|
self._kb_binding_clipboard = self.registerGestureByString(
|
||||||
|
self._ocrClipboard,
|
||||||
|
"OCR read image from clipboard",
|
||||||
|
'kb:cthulhu+control+shift+c'
|
||||||
|
)
|
||||||
|
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: Keybindings registered", True)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, f"OCRDesktop: Error registering keybindings: {e}", True)
|
||||||
|
|
||||||
|
def _startProgressBeeps(self):
|
||||||
|
"""Start playing progress beeps during OCR processing."""
|
||||||
|
if not self._player:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: Cannot start beeps - no sound player", True)
|
||||||
|
return
|
||||||
|
|
||||||
|
if self._beep_thread and self._beep_thread.is_alive():
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: Beeps already running", True)
|
||||||
|
return
|
||||||
|
|
||||||
|
self._stop_beeping = False
|
||||||
|
self._beep_thread = threading.Thread(target=self._beepLoop, daemon=True)
|
||||||
|
self._beep_thread.start()
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: Started progress beeps", True)
|
||||||
|
|
||||||
|
def _stopProgressBeeps(self):
|
||||||
|
"""Stop playing progress beeps."""
|
||||||
|
self._stop_beeping = True
|
||||||
|
if self._beep_thread:
|
||||||
|
self._beep_thread.join(timeout=1.0)
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: Stopped progress beeps", True)
|
||||||
|
|
||||||
|
def _beepLoop(self):
|
||||||
|
"""Loop that plays short system bell beeps every 0.5 seconds."""
|
||||||
|
while not self._stop_beeping:
|
||||||
|
try:
|
||||||
|
# Just use the system bell - we know this works as short beeps
|
||||||
|
print("\a")
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: System bell beep", True)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, f"OCRDesktop: System bell error: {e}", True)
|
||||||
|
break
|
||||||
|
|
||||||
|
# Wait 0.5 seconds before next beep
|
||||||
|
for _ in range(50): # Check every 0.01 seconds for quick stopping
|
||||||
|
if self._stop_beeping:
|
||||||
|
return
|
||||||
|
time.sleep(0.01)
|
||||||
|
|
||||||
|
def _announceOCRStart(self, ocr_type):
|
||||||
|
"""Announce the start of OCR operation."""
|
||||||
|
try:
|
||||||
|
message = f"Performing OCR on {ocr_type}"
|
||||||
|
if self.app:
|
||||||
|
state = self.app.getDynamicApiManager().getAPI('CthulhuState')
|
||||||
|
if state and state.activeScript:
|
||||||
|
state.activeScript.presentMessage(message, resetStyles=False)
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, f"OCRDesktop: {message}", True)
|
||||||
|
except Exception as e:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, f"OCRDesktop: Error announcing OCR start: {e}", True)
|
||||||
|
|
||||||
|
def _ocrActiveWindow(self, script=None, inputEvent=None):
|
||||||
|
"""OCR the active window."""
|
||||||
|
try:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: OCR active window requested", True)
|
||||||
|
|
||||||
|
if self._is_processing:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: Already processing, ignoring request", True)
|
||||||
|
return True
|
||||||
|
|
||||||
|
self._is_processing = True
|
||||||
|
self._announceOCRStart("window")
|
||||||
|
self._startProgressBeeps()
|
||||||
|
|
||||||
|
try:
|
||||||
|
if self._screenShotWindow():
|
||||||
|
self._performOCR()
|
||||||
|
self._presentOCRResult()
|
||||||
|
finally:
|
||||||
|
self._stopProgressBeeps()
|
||||||
|
self._is_processing = False
|
||||||
|
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
self._stopProgressBeeps()
|
||||||
|
self._is_processing = False
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, f"OCRDesktop: Error in OCR window: {e}", True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _ocrDesktop(self, script=None, inputEvent=None):
|
||||||
|
"""OCR the entire desktop."""
|
||||||
|
try:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: OCR desktop requested", True)
|
||||||
|
|
||||||
|
if self._is_processing:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: Already processing, ignoring request", True)
|
||||||
|
return True
|
||||||
|
|
||||||
|
self._is_processing = True
|
||||||
|
self._announceOCRStart("desktop")
|
||||||
|
self._startProgressBeeps()
|
||||||
|
|
||||||
|
try:
|
||||||
|
if self._screenShotDesktop():
|
||||||
|
self._performOCR()
|
||||||
|
self._presentOCRResult()
|
||||||
|
finally:
|
||||||
|
self._stopProgressBeeps()
|
||||||
|
self._is_processing = False
|
||||||
|
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
self._stopProgressBeeps()
|
||||||
|
self._is_processing = False
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, f"OCRDesktop: Error in OCR desktop: {e}", True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _ocrClipboard(self, script=None, inputEvent=None):
|
||||||
|
"""OCR image from clipboard."""
|
||||||
|
try:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: OCR clipboard requested", True)
|
||||||
|
|
||||||
|
if self._is_processing:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: Already processing, ignoring request", True)
|
||||||
|
return True
|
||||||
|
|
||||||
|
self._is_processing = True
|
||||||
|
self._announceOCRStart("clipboard")
|
||||||
|
self._startProgressBeeps()
|
||||||
|
|
||||||
|
try:
|
||||||
|
if self._readClipboard():
|
||||||
|
self._performOCR()
|
||||||
|
self._presentOCRResult()
|
||||||
|
finally:
|
||||||
|
self._stopProgressBeeps()
|
||||||
|
self._is_processing = False
|
||||||
|
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
self._stopProgressBeeps()
|
||||||
|
self._is_processing = False
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, f"OCRDesktop: Error in OCR clipboard: {e}", True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _screenShotWindow(self):
|
||||||
|
"""Take screenshot of active window."""
|
||||||
|
if not GTK_AVAILABLE:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: GTK not available for screenshots", True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
time.sleep(0.3) # Brief delay
|
||||||
|
gdkCurrDesktop = Gdk.get_default_root_window()
|
||||||
|
|
||||||
|
currWnckScreen = Wnck.Screen.get_default()
|
||||||
|
currWnckScreen.force_update()
|
||||||
|
currWnckWindow = currWnckScreen.get_active_window()
|
||||||
|
|
||||||
|
if not currWnckWindow:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: No active window found", True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
self._offsetXpos, self._offsetYpos, wnckWidth, wnckHeight = currWnckWindow.get_geometry()
|
||||||
|
pixBuff = Gdk.pixbuf_get_from_window(gdkCurrDesktop, self._offsetXpos, self._offsetYpos, wnckWidth, wnckHeight)
|
||||||
|
|
||||||
|
if pixBuff:
|
||||||
|
self._img = [self._pixbuf2image(pixBuff)]
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: Window screenshot captured", True)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: Failed to capture window screenshot", True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, f"OCRDesktop: Error taking window screenshot: {e}", True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _screenShotDesktop(self):
|
||||||
|
"""Take screenshot of entire desktop."""
|
||||||
|
if not GTK_AVAILABLE:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: GTK not available for screenshots", True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
time.sleep(0.3) # Brief delay
|
||||||
|
currDesktop = Gdk.get_default_root_window()
|
||||||
|
pixBuff = Gdk.pixbuf_get_from_window(currDesktop, 0, 0, currDesktop.get_width(), currDesktop.get_height())
|
||||||
|
|
||||||
|
if pixBuff:
|
||||||
|
self._img = [self._pixbuf2image(pixBuff)]
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: Desktop screenshot captured", True)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: Failed to capture desktop screenshot", True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, f"OCRDesktop: Error taking desktop screenshot: {e}", True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _readClipboard(self):
|
||||||
|
"""Read image from clipboard."""
|
||||||
|
if not GTK_AVAILABLE:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: GTK not available for clipboard", True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
clipboardObj = Gtk.Clipboard.get(Gdk.SELECTION_CLIPBOARD)
|
||||||
|
pixBuff = clipboardObj.wait_for_image()
|
||||||
|
|
||||||
|
if pixBuff:
|
||||||
|
self._img = [self._pixbuf2image(pixBuff)]
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: Image read from clipboard", True)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: No image found in clipboard", True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, f"OCRDesktop: Error reading clipboard: {e}", True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _pixbuf2image(self, pix):
|
||||||
|
"""Convert GdkPixbuf to PIL Image."""
|
||||||
|
data = pix.get_pixels()
|
||||||
|
w = pix.props.width
|
||||||
|
h = pix.props.height
|
||||||
|
stride = pix.props.rowstride
|
||||||
|
mode = "RGB"
|
||||||
|
if pix.props.has_alpha:
|
||||||
|
mode = "RGBA"
|
||||||
|
im = Image.frombytes(mode, (w, h), data, "raw", mode, stride)
|
||||||
|
return im
|
||||||
|
|
||||||
|
def _scaleImg(self, img):
|
||||||
|
"""Scale image for better OCR results."""
|
||||||
|
width_screen, height_screen = img.size
|
||||||
|
width_screen = width_screen * self._scaleFactor
|
||||||
|
height_screen = height_screen * self._scaleFactor
|
||||||
|
scaledImg = img.resize((width_screen, height_screen), Image.Resampling.BICUBIC)
|
||||||
|
return scaledImg
|
||||||
|
|
||||||
|
def _transformImg(self, img):
|
||||||
|
"""Transform image with various filters for better OCR."""
|
||||||
|
modifiedImg = self._scaleImg(img)
|
||||||
|
|
||||||
|
if self._invertImg:
|
||||||
|
modifiedImg = ImageOps.invert(modifiedImg)
|
||||||
|
if self._grayscaleImg:
|
||||||
|
modifiedImg = ImageOps.grayscale(modifiedImg)
|
||||||
|
if self._blackWhiteImg:
|
||||||
|
lut = [255 if v > self._blackWhiteImgValue else 0 for v in range(256)]
|
||||||
|
modifiedImg = modifiedImg.point(lut)
|
||||||
|
|
||||||
|
return modifiedImg
|
||||||
|
|
||||||
|
def _performOCR(self):
|
||||||
|
"""Perform OCR on captured images."""
|
||||||
|
if not PYTESSERACT_AVAILABLE:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: Tesseract not available", True)
|
||||||
|
return
|
||||||
|
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: Starting OCR", True)
|
||||||
|
self._OCRText = ''
|
||||||
|
|
||||||
|
for img in self._img:
|
||||||
|
modifiedImg = self._transformImg(img)
|
||||||
|
try:
|
||||||
|
# Simple text extraction
|
||||||
|
text = pytesseract.image_to_string(modifiedImg, lang=self._languageCode, config='--psm 4')
|
||||||
|
self._OCRText += text + '\n'
|
||||||
|
except Exception as e:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, f"OCRDesktop: OCR error: {e}", True)
|
||||||
|
|
||||||
|
# Clean up text
|
||||||
|
self._cleanOCRText()
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, "OCRDesktop: OCR completed", True)
|
||||||
|
|
||||||
|
def _cleanOCRText(self):
|
||||||
|
"""Clean up OCR text output."""
|
||||||
|
# Remove multiple spaces
|
||||||
|
regexSpace = re.compile('[^\S\r\n]{2,}')
|
||||||
|
self._OCRText = regexSpace.sub(' ', self._OCRText)
|
||||||
|
|
||||||
|
# Remove empty lines
|
||||||
|
regexSpace = re.compile('\n\s*\n')
|
||||||
|
self._OCRText = regexSpace.sub('\n', self._OCRText)
|
||||||
|
|
||||||
|
# Remove trailing spaces
|
||||||
|
regexSpace = re.compile('\s*\n')
|
||||||
|
self._OCRText = regexSpace.sub('\n', self._OCRText)
|
||||||
|
|
||||||
|
# Remove leading spaces
|
||||||
|
regexSpace = re.compile('^\s')
|
||||||
|
self._OCRText = regexSpace.sub('', self._OCRText)
|
||||||
|
|
||||||
|
# Remove trailing newlines
|
||||||
|
self._OCRText = self._OCRText.strip()
|
||||||
|
|
||||||
|
def _presentOCRResult(self):
|
||||||
|
"""Present OCR result to user via speech."""
|
||||||
|
try:
|
||||||
|
if not self._OCRText.strip():
|
||||||
|
message = "No text found in OCR scan"
|
||||||
|
else:
|
||||||
|
message = f"OCR result: {self._OCRText}"
|
||||||
|
|
||||||
|
if self.app:
|
||||||
|
state = self.app.getDynamicApiManager().getAPI('CthulhuState')
|
||||||
|
if state and state.activeScript:
|
||||||
|
state.activeScript.presentMessage(message, resetStyles=False)
|
||||||
|
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, f"OCRDesktop: Presented result: {len(self._OCRText)} characters", True)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
debug.printMessage(debug.LEVEL_INFO, f"OCRDesktop: Error presenting result: {e}", True)
|
@@ -5,6 +5,7 @@ subdir('Clipboard')
|
|||||||
subdir('DisplayVersion')
|
subdir('DisplayVersion')
|
||||||
subdir('HelloCthulhu')
|
subdir('HelloCthulhu')
|
||||||
subdir('IndentationAudio')
|
subdir('IndentationAudio')
|
||||||
|
subdir('OCRDesktop')
|
||||||
subdir('PluginManager')
|
subdir('PluginManager')
|
||||||
subdir('SimplePluginSystem')
|
subdir('SimplePluginSystem')
|
||||||
subdir('hello_world')
|
subdir('hello_world')
|
||||||
|
@@ -431,7 +431,7 @@ presentChatRoomLast = False
|
|||||||
presentLiveRegionFromInactiveTab = False
|
presentLiveRegionFromInactiveTab = False
|
||||||
|
|
||||||
# Plugins
|
# Plugins
|
||||||
activePlugins = ['AIAssistant', 'DisplayVersion', 'PluginManager', 'HelloCthulhu', 'ByeCthulhu']
|
activePlugins = ['AIAssistant', 'DisplayVersion', 'OCRDesktop', 'PluginManager', 'HelloCthulhu', 'ByeCthulhu']
|
||||||
|
|
||||||
# AI Assistant settings (disabled by default for opt-in behavior)
|
# AI Assistant settings (disabled by default for opt-in behavior)
|
||||||
aiAssistantEnabled = True
|
aiAssistantEnabled = True
|
||||||
|
Reference in New Issue
Block a user