Compare commits

..

2 Commits
v3.0 ... master

5 changed files with 194 additions and 7 deletions

2
I38.md
View File

@ -151,7 +151,7 @@ If you've enabled braille display support during setup, I38 will start XBrlAPI a
### OCR (Optical Character Recognition) ### OCR (Optical Character Recognition)
If installed, you can use OCR to read text from images or inaccessible applications: If required dependencies are installed, you can use OCR to read text from images or inaccessible applications:
- `MODKEY` + `F5`: Perform OCR on the entire screen and speak the content - `MODKEY` + `F5`: Perform OCR on the entire screen and speak the content
- In Ratpoison mode: `Print` or `MODKEY` + `r`: Perform OCR and save to clipboard - In Ratpoison mode: `Print` or `MODKEY` + `r`: Perform OCR and save to clipboard

View File

@ -25,15 +25,19 @@ An uppercase I looks like a 1, 3 from i3, and 8 because the song [We Are 138](ht
- lxsession: [optional] For GUI power options like shutdown - lxsession: [optional] For GUI power options like shutdown
- magic-wormhole: [optional] for file sharing with magic-wormhole GUI - magic-wormhole: [optional] for file sharing with magic-wormhole GUI
- notification-daemon: To handle notifications - notification-daemon: To handle notifications
- ocrdesktop: For getting contents of the current window with OCR.
- pamixer: for the mute-unmute script - pamixer: for the mute-unmute script
- pandoc or markdown: To generate html files. - pandoc or markdown: To generate html files.
- pcmanfm: [optional] Graphical file manager. - pcmanfm: [optional] Graphical file manager.
- playerctl: music controls - playerctl: music controls
- python-gobject: for applications menu. - python-gobject: for applications menu.
- python-i3ipc: for sounds etc. - python-i3ipc: for sounds etc.
- python-pillow: For OCR
- python-pytesseract: For OCR
- remind: [optional] For reminder notifications, Requires notify-daemon and notify-send for automatic reminders. - remind: [optional] For reminder notifications, Requires notify-daemon and notify-send for automatic reminders.
scrot: For OCR
- sox: for sounds. - sox: for sounds.
- tesseract: For OCR
- tesseract-data-eng: For OCR
- transfersh: [optional] for file sharing GUI - transfersh: [optional] for file sharing GUI
- udiskie: [optional] for automatically mounting removable storage - udiskie: [optional] for automatically mounting removable storage
- x11bell: [optional] Bell support if you do not have a PC speaker. Available from https://github.com/jovanlanik/x11bell - x11bell: [optional] Bell support if you do not have a PC speaker. Available from https://github.com/jovanlanik/x11bell

13
i38.sh
View File

@ -516,9 +516,13 @@ bindsym Mod1+Tab focus right
bindsym \$mod+BackSpace fullscreen toggle bindsym \$mod+BackSpace fullscreen toggle
# move the currently focused window to the scratchpad # Move the currently focused window to the scratchpad
bindsym \$mod+Shift+minus move scratchpad bindsym \$mod+Shift+minus move scratchpad
# Bind the currently focused window to the scratchpad
# This means it will always open in the scratchpad
bindsym \$mod+Shift+equal exec --no-startup-id ${i3Path}/scripts/bind_to_scratchpad.sh
# Show the next scratchpad window or hide the focused scratchpad window. # Show the next scratchpad window or hide the focused scratchpad window.
# If there are multiple scratchpad windows, this command cycles through them. # If there are multiple scratchpad windows, this command cycles through them.
bindsym \$mod+minus scratchpad show bindsym \$mod+minus scratchpad show
@ -572,10 +576,8 @@ bindsym $mod+Shift+BackSpace mode "default"
EOF EOF
# ocrdesktop through speech-dispatcher # ocr through speech-dispatcher
if command -v ocrdesktop &> /dev/null ; then echo "bindsym ${mod}+F5 exec ${i3Path}/scripts/ocr.py" >> ${i3Path}/config
echo "bindsym ${mod}+F5 exec bash -c 'spd-say -Cw \"performing O C R\" && ocrdesktop -cnog | spd-say -e --'" >> ${i3Path}/config
fi
# Interrupt speech-dispatcher output # Interrupt speech-dispatcher output
echo "bindsym ${mod}+Shift+F5 exec spd-say -C" >> ${i3Path}/config echo "bindsym ${mod}+Shift+F5 exec spd-say -C" >> ${i3Path}/config
@ -770,6 +772,7 @@ exec --no-startup-id bash -c 'if [[ -f "${i3Path}/firstrun" ]]; then ${webBrowse
include "${i3Path}/customizations" include "${i3Path}/customizations"
EOF EOF
touch "${i3Path}/customizations" touch "${i3Path}/customizations"
touch "${i3Path}/scratchpad"
# Check for markdown or pandoc for converting the welcome document # Check for markdown or pandoc for converting the welcome document
if command -v pandoc &> /dev/null ; then if command -v pandoc &> /dev/null ; then
pandoc -f markdown -t html "I38.md" -so "${i3Path}/I38.html" --metadata title="Welcome to I38" pandoc -f markdown -t html "I38.md" -so "${i3Path}/I38.html" --metadata title="Welcome to I38"

32
scripts/bind_to_scratchpad.sh Executable file
View File

@ -0,0 +1,32 @@
#!/usr/bin/env bash
# Find out if we're using i3
if ! [[ -n "${WAYLAND_DISPLAY}" ]]; then
cmd="i3-msg"
scratchConfig="${XDG_CONFIG_HOME:-$HOME/.config}/i3"
else
cmd="swaymsg"
scratchConfig="${XDG_CONFIG_HOME:-$HOME/.config}/sway"
fi
scratchConfig+="/scratchpad"
touch "${scratchConfig}"
# Get the focused window ID
windowId=$(xdotool getactivewindow)
# Get the class name of the window
class=$(xprop -id "$windowId" WM_CLASS | awk -F '"' '{print $4}')
if [[ -z "$class" ]]; then
notify-send "Unable to move to scratchpad."
exit 1
fi
# Check if it's already in the config
if ! grep -q "class=\"$class\"" "$scratchConfig"; then
echo "for_window [class=\"$class\"] move to scratchpad" >> "$scratchConfig"
notify-send "Added window class $class to scratchpad"
fi
# Move the window to scratchpad now
$cmd "[class=\"$class\"] move to scratchpad"

148
scripts/ocr.py Executable file
View File

@ -0,0 +1,148 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# This file is part of I38.
# I38 is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation,
# either version 3 of the License, or (at your option) any later version.
# I38 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
# PURPOSE. See the GNU General Public License for more details.
# You should have received a copy of the GNU General Public License along with I38. If not, see <https://www.gnu.org/licenses/>.
"""
Simple OCR Screen Reader
A lightweight tool that performs OCR on the screen and speaks the results
"""
import os
import sys
import time
import subprocess
from PIL import Image, ImageOps
import pytesseract
def capture_screen(max_retries=3, initial_delay=0.2):
"""
Capture the screen using scrot with robust checking and retries
Args:
max_retries: Maximum number of attempts to read the image
initial_delay: Initial delay in seconds (will increase with retries)
"""
temp_file = "/tmp/ocr_capture.png"
try:
# Capture the screen
subprocess.run(["scrot", temp_file], check=True)
# Wait and retry approach with validity checking
delay = initial_delay
for attempt in range(max_retries):
time.sleep(delay)
# Check if file exists and has content
if os.path.exists(temp_file) and os.path.getsize(temp_file) > 0:
try:
# Try to verify the image is valid
with Image.open(temp_file) as test_img:
# Just accessing a property forces PIL to validate the image
test_img.size
# If we get here, the image is valid
return Image.open(temp_file)
except (IOError, OSError) as e:
# Image exists but isn't valid yet
if attempt < max_retries - 1:
# Increase delay exponentially for next attempt
delay *= 2
continue
else:
raise Exception(f"Image file exists but is not valid after {max_retries} attempts")
# File doesn't exist or is empty
if attempt < max_retries - 1:
# Increase delay exponentially for next attempt
delay *= 2
else:
raise Exception(f"Screenshot file not created properly after {max_retries} attempts")
except Exception as e:
print(f"Error capturing screen: {e}")
raise
finally:
# Ensure file is removed even if an error occurs
if os.path.exists(temp_file):
os.remove(temp_file)
def process_image(img, scale_factor=1.5):
"""Process the image to improve OCR accuracy"""
# Scale the image to improve OCR
if scale_factor != 1:
width, height = img.size
img = img.resize((int(width * scale_factor), int(height * scale_factor)),
Image.Resampling.BICUBIC)
# Convert to grayscale for faster processing
img = ImageOps.grayscale(img)
# Improve contrast for better text recognition
img = ImageOps.autocontrast(img)
return img
def perform_ocr(img, lang='eng'):
"""Perform OCR on the image"""
# Use tessaract with optimized settings
# --oem 1: Use LSTM OCR Engine
# --psm 6: Assume a single uniform block of text
text = pytesseract.image_to_string(img, lang=lang, config='--oem 1 --psm 6')
return text
def speak_text(text):
"""Speak the text using speech-dispatcher"""
# Filter out empty lines and clean up the text
lines = [line.strip() for line in text.split('\n') if line.strip()]
cleaned_text = ' '.join(lines)
# Use speech-dispatcher to speak the text
if cleaned_text:
subprocess.run(["spd-say", "-Cw", cleaned_text])
else:
subprocess.run(["spd-say", "-Cw", "No text detected"])
def main():
# Limit tesseract thread usage to improve performance on Pi
os.environ["OMP_THREAD_LIMIT"] = "4"
try:
# Announce start
subprocess.run(["spd-say", "-Cw", "performing OCR"])
# Capture screen
img = capture_screen()
# Process image
processed_img = process_image(img, scale_factor=1.5)
# Perform OCR
text = perform_ocr(processed_img)
# Speak the results
speak_text(text)
except Exception as e:
# Let the user know something went wrong
error_msg = f"Error during OCR: {str(e)}"
print(error_msg)
try:
subprocess.run(["spd-say", "-Cw", "OCR failed"])
except:
# If even speech fails, at least we tried
pass
if __name__ == "__main__":
main()