I38/scripts/ocr.py
2025-05-31 20:13:34 -04:00

169 lines
5.6 KiB
Python
Executable File

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# This file is part of I38.
# I38 is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation,
# either version 3 of the License, or (at your option) any later version.
# I38 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
# PURPOSE. See the GNU General Public License for more details.
# You should have received a copy of the GNU General Public License along with I38. If not, see <https://www.gnu.org/licenses/>.
"""
Simple OCR Screen Reader
A lightweight tool that performs OCR on the screen and speaks the results
"""
import os
import sys
import time
import subprocess
from PIL import Image, ImageOps
import pytesseract
import pyperclip
def capture_screen(max_retries=3, initial_delay=0.2):
"""
Capture the screen using scrot with robust checking and retries
Args:
max_retries: Maximum number of attempts to read the image
initial_delay: Initial delay in seconds (will increase with retries)
"""
temp_file = "/tmp/ocr_capture.png"
try:
# Capture the screen
subprocess.run(["scrot", temp_file], check=True)
# Wait and retry approach with validity checking
delay = initial_delay
for attempt in range(max_retries):
time.sleep(delay)
# Check if file exists and has content
if os.path.exists(temp_file) and os.path.getsize(temp_file) > 0:
try:
# Try to verify the image is valid
with Image.open(temp_file) as test_img:
# Just accessing a property forces PIL to validate the image
test_img.size
# If we get here, the image is valid
return Image.open(temp_file)
except (IOError, OSError) as e:
# Image exists but isn't valid yet
if attempt < max_retries - 1:
# Increase delay exponentially for next attempt
delay *= 2
continue
else:
raise Exception(f"Image file exists but is not valid after {max_retries} attempts")
# File doesn't exist or is empty
if attempt < max_retries - 1:
# Increase delay exponentially for next attempt
delay *= 2
else:
raise Exception(f"Screenshot file not created properly after {max_retries} attempts")
except Exception as e:
print(f"Error capturing screen: {e}")
raise
finally:
# Ensure file is removed even if an error occurs
if os.path.exists(temp_file):
os.remove(temp_file)
def process_image(img, scale_factor=1.5):
"""Process the image to improve OCR accuracy"""
# Scale the image to improve OCR
if scale_factor != 1:
width, height = img.size
img = img.resize((int(width * scale_factor), int(height * scale_factor)),
Image.Resampling.BICUBIC)
# Convert to grayscale for faster processing
img = ImageOps.grayscale(img)
# Improve contrast for better text recognition
img = ImageOps.autocontrast(img)
return img
def perform_ocr(img, lang='eng'):
"""Perform OCR on the image"""
# Use tessaract with optimized settings
# --oem 1: Use LSTM OCR Engine
# --psm 6: Assume a single uniform block of text
text = pytesseract.image_to_string(img, lang=lang, config='--oem 1 --psm 6')
return text
def copy_to_clipboard(text):
"""Copy text to clipboard using pyperclip"""
try:
# Filter out empty lines and clean up the text
lines = [line.strip() for line in text.split('\n') if line.strip()]
cleaned_text = '\n'.join(lines) # Preserve line breaks for clipboard
if cleaned_text:
pyperclip.copy(cleaned_text)
return True
else:
return False
except Exception as e:
print(f"Error copying to clipboard: {e}")
return False
def speak_text(text):
"""Speak the text using speech-dispatcher"""
# Filter out empty lines and clean up the text
lines = [line.strip() for line in text.split('\n') if line.strip()]
cleaned_text = ' '.join(lines)
# Use speech-dispatcher to speak the text
if cleaned_text:
subprocess.run(["spd-say", "-Cw", cleaned_text])
else:
subprocess.run(["spd-say", "-Cw", "No text detected"])
def main():
# Limit tesseract thread usage to improve performance on Pi
os.environ["OMP_THREAD_LIMIT"] = "4"
try:
# Announce start
subprocess.run(["spd-say", "-Cw", "performing OCR"])
# Capture screen
img = capture_screen()
# Process image
processed_img = process_image(img, scale_factor=1.5)
# Perform OCR
text = perform_ocr(processed_img)
# Copy to clipboard
clipboard_success = copy_to_clipboard(text)
# Speak the results
speak_text(text)
except Exception as e:
# Let the user know something went wrong
error_msg = f"Error during OCR: {str(e)}"
print(error_msg)
try:
subprocess.run(["spd-say", "-Cw", "OCR failed"])
except:
# If even speech fails, at least we tried
pass
if __name__ == "__main__":
main()