#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Simple OCR Screen Reader A lightweight tool that performs OCR on the screen and speaks the results Optimized for Arch Linux ARM on Raspberry Pi with DWM """ import os import sys import time import subprocess from PIL import Image, ImageOps import pytesseract def capture_screen(max_retries=3, initial_delay=0.2): """ Capture the screen using scrot with robust checking and retries Args: max_retries: Maximum number of attempts to read the image initial_delay: Initial delay in seconds (will increase with retries) """ temp_file = "/tmp/ocr_capture.png" try: # Capture the screen subprocess.run(["scrot", temp_file], check=True) # Wait and retry approach with validity checking delay = initial_delay for attempt in range(max_retries): time.sleep(delay) # Check if file exists and has content if os.path.exists(temp_file) and os.path.getsize(temp_file) > 0: try: # Try to verify the image is valid with Image.open(temp_file) as test_img: # Just accessing a property forces PIL to validate the image test_img.size # If we get here, the image is valid return Image.open(temp_file) except (IOError, OSError) as e: # Image exists but isn't valid yet if attempt < max_retries - 1: # Increase delay exponentially for next attempt delay *= 2 continue else: raise Exception(f"Image file exists but is not valid after {max_retries} attempts") # File doesn't exist or is empty if attempt < max_retries - 1: # Increase delay exponentially for next attempt delay *= 2 else: raise Exception(f"Screenshot file not created properly after {max_retries} attempts") except Exception as e: print(f"Error capturing screen: {e}") raise finally: # Ensure file is removed even if an error occurs if os.path.exists(temp_file): os.remove(temp_file) def process_image(img, scale_factor=1.5): """Process the image to improve OCR accuracy""" # Scale the image to improve OCR if scale_factor != 1: width, height = img.size img = img.resize((int(width * scale_factor), int(height * scale_factor)), Image.Resampling.BICUBIC) # Convert to grayscale for faster processing img = ImageOps.grayscale(img) # Improve contrast for better text recognition img = ImageOps.autocontrast(img) return img def perform_ocr(img, lang='eng'): """Perform OCR on the image""" # Use tessaract with optimized settings # --oem 1: Use LSTM OCR Engine # --psm 6: Assume a single uniform block of text text = pytesseract.image_to_string(img, lang=lang, config='--oem 1 --psm 6') return text def speak_text(text): """Speak the text using speech-dispatcher""" # Filter out empty lines and clean up the text lines = [line.strip() for line in text.split('\n') if line.strip()] cleaned_text = ' '.join(lines) # Use speech-dispatcher to speak the text if cleaned_text: subprocess.run(["spd-say", "-Cw", cleaned_text]) else: subprocess.run(["spd-say", "-Cw", "No text detected"]) def main(): # Limit tesseract thread usage to improve performance on Pi os.environ["OMP_THREAD_LIMIT"] = "4" try: # Announce start subprocess.run(["spd-say", "-Cw", "performing OCR"]) # Capture screen img = capture_screen() # Process image processed_img = process_image(img, scale_factor=1.5) # Perform OCR text = perform_ocr(processed_img) # Speak the results speak_text(text) except Exception as e: # Let the user know something went wrong error_msg = f"Error during OCR: {str(e)}" print(error_msg) try: subprocess.run(["spd-say", "-Cw", "OCR failed"]) except: # If even speech fails, at least we tried pass if __name__ == "__main__": main()