#!/usr/bin/env python3 # -*- coding: utf-8 -*- # This file is part of I38. # I38 is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, # either version 3 of the License, or (at your option) any later version. # I38 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR # PURPOSE. See the GNU General Public License for more details. # You should have received a copy of the GNU General Public License along with I38. If not, see . """ Simple OCR Screen Reader A lightweight tool that performs OCR on the screen and speaks the results """ import os import sys import time import subprocess from PIL import Image, ImageOps import pytesseract def capture_screen(max_retries=3, initial_delay=0.2): """ Capture the screen using scrot with robust checking and retries Args: max_retries: Maximum number of attempts to read the image initial_delay: Initial delay in seconds (will increase with retries) """ temp_file = "/tmp/ocr_capture.png" try: # Capture the screen subprocess.run(["scrot", temp_file], check=True) # Wait and retry approach with validity checking delay = initial_delay for attempt in range(max_retries): time.sleep(delay) # Check if file exists and has content if os.path.exists(temp_file) and os.path.getsize(temp_file) > 0: try: # Try to verify the image is valid with Image.open(temp_file) as test_img: # Just accessing a property forces PIL to validate the image test_img.size # If we get here, the image is valid return Image.open(temp_file) except (IOError, OSError) as e: # Image exists but isn't valid yet if attempt < max_retries - 1: # Increase delay exponentially for next attempt delay *= 2 continue else: raise Exception(f"Image file exists but is not valid after {max_retries} attempts") # File doesn't exist or is empty if attempt < max_retries - 1: # Increase delay exponentially for next attempt delay *= 2 else: raise Exception(f"Screenshot file not created properly after {max_retries} attempts") except Exception as e: print(f"Error capturing screen: {e}") raise finally: # Ensure file is removed even if an error occurs if os.path.exists(temp_file): os.remove(temp_file) def process_image(img, scale_factor=1.5): """Process the image to improve OCR accuracy""" # Scale the image to improve OCR if scale_factor != 1: width, height = img.size img = img.resize((int(width * scale_factor), int(height * scale_factor)), Image.Resampling.BICUBIC) # Convert to grayscale for faster processing img = ImageOps.grayscale(img) # Improve contrast for better text recognition img = ImageOps.autocontrast(img) return img def perform_ocr(img, lang='eng'): """Perform OCR on the image""" # Use tessaract with optimized settings # --oem 1: Use LSTM OCR Engine # --psm 6: Assume a single uniform block of text text = pytesseract.image_to_string(img, lang=lang, config='--oem 1 --psm 6') return text def speak_text(text): """Speak the text using speech-dispatcher""" # Filter out empty lines and clean up the text lines = [line.strip() for line in text.split('\n') if line.strip()] cleaned_text = ' '.join(lines) # Use speech-dispatcher to speak the text if cleaned_text: subprocess.run(["spd-say", "-Cw", cleaned_text]) else: subprocess.run(["spd-say", "-Cw", "No text detected"]) def main(): # Limit tesseract thread usage to improve performance on Pi os.environ["OMP_THREAD_LIMIT"] = "4" try: # Announce start subprocess.run(["spd-say", "-Cw", "performing OCR"]) # Capture screen img = capture_screen() # Process image processed_img = process_image(img, scale_factor=1.5) # Perform OCR text = perform_ocr(processed_img) # Speak the results speak_text(text) except Exception as e: # Let the user know something went wrong error_msg = f"Error during OCR: {str(e)}" print(error_msg) try: subprocess.run(["spd-say", "-Cw", "OCR failed"]) except: # If even speech fails, at least we tried pass if __name__ == "__main__": main()