#!/usr/bin/env python3 # -*- coding: utf-8 -*- # This file is part of I38. # I38 is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, # either version 3 of the License, or (at your option) any later version. # I38 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR # PURPOSE. See the GNU General Public License for more details. # You should have received a copy of the GNU General Public License along with I38. If not, see . """ Simple OCR Screen Reader A lightweight tool that performs OCR on the screen and speaks the results """ import os import sys import time import subprocess from PIL import Image, ImageOps import pytesseract import pyperclip def capture_screen(max_retries=3, initial_delay=0.2): """ Capture the screen using scrot with robust checking and retries Args: max_retries: Maximum number of attempts to read the image initial_delay: Initial delay in seconds (will increase with retries) """ temp_file = "/tmp/ocr_capture.png" try: # Capture the screen subprocess.run(["scrot", temp_file], check=True) # Wait and retry approach with validity checking delay = initial_delay for attempt in range(max_retries): time.sleep(delay) # Check if file exists and has content if os.path.exists(temp_file) and os.path.getsize(temp_file) > 0: try: # Try to verify the image is valid with Image.open(temp_file) as test_img: # Just accessing a property forces PIL to validate the image test_img.size # If we get here, the image is valid return Image.open(temp_file) except (IOError, OSError) as e: # Image exists but isn't valid yet if attempt < max_retries - 1: # Increase delay exponentially for next attempt delay *= 2 continue else: raise Exception(f"Image file exists but is not valid after {max_retries} attempts") # File doesn't exist or is empty if attempt < max_retries - 1: # Increase delay exponentially for next attempt delay *= 2 else: raise Exception(f"Screenshot file not created properly after {max_retries} attempts") except Exception as e: print(f"Error capturing screen: {e}") raise finally: # Ensure file is removed even if an error occurs if os.path.exists(temp_file): os.remove(temp_file) def process_image(img, scale_factor=1.5): """Process the image to improve OCR accuracy""" # Scale the image to improve OCR if scale_factor != 1: width, height = img.size img = img.resize((int(width * scale_factor), int(height * scale_factor)), Image.Resampling.BICUBIC) # Convert to grayscale for faster processing img = ImageOps.grayscale(img) # Improve contrast for better text recognition img = ImageOps.autocontrast(img) return img def perform_ocr(img, lang='eng'): """Perform OCR on the image""" # Use tessaract with optimized settings # --oem 1: Use LSTM OCR Engine # --psm 6: Assume a single uniform block of text text = pytesseract.image_to_string(img, lang=lang, config='--oem 1 --psm 6') return text def copy_to_clipboard(text): """Copy text to clipboard using pyperclip""" try: # Filter out empty lines and clean up the text lines = [line.strip() for line in text.split('\n') if line.strip()] cleaned_text = '\n'.join(lines) # Preserve line breaks for clipboard if cleaned_text: pyperclip.copy(cleaned_text) return True else: return False except Exception as e: print(f"Error copying to clipboard: {e}") return False def speak_text(text): """Speak the text using speech-dispatcher""" # Filter out empty lines and clean up the text lines = [line.strip() for line in text.split('\n') if line.strip()] cleaned_text = ' '.join(lines) # Use speech-dispatcher to speak the text if cleaned_text: subprocess.run(["spd-say", "-Cw", cleaned_text]) else: subprocess.run(["spd-say", "-Cw", "No text detected"]) def main(): # Limit tesseract thread usage to improve performance on Pi os.environ["OMP_THREAD_LIMIT"] = "4" try: # Announce start subprocess.run(["spd-say", "-Cw", "performing OCR"]) # Capture screen img = capture_screen() # Process image processed_img = process_image(img, scale_factor=1.5) # Perform OCR text = perform_ocr(processed_img) # Copy to clipboard clipboard_success = copy_to_clipboard(text) # Speak the results speak_text(text) except Exception as e: # Let the user know something went wrong error_msg = f"Error during OCR: {str(e)}" print(error_msg) try: subprocess.run(["spd-say", "-Cw", "OCR failed"]) except: # If even speech fails, at least we tried pass if __name__ == "__main__": main()