I38/scripts/ocr.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# This file is part of I38.

# I38 is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation,
# either version 3 of the License, or (at your option) any later version.

# I38 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
# PURPOSE. See the GNU General Public License for more details.

# You should have received a copy of the GNU General Public License along with I38. If not, see <https://www.gnu.org/licenses/>.


"""
Simple OCR Screen Reader
A lightweight tool that performs OCR on the screen and speaks the results
"""

import os
import sys
import time
import subprocess
from PIL import Image, ImageOps
import pytesseract
import pyperclip

def capture_screen(max_retries=3, initial_delay=0.2):
    """
    Capture the screen using scrot with robust checking and retries

    Args:
        max_retries: Maximum number of attempts to read the image
        initial_delay: Initial delay in seconds (will increase with retries)
    """
    temp_file = "/tmp/ocr_capture.png"

    try:
        # Capture the screen
        subprocess.run(["scrot", temp_file], check=True)

        # Wait and retry approach with validity checking
        delay = initial_delay
        for attempt in range(max_retries):
            time.sleep(delay)

            # Check if file exists and has content
            if os.path.exists(temp_file) and os.path.getsize(temp_file) > 0:
                try:
                    # Try to verify the image is valid
                    with Image.open(temp_file) as test_img:
                        # Just accessing a property forces PIL to validate the image
                        test_img.size

                    # If we get here, the image is valid
                    return Image.open(temp_file)
                except (IOError, OSError) as e:
                    # Image exists but isn't valid yet
                    if attempt < max_retries - 1:
                        # Increase delay exponentially for next attempt
                        delay *= 2
                        continue
                    else:
                        raise Exception(f"Image file exists but is not valid after {max_retries} attempts")

            # File doesn't exist or is empty
            if attempt < max_retries - 1:
                # Increase delay exponentially for next attempt
                delay *= 2
            else:
                raise Exception(f"Screenshot file not created properly after {max_retries} attempts")

    except Exception as e:
        print(f"Error capturing screen: {e}")
        raise
    finally:
        # Ensure file is removed even if an error occurs
        if os.path.exists(temp_file):
            os.remove(temp_file)

def process_image(img, scale_factor=1.5):
    """Process the image to improve OCR accuracy"""
    # Scale the image to improve OCR
    if scale_factor != 1:
        width, height = img.size
        img = img.resize((int(width * scale_factor), int(height * scale_factor)),
                         Image.Resampling.BICUBIC)

    # Convert to grayscale for faster processing
    img = ImageOps.grayscale(img)

    # Improve contrast for better text recognition
    img = ImageOps.autocontrast(img)

    return img

def perform_ocr(img, lang='eng'):
    """Perform OCR on the image"""
    # Use tessaract with optimized settings
    # --oem 1: Use LSTM OCR Engine
    # --psm 6: Assume a single uniform block of text
    text = pytesseract.image_to_string(img, lang=lang, config='--oem 1 --psm 6')

    return text

def copy_to_clipboard(text):
    """Copy text to clipboard using pyperclip"""
    try:
        # Filter out empty lines and clean up the text
        lines = [line.strip() for line in text.split('\n') if line.strip()]
        cleaned_text = '\n'.join(lines)  # Preserve line breaks for clipboard

        if cleaned_text:
            pyperclip.copy(cleaned_text)
            return True
        else:
            return False
    except Exception as e:
        print(f"Error copying to clipboard: {e}")
        return False

def speak_text(text):
    """Speak the text using speech-dispatcher"""
    # Filter out empty lines and clean up the text
    lines = [line.strip() for line in text.split('\n') if line.strip()]
    cleaned_text = ' '.join(lines)

    # Use speech-dispatcher to speak the text
    if cleaned_text:
        subprocess.run(["spd-say", "-Cw", cleaned_text])
    else:
        subprocess.run(["spd-say", "-Cw", "No text detected"])

def main():
    # Limit tesseract thread usage to improve performance on Pi
    os.environ["OMP_THREAD_LIMIT"] = "4"

    try:
        # Announce start
        subprocess.run(["spd-say", "-Cw", "performing OCR"])

        # Capture screen
        img = capture_screen()

        # Process image
        processed_img = process_image(img, scale_factor=1.5)

        # Perform OCR
        text = perform_ocr(processed_img)

        # Copy to clipboard
        clipboard_success = copy_to_clipboard(text)

        # Speak the results
        speak_text(text)

    except Exception as e:
        # Let the user know something went wrong
        error_msg = f"Error during OCR: {str(e)}"
        print(error_msg)
        try:
            subprocess.run(["spd-say", "-Cw", "OCR failed"])
        except:
            # If even speech fails, at least we tried
            pass

if __name__ == "__main__":
    main()