Files
fenrir/tools/cleanup_cache.py

288 lines
9.5 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Fenrir Cache Cleanup Tool
Removes Python cache files and directories from the repository.
These files should never be committed and can cause issues.
Usage:
python3 tools/cleanup_cache.py # Show what would be removed
python3 tools/cleanup_cache.py --remove # Actually remove cache files
python3 tools/cleanup_cache.py --check # Exit with error if cache files found
"""
import os
import sys
import argparse
import shutil
from pathlib import Path
class CacheCleanup:
def __init__(self, verbose=True):
self.verbose = verbose
self.cache_dirs = []
self.cache_files = []
def log(self, message, level="INFO"):
"""Log a message with appropriate formatting."""
if not self.verbose and level == "INFO":
return
colors = {
"INFO": "\033[0;36m", # Cyan
"SUCCESS": "\033[0;32m", # Green
"WARNING": "\033[1;33m", # Yellow
"ERROR": "\033[0;31m", # Red
"HEADER": "\033[1;34m", # Bold Blue
}
reset = "\033[0m"
color = colors.get(level, "")
if level == "HEADER":
print(f"\n{color}{'='*60}")
print(f"{message}")
print(f"{'='*60}{reset}")
else:
symbol = {
"SUCCESS": "",
"ERROR": "",
"WARNING": "",
"INFO": ""
}.get(level, "")
print(f"{color}{symbol} {message}{reset}")
def find_cache_files(self, directory):
"""Find all Python cache files and directories."""
directory = Path(directory)
for root, dirs, files in os.walk(directory):
root_path = Path(root)
# Skip .git directory entirely
if '.git' in root_path.parts:
continue
# Find __pycache__ directories
if '__pycache__' in dirs:
cache_dir = root_path / '__pycache__'
self.cache_dirs.append(cache_dir)
# Don't traverse into __pycache__ directories
dirs.remove('__pycache__')
# Find .pyc files outside of __pycache__
for file in files:
if file.endswith('.pyc'):
cache_file = root_path / file
self.cache_files.append(cache_file)
def show_findings(self):
"""Display what cache files were found."""
total_items = len(self.cache_dirs) + len(self.cache_files)
if total_items == 0:
self.log("No Python cache files found", "SUCCESS")
return True
self.log(f"Found {total_items} cache items:", "WARNING")
if self.cache_dirs:
self.log(f"\n__pycache__ directories ({len(self.cache_dirs)}):", "WARNING")
for cache_dir in sorted(self.cache_dirs):
# Show size of directory
size = self.get_directory_size(cache_dir)
self.log(f" {cache_dir} ({size} files)", "WARNING")
if self.cache_files:
self.log(f"\nLoose .pyc files ({len(self.cache_files)}):", "WARNING")
for cache_file in sorted(self.cache_files):
# Show file size
try:
size = cache_file.stat().st_size
size_str = self.format_size(size)
self.log(f" {cache_file} ({size_str})", "WARNING")
except OSError:
self.log(f" {cache_file} (size unknown)", "WARNING")
return False
def get_directory_size(self, directory):
"""Get the number of files in a directory."""
try:
return len(list(directory.rglob('*')))
except OSError:
return 0
def format_size(self, size_bytes):
"""Format file size in human-readable format."""
if size_bytes < 1024:
return f"{size_bytes} B"
elif size_bytes < 1024 * 1024:
return f"{size_bytes // 1024} KB"
else:
return f"{size_bytes // (1024 * 1024)} MB"
def remove_cache_files(self):
"""Actually remove the cache files and directories."""
removed_count = 0
errors = []
# Remove __pycache__ directories
for cache_dir in self.cache_dirs:
try:
if cache_dir.exists():
shutil.rmtree(cache_dir)
self.log(f"Removed directory: {cache_dir}", "SUCCESS")
removed_count += 1
except OSError as e:
error_msg = f"Failed to remove {cache_dir}: {e}"
errors.append(error_msg)
self.log(error_msg, "ERROR")
# Remove .pyc files
for cache_file in self.cache_files:
try:
if cache_file.exists():
cache_file.unlink()
self.log(f"Removed file: {cache_file}", "SUCCESS")
removed_count += 1
except OSError as e:
error_msg = f"Failed to remove {cache_file}: {e}"
errors.append(error_msg)
self.log(error_msg, "ERROR")
if errors:
self.log(f"Encountered {len(errors)} errors during cleanup", "ERROR")
return False
else:
self.log(f"Successfully removed {removed_count} cache items", "SUCCESS")
return True
def check_gitignore(self):
"""Check if .gitignore properly excludes cache files."""
gitignore_path = Path('.gitignore')
if not gitignore_path.exists():
self.log("Warning: No .gitignore file found", "WARNING")
return False
try:
with open(gitignore_path, 'r') as f:
content = f.read()
has_pycache = '__pycache__' in content or '__pycache__/' in content
has_pyc = '*.pyc' in content
if has_pycache and has_pyc:
self.log("✓ .gitignore properly excludes Python cache files", "SUCCESS")
return True
else:
missing = []
if not has_pycache:
missing.append("__pycache__/")
if not has_pyc:
missing.append("*.pyc")
self.log(f"Warning: .gitignore missing: {', '.join(missing)}", "WARNING")
return False
except OSError as e:
self.log(f"Could not read .gitignore: {e}", "ERROR")
return False
def suggest_gitignore_additions(self):
"""Suggest additions to .gitignore."""
self.log("\nRecommended .gitignore entries for Python:", "INFO")
print("""
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
""")
def main():
parser = argparse.ArgumentParser(description='Clean Python cache files from Fenrir repository')
parser.add_argument('--remove', action='store_true',
help='Actually remove cache files (default is dry-run)')
parser.add_argument('--check', action='store_true',
help='Exit with non-zero code if cache files found')
parser.add_argument('--quiet', action='store_true',
help='Reduce output verbosity')
parser.add_argument('--directory', default='.',
help='Directory to scan (default: current directory)')
args = parser.parse_args()
# Ensure we're in the project root
if not Path("src/fenrirscreenreader").exists():
print("Error: Must be run from Fenrir project root directory")
sys.exit(1)
cleanup = CacheCleanup(verbose=not args.quiet)
cleanup.log("FENRIR CACHE CLEANUP", "HEADER")
cleanup.log(f"Scanning directory: {Path(args.directory).absolute()}")
# Find cache files
cleanup.find_cache_files(args.directory)
# Show what we found
no_cache_found = cleanup.show_findings()
if no_cache_found:
# Check .gitignore anyway
cleanup.check_gitignore()
cleanup.log("\n✅ Repository is clean of Python cache files", "SUCCESS")
sys.exit(0)
# Check .gitignore
gitignore_ok = cleanup.check_gitignore()
if not gitignore_ok:
cleanup.suggest_gitignore_additions()
# Handle different modes
if args.remove:
cleanup.log("\n🧹 REMOVING CACHE FILES", "HEADER")
success = cleanup.remove_cache_files()
if success:
cleanup.log("\n✅ Cache cleanup completed successfully", "SUCCESS")
sys.exit(0)
else:
cleanup.log("\n❌ Cache cleanup completed with errors", "ERROR")
sys.exit(1)
elif args.check:
cleanup.log("\n❌ Cache files found - validation failed", "ERROR")
cleanup.log("Run with --remove to clean up cache files", "INFO")
sys.exit(1)
else:
# Dry run mode
cleanup.log("\n💡 DRY RUN MODE", "HEADER")
cleanup.log("Add --remove to actually delete these files", "INFO")
cleanup.log("Add --check to fail if cache files are present", "INFO")
sys.exit(0)
if __name__ == '__main__':
main()