#!/usr/bin/env python3 """ Cleanup manager for GNSS Guard Handles cleanup of database tables and log files """ import logging import sqlite3 from datetime import datetime, timedelta from pathlib import Path from typing import Optional logger = logging.getLogger("gnss_guard.cleanup") class CleanupManager: """Manages cleanup of old data from database and logs""" def __init__( self, database_path: Path, logs_base_path: Path, positions_raw_retention_days: int = 14, positions_validation_retention_days: int = 31, logs_retention_days: int = 14, demo_mode: bool = False ): """ Initialize cleanup manager Args: database_path: Path to SQLite database file logs_base_path: Base path for logs directory positions_raw_retention_days: Days to retain positions_raw records (default: 14) positions_validation_retention_days: Days to retain positions_validation records (default: 31) logs_retention_days: Days to retain log files (default: 14) demo_mode: If True, skip database cleanup (data isn't growing in demo mode) """ self.database_path = Path(database_path) self.logs_base_path = Path(logs_base_path) self.positions_raw_retention_days = positions_raw_retention_days self.positions_validation_retention_days = positions_validation_retention_days self.logs_retention_days = logs_retention_days self.demo_mode = demo_mode self._last_cleanup_date: Optional[str] = None def run_cleanup_if_needed(self): """Run cleanup once per day (checks if already ran today) In demo mode, only log cleanup runs (database cleanup is skipped since data isn't growing - records are created and deleted in demo mode). """ today = datetime.now().strftime("%Y-%m-%d") if self._last_cleanup_date == today: return # Already ran today # In demo mode, skip database cleanup entirely but still clean logs if self.demo_mode: logger.info("Demo mode: skipping database cleanup (data not growing)") try: files_deleted, dirs_deleted = self._cleanup_logs() self._last_cleanup_date = today if files_deleted > 0 or dirs_deleted > 0: logger.info( f"Demo mode cleanup completed: " f"{files_deleted} log files, " f"{dirs_deleted} empty directories" ) except Exception as e: logger.error(f"Demo mode log cleanup failed: {e}") return logger.info("Starting daily cleanup...") try: raw_deleted = self._cleanup_positions_raw() validation_deleted = self._cleanup_positions_validation() files_deleted, dirs_deleted = self._cleanup_logs() # Optimize database after cleanup (VACUUM reclaims space, ANALYZE updates statistics) space_saved = self._optimize_database() self._last_cleanup_date = today logger.info( f"Daily cleanup completed: " f"{raw_deleted} raw positions, " f"{validation_deleted} validations, " f"{files_deleted} log files, " f"{dirs_deleted} empty directories" f"{f', {space_saved}' if space_saved else ''}" ) except Exception as e: logger.error(f"Cleanup failed: {e}") def _cleanup_positions_raw(self) -> int: """ Delete positions_raw records older than retention period Returns: Number of records deleted """ cutoff_timestamp = ( datetime.now() - timedelta(days=self.positions_raw_retention_days) ).timestamp() deleted_count = 0 try: conn = sqlite3.connect(str(self.database_path), timeout=30.0) cursor = conn.cursor() # Count before delete cursor.execute( "SELECT COUNT(*) FROM positions_raw WHERE timestamp_unix < ?", (cutoff_timestamp,) ) deleted_count = cursor.fetchone()[0] if deleted_count > 0: cursor.execute( "DELETE FROM positions_raw WHERE timestamp_unix < ?", (cutoff_timestamp,) ) conn.commit() logger.info( f"Cleaned up {deleted_count} positions_raw records " f"(> {self.positions_raw_retention_days} days)" ) conn.close() except Exception as e: logger.error(f"Failed to cleanup positions_raw: {e}") return deleted_count def _cleanup_positions_validation(self) -> int: """ Delete positions_validation records older than retention period Returns: Number of records deleted """ cutoff_timestamp = ( datetime.now() - timedelta(days=self.positions_validation_retention_days) ).timestamp() deleted_count = 0 try: conn = sqlite3.connect(str(self.database_path), timeout=30.0) cursor = conn.cursor() # Count before delete cursor.execute( "SELECT COUNT(*) FROM positions_validation WHERE validation_timestamp_unix < ?", (cutoff_timestamp,) ) deleted_count = cursor.fetchone()[0] if deleted_count > 0: cursor.execute( "DELETE FROM positions_validation WHERE validation_timestamp_unix < ?", (cutoff_timestamp,) ) conn.commit() logger.info( f"Cleaned up {deleted_count} positions_validation records " f"(> {self.positions_validation_retention_days} days)" ) conn.close() except Exception as e: logger.error(f"Failed to cleanup positions_validation: {e}") return deleted_count def _cleanup_logs(self) -> tuple: """ Delete log files and empty directories older than retention period Returns: Tuple of (files_deleted, directories_deleted) """ cutoff_timestamp = ( datetime.now() - timedelta(days=self.logs_retention_days) ).timestamp() deleted_files = 0 deleted_dirs = 0 try: if not self.logs_base_path.exists(): return (0, 0) # Delete old log files for log_file in self.logs_base_path.rglob("app_*.json"): try: if log_file.stat().st_mtime < cutoff_timestamp: log_file.unlink() deleted_files += 1 except Exception as e: logger.debug(f"Failed to delete log file {log_file}: {e}") # Clean up empty directories (must iterate multiple times for nested dirs) # Sort by path length descending to delete deepest first all_dirs = sorted( [d for d in self.logs_base_path.rglob("*") if d.is_dir()], key=lambda p: len(str(p)), reverse=True ) for dir_path in all_dirs: try: # Only delete if empty if not any(dir_path.iterdir()): dir_path.rmdir() deleted_dirs += 1 except Exception: pass # Directory not empty or other error if deleted_files > 0 or deleted_dirs > 0: logger.info( f"Cleaned up {deleted_files} log files and " f"{deleted_dirs} empty directories " f"(> {self.logs_retention_days} days)" ) except Exception as e: logger.error(f"Failed to cleanup logs: {e}") return (deleted_files, deleted_dirs) def _optimize_database(self) -> str: """ Optimize database after cleanup operations. Runs VACUUM to reclaim disk space from deleted records and ANALYZE to update query planner statistics. Returns: String describing space saved, or empty string if no optimization needed """ try: # Get database size before optimization size_before = self.database_path.stat().st_size if self.database_path.exists() else 0 conn = sqlite3.connect(str(self.database_path), timeout=60.0) cursor = conn.cursor() # ANALYZE updates statistics used by the query planner cursor.execute("ANALYZE") # VACUUM rebuilds the database file, reclaiming unused space # Note: VACUUM requires exclusive access and can't run inside a transaction cursor.execute("VACUUM") conn.close() # Get database size after optimization size_after = self.database_path.stat().st_size if self.database_path.exists() else 0 # Calculate space saved space_saved = size_before - size_after if space_saved > 0: # Format size for logging if space_saved >= 1024 * 1024: saved_str = f"{space_saved / (1024 * 1024):.1f} MB" elif space_saved >= 1024: saved_str = f"{space_saved / 1024:.1f} KB" else: saved_str = f"{space_saved} bytes" logger.info(f"Database optimized: reclaimed {saved_str}") return f"reclaimed {saved_str}" else: logger.debug("Database optimized (no space reclaimed)") return "" except Exception as e: logger.error(f"Failed to optimize database: {e}") return ""