Refactor golden image handling in backup upload process</message>

<message>Update the _set_golden_from_path function to improve the handling of existing golden image files. Replace the existing unlink logic with a more robust method that safely removes files or broken symlinks using the missing_ok parameter. This change enhances the reliability of the backup upload process by ensuring that stale references are properly cleared before setting a new golden image path.
This commit is contained in:
nearxos
2026-02-24 00:19:40 +02:00
parent df180120aa
commit 808fbf5c7c
136 changed files with 407837 additions and 2 deletions

View File

@@ -0,0 +1,4 @@
"""
Storage modules for GNSS Guard
"""

View File

@@ -0,0 +1,286 @@
#!/usr/bin/env python3
"""
Cleanup manager for GNSS Guard
Handles cleanup of database tables and log files
"""
import logging
import sqlite3
from datetime import datetime, timedelta
from pathlib import Path
from typing import Optional
logger = logging.getLogger("gnss_guard.cleanup")
class CleanupManager:
"""Manages cleanup of old data from database and logs"""
def __init__(
self,
database_path: Path,
logs_base_path: Path,
positions_raw_retention_days: int = 14,
positions_validation_retention_days: int = 31,
logs_retention_days: int = 14,
demo_mode: bool = False
):
"""
Initialize cleanup manager
Args:
database_path: Path to SQLite database file
logs_base_path: Base path for logs directory
positions_raw_retention_days: Days to retain positions_raw records (default: 14)
positions_validation_retention_days: Days to retain positions_validation records (default: 31)
logs_retention_days: Days to retain log files (default: 14)
demo_mode: If True, skip database cleanup (data isn't growing in demo mode)
"""
self.database_path = Path(database_path)
self.logs_base_path = Path(logs_base_path)
self.positions_raw_retention_days = positions_raw_retention_days
self.positions_validation_retention_days = positions_validation_retention_days
self.logs_retention_days = logs_retention_days
self.demo_mode = demo_mode
self._last_cleanup_date: Optional[str] = None
def run_cleanup_if_needed(self):
"""Run cleanup once per day (checks if already ran today)
In demo mode, only log cleanup runs (database cleanup is skipped
since data isn't growing - records are created and deleted in demo mode).
"""
today = datetime.now().strftime("%Y-%m-%d")
if self._last_cleanup_date == today:
return # Already ran today
# In demo mode, skip database cleanup entirely but still clean logs
if self.demo_mode:
logger.info("Demo mode: skipping database cleanup (data not growing)")
try:
files_deleted, dirs_deleted = self._cleanup_logs()
self._last_cleanup_date = today
if files_deleted > 0 or dirs_deleted > 0:
logger.info(
f"Demo mode cleanup completed: "
f"{files_deleted} log files, "
f"{dirs_deleted} empty directories"
)
except Exception as e:
logger.error(f"Demo mode log cleanup failed: {e}")
return
logger.info("Starting daily cleanup...")
try:
raw_deleted = self._cleanup_positions_raw()
validation_deleted = self._cleanup_positions_validation()
files_deleted, dirs_deleted = self._cleanup_logs()
# Optimize database after cleanup (VACUUM reclaims space, ANALYZE updates statistics)
space_saved = self._optimize_database()
self._last_cleanup_date = today
logger.info(
f"Daily cleanup completed: "
f"{raw_deleted} raw positions, "
f"{validation_deleted} validations, "
f"{files_deleted} log files, "
f"{dirs_deleted} empty directories"
f"{f', {space_saved}' if space_saved else ''}"
)
except Exception as e:
logger.error(f"Cleanup failed: {e}")
def _cleanup_positions_raw(self) -> int:
"""
Delete positions_raw records older than retention period
Returns:
Number of records deleted
"""
cutoff_timestamp = (
datetime.now() - timedelta(days=self.positions_raw_retention_days)
).timestamp()
deleted_count = 0
try:
conn = sqlite3.connect(str(self.database_path), timeout=30.0)
cursor = conn.cursor()
# Count before delete
cursor.execute(
"SELECT COUNT(*) FROM positions_raw WHERE timestamp_unix < ?",
(cutoff_timestamp,)
)
deleted_count = cursor.fetchone()[0]
if deleted_count > 0:
cursor.execute(
"DELETE FROM positions_raw WHERE timestamp_unix < ?",
(cutoff_timestamp,)
)
conn.commit()
logger.info(
f"Cleaned up {deleted_count} positions_raw records "
f"(> {self.positions_raw_retention_days} days)"
)
conn.close()
except Exception as e:
logger.error(f"Failed to cleanup positions_raw: {e}")
return deleted_count
def _cleanup_positions_validation(self) -> int:
"""
Delete positions_validation records older than retention period
Returns:
Number of records deleted
"""
cutoff_timestamp = (
datetime.now() - timedelta(days=self.positions_validation_retention_days)
).timestamp()
deleted_count = 0
try:
conn = sqlite3.connect(str(self.database_path), timeout=30.0)
cursor = conn.cursor()
# Count before delete
cursor.execute(
"SELECT COUNT(*) FROM positions_validation WHERE validation_timestamp_unix < ?",
(cutoff_timestamp,)
)
deleted_count = cursor.fetchone()[0]
if deleted_count > 0:
cursor.execute(
"DELETE FROM positions_validation WHERE validation_timestamp_unix < ?",
(cutoff_timestamp,)
)
conn.commit()
logger.info(
f"Cleaned up {deleted_count} positions_validation records "
f"(> {self.positions_validation_retention_days} days)"
)
conn.close()
except Exception as e:
logger.error(f"Failed to cleanup positions_validation: {e}")
return deleted_count
def _cleanup_logs(self) -> tuple:
"""
Delete log files and empty directories older than retention period
Returns:
Tuple of (files_deleted, directories_deleted)
"""
cutoff_timestamp = (
datetime.now() - timedelta(days=self.logs_retention_days)
).timestamp()
deleted_files = 0
deleted_dirs = 0
try:
if not self.logs_base_path.exists():
return (0, 0)
# Delete old log files
for log_file in self.logs_base_path.rglob("app_*.json"):
try:
if log_file.stat().st_mtime < cutoff_timestamp:
log_file.unlink()
deleted_files += 1
except Exception as e:
logger.debug(f"Failed to delete log file {log_file}: {e}")
# Clean up empty directories (must iterate multiple times for nested dirs)
# Sort by path length descending to delete deepest first
all_dirs = sorted(
[d for d in self.logs_base_path.rglob("*") if d.is_dir()],
key=lambda p: len(str(p)),
reverse=True
)
for dir_path in all_dirs:
try:
# Only delete if empty
if not any(dir_path.iterdir()):
dir_path.rmdir()
deleted_dirs += 1
except Exception:
pass # Directory not empty or other error
if deleted_files > 0 or deleted_dirs > 0:
logger.info(
f"Cleaned up {deleted_files} log files and "
f"{deleted_dirs} empty directories "
f"(> {self.logs_retention_days} days)"
)
except Exception as e:
logger.error(f"Failed to cleanup logs: {e}")
return (deleted_files, deleted_dirs)
def _optimize_database(self) -> str:
"""
Optimize database after cleanup operations.
Runs VACUUM to reclaim disk space from deleted records and
ANALYZE to update query planner statistics.
Returns:
String describing space saved, or empty string if no optimization needed
"""
try:
# Get database size before optimization
size_before = self.database_path.stat().st_size if self.database_path.exists() else 0
conn = sqlite3.connect(str(self.database_path), timeout=60.0)
cursor = conn.cursor()
# ANALYZE updates statistics used by the query planner
cursor.execute("ANALYZE")
# VACUUM rebuilds the database file, reclaiming unused space
# Note: VACUUM requires exclusive access and can't run inside a transaction
cursor.execute("VACUUM")
conn.close()
# Get database size after optimization
size_after = self.database_path.stat().st_size if self.database_path.exists() else 0
# Calculate space saved
space_saved = size_before - size_after
if space_saved > 0:
# Format size for logging
if space_saved >= 1024 * 1024:
saved_str = f"{space_saved / (1024 * 1024):.1f} MB"
elif space_saved >= 1024:
saved_str = f"{space_saved / 1024:.1f} KB"
else:
saved_str = f"{space_saved} bytes"
logger.info(f"Database optimized: reclaimed {saved_str}")
return f"reclaimed {saved_str}"
else:
logger.debug("Database optimized (no space reclaimed)")
return ""
except Exception as e:
logger.error(f"Failed to optimize database: {e}")
return ""

View File

@@ -0,0 +1,316 @@
#!/usr/bin/env python3
"""
SQLite database storage for GNSS Guard
Manages positions_raw and positions_validation tables
"""
import json
import logging
import sqlite3
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Dict, Any, Optional, List
logger = logging.getLogger("gnss_guard.database")
class Database:
"""SQLite database manager for GNSS Guard"""
def __init__(self, database_path: Path):
"""
Initialize database
Args:
database_path: Path to SQLite database file
"""
self.database_path = Path(database_path)
self.database_path.parent.mkdir(parents=True, exist_ok=True)
self._init_database()
def _init_database(self):
"""Initialize database schema and configure SQLite for optimal performance"""
try:
conn = sqlite3.connect(str(self.database_path), check_same_thread=False)
cursor = conn.cursor()
# Configure SQLite for better performance and concurrency
# WAL mode allows concurrent reads during writes
cursor.execute("PRAGMA journal_mode=WAL")
# Set busy timeout to 30 seconds (in milliseconds)
cursor.execute("PRAGMA busy_timeout=30000")
# NORMAL synchronous is faster than FULL while still being safe with WAL
cursor.execute("PRAGMA synchronous=NORMAL")
# Enable foreign key constraints (good practice)
cursor.execute("PRAGMA foreign_keys=ON")
# Create positions_raw table
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS positions_raw (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source TEXT NOT NULL,
timestamp TEXT NOT NULL,
timestamp_unix REAL NOT NULL,
latitude REAL,
longitude REAL,
altitude REAL,
position_uncertainty_m REAL,
supplementary_data TEXT,
created_at REAL NOT NULL,
UNIQUE(source, timestamp_unix)
)
"""
)
# Add position_uncertainty_m column if it doesn't exist (migration for existing databases)
try:
cursor.execute("ALTER TABLE positions_raw ADD COLUMN position_uncertainty_m REAL")
except sqlite3.OperationalError:
# Column already exists, ignore
pass
# Create positions_validation table
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS positions_validation (
id INTEGER PRIMARY KEY AUTOINCREMENT,
validation_timestamp TEXT NOT NULL,
validation_timestamp_unix REAL NOT NULL,
is_valid INTEGER NOT NULL,
sources_missing TEXT,
sources_stale TEXT,
coordinate_differences TEXT,
source_coordinates TEXT,
validation_details TEXT,
created_at REAL NOT NULL
)
"""
)
# Create indexes
cursor.execute(
"""
CREATE INDEX IF NOT EXISTS idx_positions_raw_source_timestamp
ON positions_raw(source, timestamp_unix DESC)
"""
)
cursor.execute(
"""
CREATE INDEX IF NOT EXISTS idx_positions_validation_timestamp
ON positions_validation(validation_timestamp_unix DESC)
"""
)
conn.commit()
conn.close()
logger.info(f"Database initialized at {self.database_path}")
except Exception as e:
logger.error(f"Failed to initialize database: {e}")
raise
def store_position(self, position: Dict[str, Any]) -> bool:
"""
Store or update a position in positions_raw table
Args:
position: Dictionary with position data (source, latitude, longitude, etc.)
Returns:
True if successful, False otherwise
"""
try:
conn = sqlite3.connect(str(self.database_path), check_same_thread=False, timeout=5.0)
cursor = conn.cursor()
# Use INSERT OR REPLACE to update latest position per source
cursor.execute(
"""
INSERT OR REPLACE INTO positions_raw
(source, timestamp, timestamp_unix, latitude, longitude, altitude, position_uncertainty_m, supplementary_data, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
position.get("source"),
position.get("timestamp"),
position.get("timestamp_unix"),
position.get("latitude"),
position.get("longitude"),
position.get("altitude"),
position.get("position_uncertainty_m"),
json.dumps(position.get("supplementary_data", {})),
time.time(),
),
)
conn.commit()
conn.close()
return True
except sqlite3.OperationalError as e:
if "database is locked" in str(e):
# Retry once after short delay
time.sleep(0.01)
try:
conn = sqlite3.connect(str(self.database_path), check_same_thread=False, timeout=5.0)
cursor = conn.cursor()
cursor.execute(
"""
INSERT OR REPLACE INTO positions_raw
(source, timestamp, timestamp_unix, latitude, longitude, altitude, position_uncertainty_m, supplementary_data, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
position.get("source"),
position.get("timestamp"),
position.get("timestamp_unix"),
position.get("latitude"),
position.get("longitude"),
position.get("altitude"),
position.get("position_uncertainty_m"),
json.dumps(position.get("supplementary_data", {})),
time.time(),
),
)
conn.commit()
conn.close()
return True
except Exception:
pass
logger.error(f"Failed to store position: {e}")
return False
except Exception as e:
logger.error(f"Failed to store position: {e}")
return False
def store_validation(self, validation_result: Dict[str, Any]) -> bool:
"""
Store validation result in positions_validation table
Args:
validation_result: Dictionary with validation data
Returns:
True if successful, False otherwise
"""
try:
conn = sqlite3.connect(str(self.database_path), check_same_thread=False, timeout=5.0)
cursor = conn.cursor()
cursor.execute(
"""
INSERT INTO positions_validation
(validation_timestamp, validation_timestamp_unix, is_valid, sources_missing,
sources_stale, coordinate_differences, source_coordinates, validation_details, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
validation_result.get("validation_timestamp"),
validation_result.get("validation_timestamp_unix"),
1 if validation_result.get("is_valid") else 0,
json.dumps(validation_result.get("sources_missing", [])),
json.dumps(validation_result.get("sources_stale", [])),
json.dumps(validation_result.get("coordinate_differences", {})),
json.dumps(validation_result.get("source_coordinates", {})),
json.dumps(validation_result.get("validation_details", {})),
time.time(),
),
)
conn.commit()
conn.close()
return True
except Exception as e:
logger.error(f"Failed to store validation result: {e}")
return False
def get_latest_positions(self) -> Dict[str, Dict[str, Any]]:
"""
Get latest position for each source
Returns:
Dictionary mapping source names to their latest positions
"""
try:
conn = sqlite3.connect(str(self.database_path), check_same_thread=False, timeout=5.0)
cursor = conn.cursor()
cursor.execute(
"""
SELECT source, timestamp, timestamp_unix, latitude, longitude, altitude, position_uncertainty_m, supplementary_data
FROM positions_raw
WHERE (source, timestamp_unix) IN (
SELECT source, MAX(timestamp_unix)
FROM positions_raw
GROUP BY source
)
"""
)
positions = {}
for row in cursor.fetchall():
source, timestamp, timestamp_unix, lat, lon, alt, pos_uncertainty, supp_data = row
positions[source] = {
"source": source,
"timestamp": timestamp,
"timestamp_unix": timestamp_unix,
"latitude": lat,
"longitude": lon,
"altitude": alt,
"position_uncertainty_m": pos_uncertainty,
"supplementary_data": json.loads(supp_data) if supp_data else {},
}
conn.close()
return positions
except Exception as e:
logger.error(f"Failed to get latest positions: {e}")
return {}
def get_latest_validation(self) -> Optional[Dict[str, Any]]:
"""
Get the most recent validation result from the database.
Used to restore state after app restart.
Returns:
Dictionary with validation data or None if not found
"""
try:
conn = sqlite3.connect(str(self.database_path), check_same_thread=False, timeout=5.0)
cursor = conn.cursor()
cursor.execute(
"""
SELECT validation_timestamp, validation_timestamp_unix, is_valid,
sources_missing, sources_stale, coordinate_differences,
source_coordinates, validation_details
FROM positions_validation
ORDER BY validation_timestamp_unix DESC
LIMIT 1
"""
)
row = cursor.fetchone()
conn.close()
if row:
return {
"validation_timestamp": row[0],
"validation_timestamp_unix": row[1],
"is_valid": row[2] == 1,
"sources_missing": json.loads(row[3]) if row[3] else [],
"sources_stale": json.loads(row[4]) if row[4] else [],
"coordinate_differences": json.loads(row[5]) if row[5] else {},
"source_coordinates": json.loads(row[6]) if row[6] else {},
"validation_details": json.loads(row[7]) if row[7] else {},
}
return None
except Exception as e:
logger.error(f"Failed to get latest validation: {e}")
return None

View File

@@ -0,0 +1,156 @@
#!/usr/bin/env python3
"""
Structured JSON logging for GNSS Guard
Logs to date-based folders with daily rotation and cleanup
"""
import json
import logging
import time
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, Any, Optional
logger = logging.getLogger("gnss_guard.logger")
class StructuredLogger:
"""Structured JSON logger with date-based folders"""
def __init__(self, logs_base_path: Path, retention_days: int = 14):
"""
Initialize structured logger
Args:
logs_base_path: Base path for logs directory
retention_days: Number of days to retain logs
"""
self.logs_base_path = Path(logs_base_path)
self.retention_days = retention_days
self.current_log_file: Optional[Path] = None
self.current_date: Optional[str] = None
self.log_file_handle = None
self._closed = False
def _get_log_path(self, date: datetime) -> Path:
"""Get log file path for a given date"""
year = date.strftime("%Y")
month = date.strftime("%m")
day = date.strftime("%d")
date_str = date.strftime("%Y-%m-%d")
log_dir = self.logs_base_path / year / month / day
log_dir.mkdir(parents=True, exist_ok=True)
return log_dir / f"app_{date_str}.json"
def _ensure_log_file(self):
"""Ensure log file is open for current date"""
today = datetime.now()
today_str = today.strftime("%Y-%m-%d")
if self.current_date != today_str or self.current_log_file is None:
# Close previous file if open
if self.log_file_handle:
self.log_file_handle.close()
self.log_file_handle = None
# Cleanup old logs
self._cleanup_old_logs()
# Open new log file
self.current_log_file = self._get_log_path(today)
self.current_date = today_str
# Open file in append mode
self.log_file_handle = open(self.current_log_file, "a")
logger.info(f"Opened log file: {self.current_log_file}")
def _cleanup_old_logs(self):
"""Delete log files older than retention_days"""
try:
cutoff_date = datetime.now() - timedelta(days=self.retention_days)
cutoff_timestamp = cutoff_date.timestamp()
deleted_count = 0
# Walk through all log directories
if self.logs_base_path.exists():
for log_file in self.logs_base_path.rglob("app_*.json"):
try:
if log_file.stat().st_mtime < cutoff_timestamp:
log_file.unlink()
deleted_count += 1
except Exception as e:
logger.debug(f"Failed to delete old log file {log_file}: {e}")
if deleted_count > 0:
logger.info(f"Cleaned up {deleted_count} old log file(s) (> {self.retention_days} days)")
except Exception as e:
logger.error(f"Error during log cleanup: {e}")
def log(self, level: str, source: str, message: str, data: Optional[Dict[str, Any]] = None):
"""
Write structured log entry
Args:
level: Log level (INFO, WARNING, ERROR, DEBUG)
source: Source identifier
message: Log message
data: Optional additional data dictionary
"""
try:
# Don't write if logger is explicitly closed
if self._closed:
return
# Ensure log file is open
self._ensure_log_file()
# Check if file handle is still None (shouldn't happen, but be safe)
if self.log_file_handle is None:
logger.warning(f"Cannot write log entry: logger file handle is None")
return
log_entry = {
"timestamp": datetime.now().isoformat(),
"level": level,
"source": source,
"message": message,
}
if data:
log_entry["data"] = data
# Write as JSON line
json_line = json.dumps(log_entry, separators=(",", ":"))
self.log_file_handle.write(json_line + "\n")
self.log_file_handle.flush()
except Exception as e:
logger.error(f"Failed to write log entry: {e}")
def info(self, source: str, message: str, data: Optional[Dict[str, Any]] = None):
"""Log info message"""
self.log("INFO", source, message, data)
def warning(self, source: str, message: str, data: Optional[Dict[str, Any]] = None):
"""Log warning message"""
self.log("WARNING", source, message, data)
def error(self, source: str, message: str, data: Optional[Dict[str, Any]] = None):
"""Log error message"""
self.log("ERROR", source, message, data)
def debug(self, source: str, message: str, data: Optional[Dict[str, Any]] = None):
"""Log debug message"""
self.log("DEBUG", source, message, data)
def close(self):
"""Close log file handle"""
self._closed = True
if self.log_file_handle:
self.log_file_handle.close()
self.log_file_handle = None