"""Data cleanup utilities for stale entries.""" from __future__ import annotations import logging import threading import time from typing import Any logger = logging.getLogger('intercept.cleanup') class DataStore: """Thread-safe data store with automatic cleanup of stale entries.""" def __init__(self, max_age_seconds: float = 300.0, name: str = 'data'): """ Initialize data store. Args: max_age_seconds: Maximum age of entries before cleanup (default 5 minutes) name: Name for logging purposes """ self.data: dict[str, Any] = {} self.timestamps: dict[str, float] = {} self.max_age = max_age_seconds self.name = name self._lock = threading.Lock() def set(self, key: str, value: Any) -> None: """Add or update an entry.""" with self._lock: self.data[key] = value self.timestamps[key] = time.time() def get(self, key: str, default: Any = None) -> Any: """Get an entry.""" with self._lock: return self.data.get(key, default) def update(self, key: str, updates: dict) -> None: """Update an existing entry with new values.""" with self._lock: if key in self.data: if isinstance(self.data[key], dict): self.data[key].update(updates) else: self.data[key] = updates else: self.data[key] = updates self.timestamps[key] = time.time() def touch(self, key: str) -> None: """Update timestamp for an entry without changing data.""" with self._lock: if key in self.data: self.timestamps[key] = time.time() def delete(self, key: str) -> bool: """Delete an entry.""" with self._lock: if key in self.data: del self.data[key] del self.timestamps[key] return True return False def clear(self) -> None: """Clear all entries.""" with self._lock: self.data.clear() self.timestamps.clear() def all(self) -> dict[str, Any]: """Get a copy of all data.""" with self._lock: return dict(self.data) def keys(self) -> list[str]: """Get all keys.""" with self._lock: return list(self.data.keys()) def values(self) -> list[Any]: """Get all values.""" with self._lock: return list(self.data.values()) def items(self) -> list[tuple[str, Any]]: """Get all items.""" with self._lock: return list(self.data.items()) def __len__(self) -> int: with self._lock: return len(self.data) def __contains__(self, key: str) -> bool: with self._lock: return key in self.data def __getitem__(self, key: str) -> Any: """Get an entry using subscript notation.""" with self._lock: return self.data[key] def __setitem__(self, key: str, value: Any) -> None: """Set an entry using subscript notation.""" with self._lock: self.data[key] = value self.timestamps[key] = time.time() def __delitem__(self, key: str) -> None: """Delete an entry using subscript notation.""" with self._lock: del self.data[key] del self.timestamps[key] def cleanup(self) -> int: """ Remove entries older than max_age. Returns: Number of entries removed """ now = time.time() expired = [] with self._lock: for key, timestamp in self.timestamps.items(): if now - timestamp > self.max_age: expired.append(key) for key in expired: del self.data[key] del self.timestamps[key] if expired: logger.debug(f"{self.name}: Cleaned up {len(expired)} stale entries") return len(expired) class CleanupManager: """Manages periodic cleanup of multiple data stores and database tables.""" def __init__(self, interval: float = 60.0): """ Initialize cleanup manager. Args: interval: Cleanup interval in seconds """ self.stores: list[DataStore] = [] self.db_cleanup_funcs: list[tuple[callable, int]] = [] # (func, interval_multiplier) self.interval = interval self._timer: threading.Timer | None = None self._running = False self._cleanup_count = 0 self._lock = threading.Lock() def register(self, store: DataStore) -> None: """Register a data store for cleanup.""" with self._lock: if store not in self.stores: self.stores.append(store) def unregister(self, store: DataStore) -> None: """Unregister a data store.""" with self._lock: if store in self.stores: self.stores.remove(store) def register_db_cleanup(self, func: callable, interval_multiplier: int = 60) -> None: """ Register a database cleanup function. Args: func: Cleanup function to call (should return number of deleted rows) interval_multiplier: How many cleanup cycles to wait between calls (default: 60 = 1 hour if interval is 60s) """ with self._lock: self.db_cleanup_funcs.append((func, interval_multiplier)) def start(self) -> None: """Start the cleanup timer.""" with self._lock: if self._running: return self._running = True self._schedule_cleanup() def stop(self) -> None: """Stop the cleanup timer.""" with self._lock: self._running = False if self._timer: self._timer.cancel() self._timer = None def _schedule_cleanup(self) -> None: """Schedule the next cleanup.""" if not self._running: return self._timer = threading.Timer(self.interval, self._run_cleanup) self._timer.daemon = True self._timer.start() def _run_cleanup(self) -> None: """Run cleanup on all registered stores and database tables.""" total_cleaned = 0 # Cleanup in-memory data stores with self._lock: stores = list(self.stores) db_funcs = list(self.db_cleanup_funcs) self._cleanup_count += 1 current_count = self._cleanup_count for store in stores: try: total_cleaned += store.cleanup() except Exception as e: logger.error(f"Error cleaning up {store.name}: {e}") # Cleanup database tables (less frequently) for func, interval_multiplier in db_funcs: if current_count % interval_multiplier == 0: try: deleted = func() if deleted > 0: logger.info(f"Database cleanup: {func.__name__} removed {deleted} rows") total_cleaned += deleted except Exception as e: logger.error(f"Error in database cleanup {func.__name__}: {e}") if total_cleaned > 0: logger.info(f"Cleanup complete: removed {total_cleaned} stale entries") self._schedule_cleanup() def cleanup_now(self) -> int: """Run cleanup immediately.""" total = 0 with self._lock: stores = list(self.stores) for store in stores: try: total += store.cleanup() except Exception as e: logger.error(f"Error cleaning up {store.name}: {e}") return total # Global cleanup manager cleanup_manager = CleanupManager(interval=60.0) def cleanup_dict( data: dict[str, Any], timestamps: dict[str, float], max_age_seconds: float = 300.0 ) -> list[str]: """ Clean up stale entries from a dictionary. Args: data: Dictionary to clean timestamps: Dictionary of key -> last_seen timestamp max_age_seconds: Maximum age in seconds Returns: List of removed keys """ now = time.time() expired = [] for key, timestamp in list(timestamps.items()): if now - timestamp > max_age_seconds: expired.append(key) for key in expired: data.pop(key, None) timestamps.pop(key, None) return expired