Python Snippets

Real-time System Resource Monitor with Alerts

import psutil
import time
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from datetime import datetime
from typing import Dict, Optional
import json
import threading
from dataclasses import dataclass

@dataclass
class ResourceThresholds:
    cpu_percent: float = 80.0
    memory_percent: float = 85.0
    disk_percent: float = 90.0

class SystemResourceMonitor:
    def __init__(self, thresholds: ResourceThresholds = ResourceThresholds(), 
                 alert_email: Optional[str] = None, smtp_config: Optional[Dict] = None):
        self.thresholds = thresholds
        self.alert_email = alert_email
        self.smtp_config = smtp_config or {}
        self.alert_history = []
        self.monitoring = False
        self.monitor_thread = None

    def get_system_metrics(self) -> Dict:
        """Collect current system resource usage metrics"""
        cpu_percent = psutil.cpu_percent(interval=1)
        memory = psutil.virtual_memory()
        disk = psutil.disk_usage('/')
        
        return {
            'timestamp': datetime.now().isoformat(),
            'cpu_percent': cpu_percent,
            'memory_percent': memory.percent,
            'memory_used_gb': round(memory.used / (1024**3), 2),
            'memory_total_gb': round(memory.total / (1024**3), 2),
            'disk_percent': disk.percent,
            'disk_used_gb': round(disk.used / (1024**3), 2),
            'disk_total_gb': round(disk.total / (1024**3), 2),
            'boot_time': datetime.fromtimestamp(psutil.boot_time()).isoformat()
        }

    def check_thresholds(self, metrics: Dict) -> Dict:
        """Check if any resource exceeds configured thresholds"""
        alerts = {}
        
        if metrics['cpu_percent'] > self.thresholds.cpu_percent:
            alerts['cpu'] = {
                'current': metrics['cpu_percent'],
                'threshold': self.thresholds.cpu_percent,
                'message': f"CPU usage {metrics['cpu_percent']:.1f}% exceeds threshold {self.thresholds.cpu_percent}%"
            }
            
        if metrics['memory_percent'] > self.thresholds.memory_percent:
            alerts['memory'] = {
                'current': metrics['memory_percent'],
                'threshold': self.thresholds.memory_percent,
                'message': f"Memory usage {metrics['memory_percent']:.1f}% exceeds threshold {self.thresholds.memory_percent}%"
            }
            
        if metrics['disk_percent'] > self.thresholds.disk_percent:
            alerts['disk'] = {
                'current': metrics['disk_percent'],
                'threshold': self.thresholds.disk_percent,
                'message': f"Disk usage {metrics['disk_percent']:.1f}% exceeds threshold {self.thresholds.disk_percent}%"
            }
            
        return alerts

    def send_alert(self, alerts: Dict, metrics: Dict):
        """Send alert notifications via email"""
        if not self.alert_email or not self.smtp_config:
            # Print to console if no email config
            print("ALERT:", json.dumps(alerts, indent=2))
            return
            
        try:
            msg = MIMEMultipart()
            msg['From'] = self.smtp_config.get('username')
            msg['To'] = self.alert_email
            msg['Subject'] = f"System Resource Alert - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
            
            body = f"""
System Resource Alert Detected:

{json.dumps(alerts, indent=2)}

Full System Metrics:
{json.dumps(metrics, indent=2)}

Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
            """
            
            msg.attach(MIMEText(body, 'plain'))
            
            server = smtplib.SMTP(self.smtp_config.get('server'), self.smtp_config.get('port'))
            server.starttls()
            server.login(self.smtp_config.get('username'), self.smtp_config.get('password'))
            server.send_message(msg)
            server.quit()
            
            print(f"Alert sent to {self.alert_email}")
        except Exception as e:
            print(f"Failed to send alert: {e}")

    def monitor_once(self) -> Dict:
        """Perform a single monitoring check"""
        metrics = self.get_system_metrics()
        alerts = self.check_thresholds(metrics)
        
        if alerts:
            # Check if we should send alert (avoid spam)
            should_alert = True
            if self.alert_history:
                last_alert = self.alert_history[-1]
                # Only alert if different resource or 5 minutes have passed
                if (set(alerts.keys()) == set(last_alert.get('resources', [])) and 
                    (datetime.now() - last_alert['timestamp']).seconds < 300):
                    should_alert = False
            
            if should_alert:
                self.send_alert(alerts, metrics)
                self.alert_history.append({
                    'timestamp': datetime.now(),
                    'resources': list(alerts.keys()),
                    'metrics': metrics
                })
        
        return {'metrics': metrics, 'alerts': alerts}

    def start_monitoring(self, interval: int = 60):
        """Start continuous monitoring in a background thread"""
        if self.monitoring:
            print("Monitoring already running")
            return
            
        self.monitoring = True
        self.monitor_thread = threading.Thread(target=self._monitor_loop, args=(interval,))
        self.monitor_thread.daemon = True
        self.monitor_thread.start()
        print(f"System monitoring started (interval: {interval}s)")

    def stop_monitoring(self):
        """Stop continuous monitoring"""
        self.monitoring = False
        if self.monitor_thread:
            self.monitor_thread.join()
        print("System monitoring stopped")

    def _monitor_loop(self, interval: int):
        """Internal monitoring loop"""
        while self.monitoring:
            try:
                self.monitor_once()
                time.sleep(interval)
            except Exception as e:
                print(f"Monitoring error: {e}")
                time.sleep(interval)

    def print_current_status(self):
        """Print current system status to console"""
        metrics = self.get_system_metrics()
        alerts = self.check_thresholds(metrics)
        
        print(f"\n--- System Status at {metrics['timestamp']} ---")
        print(f"CPU Usage:     {metrics['cpu_percent']:>5.1f}%")
        print(f"Memory Usage:  {metrics['memory_percent']:>5.1f}% ({metrics['memory_used_gb']}GB / {metrics['memory_total_gb']}GB)")
        print(f"Disk Usage:    {metrics['disk_percent']:>5.1f}% ({metrics['disk_used_gb']}GB / {metrics['disk_total_gb']}GB)")
        print(f"System Uptime: {self._format_uptime(metrics['boot_time'])}")
        
        if alerts:
            print("\n⚠️  ALERTS:")
            for resource, alert in alerts.items():
                print(f"  {resource.upper()}: {alert['message']}")
        else:
            print("\n✅ All systems normal")

    def _format_uptime(self, boot_time_str: str) -> str:
        """Format system uptime in human-readable form"""
        boot_time = datetime.fromisoformat(boot_time_str)
        uptime = datetime.now() - boot_time
        
        days = uptime.days
        hours, remainder = divmod(uptime.seconds, 3600)
        minutes, _ = divmod(remainder, 60)
        
        parts = []
        if days: parts.append(f"{days}d")
        if hours: parts.append(f"{hours}h")
        if minutes: parts.append(f"{minutes}m")
        
        return " ".join(parts) if parts else "< 1m"

# Example usage
if __name__ == "__main__":
    # Configure thresholds (optional)
    thresholds = ResourceThresholds(
        cpu_percent=75.0,      # Alert at 75% CPU usage
        memory_percent=80.0,   # Alert at 80% memory usage
        disk_percent=85.0      # Alert at 85% disk usage
    )
    
    # Initialize monitor (without email alerts for this example)
    monitor = SystemResourceMonitor(thresholds=thresholds)
    
    # For email alerts, you would configure like this:
    # smtp_config = {
    #     'server': 'smtp.gmail.com',
    #     'port': 587,
    #     'username': 'your-email@gmail.com',
    #     'password': 'your-app-password'
    # }
    # monitor = SystemResourceMonitor(
    #     thresholds=thresholds,
    #     alert_email='admin@company.com',
    #     smtp_config=smtp_config
    # )
    
    # Print current status
    monitor.print_current_status()
    
    # Run a single check
    result = monitor.monitor_once()
    print(f"\nMonitoring result: {len(result['alerts'])} alerts")
    
    # Start continuous monitoring (run for 5 minutes in this example)
    # monitor.start_monitoring(interval=30)  # Check every 30 seconds
    # time.sleep(300)  # Run for 5 minutes
    # monitor.stop_monitoring()

What This Code Does

This real-time system resource monitor continuously tracks critical system metrics including CPU usage, memory consumption, and disk space utilization. It compares these metrics against configurable thresholds and sends alerts when resources exceed defined limits.

The monitor provides:

Why This is Useful

System administrators and developers need to proactively monitor server resources to prevent performance degradation or outages. This tool helps:

  1. Prevent service disruptions by alerting before resources are exhausted
  2. Optimize resource allocation by identifying usage patterns
  3. Enable proactive maintenance by detecting gradual resource consumption
  4. Reduce manual monitoring overhead through automated checks
  5. Support incident response with detailed system metrics at time of alert

How to Run It

  1. Install required dependencies:
    pip install psutil
    
  2. Basic usage (console output only):
    python system_monitor.py
    
  3. Configure email alerts by uncommenting and modifying the SMTP configuration section with your email provider settings

  4. Adjust thresholds by modifying the ResourceThresholds values to match your system requirements

  5. Enable continuous monitoring by uncommenting the start_monitoring() section

The script provides immediate system status when run and can be configured for unattended operation with email notifications for critical alerts. The monitoring interval is configurable to balance between responsiveness and system overhead.