Python Snippets

Automatic Backup Creator with Compression and Scheduling

import os
import shutil
import zipfile
import datetime
import schedule
import time
from pathlib import Path

def create_backup(source_dir, backup_dir, compress=True):
    """
    Creates a backup of the specified directory with optional compression.
    
    Args:
        source_dir (str): Path to the directory to backup
        backup_dir (str): Path to the backup storage directory
        compress (bool): Whether to compress the backup as ZIP
    
    Returns:
        str: Path to the created backup
    """
    # Create backup directory if it doesn't exist
    Path(backup_dir).mkdir(parents=True, exist_ok=True)
    
    # Generate timestamp for backup naming
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    backup_name = f"backup_{timestamp}"
    
    source_path = Path(source_dir)
    backup_path = Path(backup_dir)
    
    if not source_path.exists():
        raise FileNotFoundError(f"Source directory not found: {source_dir}")
    
    if compress:
        # Create compressed ZIP backup
        zip_path = backup_path / f"{backup_name}.zip"
        with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
            for file_path in source_path.rglob('*'):
                if file_path.is_file():
                    arcname = file_path.relative_to(source_path)
                    zipf.write(file_path, arcname)
        print(f"Compressed backup created: {zip_path}")
        return str(zip_path)
    else:
        # Create uncompressed backup (copy directory)
        dest_path = backup_path / backup_name
        shutil.copytree(source_path, dest_path)
        print(f"Uncompressed backup created: {dest_path}")
        return str(dest_path)

def cleanup_old_backups(backup_dir, days_to_keep=7):
    """
    Removes backup files older than specified days.
    
    Args:
        backup_dir (str): Path to the backup storage directory
        days_to_keep (int): Number of days to keep backups
    """
    backup_path = Path(backup_dir)
    if not backup_path.exists():
        return
    
    cutoff_date = datetime.datetime.now() - datetime.timedelta(days=days_to_keep)
    
    for file_path in backup_path.iterdir():
        if file_path.is_file() and file_path.stat().st_mtime < cutoff_date.timestamp():
            file_path.unlink()
            print(f"Removed old backup: {file_path}")

def scheduled_backup():
    """Function to run backup with cleanup as scheduled task"""
    # Configuration - modify these paths for your environment
    SOURCE_DIR = "./important_files"  # Directory to backup
    BACKUP_DIR = "./backups"          # Where to store backups
    
    try:
        # Create new backup
        create_backup(SOURCE_DIR, BACKUP_DIR, compress=True)
        # Clean up old backups (keep last 7 days)
        cleanup_old_backups(BACKUP_DIR, days_to_keep=7)
    except Exception as e:
        print(f"Backup error: {e}")

# Example usage
if __name__ == "__main__":
    # Run backup immediately
    try:
        create_backup("./sample_data", "./backups", compress=True)
    except Exception as e:
        print(f"Error: {e}")
    
    # Schedule daily backups (uncomment to enable scheduling)
    # schedule.every().day.at("02:00").do(scheduled_backup)
    # 
    # print("Backup scheduler started. Press Ctrl+C to stop.")
    # try:
    #     while True:
    #         schedule.run_pending()
    #         time.sleep(60)  # Check every minute
    # except KeyboardInterrupt:
    #     print("Scheduler stopped.")

What This Code Does

This Python snippet creates an automated backup system that compresses important files and manages storage by automatically removing old backups. The solution provides:

  1. Backup Creation: Compresses a specified directory into a ZIP file with timestamped naming
  2. Space Management: Automatically removes backups older than a specified number of days
  3. Scheduling Capability: Can be configured to run backups at specific intervals
  4. Error Handling: Gracefully handles missing directories and other common issues

Why It’s Useful

This backup utility solves several common data protection problems:

How to Run It

  1. Install Requirements: No external packages required (uses only standard library)

  2. Configure Paths:
    • Modify SOURCE_DIR to point to the directory you want to back up
    • Modify BACKUP_DIR to specify where backups should be stored
  3. Run Immediately:
    python backup_script.py
    
  4. Enable Scheduling:
    • Uncomment the scheduling section in the if __name__ == "__main__": block
    • Adjust the time in schedule.every().day.at("02:00") to your preferred backup time
  5. Customize Settings:
    • Change days_to_keep in cleanup_old_backups() to control how long backups are retained
    • Set compress=False in create_backup() if you prefer uncompressed backups

The script will create timestamped backup files in your specified backup directory and automatically maintain only the most recent backups based on your retention policy.