Python Snippets

Automatic PDF Merger with Bookmark Generation

import os
from PyPDF2 import PdfReader, PdfWriter, PdfMerger
from datetime import datetime

def merge_pdfs_with_bookmarks(input_folder, output_filename="merged_document.pdf"):
    """
    Merge multiple PDF files from a folder into a single PDF with automatic bookmarks.
    
    Args:
        input_folder (str): Path to folder containing PDF files
        output_filename (str): Name of the output merged PDF file
    
    Returns:
        str: Path to the merged PDF file
    """
    # Create merger object
    merger = PdfMerger()
    bookmark_info = []
    current_page = 0
    
    # Get all PDF files from the folder, sorted alphabetically
    pdf_files = sorted([f for f in os.listdir(input_folder) 
                       if f.lower().endswith('.pdf')])
    
    if not pdf_files:
        raise ValueError("No PDF files found in the specified folder")
    
    # Process each PDF file
    for filename in pdf_files:
        file_path = os.path.join(input_folder, filename)
        
        # Add file to merger
        merger.append(file_path)
        
        # Get page count for bookmark positioning
        reader = PdfReader(file_path)
        page_count = len(reader.pages)
        
        # Store bookmark information (filename without extension as title)
        title = os.path.splitext(filename)[0]
        bookmark_info.append({
            'title': title,
            'page_number': current_page
        })
        
        current_page += page_count
    
    # Create output file path
    output_path = os.path.join(input_folder, output_filename)
    
    # Write merged PDF to file
    merger.write(output_path)
    merger.close()
    
    # Re-open to add bookmarks
    writer = PdfWriter()
    reader = PdfReader(output_path)
    
    # Copy all pages to writer
    for page in reader.pages:
        writer.add_page(page)
    
    # Add bookmarks
    for bookmark in bookmark_info:
        writer.add_outline_item(
            title=bookmark['title'],
            pagenum=bookmark['page_number']
        )
    
    # Write final PDF with bookmarks
    with open(output_path, 'wb') as output_file:
        writer.write(output_file)
    
    return output_path

def main():
    """Main function to demonstrate the PDF merging functionality"""
    print("PDF Merger with Bookmarks")
    print("=" * 30)
    
    # Get input folder from user
    input_folder = input("Enter the path to folder containing PDFs: ").strip()
    
    if not os.path.exists(input_folder):
        print(f"Error: Folder '{input_folder}' does not exist")
        return
    
    try:
        # Generate output filename with timestamp
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        output_filename = f"merged_pdfs_{timestamp}.pdf"
        
        # Merge PDFs
        result_path = merge_pdfs_with_bookmarks(input_folder, output_filename)
        
        print(f"\nSuccess! PDFs merged successfully.")
        print(f"Output file: {result_path}")
        
        # Show what was merged
        pdf_files = sorted([f for f in os.listdir(input_folder) 
                           if f.lower().endswith('.pdf')])
        print(f"\nMerged {len(pdf_files)} PDF files:")
        for i, filename in enumerate(pdf_files, 1):
            print(f"  {i}. {filename}")
            
    except Exception as e:
        print(f"Error: {str(e)}")

if __name__ == "__main__":
    main()

What This Code Does

This Python script automatically merges multiple PDF files from a specified folder into a single PDF document with automatically generated bookmarks. Each bookmark corresponds to one of the original PDF files, making navigation through the merged document much easier.

The script works by:

  1. Scanning a user-specified folder for all PDF files
  2. Merging them in alphabetical order into one document
  3. Creating bookmarks for each original file based on their filenames
  4. Saving the result as a new PDF with a timestamp in the filename

Why This is Useful

This tool solves several common problems:

How to Run It

  1. Install Required Dependencies:
    pip install PyPDF2
    
  2. Prepare Your Files:
    • Place all PDF files you want to merge in a single folder
    • Files will be merged in alphabetical order by filename
  3. Run the Script:
    python pdf_merger.py
    
  4. Usage:
    • When prompted, enter the path to your folder containing PDFs
    • The merged PDF will be saved in the same folder with a timestamped filename
    • Bookmarks will appear in the PDF sidebar with the original filenames as titles

Features

The resulting PDF will have clickable bookmarks that let you jump directly to each original document section, making it ideal for creating comprehensive reports, combining research papers, or organizing multi-part documents.