Python Snippets

Email Validator with SMTP Verification

This Python script validates email addresses using regular expressions and optionally verifies if the email actually exists by connecting to the SMTP server of the email domain. It’s useful for contact forms, user registration systems, or email marketing lists where you want to ensure email addresses are both properly formatted and likely to be deliverable.

import re
import smtplib
import dns.resolver
import socket
from email.utils import parseaddr

class EmailValidator:
    def __init__(self):
        # Comprehensive regex for email validation
        self.email_regex = re.compile(
            r"^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"
        )
    
    def validate_format(self, email):
        """Check if email has a valid format using regex."""
        return bool(self.email_regex.match(email))
    
    def validate_domain(self, email):
        """Check if the domain of the email exists (has MX record)."""
        try:
            domain = email.split('@')[1]
            # Try to get MX record for the domain
            mx_records = dns.resolver.resolve(domain, 'MX')
            return len(mx_records) > 0
        except (dns.resolver.NXDOMAIN, dns.resolver.NoAnswer, Exception):
            return False
    
    def verify_email(self, email, timeout=10):
        """
        Attempt to verify if email exists by connecting to its SMTP server.
        Note: Many servers will not reveal if an email exists for security reasons.
        """
        try:
            domain = email.split('@')[1]
            # Get MX record for the domain
            mx_records = dns.resolver.resolve(domain, 'MX')
            mx_record = sorted(mx_records, key=lambda x: x.preference)[0]
            mx_host = str(mx_record.exchange)
            
            # Connect to the SMTP server
            server = smtplib.SMTP(timeout=timeout)
            server.connect(mx_host)
            server.helo()
            server.mail('test@example.com')
            code, message = server.rcpt(email)
            server.quit()
            
            # 250 means email address accepted
            return code == 250
        except Exception:
            return False
    
    def validate(self, email, check_domain=True, verify=False):
        """
        Validate email address with configurable checks.
        
        Args:
            email (str): Email address to validate
            check_domain (bool): Whether to check if domain exists
            verify (bool): Whether to attempt SMTP verification
        
        Returns:
            dict: Validation results
        """
        result = {
            'email': email,
            'valid_format': False,
            'valid_domain': False,
            'verified': False,
            'confidence': 'invalid'
        }
        
        # Basic format validation
        result['valid_format'] = self.validate_format(email)
        if not result['valid_format']:
            return result
        
        # Domain validation
        if check_domain:
            result['valid_domain'] = self.validate_domain(email)
            if not result['valid_domain']:
                result['confidence'] = 'format_valid_domain_invalid'
                return result
        
        # SMTP verification
        if verify:
            result['verified'] = self.verify_email(email)
        
        # Determine confidence level
        if verify and result['verified']:
            result['confidence'] = 'verified'
        elif check_domain and result['valid_domain']:
            result['confidence'] = 'format_and_domain_valid'
        elif result['valid_format']:
            result['confidence'] = 'format_valid_domain_unchecked'
        else:
            result['confidence'] = 'invalid'
        
        return result

# Example usage
if __name__ == "__main__":
    validator = EmailValidator()
    
    # Test emails
    test_emails = [
        "user@example.com",
        "invalid.email",
        "test@nonexistentdomain12345.com",
        "user@gmail.com"
    ]
    
    for email in test_emails:
        # Validate format and domain only (no SMTP verification)
        result = validator.validate(email, check_domain=True, verify=False)
        print(f"Email: {email}")
        print(f"  Format valid: {result['valid_format']}")
        print(f"  Domain valid: {result['valid_domain']}")
        print(f"  Confidence: {result['confidence']}")
        print()

Explanation

This email validator provides a multi-level approach to email validation:

  1. Format Validation: Uses a comprehensive regular expression to check if the email address follows standard formatting rules.

  2. Domain Validation: Checks if the domain part of the email exists by looking up MX (Mail Exchange) DNS records. This helps identify typos in domain names.

  3. SMTP Verification (Optional): Attempts to connect to the email provider’s SMTP server and checks if it accepts the email address. Note that many email providers disable this verification for privacy reasons.

The implementation is useful because:

To run this code, you’ll need to install the dnspython package:

pip install dnspython

For full SMTP verification, you would also need to allow network connections to SMTP servers, but be aware that many providers rate-limit or block these requests. For production applications, it’s recommended to only use format and domain validation to avoid potential blocking or performance issues.