import re
import json
from typing import List, Dict, Tuple
from dataclasses import dataclass
from concurrent.futures import ThreadPoolExecutor
import time
@dataclass
class EmailValidationResult:
email: str
is_valid: bool
domain: str
validation_errors: List[str]
class EmailValidator:
def __init__(self):
# Comprehensive regex pattern for email validation
self.email_pattern = re.compile(
r'^[a-zA-Z0-9.!#$%&\'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$'
)
# Common disposable email domains to flag
self.disposable_domains = {
'mailinator.com', 'tempmail.org', 'guerrillamail.com',
'10minutemail.com', 'throwaway.email', 'yopmail.com'
}
def validate_email(self, email: str) -> EmailValidationResult:
"""Validate a single email address and return detailed results."""
errors = []
domain = ""
# Basic format check
if not email or not isinstance(email, str):
errors.append("Email must be a non-empty string")
return EmailValidationResult(email, False, domain, errors)
email = email.strip().lower()
# Check if email matches basic pattern
if not self.email_pattern.match(email):
errors.append("Email format is invalid")
return EmailValidationResult(email, False, domain, errors)
# Extract domain
try:
domain = email.split('@')[1]
except IndexError:
errors.append("Email must contain @ symbol")
return EmailValidationResult(email, False, domain, errors)
# Check for disposable email domains
if domain in self.disposable_domains:
errors.append("Disposable email addresses are not allowed")
# Check for common issues
if '..' in email:
errors.append("Email contains consecutive dots")
if email.startswith('.') or email.endswith('.'):
errors.append("Email cannot start or end with a dot")
# Length checks
if len(email) > 254:
errors.append("Email is too long (max 254 characters)")
local_part = email.split('@')[0]
if len(local_part) > 64:
errors.append("Local part of email is too long (max 64 characters)")
return EmailValidationResult(email, len(errors) == 0, domain, errors)
def bulk_validate(self, emails: List[str], max_workers: int = 10) -> List[EmailValidationResult]:
"""Validate multiple emails concurrently for better performance."""
with ThreadPoolExecutor(max_workers=max_workers) as executor:
results = list(executor.map(self.validate_email, emails))
return results
def get_validation_summary(self, results: List[EmailValidationResult]) -> Dict:
"""Generate a summary report of validation results."""
total = len(results)
valid = sum(1 for r in results if r.is_valid)
invalid = total - valid
# Count errors
error_counts = {}
for result in results:
for error in result.validation_errors:
error_counts[error] = error_counts.get(error, 0) + 1
# Domain statistics
domain_counts = {}
for result in results:
if result.domain:
domain_counts[result.domain] = domain_counts.get(result.domain, 0) + 1
return {
'total_emails': total,
'valid_emails': valid,
'invalid_emails': invalid,
'validity_rate': round((valid/total)*100, 2) if total > 0 else 0,
'error_distribution': error_counts,
'domain_distribution': dict(sorted(domain_counts.items(), key=lambda x: x[1], reverse=True)[:10])
}
def export_results(self, results: List[EmailValidationResult], filename: str = "email_validation_results.json"):
"""Export validation results to JSON file."""
export_data = {
'timestamp': time.strftime('%Y-%m-%d %H:%M:%S'),
'results': [
{
'email': r.email,
'is_valid': r.is_valid,
'domain': r.domain,
'errors': r.validation_errors
}
for r in results
],
'summary': self.get_validation_summary(results)
}
with open(filename, 'w', encoding='utf-8') as f:
json.dump(export_data, f, indent=2, ensure_ascii=False)
print(f"Results exported to {filename}")
# Example usage
if __name__ == "__main__":
# Sample email list for validation
sample_emails = [
"user@example.com",
"invalid.email",
"test@nonexistentdomain12345.com",
"user@gmail.com",
"disposable@mailinator.com",
"valid.email+tag@domain.org",
"too..many..dots@example.com",
".startswithdot@example.com",
"endswithdot.@example.com",
"normal@outlook.com",
"another.valid@yahoo.com"
]
# Initialize validator
validator = EmailValidator()
# Validate emails
print("Validating emails...")
results = validator.bulk_validate(sample_emails)
# Display results
print("\nValidation Results:")
print("-" * 50)
for result in results:
status = "✓ VALID" if result.is_valid else "✗ INVALID"
print(f"{status:10} {result.email}")
if result.validation_errors:
for error in result.validation_errors:
print(f" └─ {error}")
print()
# Print summary
summary = validator.get_validation_summary(results)
print("Validation Summary:")
print("-" * 50)
print(f"Total emails: {summary['total_emails']}")
print(f"Valid: {summary['valid_emails']}")
print(f"Invalid: {summary['invalid_emails']}")
print(f"Validity rate: {summary['validity_rate']}%")
# Show common errors
if summary['error_distribution']:
print("\nCommon validation errors:")
for error, count in sorted(summary['error_distribution'].items(), key=lambda x: x[1], reverse=True):
print(f" {error}: {count}")
# Export results
validator.export_results(results)
This is a comprehensive email validation system that provides detailed validation for individual or bulk email addresses. It goes beyond simple regex matching to provide meaningful error messages and insights about email quality.
Email validation is crucial for:
This implementation is particularly valuable for developers who need to validate email lists for applications, marketing campaigns, or user registration systems.
email_validator.py)python email_validator.pyfrom email_validator import EmailValidator
validator = EmailValidator()
result = validator.validate_email("user@example.com")
print(result.is_valid) # True or False
max_workers in bulk_validate() for performance tuningdisposable_domains setvalidate_email()The code will automatically generate a JSON report file with detailed validation results and statistics when run.