This snippet demonstrates how to efficiently process multiple files in parallel using Python’s concurrent.futures.ThreadPoolExecutor. This is particularly useful for IO-bound tasks like reading/writing files or processing large datasets.
import os
from concurrent.futures import ThreadPoolExecutor
def process_file(file_path):
"""Example function to process a single file."""
try:
with open(file_path, 'r') as f:
content = f.read()
# Example processing: count lines
line_count = len(content.split('\n'))
print(f"Processed {file_path}: {line_count} lines")
return line_count
except Exception as e:
print(f"Error processing {file_path}: {str(e)}")
return None
def process_files_parallel(directory, max_workers=4):
"""Process all files in a directory in parallel."""
files = [
os.path.join(directory, f)
for f in os.listdir(directory)
if os.path.isfile(os.path.join(directory, f))
]
with ThreadPoolExecutor(max_workers=max_workers) as executor:
results = list(executor.map(process_file, files))
# Filter out None results (errors)
successful_results = [r for r in results if r is not None]
print(f"\nTotal files processed: {len(successful_results)}")
print(f"Total lines counted: {sum(successful_results)}")
if __name__ == "__main__":
directory = "./sample_files" # Change to your target directory
process_files_parallel(directory, max_workers=8)
concurrent.futures for easy parallelism.parallel_processor.py)../sample_files) and populate it with text files.max_workers based on your system’s capabilities (default: 8).python parallel_processor.py