This Python snippet demonstrates how to efficiently process multiple files in parallel using ThreadPoolExecutor
. It’s particularly useful for CPU-bound or I/O-bound tasks where you need to handle multiple files simultaneously (e.g., batch image resizing, log parsing, or data transformation).
import os
from concurrent.futures import ThreadPoolExecutor
def process_file(file_path):
"""Example function to process a single file (modify for your use case)."""
try:
with open(file_path, 'r') as f:
content = f.read()
# Replace this with your actual processing logic
processed = f"Processed {file_path}: {len(content)} chars"
return processed
except Exception as e:
return f"Error processing {file_path}: {str(e)}"
def parallel_process_files(directory, max_workers=4):
"""Process all files in a directory in parallel."""
if not os.path.isdir(directory):
raise ValueError(f"Directory not found: {directory}")
file_paths = [
os.path.join(directory, f)
for f in os.listdir(directory)
if os.path.isfile(os.path.join(directory, f))
]
with ThreadPoolExecutor(max_workers=max_workers) as executor:
results = list(executor.map(process_file, file_paths))
return results
# Example usage:
if __name__ == "__main__":
processed_results = parallel_process_files("./data_files")
for result in processed_results:
print(result)
process_file
: A placeholder function for your file processing logic (e.g., parsing, transforming, or analyzing).parallel_process_files
:
ThreadPoolExecutor
to process files concurrently (default: 4 workers)../data_files
).process_file
function with your actual processing logic.max_workers
based on your system’s capabilities.ProcessPoolExecutor
instead (requires modifying imports).