import asyncio
import aiohttp
import aiofiles
from pathlib import Path
from typing import List, Tuple
import sys
async def download_file(session: aiohttp.ClientSession, url: str, filename: str, progress_callback=None) -> Tuple[str, bool, str]:
"""
Download a single file asynchronously with progress tracking.
Args:
session: aiohttp client session
url: URL to download from
filename: local filename to save as
progress_callback: optional callback for progress updates
Returns:
Tuple of (filename, success, message)
"""
try:
async with session.get(url) as response:
if response.status != 200:
return filename, False, f"HTTP {response.status}"
# Get file size for progress tracking
file_size = int(response.headers.get('content-length', 0))
downloaded = 0
# Create directory if needed
Path(filename).parent.mkdir(parents=True, exist_ok=True)
async with aiofiles.open(filename, 'wb') as f:
async for chunk in response.content.iter_chunked(8192):
await f.write(chunk)
downloaded += len(chunk)
# Report progress if callback provided
if progress_callback and file_size > 0:
progress = (downloaded / file_size) * 100
await progress_callback(filename, progress)
return filename, True, f"Downloaded {downloaded} bytes"
except Exception as e:
return filename, False, str(e)
async def progress_reporter(filename: str, progress: float):
"""Simple progress reporter that updates console line."""
sys.stdout.write(f"\r{filename}: {progress:.1f}%")
sys.stdout.flush()
async def download_files_concurrently(urls_and_filenames: List[Tuple[str, str]], max_concurrent: int = 5):
"""
Download multiple files concurrently with progress tracking.
Args:
urls_and_filenames: List of (url, filename) tuples
max_concurrent: Maximum number of concurrent downloads
"""
# Create semaphore to limit concurrent downloads
semaphore = asyncio.Semaphore(max_concurrent)
async def limited_download(session, url, filename):
async with semaphore:
return await download_file(session, url, filename, progress_reporter)
async with aiohttp.ClientSession() as session:
# Create tasks for all downloads
tasks = [
limited_download(session, url, filename)
for url, filename in urls_and_filenames
]
# Execute downloads concurrently
results = await asyncio.gather(*tasks, return_exceptions=True)
# Print results
print("\n\nDownload Results:")
success_count = 0
for result in results:
if isinstance(result, Exception):
print(f"Error: {result}")
continue
filename, success, message = result
status = "✓" if success else "✗"
print(f"{status} {filename}: {message}")
if success:
success_count += 1
print(f"\nCompleted: {success_count}/{len(urls_and_filenames)} downloads")
# Example usage
if __name__ == "__main__":
# List of files to download (URL, local filename)
downloads = [
("https://httpbin.org/json", "downloads/sample1.json"),
("https://httpbin.org/xml", "downloads/sample2.xml"),
("https://httpbin.org/html", "downloads/sample3.html"),
("https://httpbin.org/robots.txt", "downloads/robots.txt"),
("https://httpbin.org/uuid", "downloads/uuid.json")
]
# Run the downloader
asyncio.run(download_files_concurrently(downloads, max_concurrent=3))
This concurrent file downloader solves the common problem of efficiently downloading multiple files from the internet. Instead of downloading files one at a time, which can be slow, this snippet uses asyncio to download several files simultaneously while providing real-time progress updates.
download_file function handles downloading a single file using an async HTTP session. It writes data chunks to disk as they arrive, enabling streaming for large files.progress_reporter callback updates the console with download progress percentages.download_files_concurrently function manages the concurrent execution, limiting how many downloads happen at once.pip install aiohttp aiofilesdownloads list with your URLs and desired filenamespython downloader.pymax_concurrent parameter to control how many files download simultaneouslyprogress_reporter function for different progress display (GUI updates, logging, etc.)iter_chunked() for different memory usage patternsThis approach is particularly useful when downloading many files from stable connections, handling large datasets, or scraping content from multiple sources.