This tool extracts product prices from an e-commerce website and saves them to a CSV file for price tracking and analysis.
import requests
from bs4 import BeautifulSoup
import csv
from datetime import datetime
import time
def scrape_product_prices(url, product_selector, price_selector, output_file='product_prices.csv'):
"""
Scrapes product names and prices from a given URL and saves to CSV.
Args:
url (str): The URL of the webpage to scrape
product_selector (str): CSS selector for product elements
price_selector (str): CSS selector for price elements
output_file (str): Name of the output CSV file
"""
# Add headers to mimic a real browser request
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
try:
# Send GET request to the URL
response = requests.get(url, headers=headers)
response.raise_for_status() # Raise an exception for bad status codes
# Parse the HTML content
soup = BeautifulSoup(response.content, 'html.parser')
# Find all product elements
products = soup.select(product_selector)
# Open CSV file for writing
with open(output_file, mode='w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
# Write header row
writer.writerow(['Timestamp', 'Product Name', 'Price'])
# Extract product names and prices
for product in products:
try:
# Extract product name
product_name_element = product.select_one(product_selector + ' ' + product_selector.split()[-1])
if not product_name_element:
product_name_element = product
product_name = product_name_element.get_text(strip=True)
# Extract price
price_element = product.select_one(price_selector)
if price_element:
price = price_element.get_text(strip=True)
# Clean price text (remove currency symbols, etc.)
price = ''.join(filter(lambda x: x.isdigit() or x in '.-', price))
else:
price = 'N/A'
# Write to CSV
writer.writerow([datetime.now().strftime('%Y-%m-%d %H:%M:%S'), product_name, price])
except Exception as e:
print(f"Error processing product: {e}")
continue
print(f"Data successfully saved to {output_file}")
except requests.exceptions.RequestException as e:
print(f"Error fetching the webpage: {e}")
except Exception as e:
print(f"An error occurred: {e}")
def monitor_prices(url, product_selector, price_selector, interval=3600, output_file='price_history.csv'):
"""
Continuously monitors product prices at specified intervals.
Args:
url (str): The URL of the webpage to scrape
product_selector (str): CSS selector for product elements
price_selector (str): CSS selector for price elements
interval (int): Time in seconds between scrapes (default: 1 hour)
output_file (str): Name of the output CSV file
"""
print(f"Starting price monitoring. Interval: {interval} seconds")
while True:
try:
scrape_product_prices(url, product_selector, price_selector, output_file)
print(f"Waiting {interval} seconds until next scrape...")
time.sleep(interval)
except KeyboardInterrupt:
print("Monitoring stopped by user.")
break
except Exception as e:
print(f"Error during monitoring: {e}")
time.sleep(60) # Wait 1 minute before retrying
# Example usage
if __name__ == "__main__":
# Example: Scraping product prices from a fictional electronics store
URL = "https://example-electronics-store.com/laptops"
PRODUCT_SELECTOR = ".product-item"
PRICE_SELECTOR = ".price"
# Single scrape
scrape_product_prices(URL, PRODUCT_SELECTOR, PRICE_SELECTOR)
# Uncomment the following line to start continuous monitoring
# monitor_prices(URL, PRODUCT_SELECTOR, PRICE_SELECTOR, interval=1800) # Every 30 minutes
This web scraping tool is designed to extract product information (names and prices) from e-commerce websites and save the data to a CSV file for analysis and price tracking. It’s particularly useful for:
The tool works by sending HTTP requests to a specified URL, parsing the HTML content using BeautifulSoup, and extracting product names and prices using CSS selectors. It adds realistic browser headers to avoid being blocked by websites and handles common errors gracefully.
The script includes two main functions:
scrape_product_prices(): Performs a single scraping operation and saves results to a CSV filemonitor_prices(): Continuously monitors prices at specified intervals (useful for tracking price changes over time)To use this tool, you need to:
pip install requests beautifulsoup4The output CSV file includes timestamps, making it easy to track price changes over time. For best results, ensure you comply with the website’s robots.txt and terms of service.