import pandas as pd
import numpy as np
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')
def csv_to_excel_converter(csv_file_path, excel_file_path=None, validate_data=True):
"""
Convert CSV file to Excel format with optional data validation and formatting.
Args:
csv_file_path (str): Path to the input CSV file
excel_file_path (str, optional): Path for output Excel file.
If None, uses same name as CSV with .xlsx extension
validate_data (bool): Whether to perform basic data validation
Returns:
str: Path to the created Excel file
"""
# Read CSV file
df = pd.read_csv(csv_file_path, low_memory=False)
# Auto-generate Excel file path if not provided
if excel_file_path is None:
csv_path = Path(csv_file_path)
excel_file_path = csv_path.with_suffix('.xlsx')
# Data validation (if enabled)
if validate_data:
print("Performing data validation...")
# Check for missing values
missing_data = df.isnull().sum()
if missing_data.sum() > 0:
print(f"Warning: Found {missing_data.sum()} missing values across columns")
for col, count in missing_data[missing_data > 0].items():
print(f" - {col}: {count} missing values")
# Check data types
print("\nData types detected:")
for col in df.columns:
print(f" - {col}: {df[col].dtype}")
# Create Excel with multiple sheets for better organization
with pd.ExcelWriter(excel_file_path, engine='openpyxl') as writer:
# Main data sheet
df.to_excel(writer, sheet_name='Data', index=False)
# Summary statistics sheet (only for numeric columns)
numeric_df = df.select_dtypes(include=[np.number])
if not numeric_df.empty:
summary_stats = pd.DataFrame({
'Count': numeric_df.count(),
'Mean': numeric_df.mean(),
'Std Dev': numeric_df.std(),
'Min': numeric_df.min(),
'25%': numeric_df.quantile(0.25),
'50%': numeric_df.quantile(0.50),
'75%': numeric_df.quantile(0.75),
'Max': numeric_df.max()
}).round(2)
summary_stats.to_excel(writer, sheet_name='Summary')
# Data info sheet
info_data = pd.DataFrame({
'Column': df.columns,
'Data Type': [str(dtype) for dtype in df.dtypes],
'Non-Null Count': [df[col].count() for col in df.columns],
'Null Count': [df[col].isnull().sum() for col in df.columns],
'Unique Values': [df[col].nunique() for col in df.columns]
})
info_data.to_excel(writer, sheet_name='Info', index=False)
print(f"\nConversion complete! Excel file saved as: {excel_file_path}")
return str(excel_file_path)
# Example usage function
def example_usage():
"""
Demonstrates how to use the CSV to Excel converter with sample data
"""
# Create sample CSV data for demonstration
sample_data = """Name,Age,Salary,Department,Join_Date
John Doe,28,75000,Engineering,2023-01-15
Jane Smith,32,82000,Marketing,2022-03-22
Bob Johnson,45,95000,Sales,2020-11-08
Alice Brown,29,68000,HR,2024-02-10
Charlie Wilson,35,87000,Engineering,2021-07-19
Diana Lee,27,72000,Marketing,2023-09-05
"""
# Write sample data to CSV file
with open('sample_employees.csv', 'w') as f:
f.write(sample_data)
# Convert CSV to Excel
excel_file = csv_to_excel_converter('sample_employees.csv')
print(f"Sample Excel file created: {excel_file}")
print("The Excel file contains 3 sheets:")
print(" 1. Data - The main data from CSV")
print(" 2. Summary - Statistical summary of numeric columns")
print(" 3. Info - Metadata about each column")
return excel_file
if __name__ == "__main__":
# Run example if script is executed directly
example_usage()
This Python script provides a robust solution for converting CSV files to Excel format while adding valuable data analysis features. The main function csv_to_excel_converter() reads a CSV file and creates a professionally formatted Excel workbook with multiple sheets:
The converter includes optional data validation that identifies missing values and reports on data types, helping users understand their data quality before further processing.
Many organizations still rely on CSV files for data exchange, but Excel provides better visualization, formatting, and analytical capabilities. This tool bridges that gap by:
This is particularly valuable for data analysts, business professionals, and anyone who regularly works with data exports from databases or web services.
pip install pandas openpyxl
# Convert a CSV file to Excel
csv_to_excel_converter('your_file.csv')
# Convert with custom output path and validation
csv_to_excel_converter('input.csv', 'output.xlsx', validate_data=True)
python csv_to_excel_converter.py
The script will create a sample CSV file, convert it to Excel, and generate an Excel workbook with three organized sheets as described above.