import csv
import json
from typing import Dict, List
from pathlib import Path
def csv_to_json(
csv_file_path: str,
json_file_path: str,
required_fields: List[str] = None,
delimiter: str = ','
) -> Dict[str, int]:
"""
Convert CSV to JSON with optional data validation.
Args:
csv_file_path: Path to input CSV file
json_file_path: Path to output JSON file
required_fields: List of fields that must be present (None skips validation)
delimiter: CSV field delimiter
Returns:
Dictionary with conversion statistics:
{
'total_rows': int,
'valid_rows': int,
'skipped_rows': int
}
"""
stats = {'total_rows': 0, 'valid_rows': 0, 'skipped_rows': 0}
data = []
csv_path = Path(csv_file_path)
json_path = Path(json_file_path)
if not csv_path.exists():
raise FileNotFoundError(f"CSV file not found: {csv_file_path}")
with csv_path.open('r', encoding='utf-8') as csv_file:
reader = csv.DictReader(csv_file, delimiter=delimiter)
if required_fields:
missing_fields = [f for f in required_fields if f not in reader.fieldnames]
if missing_fields:
raise ValueError(f"Missing required fields: {', '.join(missing_fields)}")
for row in reader:
stats['total_rows'] += 1
if required_fields and not all(row.get(field) for field in required_fields):
stats['skipped_rows'] += 1
continue
data.append(row)
stats['valid_rows'] += 1
with json_path.open('w', encoding='utf-8') as json_file:
json.dump(data, json_file, indent=2, ensure_ascii=False)
return stats
# Example usage
if __name__ == "__main__":
try:
results = csv_to_json(
csv_file_path="input.csv",
json_file_path="output.json",
required_fields=["id", "name", "email"],
delimiter=","
)
print(f"Conversion complete: {results}")
except Exception as e:
print(f"Error: {e}")
This Python snippet provides a robust CSV to JSON converter with built-in data validation capabilities. Here’s why it’s useful:
Data Validation: The converter can verify that required fields exist in the CSV before processing and skip incomplete records.
Type Safety: Uses Python’s type hints for better code clarity and IDE support.
Flexible Delimiters: Works with different CSV formats (comma, tab, pipe, etc.).
Statistics Tracking: Returns conversion metrics (total rows, valid rows, skipped rows).
Error Handling: Properly handles file operations and provides clear error messages.
csv_to_json.py
)input.csv
)python csv_to_json.py
output.json
) and console for resultsdelimiter='\t'
required_fields
parameterindent
parameter in json.dump
The script handles file paths with pathlib
for cross-platform compatibility and includes proper encoding support for international characters.