Skip to content

Instantly share code, notes, and snippets.

@nobucshirai
Last active February 18, 2025 07:21
Show Gist options
  • Save nobucshirai/1bb6b5254935d5dda90e5d5b7bbc2410 to your computer and use it in GitHub Desktop.
Save nobucshirai/1bb6b5254935d5dda90e5d5b7bbc2410 to your computer and use it in GitHub Desktop.
A Python script to convert .xlsx files to .csv with UTF-8 encoding, supporting optional output file specification and overwrite protection.
#!/usr/bin/env python3
"""
Convert an Excel (.xlsx) file to a CSV (UTF-8) file.
Usage:
python3 xlsx_to_csv.py [--method {pandas,soffice}] input.xlsx [output.csv]
Options:
-h, --help Show this help message and exit.
--method {pandas,soffice} Conversion method to use (default: pandas).
Features:
- Accepts an input .xlsx file as an argument.
- Optionally allows specifying an output .csv filename.
- Defaults to using the same basename for the output file if not specified.
- Outputs to stdout if no output filename is provided (only for pandas method).
- Prompts before overwriting an existing file.
- Uses the 'soffice' command (LibreOffice) when selected and available.
"""
import argparse
import os
import sys
import subprocess
import tempfile
import shutil
import pandas as pd
def convert_with_pandas(input_file: str, output_file: str | None = None) -> None:
"""Converts an Excel file to a CSV file using pandas."""
try:
df = pd.read_excel(input_file, engine='openpyxl')
if output_file:
if os.path.exists(output_file):
confirm = input(f"File '{output_file}' exists. Overwrite? (y/n): ").strip().lower()
if confirm != 'y':
print("Operation cancelled.")
return
df.to_csv(output_file, index=False, encoding='utf-8')
print(f"Converted '{input_file}' to '{output_file}'")
else:
df.to_csv(sys.stdout, index=False, encoding='utf-8')
except Exception as e:
print(f"Error during pandas conversion: {e}", file=sys.stderr)
def convert_with_soffice(input_file: str, output_file: str | None = None) -> None:
"""Converts an Excel file to a CSV file using the soffice command."""
soffice_path = shutil.which('soffice')
if not soffice_path:
print("Error: 'soffice' command not found. Please install LibreOffice or select the pandas method.", file=sys.stderr)
sys.exit(1)
# Determine the output directory and target filename.
input_basename = os.path.splitext(os.path.basename(input_file))[0] + ".csv"
if output_file:
outdir = os.path.abspath(os.path.dirname(output_file)) or os.getcwd()
target_csv = os.path.join(outdir, input_basename)
# Check if output_file exists and prompt for overwrite.
if os.path.exists(output_file) and os.path.abspath(output_file) != os.path.abspath(target_csv):
confirm = input(f"File '{output_file}' exists. Overwrite? (y/n): ").strip().lower()
if confirm != 'y':
print("Operation cancelled.")
return
try:
# Run soffice conversion with specified output directory.
subprocess.run([soffice_path, "--headless", "--convert-to", "csv", "--outdir", outdir, input_file],
check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except subprocess.CalledProcessError as e:
print(f"Error during soffice conversion: {e}", file=sys.stderr)
return
# If the generated file name does not match the desired output_file, rename it.
abs_target_csv = os.path.abspath(target_csv)
abs_output_file = os.path.abspath(output_file)
if abs_target_csv != abs_output_file:
try:
os.replace(abs_target_csv, abs_output_file)
target_csv = abs_output_file
except Exception as e:
print(f"Error renaming output file: {e}", file=sys.stderr)
return
print(f"Converted '{input_file}' to '{target_csv}'")
else:
# When no output file is provided, convert to a temporary directory and print to stdout.
with tempfile.TemporaryDirectory() as tmpdir:
try:
subprocess.run([soffice_path, "--headless", "--convert-to", "csv", "--outdir", tmpdir, input_file],
check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except subprocess.CalledProcessError as e:
print(f"Error during soffice conversion: {e}", file=sys.stderr)
return
temp_csv = os.path.join(tmpdir, input_basename)
try:
with open(temp_csv, encoding='utf-8') as f:
sys.stdout.write(f.read())
except Exception as e:
print(f"Error reading temporary CSV file: {e}", file=sys.stderr)
def main() -> None:
"""Main function to handle argument parsing and execution."""
parser = argparse.ArgumentParser(
description="Convert an Excel (.xlsx) file to a CSV (UTF-8) file."
)
parser.add_argument("input_file", help="Path to the input .xlsx file.")
parser.add_argument("output_file", nargs="?", help="Optional output .csv filename.")
parser.add_argument("--method", choices=["pandas", "soffice"], default="soffice",
help="Conversion method to use (default: soffice).")
args = parser.parse_args()
input_file = args.input_file
# For pandas method, output to stdout if output_file is not provided.
output_file = args.output_file or (f"{os.path.splitext(input_file)[0]}.csv" if args.method == "soffice" else None)
if args.method == "soffice":
convert_with_soffice(input_file, output_file)
else:
convert_with_pandas(input_file, output_file)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment