Last active
February 18, 2025 07:21
-
-
Save nobucshirai/1bb6b5254935d5dda90e5d5b7bbc2410 to your computer and use it in GitHub Desktop.
A Python script to convert .xlsx files to .csv with UTF-8 encoding, supporting optional output file specification and overwrite protection.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Convert an Excel (.xlsx) file to a CSV (UTF-8) file. | |
Usage: | |
python3 xlsx_to_csv.py [--method {pandas,soffice}] input.xlsx [output.csv] | |
Options: | |
-h, --help Show this help message and exit. | |
--method {pandas,soffice} Conversion method to use (default: pandas). | |
Features: | |
- Accepts an input .xlsx file as an argument. | |
- Optionally allows specifying an output .csv filename. | |
- Defaults to using the same basename for the output file if not specified. | |
- Outputs to stdout if no output filename is provided (only for pandas method). | |
- Prompts before overwriting an existing file. | |
- Uses the 'soffice' command (LibreOffice) when selected and available. | |
""" | |
import argparse | |
import os | |
import sys | |
import subprocess | |
import tempfile | |
import shutil | |
import pandas as pd | |
def convert_with_pandas(input_file: str, output_file: str | None = None) -> None: | |
"""Converts an Excel file to a CSV file using pandas.""" | |
try: | |
df = pd.read_excel(input_file, engine='openpyxl') | |
if output_file: | |
if os.path.exists(output_file): | |
confirm = input(f"File '{output_file}' exists. Overwrite? (y/n): ").strip().lower() | |
if confirm != 'y': | |
print("Operation cancelled.") | |
return | |
df.to_csv(output_file, index=False, encoding='utf-8') | |
print(f"Converted '{input_file}' to '{output_file}'") | |
else: | |
df.to_csv(sys.stdout, index=False, encoding='utf-8') | |
except Exception as e: | |
print(f"Error during pandas conversion: {e}", file=sys.stderr) | |
def convert_with_soffice(input_file: str, output_file: str | None = None) -> None: | |
"""Converts an Excel file to a CSV file using the soffice command.""" | |
soffice_path = shutil.which('soffice') | |
if not soffice_path: | |
print("Error: 'soffice' command not found. Please install LibreOffice or select the pandas method.", file=sys.stderr) | |
sys.exit(1) | |
# Determine the output directory and target filename. | |
input_basename = os.path.splitext(os.path.basename(input_file))[0] + ".csv" | |
if output_file: | |
outdir = os.path.abspath(os.path.dirname(output_file)) or os.getcwd() | |
target_csv = os.path.join(outdir, input_basename) | |
# Check if output_file exists and prompt for overwrite. | |
if os.path.exists(output_file) and os.path.abspath(output_file) != os.path.abspath(target_csv): | |
confirm = input(f"File '{output_file}' exists. Overwrite? (y/n): ").strip().lower() | |
if confirm != 'y': | |
print("Operation cancelled.") | |
return | |
try: | |
# Run soffice conversion with specified output directory. | |
subprocess.run([soffice_path, "--headless", "--convert-to", "csv", "--outdir", outdir, input_file], | |
check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
except subprocess.CalledProcessError as e: | |
print(f"Error during soffice conversion: {e}", file=sys.stderr) | |
return | |
# If the generated file name does not match the desired output_file, rename it. | |
abs_target_csv = os.path.abspath(target_csv) | |
abs_output_file = os.path.abspath(output_file) | |
if abs_target_csv != abs_output_file: | |
try: | |
os.replace(abs_target_csv, abs_output_file) | |
target_csv = abs_output_file | |
except Exception as e: | |
print(f"Error renaming output file: {e}", file=sys.stderr) | |
return | |
print(f"Converted '{input_file}' to '{target_csv}'") | |
else: | |
# When no output file is provided, convert to a temporary directory and print to stdout. | |
with tempfile.TemporaryDirectory() as tmpdir: | |
try: | |
subprocess.run([soffice_path, "--headless", "--convert-to", "csv", "--outdir", tmpdir, input_file], | |
check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
except subprocess.CalledProcessError as e: | |
print(f"Error during soffice conversion: {e}", file=sys.stderr) | |
return | |
temp_csv = os.path.join(tmpdir, input_basename) | |
try: | |
with open(temp_csv, encoding='utf-8') as f: | |
sys.stdout.write(f.read()) | |
except Exception as e: | |
print(f"Error reading temporary CSV file: {e}", file=sys.stderr) | |
def main() -> None: | |
"""Main function to handle argument parsing and execution.""" | |
parser = argparse.ArgumentParser( | |
description="Convert an Excel (.xlsx) file to a CSV (UTF-8) file." | |
) | |
parser.add_argument("input_file", help="Path to the input .xlsx file.") | |
parser.add_argument("output_file", nargs="?", help="Optional output .csv filename.") | |
parser.add_argument("--method", choices=["pandas", "soffice"], default="soffice", | |
help="Conversion method to use (default: soffice).") | |
args = parser.parse_args() | |
input_file = args.input_file | |
# For pandas method, output to stdout if output_file is not provided. | |
output_file = args.output_file or (f"{os.path.splitext(input_file)[0]}.csv" if args.method == "soffice" else None) | |
if args.method == "soffice": | |
convert_with_soffice(input_file, output_file) | |
else: | |
convert_with_pandas(input_file, output_file) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment