Skip to content

Instantly share code, notes, and snippets.

@TheHumanistX
Forked from slhck/ndjson_to_csv.py
Created March 10, 2022 04:24
Show Gist options
  • Save TheHumanistX/aa2e7de75eb9f65290ae9f161c65a572 to your computer and use it in GitHub Desktop.
Save TheHumanistX/aa2e7de75eb9f65290ae9f161c65a572 to your computer and use it in GitHub Desktop.
Convert LD-JSON files to CSV
#!/usr/bin/env python3
#
# ldjson_to_csv.py
#
# Author: Werner Robitza
#
# Convert .ldjson files to CSV.
# This assumes the same keys being used in every line of the input file.
# It works on a line-by-line basis, so it should be fast and memory-efficient.
import json
import argparse
import sys
import csv
def convert_to_csv(ldjson_file):
writer = csv.writer(sys.stdout, quoting=csv.QUOTE_NONNUMERIC)
wrote_headers = False
with open(ldjson_file, 'r') as in_f:
for line in in_f:
data = json.loads(line.strip())
sorted_data = sorted(data.items())
if not wrote_headers:
writer.writerow([k for k, v in sorted_data])
wrote_headers = True
writer.writerow([v for k, v in sorted_data])
def main():
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
'input', help="input .ldjson"
)
cli_args = parser.parse_args()
convert_to_csv(cli_args.input)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment