Last active
January 10, 2020 11:37
-
-
Save YaYaB/ea60604bb655c953750ef58573e1d1cb to your computer and use it in GitHub Desktop.
Transform a json document to a ljson document (one json per line)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import os | |
from glob import glob | |
input_doc = "TODO" # can be a json doc or a folder containing only jsons | |
output_ljson = "{}_output.ljson".format('.'.join(input_doc.split('.')[:-1])) | |
def transform_input_to_dumpable(input_doc): | |
# Read input | |
with open(input_doc) as f: | |
content = json.load(f) | |
# Transform json object into a json array in case we have only one element | |
if isinstance(content, dict): | |
content = [content] | |
return content | |
# Parse json content and write in the output_file | |
with open(output_ljson, 'w') as f: | |
input_docs = [input_doc] # by default take the file | |
# If input is directory | |
if os.path.isdir(input_doc): | |
input_docs = glob("{}/*".format(input_doc)) | |
# Parse every json file | |
for doc in input_docs: | |
content = transform_input_to_dumpable(doc) | |
for elem in content: | |
f.write(json.dumps(elem) + "\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment