Created
July 11, 2018 12:13
-
-
Save sameerg07/4e0337ed2f77845edbb319633bd324e5 to your computer and use it in GitHub Desktop.
converts the json file downloaded using image classifer tool of dataturks to dataset folder
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#This script has been solely created under dataturks. Copyrights are reserved | |
#EXAMPLE USAGE | |
#python3 tensorflow_json_parser.py --json_file "flower.json" --dataset_path "Dataset5/" | |
import json | |
import glob | |
import urllib.request | |
import argparse | |
import random | |
import os | |
def downloader(image_url , i): | |
file_name = str(i) | |
full_file_name = str(file_name) + '.jpg' | |
urllib.request.urlretrieve(image_url,full_file_name) | |
if __name__ == "__main__": | |
a = argparse.ArgumentParser() | |
a.add_argument("--json_file", help="path to json") | |
a.add_argument("--dataset_path", help="path to the dataset") | |
args = a.parse_args() | |
if args.json_file is None and args.dataset_path is None: | |
a.print_help() | |
sys.exit(1) | |
with open(args.json_file) as file1: | |
lis = [] | |
for i in file1: | |
lis.append(json.loads(i)) | |
folder_names = [] | |
label_to_urls = {} | |
for i in lis: | |
if i['annotation']['labels'][0] not in folder_names: | |
folder_names.append(i['annotation']['labels'][0]) | |
label_to_urls[i['annotation']['labels'][0]] = [i['content']] | |
else: | |
label_to_urls[i['annotation']['labels'][0]].append(i['content']) | |
print(label_to_urls.keys()) | |
os.mkdir(args.dataset_path) | |
os.chdir(args.dataset_path) | |
for i in label_to_urls.keys(): | |
os.mkdir(str(i)) | |
os.chdir(str(i)) | |
k = 0; | |
for j in label_to_urls[i]: | |
downloader(j , str(i)+str(k)) | |
k+=1 | |
os.chdir("../") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment