-
-
Save allenday/22da30122526b321e32e8b8967a26efb to your computer and use it in GitHub Desktop.
converts the json file downloaded using image classifer tool of dataturks to dataset folder
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#This script has been solely created under dataturks. Copyrights are reserved | |
#EXAMPLE USAGE | |
#python3 tensorflow_json_parser.py --json_file "flower.json" --dataset_path "Dataset5/" | |
import json | |
import glob | |
import urllib.request | |
import argparse | |
import random | |
import os | |
from pathlib import Path | |
def downloader(image_url , i): | |
file_name = str(i) | |
full_file_name = str(file_name) + '.jpg' | |
prefix = 'http://' | |
offset=7 | |
if image_url[:5] == 'https': | |
prefix = 'https://' | |
offset=8 | |
escaped_url = prefix + urllib.request.quote(image_url[offset:]) | |
print(escaped_url) | |
urllib.request.urlretrieve(escaped_url,full_file_name) | |
if __name__ == "__main__": | |
a = argparse.ArgumentParser() | |
a.add_argument("--json_file", help="path to json") | |
a.add_argument("--dataset_path", help="path to the dataset") | |
args = a.parse_args() | |
if args.json_file is None and args.dataset_path is None: | |
a.print_help() | |
sys.exit(1) | |
with open(args.json_file) as file1: | |
lis = [] | |
for i in file1: | |
lis.append(json.loads(i)) | |
folder_names = [] | |
label_to_urls = {} | |
for i in lis: | |
if len(i['annotation']['labels']) == 0: | |
continue | |
if i['annotation']['labels'][0] not in folder_names: | |
folder_names.append(i['annotation']['labels'][0]) | |
label_to_urls[i['annotation']['labels'][0]] = [i['content']] | |
else: | |
label_to_urls[i['annotation']['labels'][0]].append(i['content']) | |
print(label_to_urls.keys()) | |
Path(args.dataset_path).mkdir(parents=True, exist_ok=True) | |
os.chdir(args.dataset_path) | |
for i in label_to_urls.keys(): | |
Path(str(i)).mkdir(parents=True, exist_ok=True) | |
os.chdir(str(i)) | |
k = 0; | |
for j in label_to_urls[i]: | |
b = os.path.basename(j) | |
if not Path.exists(Path(b)): | |
downloader(j , b) | |
k+=1 | |
os.chdir("../") |
escape urls as needed
detect protocol
only download if not exists
keep original filename
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
skip empty label sets