Created
March 24, 2022 23:31
-
-
Save ola0x/b320500bd5fb4b6cd053a5c463c5b01e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import glob | |
import json | |
import numpy as np | |
import pandas as pd | |
import xml.etree.ElementTree as ET | |
save_json_path = 'val_coco.json' | |
def xml_to_csv(path): | |
xml_list = [] | |
for xml_file in glob.glob(path + '/*.xml'): | |
tree = ET.parse(xml_file) | |
root = tree.getroot() | |
for member in root.findall('object'): | |
value = (root.find('filename').text, | |
int(root.find('size')[0].text), | |
int(root.find('size')[1].text), | |
member[0].text, | |
int(member[4][0].text), | |
int(member[4][1].text), | |
int(member[4][2].text), | |
int(member[4][3].text) | |
) | |
xml_list.append(value) | |
column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax'] | |
xml_df = pd.DataFrame(xml_list, columns=column_name) | |
return xml_df | |
image_path = os.path.join(os.getcwd(), ("validation\\XML")) | |
data = xml_to_csv(image_path) | |
# data.to_csv(('XML' + '_labels.csv'), index=None) | |
print('Successfully converted {} xml to csv.'.format(image_path)) | |
images = [] | |
categories = [] | |
annotations = [] | |
category = {} | |
category["supercategory"] = 'none' | |
category["id"] = 0 | |
category["name"] = 'None' | |
categories.append(category) | |
data['fileid'] = data['filename'].astype('category').cat.codes | |
data['categoryid']= pd.Categorical(data['class'],ordered= True).codes | |
data['categoryid'] = data['categoryid']+1 | |
data['annid'] = data.index | |
def image(row): | |
image = {} | |
image["height"] = row.height | |
image["width"] = row.width | |
image["id"] = row.fileid | |
image["file_name"] = row.filename | |
return image | |
def category(row): | |
category = {} | |
category["supercategory"] = 'None' | |
category["id"] = row.categoryid | |
category["name"] = row[4] | |
return category | |
def annotation(row): | |
annotation = {} | |
area = (row.xmax -row.xmin)*(row.ymax - row.ymin) | |
annotation["segmentation"] = [] | |
annotation["iscrowd"] = 0 | |
annotation["area"] = area | |
annotation["image_id"] = row.fileid | |
annotation["bbox"] = [row.xmin, row.ymin, row.xmax -row.xmin,row.ymax-row.ymin ] | |
annotation["category_id"] = row.categoryid | |
annotation["id"] = row.annid | |
return annotation | |
for row in data.itertuples(): | |
annotations.append(annotation(row)) | |
imagedf = data.drop_duplicates(subset=['fileid']).sort_values(by='fileid') | |
for row in imagedf.itertuples(): | |
images.append(image(row)) | |
catdf = data.drop_duplicates(subset=['categoryid']).sort_values(by='categoryid') | |
for row in catdf.itertuples(): | |
categories.append(category(row)) | |
data_coco = {} | |
data_coco["images"] = images | |
data_coco["categories"] = categories | |
data_coco["annotations"] = annotations | |
json.dump(data_coco, open(save_json_path, "w"), indent=4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment