ola0x · March 24, 2022 23:31
diff --git a/xml_to_coco.py b/xml_to_coco.py
 import os
 import glob
 import json
 import numpy as np
 import pandas as pd
 import xml.etree.ElementTree as ET

 save_json_path = 'val_coco.json'

 def xml_to_csv(path):
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            value = (root.find('filename').text,
                     int(root.find('size')[0].text),
                     int(root.find('size')[1].text),
                     member[0].text,
                     int(member[4][0].text),
                     int(member[4][1].text),
                     int(member[4][2].text),
                     int(member[4][3].text)
                     )
            xml_list.append(value)
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df

 image_path = os.path.join(os.getcwd(), ("validation\\XML"))
 data = xml_to_csv(image_path)
 # data.to_csv(('XML' + '_labels.csv'), index=None)
 print('Successfully converted {} xml to csv.'.format(image_path))

 images = []
 categories = []
 annotations = []

 category = {}
 category["supercategory"] = 'none'
 category["id"] = 0
 category["name"] = 'None'
 categories.append(category)

 data['fileid'] = data['filename'].astype('category').cat.codes
 data['categoryid']= pd.Categorical(data['class'],ordered= True).codes
 data['categoryid'] = data['categoryid']+1
 data['annid'] = data.index

 def image(row):
    image = {}
    image["height"] = row.height
    image["width"] = row.width
    image["id"] = row.fileid
    image["file_name"] = row.filename
    return image

 def category(row):
    category = {}
    category["supercategory"] = 'None'
    category["id"] = row.categoryid
    category["name"] = row[4]
    return category

 def annotation(row):
    annotation = {}
    area = (row.xmax -row.xmin)*(row.ymax - row.ymin)
    annotation["segmentation"] = []
    annotation["iscrowd"] = 0
    annotation["area"] = area
    annotation["image_id"] = row.fileid

    annotation["bbox"] = [row.xmin, row.ymin, row.xmax -row.xmin,row.ymax-row.ymin ]

    annotation["category_id"] = row.categoryid
    annotation["id"] = row.annid
    return annotation

 for row in data.itertuples():
    annotations.append(annotation(row))

 imagedf = data.drop_duplicates(subset=['fileid']).sort_values(by='fileid')
 for row in imagedf.itertuples():
    images.append(image(row))

 catdf = data.drop_duplicates(subset=['categoryid']).sort_values(by='categoryid')
 for row in catdf.itertuples():
    categories.append(category(row))

 data_coco = {}
 data_coco["images"] = images
 data_coco["categories"] = categories
 data_coco["annotations"] = annotations


 json.dump(data_coco, open(save_json_path, "w"), indent=4)
	import os
	import glob
	import json
	import numpy as np
	import pandas as pd
	import xml.etree.ElementTree as ET

	save_json_path = 'val_coco.json'

	def xml_to_csv(path):
	xml_list = []
	for xml_file in glob.glob(path + '/*.xml'):
	tree = ET.parse(xml_file)
	root = tree.getroot()
	for member in root.findall('object'):
	value = (root.find('filename').text,
	int(root.find('size')[0].text),
	int(root.find('size')[1].text),
	member[0].text,
	int(member[4][0].text),
	int(member[4][1].text),
	int(member[4][2].text),
	int(member[4][3].text)
	)
	xml_list.append(value)
	column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
	xml_df = pd.DataFrame(xml_list, columns=column_name)
	return xml_df

	image_path = os.path.join(os.getcwd(), ("validation\\XML"))
	data = xml_to_csv(image_path)
	# data.to_csv(('XML' + '_labels.csv'), index=None)
	print('Successfully converted {} xml to csv.'.format(image_path))

	images = []
	categories = []
	annotations = []

	category = {}
	category["supercategory"] = 'none'
	category["id"] = 0
	category["name"] = 'None'
	categories.append(category)

	data['fileid'] = data['filename'].astype('category').cat.codes
	data['categoryid']= pd.Categorical(data['class'],ordered= True).codes
	data['categoryid'] = data['categoryid']+1
	data['annid'] = data.index

	def image(row):
	image = {}
	image["height"] = row.height
	image["width"] = row.width
	image["id"] = row.fileid
	image["file_name"] = row.filename
	return image

	def category(row):
	category = {}
	category["supercategory"] = 'None'
	category["id"] = row.categoryid
	category["name"] = row[4]
	return category

	def annotation(row):
	annotation = {}
	area = (row.xmax -row.xmin)*(row.ymax - row.ymin)
	annotation["segmentation"] = []
	annotation["iscrowd"] = 0
	annotation["area"] = area
	annotation["image_id"] = row.fileid

	annotation["bbox"] = [row.xmin, row.ymin, row.xmax -row.xmin,row.ymax-row.ymin ]

	annotation["category_id"] = row.categoryid
	annotation["id"] = row.annid
	return annotation

	for row in data.itertuples():
	annotations.append(annotation(row))

	imagedf = data.drop_duplicates(subset=['fileid']).sort_values(by='fileid')
	for row in imagedf.itertuples():
	images.append(image(row))

	catdf = data.drop_duplicates(subset=['categoryid']).sort_values(by='categoryid')
	for row in catdf.itertuples():
	categories.append(category(row))

	data_coco = {}
	data_coco["images"] = images
	data_coco["categories"] = categories
	data_coco["annotations"] = annotations


	json.dump(data_coco, open(save_json_path, "w"), indent=4)