Skip to content

Instantly share code, notes, and snippets.

@myounus96
Last active November 12, 2019 09:50
Show Gist options
  • Save myounus96/26e3f5532e2772742f1793c6eefc8fef to your computer and use it in GitHub Desktop.
Save myounus96/26e3f5532e2772742f1793c6eefc8fef to your computer and use it in GitHub Desktop.
scripts for coco to pascal then to yolo,extract_person convert coco data to pascal,then voc_label convert that pascal data to yolo
from pycocotools.coco import COCO
import os
import shutil
from tqdm import tqdm
import skimage.io as io
import matplotlib.pyplot as plt
import cv2
from PIL import Image, ImageDraw
#the path you want to save your results for coco to voc
savepath="./coco/cow/train/"
img_dir=savepath+'images/'
anno_dir=savepath+'Annotations/'
# datasets_list=['train2014', 'val2014']
datasets_list=['train2014']
classes_names = ['cow']
#Store annotations and train2014/val2014/... in this folder
dataDir= 'data/coco/'
headstr = """\
<annotation>
<folder>VOC</folder>
<filename>%s</filename>
<source>
<database>My Database</database>
<annotation>COCO</annotation>
<image>flickr</image>
<flickrid>NULL</flickrid>
</source>
<owner>
<flickrid>NULL</flickrid>
<name>company</name>
</owner>
<size>
<width>%d</width>
<height>%d</height>
<depth>%d</depth>
</size>
<segmented>0</segmented>
"""
objstr = """\
<object>
<name>%s</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>%d</xmin>
<ymin>%d</ymin>
<xmax>%d</xmax>
<ymax>%d</ymax>
</bndbox>
</object>
"""
tailstr = '''\
</annotation>
'''
#if the dir is not exists,make it,else delete it
def mkr(path):
if os.path.exists(path):
shutil.rmtree(path)
os.mkdir(path)
else:
os.mkdir(path)
mkr(img_dir)
mkr(anno_dir)
def id2name(coco):
classes=dict()
for cls in coco.dataset['categories']:
classes[cls['id']]=cls['name']
return classes
def write_xml(anno_path,head, objs, tail):
f = open(anno_path, "w")
f.write(head)
for obj in objs:
f.write(objstr%(obj[0],obj[1],obj[2],obj[3],obj[4]))
f.write(tail)
def save_annotations_and_imgs(coco,dataset,filename,objs):
#eg:COCO_train2014_000000196610.jpg-->COCO_train2014_000000196610.xml
anno_path=anno_dir+filename[:-3]+'xml'
img_path=dataDir+dataset+'/'+filename
print(img_path)
dst_imgpath=img_dir+filename
img=cv2.imread(img_path)
'''
if (img.shape[2] == 1):
print(filename + " not a RGB image")
return
'''
shutil.copy(img_path, dst_imgpath)
head=headstr % (filename, img.shape[1], img.shape[0], img.shape[2])
tail = tailstr
write_xml(anno_path,head, objs, tail)
def showimg(coco,dataset,img,classes,cls_id,show=True):
global dataDir
I=Image.open('%s/%s/%s'%(dataDir,dataset,img['file_name']))
#Get the annotated information by ID
annIds = coco.getAnnIds(imgIds=img['id'], catIds=cls_id, iscrowd=None)
# print(annIds)
anns = coco.loadAnns(annIds)
# print(anns)
# coco.showAnns(anns)
objs = []
for ann in anns:
class_name=classes[ann['category_id']]
if class_name in classes_names:
print(class_name)
if 'bbox' in ann:
bbox=ann['bbox']
xmin = int(bbox[0])
ymin = int(bbox[1])
xmax = int(bbox[2] + bbox[0])
ymax = int(bbox[3] + bbox[1])
obj = [class_name, xmin, ymin, xmax, ymax]
objs.append(obj)
draw = ImageDraw.Draw(I)
draw.rectangle([xmin, ymin, xmax, ymax])
if show:
plt.figure()
plt.axis('off')
plt.imshow(I)
plt.show()
return objs
for dataset in datasets_list:
#./COCO/annotations/instances_train2014.json
annFile='{}/annotations/instances_{}.json'.format(dataDir,dataset)
#COCO API for initializing annotated data
coco = COCO(annFile)
'''
When the COCO object is created, the following information will be output:
loading annotations into memory...
Done (t=0.81s)
creating index...
index created!
So far, the JSON script has been parsed and the images are associated with the corresponding annotated data.
'''
#show all classes in coco
classes = id2name(coco)
print(classes)
#[1, 2, 3, 4, 6, 8]
classes_ids = coco.getCatIds(catNms=classes_names)
print(classes_ids)
for cls in classes_names:
#Get ID number of this class
cls_id=coco.getCatIds(catNms=[cls])
img_ids=coco.getImgIds(catIds=cls_id)
print(cls,len(img_ids))
# imgIds=img_ids[0:10]
for imgId in tqdm(img_ids):
img = coco.loadImgs(imgId)[0]
filename = img['file_name']
# print(filename)
objs=showimg(coco, dataset, img, classes,classes_ids,show=False)
print(objs)
save_annotations_and_imgs(coco, dataset, filename, objs)
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
sets=[('2014', 'train')]
classes = ["cow"]
def convert(size, box):
dw = 1./(size[0])
dh = 1./(size[1])
x = (box[0] + box[1])/2.0 - 1
y = (box[2] + box[3])/2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
def convert_annotation(image_id):
#in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
in_file = open('coco/cow/train/Annotations/%s.xml' % (image_id))
out_file = open('coco/cow/train/labels/%s.txt'%(image_id), 'w')
tree=ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult)==1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
wd = getcwd()
for image_set in sets:
if not os.path.exists('coco/cow/train/labels/'):
os.makedirs('coco/cow/train/labels/')
# image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
path = 'coco/cow/train/images/'
for filename in os.listdir(path):
image_ids = (filename).strip('.jpg').split()
list_file = open('%s_%s.txt'%(image_set), 'w')
for image_id in image_ids:
list_file.write('/images/%s.jpg\n'%(image_id))
convert_annotation(image_id)
list_file.close()
#os.system("cat 2007_train.txt 2012_train.txt > train.txt")
@myounus96
Copy link
Author

directory info
output should be as(below image)
image
input should be as(below image)
image

Note: match directories in scripts

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment