Last active
November 12, 2019 09:50
-
-
Save myounus96/26e3f5532e2772742f1793c6eefc8fef to your computer and use it in GitHub Desktop.
scripts for coco to pascal then to yolo,extract_person convert coco data to pascal,then voc_label convert that pascal data to yolo
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pycocotools.coco import COCO | |
import os | |
import shutil | |
from tqdm import tqdm | |
import skimage.io as io | |
import matplotlib.pyplot as plt | |
import cv2 | |
from PIL import Image, ImageDraw | |
#the path you want to save your results for coco to voc | |
savepath="./coco/cow/train/" | |
img_dir=savepath+'images/' | |
anno_dir=savepath+'Annotations/' | |
# datasets_list=['train2014', 'val2014'] | |
datasets_list=['train2014'] | |
classes_names = ['cow'] | |
#Store annotations and train2014/val2014/... in this folder | |
dataDir= 'data/coco/' | |
headstr = """\ | |
<annotation> | |
<folder>VOC</folder> | |
<filename>%s</filename> | |
<source> | |
<database>My Database</database> | |
<annotation>COCO</annotation> | |
<image>flickr</image> | |
<flickrid>NULL</flickrid> | |
</source> | |
<owner> | |
<flickrid>NULL</flickrid> | |
<name>company</name> | |
</owner> | |
<size> | |
<width>%d</width> | |
<height>%d</height> | |
<depth>%d</depth> | |
</size> | |
<segmented>0</segmented> | |
""" | |
objstr = """\ | |
<object> | |
<name>%s</name> | |
<pose>Unspecified</pose> | |
<truncated>0</truncated> | |
<difficult>0</difficult> | |
<bndbox> | |
<xmin>%d</xmin> | |
<ymin>%d</ymin> | |
<xmax>%d</xmax> | |
<ymax>%d</ymax> | |
</bndbox> | |
</object> | |
""" | |
tailstr = '''\ | |
</annotation> | |
''' | |
#if the dir is not exists,make it,else delete it | |
def mkr(path): | |
if os.path.exists(path): | |
shutil.rmtree(path) | |
os.mkdir(path) | |
else: | |
os.mkdir(path) | |
mkr(img_dir) | |
mkr(anno_dir) | |
def id2name(coco): | |
classes=dict() | |
for cls in coco.dataset['categories']: | |
classes[cls['id']]=cls['name'] | |
return classes | |
def write_xml(anno_path,head, objs, tail): | |
f = open(anno_path, "w") | |
f.write(head) | |
for obj in objs: | |
f.write(objstr%(obj[0],obj[1],obj[2],obj[3],obj[4])) | |
f.write(tail) | |
def save_annotations_and_imgs(coco,dataset,filename,objs): | |
#eg:COCO_train2014_000000196610.jpg-->COCO_train2014_000000196610.xml | |
anno_path=anno_dir+filename[:-3]+'xml' | |
img_path=dataDir+dataset+'/'+filename | |
print(img_path) | |
dst_imgpath=img_dir+filename | |
img=cv2.imread(img_path) | |
''' | |
if (img.shape[2] == 1): | |
print(filename + " not a RGB image") | |
return | |
''' | |
shutil.copy(img_path, dst_imgpath) | |
head=headstr % (filename, img.shape[1], img.shape[0], img.shape[2]) | |
tail = tailstr | |
write_xml(anno_path,head, objs, tail) | |
def showimg(coco,dataset,img,classes,cls_id,show=True): | |
global dataDir | |
I=Image.open('%s/%s/%s'%(dataDir,dataset,img['file_name'])) | |
#Get the annotated information by ID | |
annIds = coco.getAnnIds(imgIds=img['id'], catIds=cls_id, iscrowd=None) | |
# print(annIds) | |
anns = coco.loadAnns(annIds) | |
# print(anns) | |
# coco.showAnns(anns) | |
objs = [] | |
for ann in anns: | |
class_name=classes[ann['category_id']] | |
if class_name in classes_names: | |
print(class_name) | |
if 'bbox' in ann: | |
bbox=ann['bbox'] | |
xmin = int(bbox[0]) | |
ymin = int(bbox[1]) | |
xmax = int(bbox[2] + bbox[0]) | |
ymax = int(bbox[3] + bbox[1]) | |
obj = [class_name, xmin, ymin, xmax, ymax] | |
objs.append(obj) | |
draw = ImageDraw.Draw(I) | |
draw.rectangle([xmin, ymin, xmax, ymax]) | |
if show: | |
plt.figure() | |
plt.axis('off') | |
plt.imshow(I) | |
plt.show() | |
return objs | |
for dataset in datasets_list: | |
#./COCO/annotations/instances_train2014.json | |
annFile='{}/annotations/instances_{}.json'.format(dataDir,dataset) | |
#COCO API for initializing annotated data | |
coco = COCO(annFile) | |
''' | |
When the COCO object is created, the following information will be output: | |
loading annotations into memory... | |
Done (t=0.81s) | |
creating index... | |
index created! | |
So far, the JSON script has been parsed and the images are associated with the corresponding annotated data. | |
''' | |
#show all classes in coco | |
classes = id2name(coco) | |
print(classes) | |
#[1, 2, 3, 4, 6, 8] | |
classes_ids = coco.getCatIds(catNms=classes_names) | |
print(classes_ids) | |
for cls in classes_names: | |
#Get ID number of this class | |
cls_id=coco.getCatIds(catNms=[cls]) | |
img_ids=coco.getImgIds(catIds=cls_id) | |
print(cls,len(img_ids)) | |
# imgIds=img_ids[0:10] | |
for imgId in tqdm(img_ids): | |
img = coco.loadImgs(imgId)[0] | |
filename = img['file_name'] | |
# print(filename) | |
objs=showimg(coco, dataset, img, classes,classes_ids,show=False) | |
print(objs) | |
save_annotations_and_imgs(coco, dataset, filename, objs) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xml.etree.ElementTree as ET | |
import pickle | |
import os | |
from os import listdir, getcwd | |
from os.path import join | |
sets=[('2014', 'train')] | |
classes = ["cow"] | |
def convert(size, box): | |
dw = 1./(size[0]) | |
dh = 1./(size[1]) | |
x = (box[0] + box[1])/2.0 - 1 | |
y = (box[2] + box[3])/2.0 - 1 | |
w = box[1] - box[0] | |
h = box[3] - box[2] | |
x = x*dw | |
w = w*dw | |
y = y*dh | |
h = h*dh | |
return (x,y,w,h) | |
def convert_annotation(image_id): | |
#in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id)) | |
in_file = open('coco/cow/train/Annotations/%s.xml' % (image_id)) | |
out_file = open('coco/cow/train/labels/%s.txt'%(image_id), 'w') | |
tree=ET.parse(in_file) | |
root = tree.getroot() | |
size = root.find('size') | |
w = int(size.find('width').text) | |
h = int(size.find('height').text) | |
for obj in root.iter('object'): | |
difficult = obj.find('difficult').text | |
cls = obj.find('name').text | |
if cls not in classes or int(difficult)==1: | |
continue | |
cls_id = classes.index(cls) | |
xmlbox = obj.find('bndbox') | |
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) | |
bb = convert((w,h), b) | |
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') | |
wd = getcwd() | |
for image_set in sets: | |
if not os.path.exists('coco/cow/train/labels/'): | |
os.makedirs('coco/cow/train/labels/') | |
# image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split() | |
path = 'coco/cow/train/images/' | |
for filename in os.listdir(path): | |
image_ids = (filename).strip('.jpg').split() | |
list_file = open('%s_%s.txt'%(image_set), 'w') | |
for image_id in image_ids: | |
list_file.write('/images/%s.jpg\n'%(image_id)) | |
convert_annotation(image_id) | |
list_file.close() | |
#os.system("cat 2007_train.txt 2012_train.txt > train.txt") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
directory info


output should be as(below image)
input should be as(below image)
Note: match directories in scripts