[45a3e1]: / darkflow / utils / pascal_voc_clean_xml.py

Download this file

81 lines (66 with data), 2.2 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
"""
parse PASCAL VOC xml annotations
"""
import os
import sys
import xml.etree.ElementTree as ET
import glob
def _pp(l): # pretty printing
for i in l: print('{}: {}'.format(i, l[i]))
def pascal_voc_clean_xml(ANN, pick, exclusive=False):
print('Parsing for {} {}'.format(
pick, 'exclusively' * int(exclusive)))
dumps = list()
cur_dir = os.getcwd()
os.chdir(ANN)
annotations = os.listdir('.')
annotations = glob.glob(str(annotations) + '*.xml')
size = len(annotations)
for i, file in enumerate(annotations):
# progress bar
sys.stdout.write('\r')
percentage = 1. * (i + 1) / size
progress = int(percentage * 20)
bar_arg = [progress * '=', ' ' * (19 - progress), percentage * 100]
bar_arg += [file]
sys.stdout.write('[{}>{}]{:.0f}% {}'.format(*bar_arg))
sys.stdout.flush()
# actual parsing
in_file = open(file)
tree = ET.parse(in_file)
root = tree.getroot()
jpg = str(root.find('filename').text)
imsize = root.find('size')
w = int(imsize.find('width').text)
h = int(imsize.find('height').text)
all = list()
for obj in root.iter('object'):
current = list()
name = obj.find('name').text
if name not in pick:
continue
xmlbox = obj.find('bndbox')
xn = int(float(xmlbox.find('xmin').text))
xx = int(float(xmlbox.find('xmax').text))
yn = int(float(xmlbox.find('ymin').text))
yx = int(float(xmlbox.find('ymax').text))
current = [name, xn, yn, xx, yx]
all += [current]
add = [[jpg, [w, h, all]]]
dumps += add
in_file.close()
# gather all stats
stat = dict()
for dump in dumps:
all = dump[1][2]
for current in all:
if current[0] in pick:
if current[0] in stat:
stat[current[0]] += 1
else:
stat[current[0]] = 1
print('\nStatistics:')
_pp(stat)
print('Dataset size: {}'.format(len(dumps)))
os.chdir(cur_dir)
return dumps