|
a |
|
b/SSD/predict.py |
|
|
1 |
from keras.optimizers import Adam, SGD |
|
|
2 |
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TerminateOnNaN, CSVLogger |
|
|
3 |
from keras import backend as K |
|
|
4 |
from keras.models import load_model |
|
|
5 |
from math import ceil |
|
|
6 |
import numpy as np |
|
|
7 |
from matplotlib import pyplot as plt |
|
|
8 |
import csv |
|
|
9 |
from models.keras_ssd300 import ssd_300 |
|
|
10 |
from keras_loss_function.keras_ssd_loss import SSDLoss |
|
|
11 |
from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes |
|
|
12 |
from keras_layers.keras_layer_DecodeDetections import DecodeDetections |
|
|
13 |
from keras_layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast |
|
|
14 |
from keras_layers.keras_layer_L2Normalization import L2Normalization |
|
|
15 |
|
|
|
16 |
from ssd_encoder_decoder.ssd_input_encoder import SSDInputEncoder |
|
|
17 |
from ssd_encoder_decoder.ssd_output_decoder import decode_detections, decode_detections_fast |
|
|
18 |
|
|
|
19 |
from data_generator.object_detection_2d_data_generator import DataGenerator |
|
|
20 |
from data_generator.object_detection_2d_geometric_ops import Resize |
|
|
21 |
from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels |
|
|
22 |
from data_generator.data_augmentation_chain_original_ssd import SSDDataAugmentation |
|
|
23 |
from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms |
|
|
24 |
|
|
|
25 |
def bbox_iou(a, b): |
|
|
26 |
"""Calculate the Intersection of Unions (IoUs) between bounding boxes. |
|
|
27 |
IoU is calculated as a ratio of area of the intersection |
|
|
28 |
and area of the union. |
|
|
29 |
|
|
|
30 |
Args: |
|
|
31 |
a: (list of 4 numbers) [y1,x1,y2,x2] |
|
|
32 |
b: (list of 4 numbers) [y1,x1,y2,x2] |
|
|
33 |
Returns: |
|
|
34 |
iou: the value of the IoU of two bboxes |
|
|
35 |
|
|
|
36 |
""" |
|
|
37 |
# (float) Small value to prevent division by zero |
|
|
38 |
epsilon = 1e-5 |
|
|
39 |
# COORDINATES OF THE INTERSECTION BOX |
|
|
40 |
# print(a) |
|
|
41 |
# print(b) |
|
|
42 |
y1 = max(a[0], b[0]) |
|
|
43 |
x1 = max(a[1], b[1]) |
|
|
44 |
y2 = min(a[2], b[2]) |
|
|
45 |
x2 = min(a[3], b[3]) |
|
|
46 |
|
|
|
47 |
# AREA OF OVERLAP - Area where the boxes intersect |
|
|
48 |
width = (x2 - x1) |
|
|
49 |
height = (y2 - y1) |
|
|
50 |
# handle case where there is NO overlap |
|
|
51 |
if (width < 0) or (height < 0): |
|
|
52 |
return 0.0 |
|
|
53 |
area_overlap = width * height |
|
|
54 |
|
|
|
55 |
# COMBINED AREA |
|
|
56 |
area_a = (a[2] - a[0]) * (a[3] - a[1]) |
|
|
57 |
area_b = (b[2] - b[0]) * (b[3] - b[1]) |
|
|
58 |
area_combined = area_a + area_b - area_overlap |
|
|
59 |
|
|
|
60 |
# RATIO OF AREA OF OVERLAP OVER COMBINED AREA |
|
|
61 |
iou = area_overlap / (area_combined+epsilon) |
|
|
62 |
return iou |
|
|
63 |
|
|
|
64 |
img_height = 512 # Height of the model input images |
|
|
65 |
img_width = 512 # Width of the model input images |
|
|
66 |
img_channels = 3 # Number of color channels of the model input images |
|
|
67 |
mean_color = [123, 117, 104] # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights. |
|
|
68 |
swap_channels = [2, 1, 0] # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images. |
|
|
69 |
n_classes = 1 # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO |
|
|
70 |
scales_pascal = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets |
|
|
71 |
scales_coco = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets |
|
|
72 |
scales = scales_coco |
|
|
73 |
aspect_ratios = [[1.0, 2.0, 0.5], |
|
|
74 |
[1.0, 2.0, 0.5, 3.0, 1.0/3.0], |
|
|
75 |
[1.0, 2.0, 0.5, 3.0, 1.0/3.0], |
|
|
76 |
[1.0, 2.0, 0.5, 3.0, 1.0/3.0], |
|
|
77 |
[1.0, 2.0, 0.5], |
|
|
78 |
[1.0, 2.0, 0.5]] # The anchor box aspect ratios used in the original SSD300; the order matters |
|
|
79 |
two_boxes_for_ar1 = True |
|
|
80 |
steps = [8, 16, 32, 64, 100, 300] # The space between two adjacent anchor box center points for each predictor layer. |
|
|
81 |
offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer. |
|
|
82 |
clip_boxes = False # Whether or not to clip the anchor boxes to lie entirely within the image boundaries |
|
|
83 |
variances = [0.1, 0.1, 0.2, 0.2] # The variances by which the encoded target coordinates are divided as in the original implementation |
|
|
84 |
normalize_coords = True |
|
|
85 |
|
|
|
86 |
# 1: Build the Keras model. |
|
|
87 |
|
|
|
88 |
K.clear_session() # Clear previous models from memory. |
|
|
89 |
|
|
|
90 |
#model = ssd_300(image_size=(img_height, img_width, img_channels), |
|
|
91 |
# n_classes=n_classes, |
|
|
92 |
# mode='training', |
|
|
93 |
# l2_regularization=0.0005, |
|
|
94 |
# scales=scales, |
|
|
95 |
# aspect_ratios_per_layer=aspect_ratios, |
|
|
96 |
# two_boxes_for_ar1=two_boxes_for_ar1, |
|
|
97 |
# steps=steps, |
|
|
98 |
# offsets=offsets, |
|
|
99 |
# clip_boxes=clip_boxes, |
|
|
100 |
# variances=variances, |
|
|
101 |
# normalize_coords=normalize_coords, |
|
|
102 |
# subtract_mean=mean_color, |
|
|
103 |
# swap_channels=swap_channels) |
|
|
104 |
|
|
|
105 |
model_path = './ssd512_mine.h5' |
|
|
106 |
|
|
|
107 |
ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) |
|
|
108 |
|
|
|
109 |
model = load_model(model_path, custom_objects={'AnchorBoxes': AnchorBoxes, |
|
|
110 |
'L2Normalization': L2Normalization, |
|
|
111 |
'compute_loss': ssd_loss.compute_loss}) |
|
|
112 |
testimages_dir = './data/testImages' |
|
|
113 |
testlabels = './data/testlabels.csv' |
|
|
114 |
|
|
|
115 |
convert_to_3_channels = ConvertTo3Channels() |
|
|
116 |
resize = Resize(height=img_height, width=img_width) |
|
|
117 |
|
|
|
118 |
input_format = ['image_name', 'class_id', 'ymin', 'xmin', 'ymax', 'xmax'] |
|
|
119 |
|
|
|
120 |
val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) |
|
|
121 |
|
|
|
122 |
val_dataset.parse_csv(images_dir=testimages_dir, |
|
|
123 |
labels_filename=testlabels, |
|
|
124 |
input_format=input_format, |
|
|
125 |
include_classes='all', |
|
|
126 |
random_sample=False, |
|
|
127 |
ret=False, |
|
|
128 |
verbose=True) |
|
|
129 |
|
|
|
130 |
val_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07_test.h5', |
|
|
131 |
resize=False, |
|
|
132 |
variable_image_size=True, |
|
|
133 |
verbose=True) |
|
|
134 |
|
|
|
135 |
predict_generator = val_dataset.generate(batch_size=1, |
|
|
136 |
shuffle=False, |
|
|
137 |
transformations=[convert_to_3_channels, |
|
|
138 |
resize], |
|
|
139 |
label_encoder=None, |
|
|
140 |
returns={'processed_images', |
|
|
141 |
'filenames', |
|
|
142 |
'inverse_transform', |
|
|
143 |
'original_images', |
|
|
144 |
'original_labels'}, |
|
|
145 |
keep_images_without_gt=False) |
|
|
146 |
|
|
|
147 |
np.set_printoptions(precision = 2, suppress = True, linewidth = 90) |
|
|
148 |
|
|
|
149 |
hits = 0 |
|
|
150 |
total = 33 |
|
|
151 |
|
|
|
152 |
rescsv = open('result.csv', 'w', newline = '') |
|
|
153 |
writer1 = csv.writer(rescsv, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) |
|
|
154 |
labcsv = open('label.csv', 'w', newline = '') |
|
|
155 |
writer2 = csv.writer(labcsv, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) |
|
|
156 |
|
|
|
157 |
for i in range(33): |
|
|
158 |
batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next(predict_generator) |
|
|
159 |
print(batch_filenames[0]) |
|
|
160 |
y_pred = model.predict(batch_images) |
|
|
161 |
|
|
|
162 |
y_pred_decoded = decode_detections(y_pred, |
|
|
163 |
confidence_thresh=0.0005, |
|
|
164 |
iou_threshold=0.0001, |
|
|
165 |
top_k=1, |
|
|
166 |
normalize_coords=normalize_coords, |
|
|
167 |
img_height=img_height, |
|
|
168 |
img_width=img_width) |
|
|
169 |
|
|
|
170 |
y_pred_decoded_inv = apply_inverse_transforms(y_pred_decoded, batch_inverse_transforms) |
|
|
171 |
|
|
|
172 |
writer1.writerow([batch_filenames[0], y_pred_decoded_inv[0][0][2], y_pred_decoded_inv[0][0][3], y_pred_decoded_inv[0][0][4],y_pred_decoded_inv[0][0][5], y_pred_decoded_inv[0][0][1]]) |
|
|
173 |
|
|
|
174 |
result_bbox = [y_pred_decoded_inv[0][0][2], y_pred_decoded_inv[0][0][3], y_pred_decoded_inv[0][0][4],y_pred_decoded_inv[0][0][5]] |
|
|
175 |
print (result_bbox) |
|
|
176 |
|
|
|
177 |
writer2.writerow([batch_original_labels[0][0][0], batch_original_labels[0][0][1], batch_original_labels[0][0][2], batch_original_labels[0][0][3], batch_original_labels[0][0][4]]) |
|
|
178 |
|
|
|
179 |
label_bbox = [batch_original_labels[0][0][1], batch_original_labels[0][0][2], batch_original_labels[0][0][3], batch_original_labels[0][0][4]] |
|
|
180 |
print (label_bbox) |
|
|
181 |
|
|
|
182 |
if(bbox_iou(result_bbox, label_bbox) > 0) : |
|
|
183 |
hits = hits + 1 |
|
|
184 |
|
|
|
185 |
precision = hits / total |
|
|
186 |
print(precision) |