|
a |
|
b/features/pysift.py |
|
|
1 |
from numpy import all, any, array, arctan2, cos, sin, exp, dot, log, logical_and, roll, sqrt, stack, trace, unravel_index, pi, deg2rad, rad2deg, where, zeros, floor, full, nan, isnan, round, float32 |
|
|
2 |
from numpy.linalg import det, lstsq, norm |
|
|
3 |
from cv2 import resize, GaussianBlur, subtract, KeyPoint, INTER_LINEAR, INTER_NEAREST |
|
|
4 |
from functools import cmp_to_key |
|
|
5 |
import logging |
|
|
6 |
import matplotlib.pyplot as plt |
|
|
7 |
|
|
|
8 |
#################### |
|
|
9 |
# Global variables # |
|
|
10 |
#################### |
|
|
11 |
|
|
|
12 |
logger = logging.getLogger(__name__) |
|
|
13 |
float_tolerance = 1e-7 |
|
|
14 |
|
|
|
15 |
################# |
|
|
16 |
# Main function # |
|
|
17 |
################# |
|
|
18 |
|
|
|
19 |
def computeKeypointsAndDescriptors(image, sigma=1.6, num_intervals=3, assumed_blur=0.5, image_border_width=5): |
|
|
20 |
"""Compute SIFT keypoints and descriptors for an input image |
|
|
21 |
""" |
|
|
22 |
image = image.astype('float32') |
|
|
23 |
base_image = generateBaseImage(image, sigma, assumed_blur) |
|
|
24 |
num_octaves = computeNumberOfOctaves(base_image.shape) |
|
|
25 |
gaussian_kernels = generateGaussianKernels(sigma, num_intervals) |
|
|
26 |
gaussian_images = generateGaussianImages(base_image, num_octaves, gaussian_kernels) |
|
|
27 |
dog_images = generateDoGImages(gaussian_images) |
|
|
28 |
keypoints = findScaleSpaceExtrema(gaussian_images, dog_images, num_intervals, sigma, image_border_width) |
|
|
29 |
# print("keypoint:", keypoints) |
|
|
30 |
keypoints = removeDuplicateKeypoints(keypoints) |
|
|
31 |
keypoints = convertKeypointsToInputImageSize(keypoints) |
|
|
32 |
descriptors = generateDescriptors(keypoints, gaussian_images) |
|
|
33 |
return keypoints, descriptors |
|
|
34 |
|
|
|
35 |
######################### |
|
|
36 |
# Image pyramid related # |
|
|
37 |
######################### |
|
|
38 |
|
|
|
39 |
def generateBaseImage(image, sigma, assumed_blur): |
|
|
40 |
"""Generate base image from input image by upsampling by 2 in both directions and blurring |
|
|
41 |
""" |
|
|
42 |
logger.debug('Generating base image...') |
|
|
43 |
image = resize(image, (0, 0), fx=2, fy=2, interpolation=INTER_LINEAR) |
|
|
44 |
sigma_diff = sqrt(max((sigma ** 2) - ((2 * assumed_blur) ** 2), 0.01)) |
|
|
45 |
return GaussianBlur(image, (0, 0), sigmaX=sigma_diff, sigmaY=sigma_diff) # the image blur is now sigma instead of assumed_blur |
|
|
46 |
|
|
|
47 |
def computeNumberOfOctaves(image_shape): |
|
|
48 |
"""Compute number of octaves in image pyramid as function of base image shape (OpenCV default) |
|
|
49 |
""" |
|
|
50 |
# print("the number of octave:",int(round(log(min(image_shape)) / log(2) - 4))) |
|
|
51 |
return int(round(log(min(image_shape)) / log(2) - 1)) |
|
|
52 |
|
|
|
53 |
def generateGaussianKernels(sigma, num_intervals): |
|
|
54 |
"""Generate list of gaussian kernels at which to blur the input image. Default values of sigma, intervals, and octaves follow section 3 of Lowe's paper. |
|
|
55 |
""" |
|
|
56 |
logger.debug('Generating scales...') |
|
|
57 |
num_images_per_octave = num_intervals + 3 |
|
|
58 |
k = 2 ** (1. / num_intervals) |
|
|
59 |
gaussian_kernels = zeros(num_images_per_octave) # scale of gaussian blur necessary to go from one blur scale to the next within an octave |
|
|
60 |
gaussian_kernels[0] = sigma |
|
|
61 |
|
|
|
62 |
for image_index in range(1, num_images_per_octave): |
|
|
63 |
sigma_previous = (k ** (image_index - 1)) * sigma |
|
|
64 |
sigma_total = k * sigma_previous |
|
|
65 |
gaussian_kernels[image_index] = sqrt(sigma_total ** 2 - sigma_previous ** 2) |
|
|
66 |
return gaussian_kernels |
|
|
67 |
|
|
|
68 |
def generateGaussianImages(image, num_octaves, gaussian_kernels): |
|
|
69 |
"""Generate scale-space pyramid of Gaussian images |
|
|
70 |
""" |
|
|
71 |
logger.debug('Generating Gaussian images...') |
|
|
72 |
gaussian_images = [] |
|
|
73 |
|
|
|
74 |
for octave_index in range(num_octaves): |
|
|
75 |
gaussian_images_in_octave = [] |
|
|
76 |
gaussian_images_in_octave.append(image) # first image in octave already has the correct blur |
|
|
77 |
for gaussian_kernel in gaussian_kernels[1:]: |
|
|
78 |
image = GaussianBlur(image, (0, 0), sigmaX=gaussian_kernel, sigmaY=gaussian_kernel) |
|
|
79 |
# show the picture |
|
|
80 |
# plt.imshow(image, cmap = "gray") |
|
|
81 |
# plt.show() |
|
|
82 |
gaussian_images_in_octave.append(image) |
|
|
83 |
gaussian_images.append(gaussian_images_in_octave) |
|
|
84 |
octave_base = gaussian_images_in_octave[-3] |
|
|
85 |
image = resize(octave_base, (int(octave_base.shape[1] / 2), int(octave_base.shape[0] / 2)), interpolation=INTER_NEAREST) |
|
|
86 |
return array(gaussian_images) |
|
|
87 |
|
|
|
88 |
def generateDoGImages(gaussian_images): |
|
|
89 |
"""Generate Difference-of-Gaussians image pyramid |
|
|
90 |
""" |
|
|
91 |
logger.debug('Generating Difference-of-Gaussian images...') |
|
|
92 |
dog_images = [] |
|
|
93 |
|
|
|
94 |
for gaussian_images_in_octave in gaussian_images: |
|
|
95 |
dog_images_in_octave = [] |
|
|
96 |
for first_image, second_image in zip(gaussian_images_in_octave, gaussian_images_in_octave[1:]): |
|
|
97 |
dog_images_in_octave.append(subtract(second_image, first_image)) # ordinary subtraction will not work because the images are unsigned integers |
|
|
98 |
dog_images.append(dog_images_in_octave) |
|
|
99 |
return array(dog_images) |
|
|
100 |
|
|
|
101 |
############################### |
|
|
102 |
# Scale-space extrema related # |
|
|
103 |
############################### |
|
|
104 |
|
|
|
105 |
def findScaleSpaceExtrema(gaussian_images, dog_images, num_intervals, sigma, image_border_width, contrast_threshold=0.03): |
|
|
106 |
"""Find pixel positions of all scale-space extrema in the image pyramid |
|
|
107 |
""" |
|
|
108 |
logger.debug('Finding scale-space extrema...') |
|
|
109 |
threshold = floor(0.5 * contrast_threshold / num_intervals * 255) # from OpenCV implementation |
|
|
110 |
# threshold = contrast_threshold |
|
|
111 |
# print("threshold:", threshold) |
|
|
112 |
keypoints = [] |
|
|
113 |
|
|
|
114 |
for octave_index, dog_images_in_octave in enumerate(dog_images): |
|
|
115 |
for image_index, (first_image, second_image, third_image) in enumerate(zip(dog_images_in_octave, dog_images_in_octave[1:], dog_images_in_octave[2:])): |
|
|
116 |
# (i, j) is the center of the 3x3 array |
|
|
117 |
for i in range(image_border_width, first_image.shape[0] - image_border_width): |
|
|
118 |
for j in range(image_border_width, first_image.shape[1] - image_border_width): |
|
|
119 |
if isPixelAnExtremum(first_image[i-1:i+2, j-1:j+2], second_image[i-1:i+2, j-1:j+2], third_image[i-1:i+2, j-1:j+2], threshold): |
|
|
120 |
localization_result = localizeExtremumViaQuadraticFit(i, j, image_index + 1, octave_index, num_intervals, dog_images_in_octave, sigma, contrast_threshold, image_border_width) |
|
|
121 |
if localization_result is not None: |
|
|
122 |
keypoint, localized_image_index = localization_result |
|
|
123 |
keypoints_with_orientations = computeKeypointsWithOrientations(keypoint, octave_index, gaussian_images[octave_index][localized_image_index]) |
|
|
124 |
for keypoint_with_orientation in keypoints_with_orientations: |
|
|
125 |
keypoints.append(keypoint_with_orientation) |
|
|
126 |
return keypoints |
|
|
127 |
|
|
|
128 |
def isPixelAnExtremum(first_subimage, second_subimage, third_subimage, threshold): |
|
|
129 |
"""Return True if the center element of the 3x3x3 input array is strictly greater than or less than all its neighbors, False otherwise |
|
|
130 |
""" |
|
|
131 |
center_pixel_value = second_subimage[1, 1] |
|
|
132 |
if abs(center_pixel_value) >= threshold: |
|
|
133 |
if center_pixel_value > 0: |
|
|
134 |
return all(center_pixel_value >= first_subimage) and \ |
|
|
135 |
all(center_pixel_value >= third_subimage) and \ |
|
|
136 |
all(center_pixel_value >= second_subimage[0, :]) and \ |
|
|
137 |
all(center_pixel_value >= second_subimage[2, :]) and \ |
|
|
138 |
center_pixel_value >= second_subimage[1, 0] and \ |
|
|
139 |
center_pixel_value >= second_subimage[1, 2] |
|
|
140 |
elif center_pixel_value < 0: |
|
|
141 |
return all(center_pixel_value <= first_subimage) and \ |
|
|
142 |
all(center_pixel_value <= third_subimage) and \ |
|
|
143 |
all(center_pixel_value <= second_subimage[0, :]) and \ |
|
|
144 |
all(center_pixel_value <= second_subimage[2, :]) and \ |
|
|
145 |
center_pixel_value <= second_subimage[1, 0] and \ |
|
|
146 |
center_pixel_value <= second_subimage[1, 2] |
|
|
147 |
return False |
|
|
148 |
|
|
|
149 |
def localizeExtremumViaQuadraticFit(i, j, image_index, octave_index, num_intervals, dog_images_in_octave, sigma, contrast_threshold, image_border_width, eigenvalue_ratio=10, num_attempts_until_convergence=5): |
|
|
150 |
"""Iteratively refine pixel positions of scale-space extrema via quadratic fit around each extremum's neighbors |
|
|
151 |
""" |
|
|
152 |
logger.debug('Localizing scale-space extrema...') |
|
|
153 |
extremum_is_outside_image = False |
|
|
154 |
image_shape = dog_images_in_octave[0].shape |
|
|
155 |
for attempt_index in range(num_attempts_until_convergence): |
|
|
156 |
# need to convert from uint8 to float32 to compute derivatives and need to rescale pixel values to [0, 1] to apply Lowe's thresholds |
|
|
157 |
first_image, second_image, third_image = dog_images_in_octave[image_index-1:image_index+2] |
|
|
158 |
pixel_cube = stack([first_image[i-1:i+2, j-1:j+2], |
|
|
159 |
second_image[i-1:i+2, j-1:j+2], |
|
|
160 |
third_image[i-1:i+2, j-1:j+2]]).astype('float32') / 255. |
|
|
161 |
gradient = computeGradientAtCenterPixel(pixel_cube) |
|
|
162 |
hessian = computeHessianAtCenterPixel(pixel_cube) |
|
|
163 |
extremum_update = -lstsq(hessian, gradient, rcond=None)[0] |
|
|
164 |
if abs(extremum_update[0]) < 0.5 and abs(extremum_update[1]) < 0.5 and abs(extremum_update[2]) < 0.5: |
|
|
165 |
break |
|
|
166 |
j += int(round(extremum_update[0])) |
|
|
167 |
i += int(round(extremum_update[1])) |
|
|
168 |
image_index += int(round(extremum_update[2])) |
|
|
169 |
# make sure the new pixel_cube will lie entirely within the image |
|
|
170 |
if i < image_border_width or i >= image_shape[0] - image_border_width or j < image_border_width or j >= image_shape[1] - image_border_width or image_index < 1 or image_index > num_intervals: |
|
|
171 |
extremum_is_outside_image = True |
|
|
172 |
break |
|
|
173 |
if extremum_is_outside_image: |
|
|
174 |
logger.debug('Updated extremum moved outside of image before reaching convergence. Skipping...') |
|
|
175 |
return None |
|
|
176 |
if attempt_index >= num_attempts_until_convergence - 1: |
|
|
177 |
logger.debug('Exceeded maximum number of attempts without reaching convergence for this extremum. Skipping...') |
|
|
178 |
return None |
|
|
179 |
functionValueAtUpdatedExtremum = pixel_cube[1, 1, 1] + 0.5 * dot(gradient, extremum_update) |
|
|
180 |
if abs(functionValueAtUpdatedExtremum) * num_intervals >= contrast_threshold: |
|
|
181 |
xy_hessian = hessian[:2, :2] |
|
|
182 |
xy_hessian_trace = trace(xy_hessian) |
|
|
183 |
xy_hessian_det = det(xy_hessian) |
|
|
184 |
if xy_hessian_det > 0 and eigenvalue_ratio * (xy_hessian_trace ** 2) < ((eigenvalue_ratio + 1) ** 2) * xy_hessian_det: |
|
|
185 |
# Contrast check passed -- construct and return OpenCV KeyPoint object |
|
|
186 |
keypoint = KeyPoint() |
|
|
187 |
keypoint.pt = ((j + extremum_update[0]) * (2 ** octave_index), (i + extremum_update[1]) * (2 ** octave_index)) |
|
|
188 |
keypoint.octave = octave_index + image_index * (2 ** 8) + int(round((extremum_update[2] + 0.5) * 255)) * (2 ** 16) |
|
|
189 |
keypoint.size = sigma * (2 ** ((image_index + extremum_update[2]) / float32(num_intervals))) * (2 ** (octave_index + 1)) # octave_index + 1 because the input image was doubled |
|
|
190 |
keypoint.response = abs(functionValueAtUpdatedExtremum) |
|
|
191 |
return keypoint, image_index |
|
|
192 |
return None |
|
|
193 |
|
|
|
194 |
def computeGradientAtCenterPixel(pixel_array): |
|
|
195 |
"""Approximate gradient at center pixel [1, 1, 1] of 3x3x3 array using central difference formula of order O(h^2), where h is the step size |
|
|
196 |
""" |
|
|
197 |
# With step size h, the central difference formula of order O(h^2) for f'(x) is (f(x + h) - f(x - h)) / (2 * h) |
|
|
198 |
# Here h = 1, so the formula simplifies to f'(x) = (f(x + 1) - f(x - 1)) / 2 |
|
|
199 |
# NOTE: x corresponds to second array axis, y corresponds to first array axis, and s (scale) corresponds to third array axis |
|
|
200 |
dx = 0.5 * (pixel_array[1, 1, 2] - pixel_array[1, 1, 0]) |
|
|
201 |
dy = 0.5 * (pixel_array[1, 2, 1] - pixel_array[1, 0, 1]) |
|
|
202 |
ds = 0.5 * (pixel_array[2, 1, 1] - pixel_array[0, 1, 1]) |
|
|
203 |
return array([dx, dy, ds]) |
|
|
204 |
|
|
|
205 |
def computeHessianAtCenterPixel(pixel_array): |
|
|
206 |
"""Approximate Hessian at center pixel [1, 1, 1] of 3x3x3 array using central difference formula of order O(h^2), where h is the step size |
|
|
207 |
""" |
|
|
208 |
# With step size h, the central difference formula of order O(h^2) for f''(x) is (f(x + h) - 2 * f(x) + f(x - h)) / (h ^ 2) |
|
|
209 |
# Here h = 1, so the formula simplifies to f''(x) = f(x + 1) - 2 * f(x) + f(x - 1) |
|
|
210 |
# With step size h, the central difference formula of order O(h^2) for (d^2) f(x, y) / (dx dy) = (f(x + h, y + h) - f(x + h, y - h) - f(x - h, y + h) + f(x - h, y - h)) / (4 * h ^ 2) |
|
|
211 |
# Here h = 1, so the formula simplifies to (d^2) f(x, y) / (dx dy) = (f(x + 1, y + 1) - f(x + 1, y - 1) - f(x - 1, y + 1) + f(x - 1, y - 1)) / 4 |
|
|
212 |
# NOTE: x corresponds to second array axis, y corresponds to first array axis, and s (scale) corresponds to third array axis |
|
|
213 |
center_pixel_value = pixel_array[1, 1, 1] |
|
|
214 |
dxx = pixel_array[1, 1, 2] - 2 * center_pixel_value + pixel_array[1, 1, 0] |
|
|
215 |
dyy = pixel_array[1, 2, 1] - 2 * center_pixel_value + pixel_array[1, 0, 1] |
|
|
216 |
dss = pixel_array[2, 1, 1] - 2 * center_pixel_value + pixel_array[0, 1, 1] |
|
|
217 |
dxy = 0.25 * (pixel_array[1, 2, 2] - pixel_array[1, 2, 0] - pixel_array[1, 0, 2] + pixel_array[1, 0, 0]) |
|
|
218 |
dxs = 0.25 * (pixel_array[2, 1, 2] - pixel_array[2, 1, 0] - pixel_array[0, 1, 2] + pixel_array[0, 1, 0]) |
|
|
219 |
dys = 0.25 * (pixel_array[2, 2, 1] - pixel_array[2, 0, 1] - pixel_array[0, 2, 1] + pixel_array[0, 0, 1]) |
|
|
220 |
return array([[dxx, dxy, dxs], |
|
|
221 |
[dxy, dyy, dys], |
|
|
222 |
[dxs, dys, dss]]) |
|
|
223 |
|
|
|
224 |
######################### |
|
|
225 |
# Keypoint orientations # |
|
|
226 |
######################### |
|
|
227 |
|
|
|
228 |
def computeKeypointsWithOrientations(keypoint, octave_index, gaussian_image, radius_factor=3, num_bins=36, peak_ratio=0.8, scale_factor=1.5): |
|
|
229 |
"""Compute orientations for each keypoint |
|
|
230 |
""" |
|
|
231 |
logger.debug('Computing keypoint orientations...') |
|
|
232 |
keypoints_with_orientations = [] |
|
|
233 |
image_shape = gaussian_image.shape |
|
|
234 |
|
|
|
235 |
scale = scale_factor * keypoint.size / float32(2 ** (octave_index + 1)) # compare with keypoint.size computation in localizeExtremumViaQuadraticFit() |
|
|
236 |
radius = int(round(radius_factor * scale)) |
|
|
237 |
weight_factor = -0.5 / (scale ** 2) |
|
|
238 |
raw_histogram = zeros(num_bins) |
|
|
239 |
smooth_histogram = zeros(num_bins) |
|
|
240 |
|
|
|
241 |
for i in range(-radius, radius + 1): |
|
|
242 |
region_y = int(round(keypoint.pt[1] / float32(2 ** octave_index))) + i |
|
|
243 |
if region_y > 0 and region_y < image_shape[0] - 1: |
|
|
244 |
for j in range(-radius, radius + 1): |
|
|
245 |
region_x = int(round(keypoint.pt[0] / float32(2 ** octave_index))) + j |
|
|
246 |
if region_x > 0 and region_x < image_shape[1] - 1: |
|
|
247 |
dx = gaussian_image[region_y, region_x + 1] - gaussian_image[region_y, region_x - 1] |
|
|
248 |
dy = gaussian_image[region_y - 1, region_x] - gaussian_image[region_y + 1, region_x] |
|
|
249 |
gradient_magnitude = sqrt(dx * dx + dy * dy) |
|
|
250 |
gradient_orientation = rad2deg(arctan2(dy, dx)) |
|
|
251 |
weight = exp(weight_factor * (i ** 2 + j ** 2)) # constant in front of exponential can be dropped because we will find peaks later |
|
|
252 |
histogram_index = int(round(gradient_orientation * num_bins / 360.)) |
|
|
253 |
raw_histogram[histogram_index % num_bins] += weight * gradient_magnitude |
|
|
254 |
|
|
|
255 |
for n in range(num_bins): |
|
|
256 |
smooth_histogram[n] = (6 * raw_histogram[n] + 4 * (raw_histogram[n - 1] + raw_histogram[(n + 1) % num_bins]) + raw_histogram[n - 2] + raw_histogram[(n + 2) % num_bins]) / 16. |
|
|
257 |
orientation_max = max(smooth_histogram) |
|
|
258 |
orientation_peaks = where(logical_and(smooth_histogram > roll(smooth_histogram, 1), smooth_histogram > roll(smooth_histogram, -1)))[0] |
|
|
259 |
for peak_index in orientation_peaks: |
|
|
260 |
peak_value = smooth_histogram[peak_index] |
|
|
261 |
if peak_value >= peak_ratio * orientation_max: |
|
|
262 |
# Quadratic peak interpolation |
|
|
263 |
# The interpolation update is given by equation (6.30) in https://ccrma.stanford.edu/~jos/sasp/Quadratic_Interpolation_Spectral_Peaks.html |
|
|
264 |
left_value = smooth_histogram[(peak_index - 1) % num_bins] |
|
|
265 |
right_value = smooth_histogram[(peak_index + 1) % num_bins] |
|
|
266 |
interpolated_peak_index = (peak_index + 0.5 * (left_value - right_value) / (left_value - 2 * peak_value + right_value)) % num_bins |
|
|
267 |
orientation = 360. - interpolated_peak_index * 360. / num_bins |
|
|
268 |
if abs(orientation - 360.) < float_tolerance: |
|
|
269 |
orientation = 0 |
|
|
270 |
new_keypoint = KeyPoint(*keypoint.pt, keypoint.size, orientation, keypoint.response, keypoint.octave) |
|
|
271 |
keypoints_with_orientations.append(new_keypoint) |
|
|
272 |
return keypoints_with_orientations |
|
|
273 |
|
|
|
274 |
############################## |
|
|
275 |
# Duplicate keypoint removal # |
|
|
276 |
############################## |
|
|
277 |
|
|
|
278 |
def compareKeypoints(keypoint1, keypoint2): |
|
|
279 |
"""Return True if keypoint1 is less than keypoint2 |
|
|
280 |
""" |
|
|
281 |
if keypoint1.pt[0] != keypoint2.pt[0]: |
|
|
282 |
return keypoint1.pt[0] - keypoint2.pt[0] |
|
|
283 |
if keypoint1.pt[1] != keypoint2.pt[1]: |
|
|
284 |
return keypoint1.pt[1] - keypoint2.pt[1] |
|
|
285 |
if keypoint1.size != keypoint2.size: |
|
|
286 |
return keypoint2.size - keypoint1.size |
|
|
287 |
if keypoint1.angle != keypoint2.angle: |
|
|
288 |
return keypoint1.angle - keypoint2.angle |
|
|
289 |
if keypoint1.response != keypoint2.response: |
|
|
290 |
return keypoint2.response - keypoint1.response |
|
|
291 |
if keypoint1.octave != keypoint2.octave: |
|
|
292 |
return keypoint2.octave - keypoint1.octave |
|
|
293 |
return keypoint2.class_id - keypoint1.class_id |
|
|
294 |
|
|
|
295 |
def removeDuplicateKeypoints(keypoints): |
|
|
296 |
"""Sort keypoints and remove duplicate keypoints |
|
|
297 |
""" |
|
|
298 |
if len(keypoints) < 2: |
|
|
299 |
return keypoints |
|
|
300 |
|
|
|
301 |
keypoints.sort(key=cmp_to_key(compareKeypoints)) |
|
|
302 |
unique_keypoints = [keypoints[0]] |
|
|
303 |
|
|
|
304 |
for next_keypoint in keypoints[1:]: |
|
|
305 |
last_unique_keypoint = unique_keypoints[-1] |
|
|
306 |
if last_unique_keypoint.pt[0] != next_keypoint.pt[0] or \ |
|
|
307 |
last_unique_keypoint.pt[1] != next_keypoint.pt[1] or \ |
|
|
308 |
last_unique_keypoint.size != next_keypoint.size or \ |
|
|
309 |
last_unique_keypoint.angle != next_keypoint.angle: |
|
|
310 |
unique_keypoints.append(next_keypoint) |
|
|
311 |
return unique_keypoints |
|
|
312 |
|
|
|
313 |
############################# |
|
|
314 |
# Keypoint scale conversion # |
|
|
315 |
############################# |
|
|
316 |
|
|
|
317 |
def convertKeypointsToInputImageSize(keypoints): |
|
|
318 |
"""Convert keypoint point, size, and octave to input image size |
|
|
319 |
""" |
|
|
320 |
converted_keypoints = [] |
|
|
321 |
for keypoint in keypoints: |
|
|
322 |
keypoint.pt = tuple(0.5 * array(keypoint.pt)) |
|
|
323 |
keypoint.size *= 0.5 |
|
|
324 |
keypoint.octave = (keypoint.octave & ~255) | ((keypoint.octave - 1) & 255) |
|
|
325 |
converted_keypoints.append(keypoint) |
|
|
326 |
return converted_keypoints |
|
|
327 |
|
|
|
328 |
######################### |
|
|
329 |
# Descriptor generation # |
|
|
330 |
######################### |
|
|
331 |
|
|
|
332 |
def unpackOctave(keypoint): |
|
|
333 |
"""Compute octave, layer, and scale from a keypoint |
|
|
334 |
""" |
|
|
335 |
octave = keypoint.octave & 255 |
|
|
336 |
layer = (keypoint.octave >> 8) & 255 |
|
|
337 |
if octave >= 128: |
|
|
338 |
octave = octave | -128 |
|
|
339 |
scale = 1 / float32(1 << octave) if octave >= 0 else float32(1 << -octave) |
|
|
340 |
return octave, layer, scale |
|
|
341 |
|
|
|
342 |
def generateDescriptors(keypoints, gaussian_images, window_width=4, num_bins=8, scale_multiplier=3, descriptor_max_value=0.2): |
|
|
343 |
"""Generate descriptors for each keypoint |
|
|
344 |
""" |
|
|
345 |
logger.debug('Generating descriptors...') |
|
|
346 |
descriptors = [] |
|
|
347 |
|
|
|
348 |
for keypoint in keypoints: |
|
|
349 |
octave, layer, scale = unpackOctave(keypoint) |
|
|
350 |
gaussian_image = gaussian_images[octave + 1, layer] |
|
|
351 |
num_rows, num_cols = gaussian_image.shape |
|
|
352 |
point = round(scale * array(keypoint.pt)).astype('int') |
|
|
353 |
bins_per_degree = num_bins / 360. |
|
|
354 |
angle = 360. - keypoint.angle |
|
|
355 |
cos_angle = cos(deg2rad(angle)) |
|
|
356 |
sin_angle = sin(deg2rad(angle)) |
|
|
357 |
weight_multiplier = -0.5 / ((0.5 * window_width) ** 2) |
|
|
358 |
row_bin_list = [] |
|
|
359 |
col_bin_list = [] |
|
|
360 |
magnitude_list = [] |
|
|
361 |
orientation_bin_list = [] |
|
|
362 |
histogram_tensor = zeros((window_width + 2, window_width + 2, num_bins)) # first two dimensions are increased by 2 to account for border effects |
|
|
363 |
|
|
|
364 |
# Descriptor window size (described by half_width) follows OpenCV convention |
|
|
365 |
hist_width = scale_multiplier * 0.5 * scale * keypoint.size |
|
|
366 |
half_width = int(round(hist_width * sqrt(2) * (window_width + 1) * 0.5)) # sqrt(2) corresponds to diagonal length of a pixel |
|
|
367 |
half_width = int(min(half_width, sqrt(num_rows ** 2 + num_cols ** 2))) # ensure half_width lies within image |
|
|
368 |
|
|
|
369 |
for row in range(-half_width, half_width + 1): |
|
|
370 |
for col in range(-half_width, half_width + 1): |
|
|
371 |
row_rot = col * sin_angle + row * cos_angle |
|
|
372 |
col_rot = col * cos_angle - row * sin_angle |
|
|
373 |
row_bin = (row_rot / hist_width) + 0.5 * window_width - 0.5 |
|
|
374 |
col_bin = (col_rot / hist_width) + 0.5 * window_width - 0.5 |
|
|
375 |
if row_bin > -1 and row_bin < window_width and col_bin > -1 and col_bin < window_width: |
|
|
376 |
window_row = int(round(point[1] + row)) |
|
|
377 |
window_col = int(round(point[0] + col)) |
|
|
378 |
if window_row > 0 and window_row < num_rows - 1 and window_col > 0 and window_col < num_cols - 1: |
|
|
379 |
dx = gaussian_image[window_row, window_col + 1] - gaussian_image[window_row, window_col - 1] |
|
|
380 |
dy = gaussian_image[window_row - 1, window_col] - gaussian_image[window_row + 1, window_col] |
|
|
381 |
gradient_magnitude = sqrt(dx * dx + dy * dy) |
|
|
382 |
gradient_orientation = rad2deg(arctan2(dy, dx)) % 360 |
|
|
383 |
weight = exp(weight_multiplier * ((row_rot / hist_width) ** 2 + (col_rot / hist_width) ** 2)) |
|
|
384 |
row_bin_list.append(row_bin) |
|
|
385 |
col_bin_list.append(col_bin) |
|
|
386 |
magnitude_list.append(weight * gradient_magnitude) |
|
|
387 |
orientation_bin_list.append((gradient_orientation - angle) * bins_per_degree) |
|
|
388 |
|
|
|
389 |
for row_bin, col_bin, magnitude, orientation_bin in zip(row_bin_list, col_bin_list, magnitude_list, orientation_bin_list): |
|
|
390 |
# Smoothing via trilinear interpolation |
|
|
391 |
# Notations follows https://en.wikipedia.org/wiki/Trilinear_interpolation |
|
|
392 |
# Note that we are really doing the inverse of trilinear interpolation here (we take the center value of the cube and distribute it among its eight neighbors) |
|
|
393 |
row_bin_floor, col_bin_floor, orientation_bin_floor = floor([row_bin, col_bin, orientation_bin]).astype(int) |
|
|
394 |
row_fraction, col_fraction, orientation_fraction = row_bin - row_bin_floor, col_bin - col_bin_floor, orientation_bin - orientation_bin_floor |
|
|
395 |
if orientation_bin_floor < 0: |
|
|
396 |
orientation_bin_floor += num_bins |
|
|
397 |
if orientation_bin_floor >= num_bins: |
|
|
398 |
orientation_bin_floor -= num_bins |
|
|
399 |
|
|
|
400 |
c1 = magnitude * row_fraction |
|
|
401 |
c0 = magnitude * (1 - row_fraction) |
|
|
402 |
c11 = c1 * col_fraction |
|
|
403 |
c10 = c1 * (1 - col_fraction) |
|
|
404 |
c01 = c0 * col_fraction |
|
|
405 |
c00 = c0 * (1 - col_fraction) |
|
|
406 |
c111 = c11 * orientation_fraction |
|
|
407 |
c110 = c11 * (1 - orientation_fraction) |
|
|
408 |
c101 = c10 * orientation_fraction |
|
|
409 |
c100 = c10 * (1 - orientation_fraction) |
|
|
410 |
c011 = c01 * orientation_fraction |
|
|
411 |
c010 = c01 * (1 - orientation_fraction) |
|
|
412 |
c001 = c00 * orientation_fraction |
|
|
413 |
c000 = c00 * (1 - orientation_fraction) |
|
|
414 |
|
|
|
415 |
histogram_tensor[row_bin_floor + 1, col_bin_floor + 1, orientation_bin_floor] += c000 |
|
|
416 |
histogram_tensor[row_bin_floor + 1, col_bin_floor + 1, (orientation_bin_floor + 1) % num_bins] += c001 |
|
|
417 |
histogram_tensor[row_bin_floor + 1, col_bin_floor + 2, orientation_bin_floor] += c010 |
|
|
418 |
histogram_tensor[row_bin_floor + 1, col_bin_floor + 2, (orientation_bin_floor + 1) % num_bins] += c011 |
|
|
419 |
histogram_tensor[row_bin_floor + 2, col_bin_floor + 1, orientation_bin_floor] += c100 |
|
|
420 |
histogram_tensor[row_bin_floor + 2, col_bin_floor + 1, (orientation_bin_floor + 1) % num_bins] += c101 |
|
|
421 |
histogram_tensor[row_bin_floor + 2, col_bin_floor + 2, orientation_bin_floor] += c110 |
|
|
422 |
histogram_tensor[row_bin_floor + 2, col_bin_floor + 2, (orientation_bin_floor + 1) % num_bins] += c111 |
|
|
423 |
|
|
|
424 |
descriptor_vector = histogram_tensor[1:-1, 1:-1, :].flatten() # Remove histogram borders |
|
|
425 |
# Threshold and normalize descriptor_vector |
|
|
426 |
threshold = norm(descriptor_vector) * descriptor_max_value |
|
|
427 |
descriptor_vector[descriptor_vector > threshold] = threshold |
|
|
428 |
descriptor_vector /= max(norm(descriptor_vector), float_tolerance) |
|
|
429 |
# Multiply by 512, round, and saturate between 0 and 255 to convert from float32 to unsigned char (OpenCV convention) |
|
|
430 |
descriptor_vector = round(512 * descriptor_vector) |
|
|
431 |
descriptor_vector[descriptor_vector < 0] = 0 |
|
|
432 |
descriptor_vector[descriptor_vector > 255] = 255 |
|
|
433 |
descriptors.append(descriptor_vector) |
|
|
434 |
return array(descriptors, dtype='float32') |