|
a |
|
b/DatasetAnalysis.py |
|
|
1 |
# %% Importing packages |
|
|
2 |
|
|
|
3 |
import numpy as np |
|
|
4 |
import cv2 as cv |
|
|
5 |
import matplotlib.pyplot as plt |
|
|
6 |
import os |
|
|
7 |
from joblib import Parallel, delayed |
|
|
8 |
|
|
|
9 |
# %% Defining Functions |
|
|
10 |
############################################################# |
|
|
11 |
############################################################# |
|
|
12 |
|
|
|
13 |
def load_image_names(folder): |
|
|
14 |
# This function reads the images within a folder while filtering |
|
|
15 |
# out the weird invisible files that macos includes in their folders |
|
|
16 |
|
|
|
17 |
file_list = [] |
|
|
18 |
for file_name in os.listdir(folder): |
|
|
19 |
|
|
|
20 |
# check if the first character of the name is a '.', skip if so |
|
|
21 |
if file_name[0] != '.': |
|
|
22 |
file_list.append(file_name) |
|
|
23 |
|
|
|
24 |
return(file_list) |
|
|
25 |
|
|
|
26 |
############################################################# |
|
|
27 |
|
|
|
28 |
def basic_image_seg_visualization(image_name): |
|
|
29 |
# this function shows both the RGB image and its corresponding segmentation |
|
|
30 |
# next to each other |
|
|
31 |
image = cv.imread(image_name,cv.IMREAD_UNCHANGED) |
|
|
32 |
# change the color order, because openCV2 reads color in as BGR, not RGB |
|
|
33 |
color_image = cv.cvtColor(image[:,:,0:3],cv.COLOR_BGR2RGB) |
|
|
34 |
|
|
|
35 |
# create our subplots |
|
|
36 |
fig, (ax1, ax2) = plt.subplots(1,2) |
|
|
37 |
|
|
|
38 |
# show both the images |
|
|
39 |
ax1.imshow(color_image) |
|
|
40 |
ax2.imshow(image[:,:,3], vmin=0,vmax=5) |
|
|
41 |
plt.show() |
|
|
42 |
return() |
|
|
43 |
|
|
|
44 |
############################################################# |
|
|
45 |
|
|
|
46 |
def includes_segmentation(image_name, class_id): |
|
|
47 |
# this function is meant to be run in parallel, but can be run individually. |
|
|
48 |
# it receives an image name and a class_id, and determins whether and how |
|
|
49 |
# much of a certain class is contained in the image. |
|
|
50 |
|
|
|
51 |
image = cv.imread(image_name,cv.IMREAD_UNCHANGED) |
|
|
52 |
segmentation = image[:,:,3] |
|
|
53 |
|
|
|
54 |
# how many pixels are a part of the class? |
|
|
55 |
seg_sum = np.sum((segmentation==class_id)) |
|
|
56 |
# total number of pixels? |
|
|
57 |
total_sum = segmentation.shape[0]*segmentation.shape[1] |
|
|
58 |
|
|
|
59 |
# if the class is in the image, return true and the percentage |
|
|
60 |
if seg_sum>0: |
|
|
61 |
return(image_name,True,seg_sum/total_sum) |
|
|
62 |
else: |
|
|
63 |
return(image_name,False,0) |
|
|
64 |
|
|
|
65 |
############################################################# |
|
|
66 |
############################################################# |
|
|
67 |
|
|
|
68 |
dataset_directory = '/media/briancottle/3a7b7bdc-6753-4423-b5ac-ff074ad75013/sub_sampled_20220526' |
|
|
69 |
|
|
|
70 |
os.chdir(dataset_directory) |
|
|
71 |
|
|
|
72 |
file_names = load_image_names('.') |
|
|
73 |
|
|
|
74 |
# %% |
|
|
75 |
random_index = int(np.random.random()*len(file_names)) |
|
|
76 |
basic_image_seg_visualization(file_names[random_index]) |
|
|
77 |
|
|
|
78 |
# %% |
|
|
79 |
class_id = 5 |
|
|
80 |
|
|
|
81 |
# check for vasculature |
|
|
82 |
contains_names_vascular = Parallel(n_jobs=20, verbose=1)(delayed(includes_segmentation) \ |
|
|
83 |
(name,5) for name in file_names) |
|
|
84 |
# check for neural tissue |
|
|
85 |
contains_names_neural = Parallel(n_jobs=20, verbose=1)(delayed(includes_segmentation) \ |
|
|
86 |
(name,4) for name in file_names) |
|
|
87 |
|
|
|
88 |
|
|
|
89 |
# %% |
|
|
90 |
|
|
|
91 |
# get only the names and percentages for images that contain vasculature |
|
|
92 |
vascular_images = [] |
|
|
93 |
vascular_percentages = [] |
|
|
94 |
for evaluation in contains_names_vascular: |
|
|
95 |
name = evaluation[0] |
|
|
96 |
present = evaluation[1] |
|
|
97 |
percentage = evaluation[2] |
|
|
98 |
|
|
|
99 |
if present: |
|
|
100 |
vascular_images.append(name) |
|
|
101 |
vascular_percentages.append(percentage) |
|
|
102 |
|
|
|
103 |
# get only the names and percentages for images that contain vasculature |
|
|
104 |
neural_images = [] |
|
|
105 |
neural_percentages = [] |
|
|
106 |
for evaluation in contains_names_neural: |
|
|
107 |
name = evaluation[0] |
|
|
108 |
present = evaluation[1] |
|
|
109 |
percentage = evaluation[2] |
|
|
110 |
|
|
|
111 |
if present: |
|
|
112 |
neural_images.append(name) |
|
|
113 |
neural_percentages.append(percentage) |
|
|
114 |
|
|
|
115 |
# %% |
|
|
116 |
|
|
|
117 |
|
|
|
118 |
# reporting on valuse found for vasculature |
|
|
119 |
print(f'the number of images containing vasculature is: {len(vascular_images)}') |
|
|
120 |
print(f'the largest percentage was found in file {vascular_images[np.argmax(vascular_percentages)]}, and was {np.max(vascular_percentages)}') |
|
|
121 |
basic_image_seg_visualization(vascular_images[np.argmax(vascular_percentages)]) |
|
|
122 |
|
|
|
123 |
plt.hist(vascular_percentages) |
|
|
124 |
plt.show() |
|
|
125 |
# %% |
|
|
126 |
random_index = int(np.random.random()*len(vascular_images)) |
|
|
127 |
basic_image_seg_visualization(vascular_images[random_index]) |
|
|
128 |
# %% |