Diff of /DatasetAnalysis.py [000000] .. [3b7fea]

Switch to unified view

a b/DatasetAnalysis.py
1
# %% Importing packages
2
3
import numpy as np
4
import cv2 as cv
5
import matplotlib.pyplot as plt
6
import os
7
from joblib import Parallel, delayed
8
9
# %% Defining Functions
10
#############################################################
11
#############################################################
12
13
def load_image_names(folder):
14
    # This function reads the images within a folder while filtering 
15
    # out the weird invisible files that macos includes in their folders
16
    
17
    file_list = []
18
    for file_name in os.listdir(folder):
19
20
        # check if the first character of the name is a '.', skip if so
21
        if file_name[0] != '.': 
22
            file_list.append(file_name)
23
24
    return(file_list)
25
26
#############################################################
27
28
def basic_image_seg_visualization(image_name):
29
    # this function shows both the RGB image and its corresponding segmentation
30
    # next to each other
31
    image = cv.imread(image_name,cv.IMREAD_UNCHANGED)
32
    # change the color order, because openCV2 reads color in as BGR, not RGB
33
    color_image = cv.cvtColor(image[:,:,0:3],cv.COLOR_BGR2RGB)
34
35
    # create our subplots
36
    fig, (ax1, ax2) = plt.subplots(1,2)
37
38
    # show both the images
39
    ax1.imshow(color_image)
40
    ax2.imshow(image[:,:,3], vmin=0,vmax=5)
41
    plt.show()
42
    return()
43
44
#############################################################
45
46
def includes_segmentation(image_name, class_id):
47
    # this function is meant to be run in parallel, but can be run individually.
48
    # it receives an image name and a class_id, and determins whether and how 
49
    # much of a certain class is contained in the image.
50
51
    image = cv.imread(image_name,cv.IMREAD_UNCHANGED)
52
    segmentation = image[:,:,3]
53
54
    # how many pixels are a part of the class?
55
    seg_sum = np.sum((segmentation==class_id))
56
    # total number of pixels?
57
    total_sum = segmentation.shape[0]*segmentation.shape[1]
58
59
    # if the class is in the image, return true and the percentage
60
    if seg_sum>0:
61
        return(image_name,True,seg_sum/total_sum)
62
    else:
63
        return(image_name,False,0)
64
65
#############################################################
66
#############################################################
67
68
dataset_directory = '/media/briancottle/3a7b7bdc-6753-4423-b5ac-ff074ad75013/sub_sampled_20220526'
69
70
os.chdir(dataset_directory)
71
72
file_names = load_image_names('.')
73
74
# %%
75
random_index = int(np.random.random()*len(file_names))
76
basic_image_seg_visualization(file_names[random_index])
77
78
# %%
79
class_id = 5
80
81
# check for vasculature
82
contains_names_vascular = Parallel(n_jobs=20, verbose=1)(delayed(includes_segmentation) \
83
                                    (name,5) for name in file_names)
84
# check for neural tissue 
85
contains_names_neural = Parallel(n_jobs=20, verbose=1)(delayed(includes_segmentation) \
86
                                    (name,4) for name in file_names)
87
88
89
# %%
90
91
# get only the names and percentages for images that contain vasculature
92
vascular_images = []
93
vascular_percentages = []
94
for evaluation in contains_names_vascular:
95
    name = evaluation[0]
96
    present = evaluation[1]
97
    percentage = evaluation[2]
98
99
    if present:
100
        vascular_images.append(name)
101
        vascular_percentages.append(percentage)
102
103
# get only the names and percentages for images that contain vasculature
104
neural_images = []
105
neural_percentages = []
106
for evaluation in contains_names_neural:
107
    name = evaluation[0]
108
    present = evaluation[1]
109
    percentage = evaluation[2]
110
111
    if present:
112
        neural_images.append(name)
113
        neural_percentages.append(percentage)
114
115
# %% 
116
117
118
# reporting on valuse found for vasculature
119
print(f'the number of images containing vasculature is: {len(vascular_images)}')
120
print(f'the largest percentage was found in file {vascular_images[np.argmax(vascular_percentages)]}, and was {np.max(vascular_percentages)}')
121
basic_image_seg_visualization(vascular_images[np.argmax(vascular_percentages)])
122
123
plt.hist(vascular_percentages)
124
plt.show()
125
# %%
126
random_index = int(np.random.random()*len(vascular_images))
127
basic_image_seg_visualization(vascular_images[random_index])
128
# %%