[3b7fea]: / DatasetAnalysis.py

Download this file

129 lines (96 with data), 4.1 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# %% Importing packages
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
import os
from joblib import Parallel, delayed
# %% Defining Functions
#############################################################
#############################################################
def load_image_names(folder):
# This function reads the images within a folder while filtering
# out the weird invisible files that macos includes in their folders
file_list = []
for file_name in os.listdir(folder):
# check if the first character of the name is a '.', skip if so
if file_name[0] != '.':
file_list.append(file_name)
return(file_list)
#############################################################
def basic_image_seg_visualization(image_name):
# this function shows both the RGB image and its corresponding segmentation
# next to each other
image = cv.imread(image_name,cv.IMREAD_UNCHANGED)
# change the color order, because openCV2 reads color in as BGR, not RGB
color_image = cv.cvtColor(image[:,:,0:3],cv.COLOR_BGR2RGB)
# create our subplots
fig, (ax1, ax2) = plt.subplots(1,2)
# show both the images
ax1.imshow(color_image)
ax2.imshow(image[:,:,3], vmin=0,vmax=5)
plt.show()
return()
#############################################################
def includes_segmentation(image_name, class_id):
# this function is meant to be run in parallel, but can be run individually.
# it receives an image name and a class_id, and determins whether and how
# much of a certain class is contained in the image.
image = cv.imread(image_name,cv.IMREAD_UNCHANGED)
segmentation = image[:,:,3]
# how many pixels are a part of the class?
seg_sum = np.sum((segmentation==class_id))
# total number of pixels?
total_sum = segmentation.shape[0]*segmentation.shape[1]
# if the class is in the image, return true and the percentage
if seg_sum>0:
return(image_name,True,seg_sum/total_sum)
else:
return(image_name,False,0)
#############################################################
#############################################################
dataset_directory = '/media/briancottle/3a7b7bdc-6753-4423-b5ac-ff074ad75013/sub_sampled_20220526'
os.chdir(dataset_directory)
file_names = load_image_names('.')
# %%
random_index = int(np.random.random()*len(file_names))
basic_image_seg_visualization(file_names[random_index])
# %%
class_id = 5
# check for vasculature
contains_names_vascular = Parallel(n_jobs=20, verbose=1)(delayed(includes_segmentation) \
(name,5) for name in file_names)
# check for neural tissue
contains_names_neural = Parallel(n_jobs=20, verbose=1)(delayed(includes_segmentation) \
(name,4) for name in file_names)
# %%
# get only the names and percentages for images that contain vasculature
vascular_images = []
vascular_percentages = []
for evaluation in contains_names_vascular:
name = evaluation[0]
present = evaluation[1]
percentage = evaluation[2]
if present:
vascular_images.append(name)
vascular_percentages.append(percentage)
# get only the names and percentages for images that contain vasculature
neural_images = []
neural_percentages = []
for evaluation in contains_names_neural:
name = evaluation[0]
present = evaluation[1]
percentage = evaluation[2]
if present:
neural_images.append(name)
neural_percentages.append(percentage)
# %%
# reporting on valuse found for vasculature
print(f'the number of images containing vasculature is: {len(vascular_images)}')
print(f'the largest percentage was found in file {vascular_images[np.argmax(vascular_percentages)]}, and was {np.max(vascular_percentages)}')
basic_image_seg_visualization(vascular_images[np.argmax(vascular_percentages)])
plt.hist(vascular_percentages)
plt.show()
# %%
random_index = int(np.random.random()*len(vascular_images))
basic_image_seg_visualization(vascular_images[random_index])
# %%