Histology-ML / Git / Diff of /DatasetAnalysis.py

Models:
DanielG/
Histology-ML
Downloads: 1
Diff of /DatasetAnalysis.py [000000] .. [3b7fea]
Switch to side-by-side view

--- a
+++ b/DatasetAnalysis.py
@@ -0,0 +1,128 @@
+# %% Importing packages
+
+import numpy as np
+import cv2 as cv
+import matplotlib.pyplot as plt
+import os
+from joblib import Parallel, delayed
+
+# %% Defining Functions
+#############################################################
+#############################################################
+
+def load_image_names(folder):
+    # This function reads the images within a folder while filtering 
+    # out the weird invisible files that macos includes in their folders
+    
+    file_list = []
+    for file_name in os.listdir(folder):
+
+        # check if the first character of the name is a '.', skip if so
+        if file_name[0] != '.': 
+            file_list.append(file_name)
+
+    return(file_list)
+
+#############################################################
+
+def basic_image_seg_visualization(image_name):
+    # this function shows both the RGB image and its corresponding segmentation
+    # next to each other
+    image = cv.imread(image_name,cv.IMREAD_UNCHANGED)
+    # change the color order, because openCV2 reads color in as BGR, not RGB
+    color_image = cv.cvtColor(image[:,:,0:3],cv.COLOR_BGR2RGB)
+
+    # create our subplots
+    fig, (ax1, ax2) = plt.subplots(1,2)
+
+    # show both the images
+    ax1.imshow(color_image)
+    ax2.imshow(image[:,:,3], vmin=0,vmax=5)
+    plt.show()
+    return()
+
+#############################################################
+
+def includes_segmentation(image_name, class_id):
+    # this function is meant to be run in parallel, but can be run individually.
+    # it receives an image name and a class_id, and determins whether and how 
+    # much of a certain class is contained in the image.
+
+    image = cv.imread(image_name,cv.IMREAD_UNCHANGED)
+    segmentation = image[:,:,3]
+
+    # how many pixels are a part of the class?
+    seg_sum = np.sum((segmentation==class_id))
+    # total number of pixels?
+    total_sum = segmentation.shape[0]*segmentation.shape[1]
+
+    # if the class is in the image, return true and the percentage
+    if seg_sum>0:
+        return(image_name,True,seg_sum/total_sum)
+    else:
+        return(image_name,False,0)
+
+#############################################################
+#############################################################
+
+dataset_directory = '/media/briancottle/3a7b7bdc-6753-4423-b5ac-ff074ad75013/sub_sampled_20220526'
+
+os.chdir(dataset_directory)
+
+file_names = load_image_names('.')
+
+# %%
+random_index = int(np.random.random()*len(file_names))
+basic_image_seg_visualization(file_names[random_index])
+
+# %%
+class_id = 5
+
+# check for vasculature
+contains_names_vascular = Parallel(n_jobs=20, verbose=1)(delayed(includes_segmentation) \
+                                    (name,5) for name in file_names)
+# check for neural tissue 
+contains_names_neural = Parallel(n_jobs=20, verbose=1)(delayed(includes_segmentation) \
+                                    (name,4) for name in file_names)
+
+
+# %%
+
+# get only the names and percentages for images that contain vasculature
+vascular_images = []
+vascular_percentages = []
+for evaluation in contains_names_vascular:
+    name = evaluation[0]
+    present = evaluation[1]
+    percentage = evaluation[2]
+
+    if present:
+        vascular_images.append(name)
+        vascular_percentages.append(percentage)
+
+# get only the names and percentages for images that contain vasculature
+neural_images = []
+neural_percentages = []
+for evaluation in contains_names_neural:
+    name = evaluation[0]
+    present = evaluation[1]
+    percentage = evaluation[2]
+
+    if present:
+        neural_images.append(name)
+        neural_percentages.append(percentage)
+
+# %% 
+
+
+# reporting on valuse found for vasculature
+print(f'the number of images containing vasculature is: {len(vascular_images)}')
+print(f'the largest percentage was found in file {vascular_images[np.argmax(vascular_percentages)]}, and was {np.max(vascular_percentages)}')
+basic_image_seg_visualization(vascular_images[np.argmax(vascular_percentages)])
+
+plt.hist(vascular_percentages)
+plt.show()
+# %%
+random_index = int(np.random.random()*len(vascular_images))
+basic_image_seg_visualization(vascular_images[random_index])
+# %%