Diff of /src/data_flow.py [000000] .. [fb2ce2]

Switch to side-by-side view

--- a
+++ b/src/data_flow.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+# Custom module for dealing with global project paths and functions related to injesting and accessing raw data
+
+import sys
+import os
+import ast
+import parse_config
+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+import pydicom
+
+
+TRAIN_DATA_PATH = parse_config.TRAIN_DATA_PATH
+TEST_DATA_PATH = parse_config.TEST_DATA_PATH
+CSV_PATHS = parse_config.CSV_PATHS
+
+
+def translate_dicom(filename, apply_window=True):
+    """
+    Transform a medical DICOM file to a standardized pixel based array
+    Arguments:
+        filename {string}
+        path {string} -- file path to data, set in config.ini
+        apply_window {bool} -- if True (default) then windowed png of dicom data is returned
+    """
+    data = pydicom.dcmread(filename)
+    
+    if apply_window:
+        window_center, window_width, intercept, slope = get_windowing(data)
+        img = window_image(data.pixel_array, window_center, window_width, intercept, slope)
+        return np.array(img, dtype=float)
+
+    img = np.array(data.pixel_array, dtype=float)
+    standardized_array = np.divide(np.subtract(img,img.mean()),img.std())
+    return standardized_array
+
+
+def window_image(img, window_center, window_width, intercept, slope):
+    """
+    Given a CT scan img apply a windowing to the image
+    Arguments:
+        img {np.array} -- array of a dicom img processed by pydicom.dcmread()
+        window_center,window_width,intercept,slope {floats} -- values provided by dicom file metadata
+    Attribution: This code comes from Richard McKinley's Kaggle kernel
+    """
+    img = (img * slope + intercept)
+    img_min = window_center - window_width // 2
+    img_max = window_center + window_width // 2
+    img[img < img_min] = img_min
+    img[img > img_max] = img_max
+    return img 
+
+
+def get_first_of_dicom_field_as_int(x):
+    """
+    Converts pydicom obj into an int
+    Arguments:
+        x {pydicom obj} -- either a single or multivalue obj
+    Attribution: This code comes from Richard McKinley's Kaggle kernel
+    """
+    #get x[0] as in int is x is a 'pydicom.multival.MultiValue', otherwise get int(x)
+    if type(x) == pydicom.multival.MultiValue:
+        return int(x[0])
+    else:
+        return int(x)
+
+
+def get_windowing(data):
+    """
+    Retrieves windowing data from dicom metadata
+    Arguments:
+        data {pydicom data obj} -- object returned from pydicom dcmread() 
+    Attribution: This code comes from Richard McKinley's Kaggle kernel
+    """
+    dicom_fields = [data.WindowCenter,
+                    data.WindowWidth,
+                    data.RescaleIntercept,
+                    data.RescaleSlope]
+    return [get_first_of_dicom_field_as_int(x) for x in dicom_fields]