In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from fastai.core import *

%matplotlib notebook

In [2]:
! ls -R 

LICENSE         MRNet_EDA.ipynb README.md


In [3]:
data_path = Path('../data')
train_path = data_path/'smalltrain'/'train'
valid_path = data_path/'smallvalid'/'valid'

In [5]:
train_abnl = pd.read_csv(data_path/'train-abnormal.csv', header=None,
                       names=['Case', 'Abnormal'], 
                       dtype={'Case': str, 'Abnormal': np.int64})
print(train_abnl.groupby('Abnormal').count())
train_abnl.head()

          Case
Abnormal      
0          217
1          913


Unnamed: 0,Case,Abnormal
0,0,1
1,1,1
2,2,1
3,3,1
4,4,1


In [6]:
train_acl = pd.read_csv(data_path/'train-acl.csv', header=None,
                       names=['Case', 'ACL_tear'], 
                       dtype={'Case': str, 'ACL_tear': np.int64})
print(train_acl.groupby('ACL_tear').count())
train_acl.head()

          Case
ACL_tear      
0          922
1          208


Unnamed: 0,Case,ACL_tear
0,0,0
1,1,1
2,2,0
3,3,0
4,4,0


In [7]:
train_meniscus = pd.read_csv(data_path/'train-meniscus.csv', header=None,
                       names=['Case', 'Meniscus_tear'], 
                       dtype={'Case': str, 'Meniscus_tear': np.int64})
print(train_meniscus.groupby('Meniscus_tear').count())
train_meniscus.head()

               Case
Meniscus_tear      
0               733
1               397


Unnamed: 0,Case,Meniscus_tear
0,0,0
1,1,1
2,2,0
3,3,1
4,4,0


### Co-occurrence of ACL and Meniscus tears

In [None]:
train = pd.merge(train_abnl, train_acl, on='Case')

In [10]:
train = pd.merge(train, train_meniscus, on='Case')

In [16]:
display(train.head())
display(train.groupby(['Abnormal','ACL_tear','Meniscus_tear']).count())

Unnamed: 0,Case,Abnormal,ACL_tear,Meniscus_tear
0,0,1,0,0
1,1,1,1,1
2,2,1,0,0
3,3,1,0,1
4,4,1,0,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Case
Abnormal,ACL_tear,Meniscus_tear,Unnamed: 3_level_1
0,0,0,217
1,0,0,433
1,0,1,272
1,1,0,83
1,1,1,125


Note that cases considered Abnormal but without either ACL or Meniscus tear are the most common category, and ACL tears without Meniscus tear is the least common case in the training sample.

## Load stacks/sequences of images from each plane
Files are saved as NumPy arrays. Scans were taken from each of three planes, axial, coronal, and sagittal. For each plane, the scan results in a set of images.  

First, let's check for variation in the number of images per sequence, and in the image dimensions.

In [67]:
def collect_stack_dims(case_df, data_path=train_path):
    cases = list(case_df.Case)
    data = []
    for case in cases:
        row = [case]
        for plane in ['axial', 'coronal', 'sagittal']:
            fpath = data_path/plane/'{}.npy'.format(case)
            try: 
                s,w,h = np.load(fpath).shape 
                row.extend([s,w,h])
            except FileNotFoundError:
                continue
#        print('{}: {}'.format(case,row))
        if len(row)==10: data.append(row)
    columns=['Case',
             'axial_s','axial_w','axial_h',
             'coronal_s','coronal_w','coronal_h',
             'sagittal_s','sagittal_w','sagittal_h',
            ]
    data_dict = {}
    for i,k in enumerate(columns): data_dict[k] = [row[i] for row in data]
    return pd.DataFrame(data_dict)

In [68]:
dimdf = collect_stack_dims(train)

In [70]:
dimdf.describe()

Unnamed: 0,axial_s,axial_w,axial_h,coronal_s,coronal_w,coronal_h,sagittal_s,sagittal_w,sagittal_h
count,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
mean,35.86,256.0,256.0,31.36,256.0,256.0,31.72,256.0,256.0
std,7.050865,0.0,0.0,7.899264,0.0,0.0,6.35687,0.0,0.0
min,22.0,256.0,256.0,18.0,256.0,256.0,19.0,256.0,256.0
25%,32.0,256.0,256.0,24.0,256.0,256.0,26.25,256.0,256.0
50%,37.5,256.0,256.0,32.0,256.0,256.0,32.0,256.0,256.0
75%,40.0,256.0,256.0,37.75,256.0,256.0,36.0,256.0,256.0
max,51.0,256.0,256.0,46.0,256.0,256.0,46.0,256.0,256.0


The number of images in a set varies from case (patient) to case, and the dimensions of each image is the same, 256x256. In the sample of data collected here, axial sequences range in length from 22 to 51; coronal, from 18 to 46; sagittal, from 19 to 46.

In [20]:
def load_one_stack(case, data_path=train_path, plane='coronal'):
    fpath = data_path/plane/'{}.npy'.format(case)
    return np.load(fpath)

def load_stacks(case):
    x = {}
    planes = ['axial', 'coronal', 'sagittal']
    for i, plane in enumerate(planes):
        x[plane] = load_one_stack(case, plane=plane)
    return x

In [29]:
case = train_abnl.Case[0]
x = load_one_stack(case, plane='coronal')
print(x.shape)
print(x.max())

(36, 256, 256)
255


In [9]:
x_multi = load_stacks(case)
x_multi

{'axial': array([[[ 0,  0,  0,  0, ...,  4,  5,  4,  3],
         [ 0,  0,  0,  0, ...,  8,  8,  6,  8],
         [ 0,  0,  0,  0, ..., 14, 14, 11, 11],
         [ 0,  0,  0,  0, ..., 16, 16, 14, 15],
         ...,
         [ 0,  0,  0,  0, ..., 14, 15, 18, 16],
         [ 0,  0,  0,  0, ..., 15, 16, 15, 12],
         [ 0,  0,  0,  0, ..., 11, 12, 13, 12],
         [ 0,  0,  0,  0, ...,  8, 11,  7,  9]],
 
        [[ 0,  0,  0,  0, ...,  4,  3,  2,  2],
         [ 0,  0,  0,  0, ...,  5,  9,  7,  7],
         [ 0,  0,  0,  0, ..., 10, 13, 10, 10],
         [ 0,  0,  0,  0, ..., 14, 14, 19, 17],
         ...,
         [ 0,  0,  0,  0, ..., 18, 16, 16, 17],
         [ 0,  0,  0,  0, ..., 13, 12, 15, 13],
         [ 0,  0,  0,  0, ..., 16, 14, 12, 12],
         [ 0,  0,  0,  0, ...,  8,  6,  5,  7]],
 
        [[ 0,  0,  0,  0, ...,  1,  1,  1,  1],
         [ 0,  0,  0,  0, ...,  7,  8,  6,  6],
         [ 0,  0,  0,  0, ..., 12, 11, 13, 10],
         [ 0,  0,  0,  0, ..., 12, 18, 18, 16

In [30]:
from ipywidgets import interactive
from IPython.display import display

plt.style.use('grayscale')

class KneePlot():
    def __init__(self, x, figsize=(10, 10)):
        self.x = x
        self.slice_range = (0, self.x.shape[0] - 1)
        self.resize(figsize)
    
    def _plot_slice(self, im_slice):
        fig, ax = plt.subplots(1, 1, figsize=self.figsize)
        ax.imshow(self.x[im_slice, :, :])
        plt.show()

    def resize(self, figsize):
        self.figsize = figsize
        self.interactive_plot = interactive(self._plot_slice, im_slice=self.slice_range)
        self.output = self.interactive_plot.children[-1]
        self.output.layout.height = '{}px'.format(60 * self.figsize[1])

    def show(self):
        display(self.interactive_plot)


In [31]:
plot = KneePlot(x)
plot.show()


interactive(children=(IntSlider(value=17, description='im_slice', max=35), Output(layout=Layout(height='600px'…

In [12]:
plot.resize(figsize=(12, 12))
plot.show()


interactive(children=(IntSlider(value=17, description='im_slice', max=35), Output(layout=Layout(height='720px'…

In [15]:
from ipywidgets import interact, Dropdown, IntSlider

class MultiKneePlot():
    def __init__(self, x_multi, figsize=(10, 10)):
        self.x = x_multi
        self.planes = ['coronal', 'sagittal', 'axial']
        self.slice_nums = {plane: self.x[plane].shape[0] for plane in self.planes}
        self.figsize = figsize
    
    def _plot_slices(self, plane, im_slice): 
        fig, ax = plt.subplots(1, 1, figsize=self.figsize)
        ax.imshow(self.x[plane][im_slice, :, :])
        plt.show()
    
    def draw(self):
        planes_widget = Dropdown(options=self.planes)
        plane_init = self.planes[0]
        slice_init = self.slice_nums[plane_init] - 1
        slices_widget = IntSlider(min=0, max=slice_init, value=slice_init//2)
        def update_slices_widget(*args):
            slices_widget.max = self.slice_nums[planes_widget.value] - 1
            slices_widget.value = slices_widget.max // 2
        planes_widget.observe(update_slices_widget, 'value')
        interact(self._plot_slices, plane=planes_widget, im_slice=slices_widget)
    
    def resize(self, figsize): self.figsize = figsize


In [16]:
plot_multi = MultiKneePlot(x_multi)
plot_multi.draw()

interactive(children=(Dropdown(description='plane', options=('coronal', 'sagittal', 'axial'), value='coronal')…