--- a +++ b/1. Applying AI to 2D Medical Imaging Data/4. Prepare DICOM Images for ML Exercise/solution.ipynb @@ -0,0 +1,427 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import pydicom\n", + "import glob" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "## First, read all of my DICOM files into a list\n", + "mydicoms = glob.glob(\"*.dcm\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Let's look at the contents of the first DICOM:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "dcm1 = pydicom.dcmread(mydicoms[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(0008, 0016) SOP Class UID UI: Secondary Capture Image Storage\n", + "(0008, 0018) SOP Instance UID UI: 1.3.6.1.4.1.11129.5.5.162426174634548301003630270411628292460952\n", + "(0008, 0060) Modality CS: 'DX'\n", + "(0008, 1030) Study Description LO: 'Effusion|Nodule|Pleural_Thickening|Mass'\n", + "(0010, 0020) Patient ID LO: '29579'\n", + "(0010, 0040) Patient's Sex CS: 'F'\n", + "(0010, 1010) Patient's Age AS: '24'\n", + "(0020, 000d) Study Instance UID UI: 1.3.6.1.4.1.11129.5.5.113025392650823751977671880960497589856674\n", + "(0020, 000e) Series Instance UID UI: 1.3.6.1.4.1.11129.5.5.168055162156043936178718006100964727334210\n", + "(0028, 0002) Samples per Pixel US: 1\n", + "(0028, 0004) Photometric Interpretation CS: 'MONOCHROME2'\n", + "(0028, 0010) Rows US: 1024\n", + "(0028, 0011) Columns US: 1024\n", + "(0028, 0100) Bits Allocated US: 8\n", + "(0028, 0101) Bits Stored US: 8\n", + "(0028, 0102) High Bit US: 7\n", + "(0028, 0103) Pixel Representation US: 0\n", + "(7fe0, 0010) Pixel Data OW: Array of 1048576 elements" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dcm1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Looking at the attributes listed above, I can see that I'm looking to extract the following attributes: \n", + "* Modality\n", + "* Study Description\n", + "* Patient ID\n", + "* Patient's Sex\n", + "* Patient's Age\n", + "* Rows\n", + "* Columns" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'DX'" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dcm1.Modality" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Effusion|Nodule|Pleural_Thickening|Mass'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dcm1.StudyDescription" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'29579'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dcm1.PatientID" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'F'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dcm1.PatientSex" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'24'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dcm1.PatientAge" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1024" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dcm1.Rows" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1024" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dcm1.Columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Now, let's create the dataframe that we want, and populate it in a loop with all of our DICOMS:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "all_data = []\n", + "\n", + "for i in mydicoms: \n", + " dcm = pydicom.dcmread(i)\n", + " fields = [dcm.PatientID, int(dcm.PatientAge), dcm.PatientSex, dcm.Modality, dcm.StudyDescription,\n", + " dcm.Rows, dcm.Columns]\n", + " all_data.append(fields)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "mydata = pd.DataFrame(all_data, \n", + " columns = ['PatientID','PatientAge','PatientSex','Modality','Findings','Rows','Columns'])" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>PatientID</th>\n", + " <th>PatientAge</th>\n", + " <th>PatientSex</th>\n", + " <th>Modality</th>\n", + " <th>Findings</th>\n", + " <th>Rows</th>\n", + " <th>Columns</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>29579</td>\n", + " <td>24</td>\n", + " <td>F</td>\n", + " <td>DX</td>\n", + " <td>Effusion|Nodule|Pleural_Thickening|Mass</td>\n", + " <td>1024</td>\n", + " <td>1024</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1688</td>\n", + " <td>59</td>\n", + " <td>F</td>\n", + " <td>DX</td>\n", + " <td>Infiltration|Nodule</td>\n", + " <td>1024</td>\n", + " <td>1024</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>13659</td>\n", + " <td>62</td>\n", + " <td>F</td>\n", + " <td>DX</td>\n", + " <td>Consolidation|Mass|Pneumonia|Pneumothorax</td>\n", + " <td>1024</td>\n", + " <td>1024</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>13118</td>\n", + " <td>69</td>\n", + " <td>M</td>\n", + " <td>DX</td>\n", + " <td>Atelectasis</td>\n", + " <td>1024</td>\n", + " <td>1024</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>10172</td>\n", + " <td>59</td>\n", + " <td>F</td>\n", + " <td>DX</td>\n", + " <td>Atelectasis|Effusion</td>\n", + " <td>1024</td>\n", + " <td>1024</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>5066</td>\n", + " <td>52</td>\n", + " <td>M</td>\n", + " <td>DX</td>\n", + " <td>Cardiomegaly|Effusion|Infiltration</td>\n", + " <td>1024</td>\n", + " <td>1024</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>23075</td>\n", + " <td>31</td>\n", + " <td>M</td>\n", + " <td>DX</td>\n", + " <td>Mass</td>\n", + " <td>1024</td>\n", + " <td>1024</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " PatientID PatientAge PatientSex Modality \\\n", + "0 29579 24 F DX \n", + "1 1688 59 F DX \n", + "2 13659 62 F DX \n", + "3 13118 69 M DX \n", + "4 10172 59 F DX \n", + "5 5066 52 M DX \n", + "6 23075 31 M DX \n", + "\n", + " Findings Rows Columns \n", + "0 Effusion|Nodule|Pleural_Thickening|Mass 1024 1024 \n", + "1 Infiltration|Nodule 1024 1024 \n", + "2 Consolidation|Mass|Pneumonia|Pneumothorax 1024 1024 \n", + "3 Atelectasis 1024 1024 \n", + "4 Atelectasis|Effusion 1024 1024 \n", + "5 Cardiomegaly|Effusion|Infiltration 1024 1024 \n", + "6 Mass 1024 1024 " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mydata" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}