428 lines (427 with data), 11.3 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import pydicom\n",
"import glob"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"## First, read all of my DICOM files into a list\n",
"mydicoms = glob.glob(\"*.dcm\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Let's look at the contents of the first DICOM:"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"dcm1 = pydicom.dcmread(mydicoms[0])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(0008, 0016) SOP Class UID UI: Secondary Capture Image Storage\n",
"(0008, 0018) SOP Instance UID UI: 1.3.6.1.4.1.11129.5.5.162426174634548301003630270411628292460952\n",
"(0008, 0060) Modality CS: 'DX'\n",
"(0008, 1030) Study Description LO: 'Effusion|Nodule|Pleural_Thickening|Mass'\n",
"(0010, 0020) Patient ID LO: '29579'\n",
"(0010, 0040) Patient's Sex CS: 'F'\n",
"(0010, 1010) Patient's Age AS: '24'\n",
"(0020, 000d) Study Instance UID UI: 1.3.6.1.4.1.11129.5.5.113025392650823751977671880960497589856674\n",
"(0020, 000e) Series Instance UID UI: 1.3.6.1.4.1.11129.5.5.168055162156043936178718006100964727334210\n",
"(0028, 0002) Samples per Pixel US: 1\n",
"(0028, 0004) Photometric Interpretation CS: 'MONOCHROME2'\n",
"(0028, 0010) Rows US: 1024\n",
"(0028, 0011) Columns US: 1024\n",
"(0028, 0100) Bits Allocated US: 8\n",
"(0028, 0101) Bits Stored US: 8\n",
"(0028, 0102) High Bit US: 7\n",
"(0028, 0103) Pixel Representation US: 0\n",
"(7fe0, 0010) Pixel Data OW: Array of 1048576 elements"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dcm1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Looking at the attributes listed above, I can see that I'm looking to extract the following attributes: \n",
"* Modality\n",
"* Study Description\n",
"* Patient ID\n",
"* Patient's Sex\n",
"* Patient's Age\n",
"* Rows\n",
"* Columns"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'DX'"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dcm1.Modality"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Effusion|Nodule|Pleural_Thickening|Mass'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dcm1.StudyDescription"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'29579'"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dcm1.PatientID"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'F'"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dcm1.PatientSex"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'24'"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dcm1.PatientAge"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1024"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dcm1.Rows"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"1024"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dcm1.Columns"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Now, let's create the dataframe that we want, and populate it in a loop with all of our DICOMS:"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"all_data = []\n",
"\n",
"for i in mydicoms: \n",
" dcm = pydicom.dcmread(i)\n",
" fields = [dcm.PatientID, int(dcm.PatientAge), dcm.PatientSex, dcm.Modality, dcm.StudyDescription,\n",
" dcm.Rows, dcm.Columns]\n",
" all_data.append(fields)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"mydata = pd.DataFrame(all_data, \n",
" columns = ['PatientID','PatientAge','PatientSex','Modality','Findings','Rows','Columns'])"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PatientID</th>\n",
" <th>PatientAge</th>\n",
" <th>PatientSex</th>\n",
" <th>Modality</th>\n",
" <th>Findings</th>\n",
" <th>Rows</th>\n",
" <th>Columns</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>29579</td>\n",
" <td>24</td>\n",
" <td>F</td>\n",
" <td>DX</td>\n",
" <td>Effusion|Nodule|Pleural_Thickening|Mass</td>\n",
" <td>1024</td>\n",
" <td>1024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1688</td>\n",
" <td>59</td>\n",
" <td>F</td>\n",
" <td>DX</td>\n",
" <td>Infiltration|Nodule</td>\n",
" <td>1024</td>\n",
" <td>1024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13659</td>\n",
" <td>62</td>\n",
" <td>F</td>\n",
" <td>DX</td>\n",
" <td>Consolidation|Mass|Pneumonia|Pneumothorax</td>\n",
" <td>1024</td>\n",
" <td>1024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13118</td>\n",
" <td>69</td>\n",
" <td>M</td>\n",
" <td>DX</td>\n",
" <td>Atelectasis</td>\n",
" <td>1024</td>\n",
" <td>1024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10172</td>\n",
" <td>59</td>\n",
" <td>F</td>\n",
" <td>DX</td>\n",
" <td>Atelectasis|Effusion</td>\n",
" <td>1024</td>\n",
" <td>1024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>5066</td>\n",
" <td>52</td>\n",
" <td>M</td>\n",
" <td>DX</td>\n",
" <td>Cardiomegaly|Effusion|Infiltration</td>\n",
" <td>1024</td>\n",
" <td>1024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>23075</td>\n",
" <td>31</td>\n",
" <td>M</td>\n",
" <td>DX</td>\n",
" <td>Mass</td>\n",
" <td>1024</td>\n",
" <td>1024</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" PatientID PatientAge PatientSex Modality \\\n",
"0 29579 24 F DX \n",
"1 1688 59 F DX \n",
"2 13659 62 F DX \n",
"3 13118 69 M DX \n",
"4 10172 59 F DX \n",
"5 5066 52 M DX \n",
"6 23075 31 M DX \n",
"\n",
" Findings Rows Columns \n",
"0 Effusion|Nodule|Pleural_Thickening|Mass 1024 1024 \n",
"1 Infiltration|Nodule 1024 1024 \n",
"2 Consolidation|Mass|Pneumonia|Pneumothorax 1024 1024 \n",
"3 Atelectasis 1024 1024 \n",
"4 Atelectasis|Effusion 1024 1024 \n",
"5 Cardiomegaly|Effusion|Infiltration 1024 1024 \n",
"6 Mass 1024 1024 "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mydata"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}