534 lines (533 with data), 23.3 kB
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can read the DICOM metadata - especially \"Window Center\", \"Window Width\", \"Rescale Intercept\" and \"Rescale Slope\" - for data pre-processing.\n",
"Head CT are a center of 40 and a width of 80"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"import os\n",
"from multiprocessing import Process\n",
"import concurrent.futures\n",
"import ast\n",
"import csv\n",
"import numpy as np\n",
"import pandas as pd\n",
"from tqdm import tqdm_notebook as tqdm\n",
"import pydicom\n",
"import re\n",
"import PIL\n",
"from PIL import Image\n",
"from random import randrange\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"# import seaborn as sns\n",
"\n",
"train_csv_path = '../src/training.csv'\n",
"validate_csv_path = '../src/validation.csv'\n",
"test_csv_path = '../src/testing.csv'\n",
"\n",
"tony_data_path = ''\n",
"chris_data_path = ''\n",
"kyle_data_path = '/media/keil/baltar/intracranial-hemorrhage-detection-data/stage_1_train_images/'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"#dicoms to avoid\n",
"bad_actors = []"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9ea75e7e82e54a8d9a809ec1701e4a09",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=674258), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"ID_dfa4e344a.dcm (638, 490)\n",
"ID_12e3b6923.dcm (430, 404)\n",
"ID_cef2af72d.dcm (638, 490)\n",
"ID_046ba342c.dcm (638, 490)\n",
"ID_680b2194c.dcm (666, 512)\n",
"ID_1bb3b44c7.dcm (638, 490)\n",
"ID_567a36143.dcm (436, 436)\n",
"ID_b4adf8739.dcm (462, 462)\n",
"ID_8d0ca7742.dcm (430, 404)\n",
"ID_db48a633d.dcm (666, 512)\n",
"ID_9cdc7295b.dcm (768, 768)\n",
"ID_c964e4096.dcm (666, 512)\n",
"ID_ca9462f49.dcm (462, 462)\n",
"ID_191369dca.dcm (408, 374)\n",
"ID_ac39010dc.dcm (430, 404)\n",
"ID_0e9ac1c5f.dcm (464, 464)\n",
"ID_d9840380c.dcm (464, 464)\n",
"ID_66131f4c9.dcm (408, 374)\n",
"ID_676b0cb59.dcm (768, 768)\n",
"ID_b76b13444.dcm (666, 512)\n",
"ID_898ff55b6.dcm (462, 462)\n",
"ID_37c495912.dcm (462, 462)\n",
"ID_a432727fd.dcm (464, 464)\n",
"ID_1bc5771a7.dcm (638, 490)\n",
"ID_69974dd3e.dcm (408, 374)\n",
"ID_3e31d57d0.dcm (666, 512)\n",
"ID_91c508c7a.dcm (462, 462)\n",
"ID_c35d5c858.dcm (430, 404)\n",
"ID_d6435f3bf.dcm (666, 512)\n",
"ID_b77ba3355.dcm (464, 464)\n",
"ID_a23a8193f.dcm (638, 490)\n",
"ID_9dad2eb09.dcm (666, 512)\n",
"ID_ea0ddbaf9.dcm (408, 374)\n",
"ID_d0c52575a.dcm (464, 464)\n",
"ID_c64131283.dcm (768, 768)\n",
"ID_f0d55b727.dcm (462, 462)\n",
"ID_1690a6499.dcm (436, 436)\n",
"ID_a2e178cc7.dcm (436, 436)\n",
"ID_950a06268.dcm (638, 490)\n",
"ID_cec3997fa.dcm (638, 490)\n",
"ID_3cb1b59bc.dcm (430, 404)\n",
"ID_3c8b72361.dcm (638, 490)\n",
"ID_ae1689e1b.dcm (436, 436)\n",
"ID_394ffb5fd.dcm (408, 374)\n",
"ID_fdbfb2c17.dcm (666, 512)\n",
"ID_94463e98f.dcm (462, 462)\n",
"ID_80a2dbc4a.dcm (768, 768)\n",
"ID_3274f5977.dcm (666, 512)\n",
"ID_f23f8e617.dcm (768, 768)\n",
"ID_8fd6d5047.dcm (768, 768)\n",
"ID_9b297fa83.dcm (408, 374)\n",
"ID_19f266244.dcm (638, 490)\n",
"ID_8caa68ebd.dcm (408, 374)\n",
"ID_079945c27.dcm (666, 512)\n",
"ID_c60e34466.dcm (768, 768)\n",
"ID_d1afb9750.dcm (436, 436)\n",
"ID_1291d1943.dcm (464, 464)\n",
"ID_6b1a86148.dcm (638, 490)\n",
"ID_7e756c43b.dcm (638, 490)\n",
"ID_142f85eb8.dcm (464, 464)\n",
"ID_d7229490a.dcm (462, 462)\n",
"ID_17103c79e.dcm (430, 404)\n",
"ID_8756b0c04.dcm (768, 768)\n",
"ID_28d6a694f.dcm (408, 374)\n",
"ID_8dc299456.dcm (638, 490)\n",
"ID_61c646098.dcm (436, 436)\n",
"ID_9a3bba619.dcm (430, 404)\n",
"ID_de10fdac2.dcm (768, 768)\n",
"ID_b19f52c76.dcm (436, 436)\n",
"ID_25de55880.dcm (462, 462)\n",
"ID_b494c2115.dcm (430, 404)\n",
"ID_a1bb9bc26.dcm (464, 464)\n",
"ID_2b3671dd9.dcm (666, 512)\n",
"ID_f1fe5334e.dcm (464, 464)\n",
"ID_3d5d23058.dcm (464, 464)\n",
"ID_21053fe7e.dcm (430, 404)\n",
"ID_b12bb2b16.dcm (638, 490)\n",
"ID_15b3ba199.dcm (462, 462)\n",
"ID_c2738e8b1.dcm (436, 436)\n",
"ID_0de0ab1d8.dcm (768, 768)\n",
"ID_038f966b9.dcm (408, 374)\n",
"ID_184c541fa.dcm (768, 768)\n",
"ID_5bf2ca43f.dcm (666, 512)\n",
"ID_ae691dd29.dcm (638, 490)\n",
"ID_a3feeadf4.dcm (408, 374)\n",
"ID_c6f2d84be.dcm (464, 464)\n",
"ID_176e4f16d.dcm (638, 490)\n",
"ID_6c19c9f7b.dcm (666, 512)\n",
"ID_291edd834.dcm (768, 768)\n",
"ID_b76de950b.dcm (436, 436)\n",
"ID_6d7a27643.dcm (462, 462)\n",
"ID_19306ecc5.dcm (464, 464)\n",
"ID_6a939bc17.dcm (408, 374)\n",
"ID_8144c7120.dcm (430, 404)\n",
"ID_b8665a653.dcm (436, 436)\n",
"ID_d7777de78.dcm (430, 404)\n",
"ID_1e633cf27.dcm (464, 464)\n",
"ID_cbbb50e6d.dcm (666, 512)\n",
"ID_6dcedd2e1.dcm (430, 404)\n",
"ID_c07d2cb73.dcm (638, 490)\n",
"ID_76f88846f.dcm (768, 768)\n",
"ID_b194d2a23.dcm (462, 462)\n",
"ID_f03370d7c.dcm (638, 490)\n",
"ID_7714ead69.dcm (666, 512)\n",
"ID_53f460f86.dcm (768, 768)\n",
"ID_631f0b556.dcm (638, 490)\n",
"ID_7917d368d.dcm (768, 768)\n",
"ID_11c4f9f91.dcm (462, 462)\n",
"ID_10fe2031e.dcm (408, 374)\n",
"ID_2fd4dda7c.dcm (408, 374)\n",
"ID_ff012ee5b.dcm (666, 512)\n",
"ID_985fb5e49.dcm (638, 490)\n",
"ID_a9ab8569f.dcm (408, 374)\n",
"ID_75e3f7e5a.dcm (638, 490)\n",
"ID_f145c3cf4.dcm (768, 768)\n",
"ID_9da128021.dcm (436, 436)\n",
"ID_155b9c546.dcm (436, 436)\n",
"ID_c51cbe76b.dcm (768, 768)\n",
"ID_fd5c41761.dcm (464, 464)\n",
"ID_d1a1c9a6c.dcm (436, 436)\n",
"ID_445a92ac2.dcm (638, 490)\n",
"ID_2ac7f01ed.dcm (408, 374)\n",
"ID_73dee8958.dcm (768, 768)\n",
"ID_830f46cad.dcm (408, 374)\n",
"ID_dfaa49f5c.dcm (462, 462)\n",
"ID_09aeb0bbd.dcm (462, 462)\n",
"ID_317330708.dcm (430, 404)\n",
"ID_88b0d8b4f.dcm (638, 490)\n",
"ID_3f422852d.dcm (666, 512)\n",
"ID_a3128aa77.dcm (408, 374)\n",
"ID_7a02fdbea.dcm (666, 512)\n",
"ID_4f0317d23.dcm (430, 404)\n",
"ID_57d6a6455.dcm (768, 768)\n",
"ID_56ecdf5c1.dcm (464, 464)\n",
"ID_abcd58e88.dcm (436, 436)\n",
"ID_362423b57.dcm (430, 404)\n",
"ID_66accd2e4.dcm (430, 404)\n",
"ID_97e5a203e.dcm (430, 404)\n",
"ID_798d956d0.dcm (638, 490)\n",
"ID_97cd49666.dcm (408, 374)\n",
"ID_5dbe845c1.dcm (436, 436)\n",
"ID_6f92e4481.dcm (462, 462)\n",
"ID_28c4609b3.dcm (462, 462)\n",
"ID_aef6c6df9.dcm (464, 464)\n",
"ID_3bc141392.dcm (408, 374)\n",
"ID_882cd57de.dcm (638, 490)\n",
"ID_91b9ce430.dcm (464, 464)\n",
"ID_fe7327fab.dcm (436, 436)\n",
"ID_a7e689932.dcm (638, 490)\n",
"ID_d1b2d9ad0.dcm (436, 436)\n",
"ID_75cbdae68.dcm (666, 512)\n",
"ID_3e60e696d.dcm (638, 490)\n",
"ID_4e61fb0b2.dcm (638, 490)\n",
"ID_dd3b5bf4e.dcm (430, 404)\n",
"ID_72dce7784.dcm (638, 490)\n",
"ID_c11582dc9.dcm (436, 436)\n",
"ID_6b15a7649.dcm (430, 404)\n",
"ID_04280250b.dcm (408, 374)\n",
"ID_cb970c6dc.dcm (462, 462)\n",
"ID_ea2861e9a.dcm (666, 512)\n",
"ID_dabc2a818.dcm (464, 464)\n",
"ID_4c9fb82af.dcm (436, 436)\n",
"ID_85900eb84.dcm (408, 374)\n",
"ID_d4ea87a35.dcm (430, 404)\n",
"ID_3ba8a116c.dcm (436, 436)\n",
"ID_8f5d4b696.dcm (430, 404)\n",
"ID_0c4987103.dcm (462, 462)\n",
"ID_0603b315e.dcm (408, 374)\n",
"ID_9ece1bb21.dcm (464, 464)\n",
"ID_c1ff9eb46.dcm (768, 768)\n",
"ID_c6bbec638.dcm (464, 464)\n",
"ID_845f922f4.dcm (436, 436)\n",
"ID_7940bb7d0.dcm (638, 490)\n",
"ID_c4575f13b.dcm (430, 404)\n",
"ID_f188940f9.dcm (638, 490)\n",
"ID_9b68c3f5f.dcm (436, 436)\n",
"ID_75d691728.dcm (638, 490)\n",
"ID_8fde47d9f.dcm (408, 374)\n",
"ID_155249efa.dcm (436, 436)\n",
"ID_6fbc30b5d.dcm (408, 374)\n",
"ID_d3b76ef6e.dcm (638, 490)\n",
"ID_6508563e0.dcm (464, 464)\n",
"ID_ac47ba810.dcm (638, 490)\n",
"ID_7c08b7fb7.dcm (430, 404)\n",
"ID_f4891876d.dcm (430, 404)\n",
"ID_b055aafa9.dcm (408, 374)\n",
"ID_22069463a.dcm (768, 768)\n",
"ID_081f4d071.dcm (408, 374)\n",
"ID_dd083e12a.dcm (638, 490)\n",
"ID_b1cea5abb.dcm (666, 512)\n",
"ID_e4b636907.dcm (666, 512)\n",
"ID_23d0b13b7.dcm (464, 464)\n",
"ID_ff9674e53.dcm (462, 462)\n",
"ID_36ab2e72a.dcm (436, 436)\n",
"ID_5ffae2e26.dcm (768, 768)\n",
"ID_6cc19ac41.dcm (408, 374)\n",
"ID_0b0e59911.dcm (430, 404)\n",
"ID_64b44f180.dcm (408, 374)\n",
"ID_ae7020fd1.dcm (638, 490)\n",
"ID_61d2718d2.dcm (462, 462)\n",
"ID_942e2f95b.dcm (638, 490)\n",
"ID_cade293be.dcm (462, 462)\n",
"ID_ab474037b.dcm (464, 464)\n",
"ID_d3fd5220e.dcm (408, 374)\n",
"ID_55f7bbbf2.dcm (666, 512)\n",
"ID_4e14d0fe8.dcm (666, 512)\n",
"ID_68e45bca7.dcm (768, 768)\n",
"ID_6cb797177.dcm (430, 404)\n",
"ID_7e870621c.dcm (462, 462)\n",
"ID_5ab140176.dcm (768, 768)\n",
"ID_8c5fc9e44.dcm (666, 512)\n",
"ID_3eb407dd8.dcm (464, 464)\n",
"ID_3d7a23dbb.dcm (436, 436)\n",
"ID_c6463f07d.dcm (462, 462)\n",
"ID_18aac96c0.dcm (430, 404)\n",
"ID_f698edc00.dcm (464, 464)\n",
"ID_b966185b8.dcm (462, 462)\n",
"ID_ca4a832a1.dcm (464, 464)\n",
"ID_60a1f0e24.dcm (462, 462)\n",
"ID_10f34fb10.dcm (638, 490)\n",
"ID_def2a0e9f.dcm (638, 490)\n",
"ID_f22730d7b.dcm (638, 490)\n",
"ID_12a0d6d34.dcm (430, 404)\n",
"ID_f4c2157d8.dcm (638, 490)\n",
"ID_0e1861e6d.dcm (638, 490)\n",
"ID_44d57858e.dcm (666, 512)\n",
"ID_be3fb6c17.dcm (436, 436)\n",
"ID_ae7b11865.dcm (430, 404)\n",
"ID_b9938c32c.dcm (436, 436)\n",
"ID_d77fa1286.dcm (462, 462)\n",
"ID_8a35660d5.dcm (430, 404)\n",
"ID_842e85173.dcm (462, 462)\n",
"ID_a880e377e.dcm (462, 462)\n"
]
},
{
"ename": "ValueError",
"evalue": "The length of the pixel data in the dataset (153710 bytes) doesn't match the expected length (524288 bytes). The dataset may be corrupted or there may be an issue with the pixel data handler.",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-5-d37fcb90a2a6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mwriter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwriterow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdicom\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'corrupted'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0mimg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpixel_array\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mimg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m512\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m512\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdicom\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mimg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/miniconda3/envs/py3x/lib/python3.7/site-packages/pydicom/dataset.py\u001b[0m in \u001b[0;36mpixel_array\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1360\u001b[0m \u001b[0mThe\u001b[0m \u001b[0mPixel\u001b[0m \u001b[0mData\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m7\u001b[0m\u001b[0mFE0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m00\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0ma\u001b[0m \u001b[0mNumPy\u001b[0m \u001b[0mndarray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1361\u001b[0m \"\"\"\n\u001b[0;32m-> 1362\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconvert_pixel_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1363\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_pixel_array\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1364\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/miniconda3/envs/py3x/lib/python3.7/site-packages/pydicom/dataset.py\u001b[0m in \u001b[0;36mconvert_pixel_data\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1306\u001b[0m )\n\u001b[1;32m 1307\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1308\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mlast_exception\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1309\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1310\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mdecompress\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/miniconda3/envs/py3x/lib/python3.7/site-packages/pydicom/dataset.py\u001b[0m in \u001b[0;36mconvert_pixel_data\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1274\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1275\u001b[0m \u001b[0;31m# Use the handler to get a 1D numpy array of the pixel data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1276\u001b[0;31m \u001b[0marr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhandler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_pixeldata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1277\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_pixel_array\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreshape_pixel_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1278\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/miniconda3/envs/py3x/lib/python3.7/site-packages/pydicom/pixel_data_handlers/numpy_handler.py\u001b[0m in \u001b[0;36mget_pixeldata\u001b[0;34m(ds, read_only)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[0;34m\"The dataset may be corrupted or there may be an issue \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;34m\"with the pixel data handler.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m \u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mactual_length\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpadded_expected_len\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 258\u001b[0m )\n\u001b[1;32m 259\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mactual_length\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0mpadded_expected_len\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: The length of the pixel data in the dataset (153710 bytes) doesn't match the expected length (524288 bytes). The dataset may be corrupted or there may be an issue with the pixel data handler."
]
}
],
"source": [
"with open('bad_actors.csv', 'w') as csv_file:\n",
" writer = csv.writer(csv_file, delimiter=',')\n",
" writer.writerow(['id', 'cause'])\n",
" for dicom in tqdm(os.listdir(kyle_data_path)):\n",
" try:\n",
" data = pydicom.dcmread(kyle_data_path+dicom)\n",
" except:\n",
" writer.writerow([dicom, 'corrupted'])\n",
" img = np.array(data.pixel_array, dtype=float)\n",
" if img.shape != (512, 512):\n",
" print(dicom, img.shape)\n",
" writer.writerow([dicom,img.shape])\n",
" \n",
"\n",
"\n",
"\n",
"# from multiprocessing.pool import ThreadPool as Pool\n",
"\n",
"# pool_size = 5 \n",
"# # define worker function before a Pool is instantiated\n",
"# def worker(dicom):\n",
"# try:\n",
"# data = pydicom.dcmread(kyle_data_path+dicom)\n",
"# img = np.array(data.pixel_array, dtype=float)\n",
"# except:\n",
"# writer.writerow([dicom, 'corrupted'])\n",
" \n",
"# if img.shape != (512, 512):\n",
"# print(dicom, img.shape)\n",
"# writer.writerow([dicom,img.shape])\n",
"\n",
"\n",
"# pool = Pool(pool_size)\n",
"\n",
"# with open('bad_actors.csv', 'w') as csv_file:\n",
"# writer = csv.writer(csv_file, delimiter=',')\n",
"# writer.writerow(['id', 'cause'])\n",
" \n",
"# for dicom in tqdm(os.listdir(kyle_data_path)):\n",
"# data = pool.apply_async(worker, (dicom,))\n",
"\n",
"# pool.close()\n",
"# pool.join()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"674258\n"
]
}
],
"source": [
"print(len(set(bad_actors)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Helper Functions\n",
"\n",
"def translate_dicom(filename, path=kyle_data_path, apply_window=True):\n",
" \"\"\"\n",
" Transform a medical DICOM file to a standardized pixel based array\n",
" Arguments:\n",
" filename {string}\n",
" path {string} -- file path to data, set in config.ini\n",
" apply_window {bool} -- if True (default) then windowed png of dicom data is returned\n",
" \"\"\"\n",
" \n",
" data = pydicom.dcmread(path + filename)\n",
" \n",
" if apply_window:\n",
" window_center, window_width, intercept, slope = get_windowing(data)\n",
" img = window_image(data.pixel_array, window_center, window_width, intercept, slope)\n",
" return np.array(img, dtype=float)\n",
"\n",
" img = np.array(data.pixel_array, dtype=float)\n",
" standardized_array = np.divide(np.subtract(img,img.mean()),img.std())\n",
" return standardized_array\n",
"\n",
"def window_image(img, window_center, window_width, intercept, slope):\n",
" \"\"\"\n",
" Given a CT scan img apply a windowing to the image\n",
" Arguments:\n",
" img {np.array} -- array of a dicom img processed by pydicom.dcmread()\n",
" window_center,window_width,intercept,slope {floats} -- values provided by dicom file metadata\n",
" \"\"\"\n",
" img = (img * slope + intercept)\n",
" img_min = window_center - window_width // 2\n",
" img_max = window_center + window_width // 2\n",
" img[img < img_min] = img_min\n",
" img[img > img_max] = img_max\n",
" return img \n",
"\n",
"def get_first_of_dicom_field_as_int(x):\n",
" #get x[0] as in int is x is a 'pydicom.multival.MultiValue', otherwise get int(x)\n",
" if type(x) == pydicom.multival.MultiValue:\n",
" return int(x[0])\n",
" else:\n",
" return int(x)\n",
"\n",
"def get_windowing(data):\n",
" dicom_fields = [data.WindowCenter,\n",
" data.WindowWidth,\n",
" data.RescaleIntercept,\n",
" data.RescaleSlope]\n",
" return (get_first_of_dicom_field_as_int(x) for x in dicom_fields)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}