122 lines (121 with data), 3.4 kB
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's create a CSV with all the metadata of the DICOM images so we can do a better analysis of that data.\n",
"\n",
"# Import"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
"_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5"
},
"outputs": [],
"source": [
"import glob\n",
"import pandas as pd\n",
"import pydicom\n",
"import tqdm\n",
"\n",
"import pdb"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Read loop"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0",
"_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a"
},
"outputs": [],
"source": [
"def get_data_dict(img):\n",
" img_data = {}\n",
" for i in img.iterall():\n",
" if i.name == \"Pixel Data\":\n",
" continue\n",
" name = i.name.replace(\" \", \"_\").replace(\"(\", \"\").replace(\")\", \"\").lower()\n",
" img_data[name] = i.value\n",
" return img_data\n",
"\n",
"def get_list_data(imgs):\n",
" list_data = []\n",
" for i in tqdm.tqdm(imgs):\n",
" if i[-16:] == 'ID_6431af929.dcm': continue\n",
" img = pydicom.read_file(i)\n",
" img_data = get_data_dict(img)\n",
" \n",
" intercept = int(img[('0028','1052')].value)\n",
" slope = int(img[('0028','1053')].value)\n",
" pic = (img.pixel_array * slope + intercept)\n",
" img_data['pxl_min'] = pic.min()\n",
" img_data['pxl_max'] = pic.max()\n",
" img_data['pxl_std'] = pic.std()\n",
" img_data['pxl_mean'] = pic.mean()\n",
" \n",
" list_data.append(img_data)\n",
" return list_data\n",
"\n",
"def get_df_data(imgs):\n",
" list_data = get_list_data(imgs)\n",
" return pd.DataFrame(list_data)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"input_path = \"../input/rsna-intracranial-hemorrhage-detection\"\n",
"\n",
"train_imgs = glob.glob(f\"{input_path}/stage_1_train_images/*\")\n",
"test_imgs = glob.glob(f\"{input_path}/stage_1_test_images/*\")\n",
"#test_imgs2 = glob.glob(f\"{input_path}/stage_2_test_images/*\")\n",
"\n",
"df_imgs = get_df_data(train_imgs)\n",
"df_imgs.to_csv(\"df_dicom_metadata.csv\", index=False)\n",
"\n",
"df_imgs = get_df_data(test_imgs)\n",
"df_imgs.to_csv(\"df_dicom_test_metadata.csv\", index=False)\n",
"\n",
"#df_imgs = get_df_data(test_imgs2)\n",
"#df_imgs.to_csv(\"df_dicom_test2_metadata.csv\", index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 1
}