[53d15f]: / notebooks / DICOM_metadata_to_CSV.ipynb

Download this file

122 lines (121 with data), 3.4 kB

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's create a CSV with all the metadata of the DICOM images so we can do a better analysis of that data.\n",
    "\n",
    "# Import"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
    "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5"
   },
   "outputs": [],
   "source": [
    "import glob\n",
    "import pandas as pd\n",
    "import pydicom\n",
    "import tqdm\n",
    "\n",
    "import pdb"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Read loop"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0",
    "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a"
   },
   "outputs": [],
   "source": [
    "def get_data_dict(img):\n",
    "    img_data = {}\n",
    "    for i in img.iterall():\n",
    "        if i.name == \"Pixel Data\":\n",
    "            continue\n",
    "        name = i.name.replace(\" \", \"_\").replace(\"(\", \"\").replace(\")\", \"\").lower()\n",
    "        img_data[name] = i.value\n",
    "    return img_data\n",
    "\n",
    "def get_list_data(imgs):\n",
    "    list_data = []\n",
    "    for i in tqdm.tqdm(imgs):\n",
    "        if i[-16:] == 'ID_6431af929.dcm': continue\n",
    "        img = pydicom.read_file(i)\n",
    "        img_data = get_data_dict(img)\n",
    "        \n",
    "        intercept = int(img[('0028','1052')].value)\n",
    "        slope = int(img[('0028','1053')].value)\n",
    "        pic = (img.pixel_array * slope + intercept)\n",
    "        img_data['pxl_min'] = pic.min()\n",
    "        img_data['pxl_max'] = pic.max()\n",
    "        img_data['pxl_std'] = pic.std()\n",
    "        img_data['pxl_mean'] = pic.mean()\n",
    "        \n",
    "        list_data.append(img_data)\n",
    "    return list_data\n",
    "\n",
    "def get_df_data(imgs):\n",
    "    list_data = get_list_data(imgs)\n",
    "    return pd.DataFrame(list_data)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "input_path = \"../input/rsna-intracranial-hemorrhage-detection\"\n",
    "\n",
    "train_imgs = glob.glob(f\"{input_path}/stage_1_train_images/*\")\n",
    "test_imgs = glob.glob(f\"{input_path}/stage_1_test_images/*\")\n",
    "#test_imgs2 = glob.glob(f\"{input_path}/stage_2_test_images/*\")\n",
    "\n",
    "df_imgs = get_df_data(train_imgs)\n",
    "df_imgs.to_csv(\"df_dicom_metadata.csv\", index=False)\n",
    "\n",
    "df_imgs = get_df_data(test_imgs)\n",
    "df_imgs.to_csv(\"df_dicom_test_metadata.csv\", index=False)\n",
    "\n",
    "#df_imgs = get_df_data(test_imgs2)\n",
    "#df_imgs.to_csv(\"df_dicom_test2_metadata.csv\", index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}