{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from pathlib import Path\n", "from collections import defaultdict\n", "import pickle" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "stage = \"stage_2\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Train dataset" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(f\"data/{stage}_train_dicom_diags.csv\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0BitsAllocatedBitsStoredColumnsHighBitImageOrientationPatient_0ImageOrientationPatient_1ImageOrientationPatient_2ImageOrientationPatient_3ImageOrientationPatient_4...WindowWidthWindowWidth_0WindowWidth_1fidanyepiduralintraparenchymalintraventricularsubarachnoidsubdural
001616512151.00.00.00.00.927184...80.0NaNNaNID_000012eaf000000
111616512151.00.00.00.00.968148...80.0NaNNaNID_000039fa0000000
221616512151.00.00.00.01.000000...100.0NaNNaNID_00005679d000000
331612512111.00.00.00.00.994522...NaN80.080.0ID_00008ce3c000000
441616512151.00.00.00.01.000000...135.0NaNNaNID_0000950d7000000
\n", "

5 rows × 41 columns

\n", "
" ], "text/plain": [ " Unnamed: 0 BitsAllocated BitsStored Columns HighBit \\\n", "0 0 16 16 512 15 \n", "1 1 16 16 512 15 \n", "2 2 16 16 512 15 \n", "3 3 16 12 512 11 \n", "4 4 16 16 512 15 \n", "\n", " ImageOrientationPatient_0 ImageOrientationPatient_1 \\\n", "0 1.0 0.0 \n", "1 1.0 0.0 \n", "2 1.0 0.0 \n", "3 1.0 0.0 \n", "4 1.0 0.0 \n", "\n", " ImageOrientationPatient_2 ImageOrientationPatient_3 \\\n", "0 0.0 0.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", "3 0.0 0.0 \n", "4 0.0 0.0 \n", "\n", " ImageOrientationPatient_4 ... WindowWidth WindowWidth_0 WindowWidth_1 \\\n", "0 0.927184 ... 80.0 NaN NaN \n", "1 0.968148 ... 80.0 NaN NaN \n", "2 1.000000 ... 100.0 NaN NaN \n", "3 0.994522 ... NaN 80.0 80.0 \n", "4 1.000000 ... 135.0 NaN NaN \n", "\n", " fid any epidural intraparenchymal intraventricular subarachnoid \\\n", "0 ID_000012eaf 0 0 0 0 0 \n", "1 ID_000039fa0 0 0 0 0 0 \n", "2 ID_00005679d 0 0 0 0 0 \n", "3 ID_00008ce3c 0 0 0 0 0 \n", "4 ID_0000950d7 0 0 0 0 0 \n", "\n", " subdural \n", "0 0 \n", "1 0 \n", "2 0 \n", "3 0 \n", "4 0 \n", "\n", "[5 rows x 41 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "21744" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# sort, then group by (order is preserved within groups)\n", "gs = df.sort_values('ImagePositionPatient_2').groupby('SeriesInstanceUID')\n", "len(gs)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ImagePositionPatient_2fid
577964193.542489ID_c45659d3d
229790198.214051ID_4e0bdd2ba
22395202.885613ID_079945c27
746126207.557174ID_fdbfb2c17
253266212.228736ID_55f7bbbf2
\n", "
" ], "text/plain": [ " ImagePositionPatient_2 fid\n", "577964 193.542489 ID_c45659d3d\n", "229790 198.214051 ID_4e0bdd2ba\n", "22395 202.885613 ID_079945c27\n", "746126 207.557174 ID_fdbfb2c17\n", "253266 212.228736 ID_55f7bbbf2" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# see if it worked\n", "gs.get_group('ID_fa19cd5ea9')[['ImagePositionPatient_2', 'fid']].head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "g = gs.get_group('ID_fa19cd5ea9')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "subg = g[['SeriesInstanceUID', 'fid', 'any', 'epidural', \n", " 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural']]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SeriesInstanceUIDfidanyepiduralintraparenchymalintraventricularsubarachnoidsubdural
577964ID_fa19cd5ea9ID_c45659d3d000000
229790ID_fa19cd5ea9ID_4e0bdd2ba000000
22395ID_fa19cd5ea9ID_079945c27100001
746126ID_fa19cd5ea9ID_fdbfb2c17100001
253266ID_fa19cd5ea9ID_55f7bbbf2100001
549211ID_fa19cd5ea9ID_ba7080372100001
592856ID_fa19cd5ea9ID_c964e4096100001
183149ID_fa19cd5ea9ID_3e31d57d0100001
306771ID_fa19cd5ea9ID_680b2194c100001
540358ID_fa19cd5ea9ID_b76b13444100001
645217ID_fa19cd5ea9ID_db48a633d100001
270974ID_fa19cd5ea9ID_5bf2ca43f100001
672814ID_fa19cd5ea9ID_e4b636907100001
350834ID_fa19cd5ea9ID_7714ead69100001
749886ID_fa19cd5ea9ID_ff012ee5b100001
523978ID_fa19cd5ea9ID_b1cea5abb100001
464942ID_fa19cd5ea9ID_9dad2eb09100001
229881ID_fa19cd5ea9ID_4e14d0fe8100001
186237ID_fa19cd5ea9ID_3f422852d100001
599624ID_fa19cd5ea9ID_cbbb50e6d100001
347055ID_fa19cd5ea9ID_75cbdae68100001
359450ID_fa19cd5ea9ID_7a02fdbea100001
127205ID_fa19cd5ea9ID_2b3671dd9100001
148587ID_fa19cd5ea9ID_3274f5977000000
413641ID_fa19cd5ea9ID_8c5fc9e44000000
688538ID_fa19cd5ea9ID_ea2861e9a000000
318670ID_fa19cd5ea9ID_6c19c9f7b000000
630472ID_fa19cd5ea9ID_d6435f3bf000000
202656ID_fa19cd5ea9ID_44d57858e000000
\n", "
" ], "text/plain": [ " SeriesInstanceUID fid any epidural intraparenchymal \\\n", "577964 ID_fa19cd5ea9 ID_c45659d3d 0 0 0 \n", "229790 ID_fa19cd5ea9 ID_4e0bdd2ba 0 0 0 \n", "22395 ID_fa19cd5ea9 ID_079945c27 1 0 0 \n", "746126 ID_fa19cd5ea9 ID_fdbfb2c17 1 0 0 \n", "253266 ID_fa19cd5ea9 ID_55f7bbbf2 1 0 0 \n", "549211 ID_fa19cd5ea9 ID_ba7080372 1 0 0 \n", "592856 ID_fa19cd5ea9 ID_c964e4096 1 0 0 \n", "183149 ID_fa19cd5ea9 ID_3e31d57d0 1 0 0 \n", "306771 ID_fa19cd5ea9 ID_680b2194c 1 0 0 \n", "540358 ID_fa19cd5ea9 ID_b76b13444 1 0 0 \n", "645217 ID_fa19cd5ea9 ID_db48a633d 1 0 0 \n", "270974 ID_fa19cd5ea9 ID_5bf2ca43f 1 0 0 \n", "672814 ID_fa19cd5ea9 ID_e4b636907 1 0 0 \n", "350834 ID_fa19cd5ea9 ID_7714ead69 1 0 0 \n", "749886 ID_fa19cd5ea9 ID_ff012ee5b 1 0 0 \n", "523978 ID_fa19cd5ea9 ID_b1cea5abb 1 0 0 \n", "464942 ID_fa19cd5ea9 ID_9dad2eb09 1 0 0 \n", "229881 ID_fa19cd5ea9 ID_4e14d0fe8 1 0 0 \n", "186237 ID_fa19cd5ea9 ID_3f422852d 1 0 0 \n", "599624 ID_fa19cd5ea9 ID_cbbb50e6d 1 0 0 \n", "347055 ID_fa19cd5ea9 ID_75cbdae68 1 0 0 \n", "359450 ID_fa19cd5ea9 ID_7a02fdbea 1 0 0 \n", "127205 ID_fa19cd5ea9 ID_2b3671dd9 1 0 0 \n", "148587 ID_fa19cd5ea9 ID_3274f5977 0 0 0 \n", "413641 ID_fa19cd5ea9 ID_8c5fc9e44 0 0 0 \n", "688538 ID_fa19cd5ea9 ID_ea2861e9a 0 0 0 \n", "318670 ID_fa19cd5ea9 ID_6c19c9f7b 0 0 0 \n", "630472 ID_fa19cd5ea9 ID_d6435f3bf 0 0 0 \n", "202656 ID_fa19cd5ea9 ID_44d57858e 0 0 0 \n", "\n", " intraventricular subarachnoid subdural \n", "577964 0 0 0 \n", "229790 0 0 0 \n", "22395 0 0 1 \n", "746126 0 0 1 \n", "253266 0 0 1 \n", "549211 0 0 1 \n", "592856 0 0 1 \n", "183149 0 0 1 \n", "306771 0 0 1 \n", "540358 0 0 1 \n", "645217 0 0 1 \n", "270974 0 0 1 \n", "672814 0 0 1 \n", "350834 0 0 1 \n", "749886 0 0 1 \n", "523978 0 0 1 \n", "464942 0 0 1 \n", "229881 0 0 1 \n", "186237 0 0 1 \n", "599624 0 0 1 \n", "347055 0 0 1 \n", "359450 0 0 1 \n", "127205 0 0 1 \n", "148587 0 0 0 \n", "413641 0 0 0 \n", "688538 0 0 0 \n", "318670 0 0 0 \n", "630472 0 0 0 \n", "202656 0 0 0 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "subg" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# You can use a btrfs snapshot and rename files by study_id and z-pos through the brain\n", "def rename_train_group(subg):\n", " ix = 0\n", " total = len(subg)\n", " for index, row in subg.iterrows():\n", " cur_fn = row['fid']\n", " new_fn = f\"{row['SeriesInstanceUID']}_{ix:03}_{total:03}_{row['any']}_{row['epidural']}_{row['intraparenchymal']}_{row['intraventricular']}_{row['subarachnoid']}_{row['subdural']}_{cur_fn}\"\n", " ix += 1\n", " Path(f'data/unzip_renamed/{stage}_train_images/{cur_fn}.dcm').rename(f'data/unzip_renamed/{stage}_train_images/{new_fn}.dcm')" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "def index_group(subg, study_ix_to_fn, fn_to_study_ix):\n", " ix = 0\n", " for index, row in subg.iterrows():\n", " fn = row['SOPInstanceUID']\n", " study = row['SeriesInstanceUID']\n", " study_ix_to_fn[study].append(fn)\n", " fn_to_study_ix[fn] = (study, ix)\n", " ix += 1 " ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "labels = [ 'any', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural' ]\n", "\n", "def label_group(subg, fn_to_labels):\n", " for index, row in subg.iterrows():\n", " fn = row['SOPInstanceUID']\n", " fn_to_labels[fn] = [ label for label in labels if row[label] == 1 ]" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "train_study_ix_to_fn = defaultdict(list)\n", "train_fn_to_study_ix = {}\n", "train_fn_to_labels = {}\n", "\n", "for name, subg in gs:\n", " #rename_train_group(subg)\n", " index_group(subg, train_study_ix_to_fn, train_fn_to_study_ix)\n", " label_group(subg, train_fn_to_labels)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "# Do not pickle yet, merge with test\n", "pickle.dump(train_study_ix_to_fn, open(f\"data/{stage}_train_study_ix_to_fn.pickle\", \"wb\" ))\n", "pickle.dump(train_fn_to_study_ix, open(f\"data/{stage}_train_fn_to_study_ix.pickle\", \"wb\" ))" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['any', 'subdural']" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_fn_to_labels['ID_079945c27']" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "pickle.dump(train_fn_to_labels, open(f\"data/{stage}_train_fn_to_labels.pickle\", 'wb'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Test dataset" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(f\"data/{stage}_test_dicom.csv\")" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0BitsAllocatedBitsStoredColumnsHighBitImageOrientationPatient_0ImageOrientationPatient_1ImageOrientationPatient_2ImageOrientationPatient_3ImageOrientationPatient_4...SamplesPerPixelSeriesInstanceUIDStudyIDStudyInstanceUIDWindowCenterWindowCenter_0WindowCenter_1WindowWidthWindowWidth_0WindowWidth_1
001612512111.00.00.00.00.981627...1ID_4d28912ba6NaNID_1f6d1e8aebNaN40.040.0NaN80.080.0
111616512151.00.00.00.00.987688...1ID_acabdeee86NaNID_4a8d7ec19f30.0NaNNaN80.0NaNNaN
221616512151.00.00.00.00.927184...1ID_d00cee7f0cNaNID_a6ca24417230.0NaNNaN80.0NaNNaN
331616512151.00.00.00.00.986286...1ID_a52a0112d5NaNID_fa950a03af30.0NaNNaN80.0NaNNaN
441612512111.00.00.00.01.000000...1ID_f552d3b922NaNID_965d8b3d8eNaN36.036.0NaN80.080.0
\n", "

5 rows × 34 columns

\n", "
" ], "text/plain": [ " Unnamed: 0 BitsAllocated BitsStored Columns HighBit \\\n", "0 0 16 12 512 11 \n", "1 1 16 16 512 15 \n", "2 2 16 16 512 15 \n", "3 3 16 16 512 15 \n", "4 4 16 12 512 11 \n", "\n", " ImageOrientationPatient_0 ImageOrientationPatient_1 \\\n", "0 1.0 0.0 \n", "1 1.0 0.0 \n", "2 1.0 0.0 \n", "3 1.0 0.0 \n", "4 1.0 0.0 \n", "\n", " ImageOrientationPatient_2 ImageOrientationPatient_3 \\\n", "0 0.0 0.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", "3 0.0 0.0 \n", "4 0.0 0.0 \n", "\n", " ImageOrientationPatient_4 ... SamplesPerPixel SeriesInstanceUID \\\n", "0 0.981627 ... 1 ID_4d28912ba6 \n", "1 0.987688 ... 1 ID_acabdeee86 \n", "2 0.927184 ... 1 ID_d00cee7f0c \n", "3 0.986286 ... 1 ID_a52a0112d5 \n", "4 1.000000 ... 1 ID_f552d3b922 \n", "\n", " StudyID StudyInstanceUID WindowCenter WindowCenter_0 WindowCenter_1 \\\n", "0 NaN ID_1f6d1e8aeb NaN 40.0 40.0 \n", "1 NaN ID_4a8d7ec19f 30.0 NaN NaN \n", "2 NaN ID_a6ca244172 30.0 NaN NaN \n", "3 NaN ID_fa950a03af 30.0 NaN NaN \n", "4 NaN ID_965d8b3d8e NaN 36.0 36.0 \n", "\n", " WindowWidth WindowWidth_0 WindowWidth_1 \n", "0 NaN 80.0 80.0 \n", "1 80.0 NaN NaN \n", "2 80.0 NaN NaN \n", "3 80.0 NaN NaN \n", "4 NaN 80.0 80.0 \n", "\n", "[5 rows x 34 columns]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3518" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# sort, then group by (order is preserver within groups)\n", "gs = df.sort_values('ImagePositionPatient_2').groupby('SeriesInstanceUID')\n", "len(gs)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "def rename_test_group(subg):\n", " ix = 0\n", " total = len(subg)\n", " for index, row in subg.iterrows():\n", " cur_fn = row['SOPInstanceUID']\n", " new_fn = f\"{row['SeriesInstanceUID']}_{ix:03}_{total:03}_{cur_fn}\"\n", " ix += 1\n", " Path(f'data/unzip_renamed/{stage}_test_images/{cur_fn}.dcm').rename(f'data/unzip_renamed/{stage}_test_images/{new_fn}.dcm')\n" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "test_study_ix_to_fn = defaultdict(list)\n", "test_fn_to_study_ix = {}\n", "\n", "for name, subg in gs:\n", " #rename_test_group(subg)\n", " index_group(subg, test_study_ix_to_fn, test_fn_to_study_ix)\n" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "pickle.dump(test_study_ix_to_fn, open(f\"data/{stage}_test_study_ix_to_fn.pickle\", \"wb\" ))\n", "pickle.dump(test_fn_to_study_ix, open(f\"data/{stage}_test_fn_to_study_ix.pickle\", \"wb\" ))" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "study_ix_to_fn = { **train_study_ix_to_fn, **test_study_ix_to_fn }\n", "fn_to_study_ix = { **train_fn_to_study_ix, **test_fn_to_study_ix }" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "pickle.dump(study_ix_to_fn, open(f\"data/{stage}_study_ix_to_fn.pickle\", \"wb\" ))\n", "pickle.dump(fn_to_study_ix, open(f\"data/{stage}_fn_to_study_ix.pickle\", \"wb\" ))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }