[637b40]: / notebooks / count_splits.ipynb

Download this file

1 lines (1 with data), 1.4 kB

{"cells":[{"cell_type":"code","execution_count":1,"metadata":{},"outputs":[],"source":["import json\n","\n","with open(\"strat_split_2020_11_02_copy.json\") as json_file:\n","    splits = json.load(json_file)\n"]},{"cell_type":"code","execution_count":2,"metadata":{},"outputs":[],"source":["\n","from pathlib import Path\n","\n","all_dicoms = list(\n","    Path(\"/opt/pkd-data/akshay_ADPKD_data/data\").glob(\"**/*.dcm\")\n",")\n","\n","\n","def match(dicom, id_list):\n","    return any(i in str(dicom) for i in id_list)\n","\n","\n","split_counts = [\n","    k for d in all_dicoms for k in splits.keys() if match(d, splits[k])\n","]\n"]},{"cell_type":"code","execution_count":3,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["DICOMS per split= Counter({'train': 5570, 'test': 1312, 'val': 1050})\n"]}],"source":["from collections import Counter\n","\n","split_counts = Counter(split_counts)\n","\n","print(f\"DICOMS per split= {split_counts}\")\n"]}],"metadata":{"interpreter":{"hash":"fc7d54d68351587bf5ae49be1b48c59c87cb138b39f29c19ac034fa649570f02"},"kernelspec":{"display_name":"Python 3.8.5 64-bit ('adpkd_env_cuda_11_2': venv)","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.5"},"orig_nbformat":4},"nbformat":4,"nbformat_minor":2}