[4807fa]: / exp_template-mv-ml-v2.ipynb

Download this file

535 lines (534 with data), 22.8 kB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import socket\n",
    "if socket.gethostname() == 'dlm':\n",
    "  %env CUDA_DEVICE_ORDER=PCI_BUS_ID\n",
    "  %env CUDA_VISIBLE_DEVICES=3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "import re\n",
    "import collections\n",
    "import functools\n",
    "import requests, zipfile, io\n",
    "import pickle\n",
    "import copy\n",
    "\n",
    "import pandas\n",
    "import numpy as np\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "import sklearn\n",
    "import sklearn.decomposition\n",
    "import sklearn.metrics\n",
    "import networkx\n",
    "\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "\n",
    "lib_path = 'I:/code'\n",
    "if not os.path.exists(lib_path):\n",
    "  lib_path = '/media/6T/.tianle/.lib'\n",
    "if not os.path.exists(lib_path):\n",
    "  lib_path = '/projects/academic/azhang/tianlema/lib'\n",
    "if os.path.exists(lib_path) and lib_path not in sys.path:\n",
    "  sys.path.append(lib_path)\n",
    "  \n",
    "from dl.models.basic_models import *\n",
    "from dl.utils.visualization.visualization import *\n",
    "from dl.utils.outlier import *\n",
    "from dl.utils.train import *\n",
    "from autoencoder.autoencoder import *\n",
    "from dl.utils.utils import get_overlap_samples, filter_clinical_dict, get_target_variable\n",
    "from dl.utils.utils import get_shuffled_data, target_to_numpy\n",
    "\n",
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "\n",
    "use_gpu = True\n",
    "if use_gpu and torch.cuda.is_available():\n",
    "  device = torch.device('cuda')\n",
    "  print('Using GPU:)')\n",
    "else:\n",
    "  device = torch.device('cpu')\n",
    "  print('Using CPU:(')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# neural net models include nn (mlp), resnet, densenet; another choice is ml (machine learning)\n",
    "# model_type, dense, residual are dependent\n",
    "model_type = 'resnet'\n",
    "dense = False\n",
    "residual = True\n",
    "hidden_dim = [100, 100]\n",
    "train_portion = 0.7\n",
    "val_portion = 0.1\n",
    "test_portion = 0.2\n",
    "num_train_types = -1 # -1 means not used\n",
    "num_val_types = -1\n",
    "num_test_types = -1 # this will almost never be used \n",
    "num_sets = 10\n",
    "num_folds = 10 # no longer used anymore\n",
    "sel_set_idx = 0\n",
    "cv_type = 'instance-shuffle' # or 'group-shuffle'; cross validation shuffle method\n",
    "sel_disease_types = 'all'\n",
    "# The number of total samples and the numbers for each class in selected disease types must >=\n",
    "min_num_samples_per_type_cls = [100, 0]\n",
    "# if 'auto-search', will search for the file first; if not exist, then generate random data split\n",
    "# and write to the file;\n",
    "# if string other than 'auto-search' is provided, assume the string is a proper file name, \n",
    "# and read the file;\n",
    "# if False, will generate a random data split, but not write to file \n",
    "# if True will generate a random data split, and write to file\n",
    "predefined_sample_set_file = 'auto-search' \n",
    "target_variable = 'PFI' # To do: target variable can be a list (partially handled)\n",
    "target_variable_type = 'discrete' # or 'continuous' real numbers\n",
    "target_variable_range = [0, 1]\n",
    "data_type = ['gene', 'methy', 'rppa', 'mirna']\n",
    "normal_transform_feature = True\n",
    "additional_vars = []#['age_at_initial_pathologic_diagnosis', 'gender']\n",
    "additional_var_types = []#['continuous', 'discrete']\n",
    "additional_var_ranges = []#[[0, 100], ['MALE', 'FEMALE']]\n",
    "randomize_labels = False\n",
    "lr = 5e-4\n",
    "weight_decay = 1e-4\n",
    "num_epochs = 1000\n",
    "reduce_every = 500\n",
    "show_results_in_notebook = True"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Prepare data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "result_folder = 'results'\n",
    "data_split_idx_folder = f'{result_folder}/data_split_idx'\n",
    "project_folder = '../../pan-can-atlas'\n",
    "print_stats = True\n",
    "if not os.path.exists(project_folder):\n",
    "  project_folder = 'F:/TCGA/Pan-Cancer-Atlas'\n",
    "filepath = f'{project_folder}/data/processed/combined2.pkl'\n",
    "with open(filepath, 'rb') as f:\n",
    "  data = pickle.load(f)\n",
    "  patient_clinical = data['patient_clinical']\n",
    "  feature_mat_dict = data['feature_mat_dict']\n",
    "  feature_interaction_mat_dict = data['feature_interaction_mat_dict']\n",
    "  feature_id_dict = data['feature_id_dict']\n",
    "  aliquot_id_dict = data['aliquot_id_dict']\n",
    "#   sel_patient_ids = data['sample_id_sel']\n",
    "#   sample_idx_sel_dict = data['sample_idx_sel_dict']\n",
    "#   for k, v in sample_idx_sel_dict.items():\n",
    "#     assert [i[:12] for i in aliquot_id_dict[k][v]] == sel_patient_ids\n",
    "\n",
    "if print_stats:\n",
    "  for k, v in feature_mat_dict.items():\n",
    "    print(f'feature_mat: {k}, max={v.max():.3f}, min={v.min():.3f}, '\n",
    "          f'mean={v.mean():.3f}, {np.mean(v>0):.3f}')  \n",
    "  for k, v in feature_interaction_mat_dict.items():\n",
    "    print(f'feature_interaction_mat: {k}, max={v.max():.3f}, min={v.min():.3f}, '\n",
    "          f'mean={v.mean():.3f}, {np.mean(v>0):.3f}') \n",
    "  for k, v in feature_id_dict.items():\n",
    "    print(k, v.shape, v[0])\n",
    "  for k, v in aliquot_id_dict.items():\n",
    "    print(k, v.shape, v[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# select samples with required clinical variables\n",
    "clinical_dict = filter_clinical_dict(target_variable, target_variable_type=target_variable_type, \n",
    "                                     target_variable_range=target_variable_range, \n",
    "                                     clinical_dict=patient_clinical)\n",
    "if len(additional_vars) > 0:\n",
    "  clinical_dict = filter_clinical_dict(additional_vars, target_variable_type=additional_var_types, \n",
    "                                       target_variable_range=additional_var_ranges, \n",
    "                                       clinical_dict=clinical_dict)\n",
    "\n",
    "# select samples with feature matrix of given type(s)\n",
    "if isinstance(data_type, str):\n",
    "  sample_list = {s[:12] for s in aliquot_id_dict[data_type]}\n",
    "  data_type_str = data_type\n",
    "elif isinstance(data_type, (list, tuple)):\n",
    "  sample_list = get_overlap_samples([aliquot_id_dict[dtype] for dtype in data_type], \n",
    "                                    common_list=None, start=0, end=12, return_common_list=True)\n",
    "  data_type_str = '-'.join(sorted(data_type))\n",
    "else:\n",
    "  raise ValueError(f'data_type must be str or list/tuple, but is {type(data_type)}')\n",
    "sample_list = sample_list.intersection(clinical_dict)\n",
    "\n",
    "# select samples with given disease types\n",
    "sel_disease_type_str = sel_disease_types # will be overwritten if it is a list\n",
    "if isinstance(sel_disease_types, (list, tuple)):\n",
    "  sample_list = [s for s in sample_list if clinical_dict[s]['type'] in sel_disease_types]\n",
    "  sel_disease_type_str = '-'.join(sorted(sel_disease_types))\n",
    "elif isinstance(sel_disease_types, str) and sel_disease_types!='all':\n",
    "  sample_list = [s for s in sample_list if clinical_dict[s]['type'] == sel_disease_types]\n",
    "else:\n",
    "  assert sel_disease_types == 'all'\n",
    " \n",
    "# For classification tasks with given min_num_samples_per_type_cls,\n",
    "# only keep disease types that have a minimal number of samples per type and per class\n",
    "# Reflection: it might be better to use collections.defaultdict(list) to store samples in each type\n",
    "type_cnt = collections.Counter([clinical_dict[s]['type'] for s in sample_list])\n",
    "if sum(min_num_samples_per_type_cls)>0 and (target_variable_type=='discrete' \n",
    "                                            or target_variable_type[0]=='discrete'):\n",
    "  # the number of samples in each disease type >= min_num_samples_per_type_cls[0]\n",
    "  type_cnt = {k: v for k, v in type_cnt.items() if v >= min_num_samples_per_type_cls[0]}\n",
    "  disease_type_cnt = {}\n",
    "  for k in type_cnt:\n",
    "    # collections.Counter can accept generator\n",
    "    cls_cnt = collections.Counter(clinical_dict[s][target_variable] \n",
    "                                  if isinstance(target_variable, str) \n",
    "                                  else clinical_dict[s][target_variable[0]] \n",
    "                                  for s in sample_list if clinical_dict[s]['type']==k)\n",
    "    if all([v >= min_num_samples_per_type_cls[1] for v in cls_cnt.values()]):\n",
    "      # the number of samples in each class >= min_num_samples_per_type_cls[1]\n",
    "      disease_type_cnt[k] = dict(cls_cnt)\n",
    "      print(k, disease_type_cnt[k])\n",
    "  sample_list = [s for s in sample_list if clinical_dict[s]['type'] in disease_type_cnt]\n",
    "sel_patient_ids = sorted(sample_list)\n",
    "print(f'Selected {len(sel_patient_ids)} patients from {len(disease_type_cnt)} disease_types')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Split data into training, validation, and test sets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "predefined_sample_set_filename = (target_variable if isinstance(target_variable,str) \n",
    "                                else '-'.join(target_variable))\n",
    "predefined_sample_set_filename += f'_{cv_type}'\n",
    "if len(additional_vars) > 0:\n",
    "  predefined_sample_set_filename += f\"_{'-'.join(sorted(additional_vars))}\"\n",
    "\n",
    "predefined_sample_set_filename += (f\"_{data_type_str}_{sel_disease_type_str}_\"\n",
    "                                   f\"{'-'.join(map(str, min_num_samples_per_type_cls))}\")\n",
    "predefined_sample_set_filename += f\"_{'-'.join(map(str, [train_portion, val_portion, test_portion]))}\"\n",
    "if cv_type == 'group-shuffle' and num_train_types > 0:\n",
    "  predefined_sample_set_filename += f\"_{'-'.join(map(str, [num_train_types, num_val_types, num_test_types]))}\"\n",
    "predefined_sample_set_filename += f'_{num_sets}sets'\n",
    "res_file = f\"{predefined_sample_set_filename}_{sel_set_idx}_{'-'.join(map(str, hidden_dim))}_{model_type}.pkl\"\n",
    "predefined_sample_set_filename += '.pkl'\n",
    "# This will be overwritten if predefined_sample_set_file == 'auto-search' or filepath, and the file exists\n",
    "predefined_sample_sets = [get_shuffled_data(sel_patient_ids, clinical_dict, cv_type=cv_type, \n",
    "                  instance_portions=[train_portion, val_portion, test_portion], \n",
    "                  group_sizes=[num_train_types, num_val_types, num_test_types],\n",
    "                  group_variable_name='type', seed=None, verbose=False) for i in range(num_sets)]\n",
    "if predefined_sample_set_file == 'auto-search':\n",
    "  if os.path.exists(f'{data_split_idx_folder}/{predefined_sample_set_filename}'):\n",
    "    with open(f'{data_split_idx_folder}/{predefined_sample_set_filename}', 'rb') as f:\n",
    "      print(f'Read predefined_sample_set_file: '\n",
    "            f'{data_split_idx_folder}/{predefined_sample_set_filename}')\n",
    "      tmp = pickle.load(f)\n",
    "      # overwrite calculated predefined_sample_sets\n",
    "      predefined_sample_sets = tmp['predefined_sample_sets']    \n",
    "elif isinstance(predefined_sample_set_file, str): # but not 'auto-search'; assume it's a file name\n",
    "  if os.path.exists(predefined_sample_set_file):\n",
    "    with open(f'{data_split_idx_folder}/{predefined_sample_set_file}', 'rb') as f:\n",
    "      print(f'Read predefined_sample_set_file: {data_split_idx_folder}/{predefined_sample_set_file}')\n",
    "      tmp = pickle.load(f)\n",
    "      predefined_sample_sets = tmp['predefined_sample_sets']\n",
    "  else:\n",
    "    raise ValueError(f'predefined_sample_set_file: {data_split_idx_folder}/{predefined_sample_set_file} does not exist!')\n",
    "\n",
    "if (not os.path.exists(f'{data_split_idx_folder}/{predefined_sample_set_filename}') \n",
    "    and predefined_sample_set_file == 'auto-search') or predefined_sample_set_file is True:\n",
    "  with open(f'{data_split_idx_folder}/{predefined_sample_set_filename}', 'wb') as f:\n",
    "      print(f'Write predefined_sample_set_file: {data_split_idx_folder}/{predefined_sample_set_filename}')\n",
    "      pickle.dump({'predefined_sample_sets': predefined_sample_sets}, f)\n",
    "     \n",
    "sel_patient_ids, idx_splits = predefined_sample_sets[sel_set_idx]\n",
    "train_idx, val_idx, test_idx = idx_splits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if isinstance(data_type, str):\n",
    "  sample_lists = [aliquot_id_dict[data_type]]\n",
    "else:\n",
    "  assert isinstance(data_type, (list, tuple))\n",
    "  sample_lists = [aliquot_id_dict[dtype] for dtype in data_type]\n",
    "idx_lists = get_overlap_samples(sample_lists=sample_lists, common_list=sel_patient_ids, \n",
    "                    start=0, end=12, return_common_list=False)\n",
    "sample_idx_sel_dict = {}\n",
    "if isinstance(data_type, str):\n",
    "  sample_idx_sel_dict = {data_type: idx_lists[0]}\n",
    "else:\n",
    "  sample_idx_sel_dict = {dtype: idx_list for dtype, idx_list in zip(data_type, idx_lists)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if isinstance(data_type, str):\n",
    "  print(f'Only use one data type: {data_type}')\n",
    "  num_data_types = 1\n",
    "  mat = feature_mat_dict[data_type][sample_idx_sel_dict[data_type]]\n",
    "  # Data preprocessing: make each row have mean 0 and sd 1.\n",
    "  x = (mat - mat.mean(axis=1, keepdims=True)) / mat.std(axis=1, keepdims=True)\n",
    "  interaction_mat = feature_interaction_mat_dict[data_type]\n",
    "  interaction_mat = torch.from_numpy(interaction_mat).float().to(device)\n",
    "  # Normalize these interaction mat\n",
    "  interaction_mat = interaction_mat / interaction_mat.norm()\n",
    "else:\n",
    "  mat = []\n",
    "  interaction_mats = []\n",
    "  in_dims = []\n",
    "  num_data_types = len(data_type)\n",
    "  # do not handle the special case of [data_type] to avoid too much code complexity\n",
    "  assert num_data_types > 1 \n",
    "  for dtype in data_type: # multiple data types\n",
    "    m = feature_mat_dict[dtype][sample_idx_sel_dict[dtype]]\n",
    "    #When there are multiple data types, make sure each type is normalized to have mean 0 and std 1\n",
    "    m = (m - m.mean(axis=1, keepdims=True)) / m.std(axis=1, keepdims=True)\n",
    "    mat.append(m)\n",
    "    in_dims.append(m.shape[1])\n",
    "    # For neural network model graph laplacian regularizer\n",
    "    interaction_mat = feature_interaction_mat_dict[dtype]\n",
    "    interaction_mat = torch.from_numpy(interaction_mat).float().to(device)\n",
    "    # Normalize these interaction mat\n",
    "    interaction_mat = interaction_mat / interaction_mat.norm()\n",
    "    interaction_mats.append(interaction_mat)\n",
    "    print(f'{dtype}: {m.shape}; '\n",
    "          f'interaction_mat: mean={interaction_mat.mean().item():2f}, '\n",
    "          f'std={interaction_mat.std().item():2f}, {interaction_mat.shape[0]}')\n",
    "  # Later interaction_mat will be passed to Loss_feature_interaction\n",
    "  interaction_mat = interaction_mats\n",
    "  mat = np.concatenate(mat, axis=1)\n",
    "  # For machine learing methods that use concatenated features without knowing underlying views,\n",
    "  # it might be good to make each row have mean 0 and sd 1.\n",
    "  x = (mat - mat.mean(axis=1, keepdims=True)) / mat.std(axis=1, keepdims=True)\n",
    "\n",
    "if normal_transform_feature:\n",
    "  X = x\n",
    "else:\n",
    "  X = mat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_targets = get_target_variable(target_variable, clinical_dict, sel_patient_ids)\n",
    "y_true = target_to_numpy(y_targets, target_variable_type, target_variable_range)\n",
    "if len(additional_vars) > 0:\n",
    "  additional_variables = get_target_variable(additional_vars, clinical_dict, sel_patient_ids)\n",
    "  # to do handle additional variables such as age and gender"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### To do: handle multiple inputs, multiple targets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# sklearn classifiers also accept torch.Tensor\n",
    "X = torch.tensor(X).float().to(device)\n",
    "y_true = torch.tensor(y_true).long().to(device)\n",
    "num_cls = len(torch.unique(y_true))\n",
    "\n",
    "x_train, y_train = X[train_idx], y_true[train_idx]\n",
    "x_val, y_val = X[val_idx], y_true[val_idx]\n",
    "x_test, y_test = X[test_idx], y_true[test_idx]\n",
    "print(x_train.shape, x_val.shape, x_test.shape, y_train.shape, y_val.shape, y_test.shape)\n",
    "\n",
    "label_prob_train = get_label_prob(y_train, verbose=False)\n",
    "label_probs = [label_prob_train]\n",
    "if len(y_val)>0:\n",
    "  label_prob_val = get_label_prob(y_val, verbose=False)\n",
    "  assert len(label_prob_train) == len(label_prob_val)\n",
    "  label_probs.append(label_prob_val)\n",
    "if len(y_test)>0:\n",
    "  label_prob_test = get_label_prob(y_test, verbose=False)\n",
    "  assert len(label_prob_train) == len(label_prob_test)\n",
    "  label_probs.append(label_prob_test)\n",
    "if isinstance(label_probs, torch.Tensor):\n",
    "  print('label distribution:\\n', torch.stack(label_probs, dim=1))\n",
    "else:\n",
    "  print('label distribution:\\n', np.stack(label_probs, axis=1))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Optionally randomize true class labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "if randomize_labels:\n",
    "  print('Randomize class labels!')\n",
    "  y_train = torch.multinomial(label_prob_train, len(y_train), replacement=True)\n",
    "  if len(y_val) > 0:\n",
    "    y_val = torch.multinomial(label_prob_val, len(y_val), replacement=True)\n",
    "  if len(y_test) > 0:\n",
    "    y_test = torch.multinomial(label_prob_test, len(y_test), replacement=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Sklearn classifiers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "from sklearn.naive_bayes import GaussianNB\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier\n",
    "model_names = ['kNN', 'Naive Bayes', 'SVM', 'Decision Tree', 'Random Forest', 'AdaBoost']\n",
    "split_names = ['train', 'val', 'test']\n",
    "metric_names = ['acc', 'precision', 'recall', 'f1_score', 'adjusted_mutual_info', 'auc', \n",
    "                'average_precision']\n",
    "metric_all = []\n",
    "confusion_mat_all = []\n",
    "loss_his_all = [] # loss_his_all and acc_his_all are empty for sklearn classifiers\n",
    "acc_his_all = []\n",
    "classifiers = [KNeighborsClassifier(5), \n",
    "               GaussianNB(), \n",
    "               sklearn.svm.SVC(kernel=\"linear\", C=0.025),\n",
    "               DecisionTreeClassifier(max_depth=5),\n",
    "               RandomForestClassifier(max_depth=5, n_estimators=10),\n",
    "               AdaBoostClassifier()\n",
    "              ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "assert train_portion > 0 and val_portion > 0 and test_portion > 0 # Assume there are 3 splits\n",
    "for name, classifier in zip(model_names, classifiers):\n",
    "  print(name)\n",
    "  classifier.fit(x_train, y_train)\n",
    "  metric = []\n",
    "  for x_, y_ in zip([x_train, x_val, x_test], [y_train, y_val, y_test]):\n",
    "    if name == 'SVM':\n",
    "      y_score = classifier.decision_function(x_) # sklearn.svm.SVC does not have predict_proba\n",
    "    else:\n",
    "      y_score = classifier.predict_proba(x_)\n",
    "    metric.append(eval_classification(y_true=y_, y_pred=y_score, \n",
    "                                      average='weighted', verbose=True))\n",
    "  metric_all.append([v[0] for v in metric])\n",
    "  confusion_mat_all.append([v[1] for v in metric])  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(f'{result_folder}/{res_file}', 'wb') as f:\n",
    "  print(f'Write result to file {result_folder}/{res_file}')\n",
    "  pickle.dump({'loss_his_all': loss_his_all,\n",
    "               'acc_his_all': acc_his_all,\n",
    "               'metric_all': metric_all,\n",
    "               'confusion_mat_all': confusion_mat_all,\n",
    "               'model_names': model_names,\n",
    "               'split_names': split_names,\n",
    "               'metric_names': metric_names\n",
    "              }, f)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Neural network models that are included in another notebook"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}