Switch to unified view

a b/datasets_csv/Preprocessing.ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": 1,
6
   "metadata": {},
7
   "outputs": [],
8
   "source": [
9
    "import os\n",
10
    "from os.path import join\n",
11
    "\n",
12
    "import pandas as pd\n",
13
    "import numpy as np\n",
14
    "\n",
15
    "label_col = 'survival_months'\n",
16
    "n_bins = 4\n",
17
    "eps = 1e-6"
18
   ]
19
  },
20
  {
21
   "cell_type": "code",
22
   "execution_count": 2,
23
   "metadata": {},
24
   "outputs": [],
25
   "source": [
26
    "def add_bins(slide_data):\n",
27
    "    assert 'case_id' in slide_data.columns and 'censorship' in slide_data.columns\n",
28
    "    \n",
29
    "    patients_df = slide_data.drop_duplicates(['case_id']).copy()\n",
30
    "    uncensored_df = patients_df[patients_df['censorship'] < 1]\n",
31
    "    disc_labels, q_bins = pd.qcut(uncensored_df[label_col], q=n_bins, retbins=True, labels=False)\n",
32
    "    q_bins[-1] = slide_data[label_col].max() + eps\n",
33
    "    q_bins[0] = slide_data[label_col].min() - eps\n",
34
    "\n",
35
    "    disc_labels, q_bins = pd.cut(patients_df[label_col], bins=q_bins, retbins=True, labels=False, right=False, include_lowest=True)\n",
36
    "    patients_df.insert(2, 'label', disc_labels.values.astype(int))\n",
37
    "\n",
38
    "    patient_dict = {}\n",
39
    "    slide_data = slide_data.set_index('case_id')\n",
40
    "    for patient in patients_df['case_id']:\n",
41
    "        slide_ids = slide_data.loc[patient, 'slide_id']\n",
42
    "        if isinstance(slide_ids, str):\n",
43
    "            slide_ids = np.array(slide_ids).reshape(-1)\n",
44
    "        else:\n",
45
    "            slide_ids = slide_ids.values\n",
46
    "        patient_dict.update({patient:slide_ids})\n",
47
    "        \n",
48
    "    return q_bins, patient_dict, patients_df"
49
   ]
50
  },
51
  {
52
   "cell_type": "code",
53
   "execution_count": 3,
54
   "metadata": {},
55
   "outputs": [],
56
   "source": [
57
    "slide_data = pd.read_csv('./tcga_gbmlgg_all_clean.csv.zip', compression='zip', header=0, index_col=0, sep=',',  low_memory=False)\n",
58
    "\n",
59
    "n_bins = 4\n",
60
    "eps = 1e-6\n",
61
    "\n",
62
    "### Asserts that 'case_id' is a column, not an index.\n",
63
    "if 'case_id' not in slide_data:\n",
64
    "    slide_data.index = slide_data.index.str[:12]\n",
65
    "    slide_data['case_id'] = slide_data.index\n",
66
    "    slide_data = slide_data.reset_index(drop=True)\n",
67
    "\n",
68
    "q_bins, patients_dict, slide_data = add_bins(slide_data)\n",
69
    "\n",
70
    "slide_data.reset_index(drop=True, inplace=True)\n",
71
    "slide_data = slide_data.assign(slide_id=slide_data['case_id'])\n",
72
    "\n",
73
    "label_dict = {}\n",
74
    "key_count = 0\n",
75
    "for i in range(len(q_bins)-1):\n",
76
    "    for c in [0, 1]:\n",
77
    "        label_dict.update({(i, c):key_count})\n",
78
    "        key_count+=1\n",
79
    "\n",
80
    "for i in slide_data.index:\n",
81
    "    key = slide_data.loc[i, 'label']\n",
82
    "    slide_data.at[i, 'disc_label'] = key\n",
83
    "    censorship = slide_data.loc[i, 'censorship']\n",
84
    "    key = (key, int(censorship))\n",
85
    "    slide_data.at[i, 'label'] = label_dict[key]\n",
86
    "\n",
87
    "bins = q_bins\n",
88
    "num_classes=len(label_dict)\n",
89
    "patients_df = slide_data.drop_duplicates(['case_id'])\n",
90
    "patient_data = {'case_id':patients_df['case_id'].values, 'label':patients_df['label'].values}\n",
91
    "\n",
92
    "new_cols = list(slide_data.columns[-2:]) + list(slide_data.columns[:-2])\n",
93
    "slide_data = slide_data[new_cols]\n",
94
    "metadata = slide_data.columns[:11]"
95
   ]
96
  },
97
  {
98
   "cell_type": "code",
99
   "execution_count": 5,
100
   "metadata": {},
101
   "outputs": [],
102
   "source": [
103
    "from sklearn.pipeline import Pipeline\n",
104
    "from sklearn.decomposition import PCA\n",
105
    "from sklearn.preprocessing import StandardScaler\n",
106
    "\n",
107
    "\n",
108
    "def series_intersection(s1, s2):\n",
109
    "    return pd.Series(list(set(s1) & set(s2)))\n",
110
    "\n",
111
    "genomic_features = slide_data.drop(metadata, axis=1)\n",
112
    "scaler_omic = StandardScaler().fit(genomic_features)"
113
   ]
114
  },
115
  {
116
   "cell_type": "code",
117
   "execution_count": 9,
118
   "metadata": {},
119
   "outputs": [
120
    {
121
     "name": "stderr",
122
     "output_type": "stream",
123
     "text": [
124
      "/home/mahmoodlab/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3071: DtypeWarning: Columns (2) have mixed types.Specify dtype option on import or set low_memory=False.\n",
125
      "  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n"
126
     ]
127
    }
128
   ],
129
   "source": [
130
    "import os\n",
131
    "from os.path import join\n",
132
    "\n",
133
    "import pandas as pd\n",
134
    "import numpy as np\n",
135
    "\n",
136
    "signatures = pd.read_csv('./signatures.csv')\n",
137
    "slide_df = pd.read_csv('./tcga_gbmlgg_all_clean.csv.zip')"
138
   ]
139
  },
140
  {
141
   "cell_type": "code",
142
   "execution_count": 43,
143
   "metadata": {},
144
   "outputs": [
145
    {
146
     "data": {
147
      "text/html": [
148
       "<div>\n",
149
       "<style scoped>\n",
150
       "    .dataframe tbody tr th:only-of-type {\n",
151
       "        vertical-align: middle;\n",
152
       "    }\n",
153
       "\n",
154
       "    .dataframe tbody tr th {\n",
155
       "        vertical-align: top;\n",
156
       "    }\n",
157
       "\n",
158
       "    .dataframe thead th {\n",
159
       "        text-align: right;\n",
160
       "    }\n",
161
       "</style>\n",
162
       "<table border=\"1\" class=\"dataframe\">\n",
163
       "  <thead>\n",
164
       "    <tr style=\"text-align: right;\">\n",
165
       "      <th></th>\n",
166
       "      <th>Unnamed: 0</th>\n",
167
       "      <th>Unnamed: 0.1</th>\n",
168
       "      <th>case_id</th>\n",
169
       "      <th>slide_id</th>\n",
170
       "      <th>site</th>\n",
171
       "      <th>is_female</th>\n",
172
       "      <th>oncotree_code</th>\n",
173
       "      <th>age</th>\n",
174
       "      <th>survival_months</th>\n",
175
       "      <th>censorship</th>\n",
176
       "      <th>...</th>\n",
177
       "      <th>ZSCAN10_rnaseq</th>\n",
178
       "      <th>ZSCAN12_rnaseq</th>\n",
179
       "      <th>ZSCAN20_rnaseq</th>\n",
180
       "      <th>ZSCAN21_rnaseq</th>\n",
181
       "      <th>ZSCAN22_rnaseq</th>\n",
182
       "      <th>ZSCAN2_rnaseq</th>\n",
183
       "      <th>ZSCAN9_rnaseq</th>\n",
184
       "      <th>ZXDA_rnaseq</th>\n",
185
       "      <th>ZXDB_rnaseq</th>\n",
186
       "      <th>ZXDC_rnaseq</th>\n",
187
       "    </tr>\n",
188
       "  </thead>\n",
189
       "  <tbody>\n",
190
       "    <tr>\n",
191
       "      <th>0</th>\n",
192
       "      <td>0</td>\n",
193
       "      <td>0</td>\n",
194
       "      <td>TCGA-02-0047</td>\n",
195
       "      <td>TCGA-02-0047-01Z-00-DX1.4755D138-5842-4159-848...</td>\n",
196
       "      <td>2</td>\n",
197
       "      <td>0.0</td>\n",
198
       "      <td>GBM</td>\n",
199
       "      <td>78.0</td>\n",
200
       "      <td>14.72</td>\n",
201
       "      <td>0.0</td>\n",
202
       "      <td>...</td>\n",
203
       "      <td>-0.1599</td>\n",
204
       "      <td>-0.59540</td>\n",
205
       "      <td>0.0813</td>\n",
206
       "      <td>-1.16960</td>\n",
207
       "      <td>-0.1728</td>\n",
208
       "      <td>-0.1144</td>\n",
209
       "      <td>-0.4155</td>\n",
210
       "      <td>0.4046</td>\n",
211
       "      <td>-0.01680</td>\n",
212
       "      <td>0.3026</td>\n",
213
       "    </tr>\n",
214
       "    <tr>\n",
215
       "      <th>1</th>\n",
216
       "      <td>1</td>\n",
217
       "      <td>1</td>\n",
218
       "      <td>TCGA-06-0125</td>\n",
219
       "      <td>TCGA-06-0125-01Z-00-DX1.8e0915b2-8dc3-4753-806...</td>\n",
220
       "      <td>6</td>\n",
221
       "      <td>1.0</td>\n",
222
       "      <td>GBM</td>\n",
223
       "      <td>63.0</td>\n",
224
       "      <td>47.57</td>\n",
225
       "      <td>0.0</td>\n",
226
       "      <td>...</td>\n",
227
       "      <td>0.4608</td>\n",
228
       "      <td>0.52815</td>\n",
229
       "      <td>1.2580</td>\n",
230
       "      <td>1.41685</td>\n",
231
       "      <td>2.4839</td>\n",
232
       "      <td>-0.2388</td>\n",
233
       "      <td>0.9025</td>\n",
234
       "      <td>0.3242</td>\n",
235
       "      <td>1.01905</td>\n",
236
       "      <td>0.2265</td>\n",
237
       "    </tr>\n",
238
       "    <tr>\n",
239
       "      <th>2</th>\n",
240
       "      <td>2</td>\n",
241
       "      <td>2</td>\n",
242
       "      <td>TCGA-06-0125</td>\n",
243
       "      <td>TCGA-06-0125-01Z-00-DX2.4f9cef92-2bdb-480d-870...</td>\n",
244
       "      <td>6</td>\n",
245
       "      <td>1.0</td>\n",
246
       "      <td>GBM</td>\n",
247
       "      <td>63.0</td>\n",
248
       "      <td>47.57</td>\n",
249
       "      <td>0.0</td>\n",
250
       "      <td>...</td>\n",
251
       "      <td>0.4608</td>\n",
252
       "      <td>0.52815</td>\n",
253
       "      <td>1.2580</td>\n",
254
       "      <td>1.41685</td>\n",
255
       "      <td>2.4839</td>\n",
256
       "      <td>-0.2388</td>\n",
257
       "      <td>0.9025</td>\n",
258
       "      <td>0.3242</td>\n",
259
       "      <td>1.01905</td>\n",
260
       "      <td>0.2265</td>\n",
261
       "    </tr>\n",
262
       "    <tr>\n",
263
       "      <th>3</th>\n",
264
       "      <td>3</td>\n",
265
       "      <td>3</td>\n",
266
       "      <td>TCGA-06-0129</td>\n",
267
       "      <td>TCGA-06-0129-01Z-00-DX1.b7bddf7d-f39e-45e7-a78...</td>\n",
268
       "      <td>6</td>\n",
269
       "      <td>0.0</td>\n",
270
       "      <td>GBM</td>\n",
271
       "      <td>30.0</td>\n",
272
       "      <td>33.64</td>\n",
273
       "      <td>0.0</td>\n",
274
       "      <td>...</td>\n",
275
       "      <td>-0.2960</td>\n",
276
       "      <td>-0.75980</td>\n",
277
       "      <td>1.2706</td>\n",
278
       "      <td>-0.14840</td>\n",
279
       "      <td>1.4803</td>\n",
280
       "      <td>1.5796</td>\n",
281
       "      <td>1.0245</td>\n",
282
       "      <td>1.0492</td>\n",
283
       "      <td>5.78560</td>\n",
284
       "      <td>1.7766</td>\n",
285
       "    </tr>\n",
286
       "    <tr>\n",
287
       "      <th>4</th>\n",
288
       "      <td>4</td>\n",
289
       "      <td>4</td>\n",
290
       "      <td>TCGA-06-0129</td>\n",
291
       "      <td>TCGA-06-0129-01Z-00-DX2.1ea78b46-1dc7-44d8-81b...</td>\n",
292
       "      <td>6</td>\n",
293
       "      <td>0.0</td>\n",
294
       "      <td>GBM</td>\n",
295
       "      <td>30.0</td>\n",
296
       "      <td>33.64</td>\n",
297
       "      <td>0.0</td>\n",
298
       "      <td>...</td>\n",
299
       "      <td>-0.2960</td>\n",
300
       "      <td>-0.75980</td>\n",
301
       "      <td>1.2706</td>\n",
302
       "      <td>-0.14840</td>\n",
303
       "      <td>1.4803</td>\n",
304
       "      <td>1.5796</td>\n",
305
       "      <td>1.0245</td>\n",
306
       "      <td>1.0492</td>\n",
307
       "      <td>5.78560</td>\n",
308
       "      <td>1.7766</td>\n",
309
       "    </tr>\n",
310
       "    <tr>\n",
311
       "      <th>...</th>\n",
312
       "      <td>...</td>\n",
313
       "      <td>...</td>\n",
314
       "      <td>...</td>\n",
315
       "      <td>...</td>\n",
316
       "      <td>...</td>\n",
317
       "      <td>...</td>\n",
318
       "      <td>...</td>\n",
319
       "      <td>...</td>\n",
320
       "      <td>...</td>\n",
321
       "      <td>...</td>\n",
322
       "      <td>...</td>\n",
323
       "      <td>...</td>\n",
324
       "      <td>...</td>\n",
325
       "      <td>...</td>\n",
326
       "      <td>...</td>\n",
327
       "      <td>...</td>\n",
328
       "      <td>...</td>\n",
329
       "      <td>...</td>\n",
330
       "      <td>...</td>\n",
331
       "      <td>...</td>\n",
332
       "      <td>...</td>\n",
333
       "    </tr>\n",
334
       "    <tr>\n",
335
       "      <th>1037</th>\n",
336
       "      <td>1037</td>\n",
337
       "      <td>1037</td>\n",
338
       "      <td>TCGA-WY-A85A</td>\n",
339
       "      <td>TCGA-WY-A85A-01Z-00-DX1.CB302B89-F89A-40FD-A7D...</td>\n",
340
       "      <td>WY</td>\n",
341
       "      <td>0.0</td>\n",
342
       "      <td>ASTR</td>\n",
343
       "      <td>20.0</td>\n",
344
       "      <td>43.36</td>\n",
345
       "      <td>1.0</td>\n",
346
       "      <td>...</td>\n",
347
       "      <td>-0.2997</td>\n",
348
       "      <td>-0.67560</td>\n",
349
       "      <td>0.2714</td>\n",
350
       "      <td>0.36210</td>\n",
351
       "      <td>-0.2401</td>\n",
352
       "      <td>1.4333</td>\n",
353
       "      <td>0.2715</td>\n",
354
       "      <td>-0.5415</td>\n",
355
       "      <td>-0.69620</td>\n",
356
       "      <td>-0.1123</td>\n",
357
       "    </tr>\n",
358
       "    <tr>\n",
359
       "      <th>1038</th>\n",
360
       "      <td>1038</td>\n",
361
       "      <td>1038</td>\n",
362
       "      <td>TCGA-WY-A85B</td>\n",
363
       "      <td>TCGA-WY-A85B-01Z-00-DX1.1E4B796A-A1E3-45F9-807...</td>\n",
364
       "      <td>WY</td>\n",
365
       "      <td>0.0</td>\n",
366
       "      <td>ASTR</td>\n",
367
       "      <td>24.0</td>\n",
368
       "      <td>45.76</td>\n",
369
       "      <td>1.0</td>\n",
370
       "      <td>...</td>\n",
371
       "      <td>-0.0678</td>\n",
372
       "      <td>0.30360</td>\n",
373
       "      <td>0.3361</td>\n",
374
       "      <td>1.21610</td>\n",
375
       "      <td>0.9365</td>\n",
376
       "      <td>1.4954</td>\n",
377
       "      <td>1.4201</td>\n",
378
       "      <td>-0.3525</td>\n",
379
       "      <td>0.52860</td>\n",
380
       "      <td>0.1971</td>\n",
381
       "    </tr>\n",
382
       "    <tr>\n",
383
       "      <th>1039</th>\n",
384
       "      <td>1039</td>\n",
385
       "      <td>1039</td>\n",
386
       "      <td>TCGA-WY-A85C</td>\n",
387
       "      <td>TCGA-WY-A85C-01Z-00-DX1.E0A6429A-91B3-4FFE-9FF...</td>\n",
388
       "      <td>WY</td>\n",
389
       "      <td>0.0</td>\n",
390
       "      <td>ASTR</td>\n",
391
       "      <td>36.0</td>\n",
392
       "      <td>46.85</td>\n",
393
       "      <td>1.0</td>\n",
394
       "      <td>...</td>\n",
395
       "      <td>0.0320</td>\n",
396
       "      <td>-1.01940</td>\n",
397
       "      <td>0.6582</td>\n",
398
       "      <td>2.55740</td>\n",
399
       "      <td>1.4708</td>\n",
400
       "      <td>0.8381</td>\n",
401
       "      <td>2.9481</td>\n",
402
       "      <td>0.1252</td>\n",
403
       "      <td>0.75300</td>\n",
404
       "      <td>0.9603</td>\n",
405
       "    </tr>\n",
406
       "    <tr>\n",
407
       "      <th>1040</th>\n",
408
       "      <td>1040</td>\n",
409
       "      <td>1040</td>\n",
410
       "      <td>TCGA-WY-A85D</td>\n",
411
       "      <td>TCGA-WY-A85D-01Z-00-DX1.FB8C252B-7A88-4B14-B3C...</td>\n",
412
       "      <td>WY</td>\n",
413
       "      <td>0.0</td>\n",
414
       "      <td>OAST</td>\n",
415
       "      <td>60.0</td>\n",
416
       "      <td>37.68</td>\n",
417
       "      <td>1.0</td>\n",
418
       "      <td>...</td>\n",
419
       "      <td>-0.3021</td>\n",
420
       "      <td>-0.34820</td>\n",
421
       "      <td>-0.4824</td>\n",
422
       "      <td>1.57910</td>\n",
423
       "      <td>0.0187</td>\n",
424
       "      <td>-0.7983</td>\n",
425
       "      <td>1.4101</td>\n",
426
       "      <td>-1.0976</td>\n",
427
       "      <td>-1.00950</td>\n",
428
       "      <td>0.5940</td>\n",
429
       "    </tr>\n",
430
       "    <tr>\n",
431
       "      <th>1041</th>\n",
432
       "      <td>1041</td>\n",
433
       "      <td>1041</td>\n",
434
       "      <td>TCGA-WY-A85E</td>\n",
435
       "      <td>TCGA-WY-A85E-01Z-00-DX1.AA7A4C1F-99AA-490D-B6D...</td>\n",
436
       "      <td>WY</td>\n",
437
       "      <td>1.0</td>\n",
438
       "      <td>OAST</td>\n",
439
       "      <td>48.0</td>\n",
440
       "      <td>20.80</td>\n",
441
       "      <td>1.0</td>\n",
442
       "      <td>...</td>\n",
443
       "      <td>-0.2576</td>\n",
444
       "      <td>0.89960</td>\n",
445
       "      <td>-0.7533</td>\n",
446
       "      <td>1.42710</td>\n",
447
       "      <td>-0.6667</td>\n",
448
       "      <td>0.8354</td>\n",
449
       "      <td>1.2988</td>\n",
450
       "      <td>-0.4902</td>\n",
451
       "      <td>-0.42940</td>\n",
452
       "      <td>-2.0717</td>\n",
453
       "    </tr>\n",
454
       "  </tbody>\n",
455
       "</table>\n",
456
       "<p>1042 rows × 2842 columns</p>\n",
457
       "</div>"
458
      ],
459
      "text/plain": [
460
       "      Unnamed: 0  Unnamed: 0.1       case_id  \\\n",
461
       "0              0             0  TCGA-02-0047   \n",
462
       "1              1             1  TCGA-06-0125   \n",
463
       "2              2             2  TCGA-06-0125   \n",
464
       "3              3             3  TCGA-06-0129   \n",
465
       "4              4             4  TCGA-06-0129   \n",
466
       "...          ...           ...           ...   \n",
467
       "1037        1037          1037  TCGA-WY-A85A   \n",
468
       "1038        1038          1038  TCGA-WY-A85B   \n",
469
       "1039        1039          1039  TCGA-WY-A85C   \n",
470
       "1040        1040          1040  TCGA-WY-A85D   \n",
471
       "1041        1041          1041  TCGA-WY-A85E   \n",
472
       "\n",
473
       "                                               slide_id site  is_female  \\\n",
474
       "0     TCGA-02-0047-01Z-00-DX1.4755D138-5842-4159-848...    2        0.0   \n",
475
       "1     TCGA-06-0125-01Z-00-DX1.8e0915b2-8dc3-4753-806...    6        1.0   \n",
476
       "2     TCGA-06-0125-01Z-00-DX2.4f9cef92-2bdb-480d-870...    6        1.0   \n",
477
       "3     TCGA-06-0129-01Z-00-DX1.b7bddf7d-f39e-45e7-a78...    6        0.0   \n",
478
       "4     TCGA-06-0129-01Z-00-DX2.1ea78b46-1dc7-44d8-81b...    6        0.0   \n",
479
       "...                                                 ...  ...        ...   \n",
480
       "1037  TCGA-WY-A85A-01Z-00-DX1.CB302B89-F89A-40FD-A7D...   WY        0.0   \n",
481
       "1038  TCGA-WY-A85B-01Z-00-DX1.1E4B796A-A1E3-45F9-807...   WY        0.0   \n",
482
       "1039  TCGA-WY-A85C-01Z-00-DX1.E0A6429A-91B3-4FFE-9FF...   WY        0.0   \n",
483
       "1040  TCGA-WY-A85D-01Z-00-DX1.FB8C252B-7A88-4B14-B3C...   WY        0.0   \n",
484
       "1041  TCGA-WY-A85E-01Z-00-DX1.AA7A4C1F-99AA-490D-B6D...   WY        1.0   \n",
485
       "\n",
486
       "     oncotree_code   age  survival_months  censorship  ...  ZSCAN10_rnaseq  \\\n",
487
       "0              GBM  78.0            14.72         0.0  ...         -0.1599   \n",
488
       "1              GBM  63.0            47.57         0.0  ...          0.4608   \n",
489
       "2              GBM  63.0            47.57         0.0  ...          0.4608   \n",
490
       "3              GBM  30.0            33.64         0.0  ...         -0.2960   \n",
491
       "4              GBM  30.0            33.64         0.0  ...         -0.2960   \n",
492
       "...            ...   ...              ...         ...  ...             ...   \n",
493
       "1037          ASTR  20.0            43.36         1.0  ...         -0.2997   \n",
494
       "1038          ASTR  24.0            45.76         1.0  ...         -0.0678   \n",
495
       "1039          ASTR  36.0            46.85         1.0  ...          0.0320   \n",
496
       "1040          OAST  60.0            37.68         1.0  ...         -0.3021   \n",
497
       "1041          OAST  48.0            20.80         1.0  ...         -0.2576   \n",
498
       "\n",
499
       "      ZSCAN12_rnaseq  ZSCAN20_rnaseq  ZSCAN21_rnaseq  ZSCAN22_rnaseq  \\\n",
500
       "0           -0.59540          0.0813        -1.16960         -0.1728   \n",
501
       "1            0.52815          1.2580         1.41685          2.4839   \n",
502
       "2            0.52815          1.2580         1.41685          2.4839   \n",
503
       "3           -0.75980          1.2706        -0.14840          1.4803   \n",
504
       "4           -0.75980          1.2706        -0.14840          1.4803   \n",
505
       "...              ...             ...             ...             ...   \n",
506
       "1037        -0.67560          0.2714         0.36210         -0.2401   \n",
507
       "1038         0.30360          0.3361         1.21610          0.9365   \n",
508
       "1039        -1.01940          0.6582         2.55740          1.4708   \n",
509
       "1040        -0.34820         -0.4824         1.57910          0.0187   \n",
510
       "1041         0.89960         -0.7533         1.42710         -0.6667   \n",
511
       "\n",
512
       "      ZSCAN2_rnaseq  ZSCAN9_rnaseq  ZXDA_rnaseq  ZXDB_rnaseq  ZXDC_rnaseq  \n",
513
       "0           -0.1144        -0.4155       0.4046     -0.01680       0.3026  \n",
514
       "1           -0.2388         0.9025       0.3242      1.01905       0.2265  \n",
515
       "2           -0.2388         0.9025       0.3242      1.01905       0.2265  \n",
516
       "3            1.5796         1.0245       1.0492      5.78560       1.7766  \n",
517
       "4            1.5796         1.0245       1.0492      5.78560       1.7766  \n",
518
       "...             ...            ...          ...          ...          ...  \n",
519
       "1037         1.4333         0.2715      -0.5415     -0.69620      -0.1123  \n",
520
       "1038         1.4954         1.4201      -0.3525      0.52860       0.1971  \n",
521
       "1039         0.8381         2.9481       0.1252      0.75300       0.9603  \n",
522
       "1040        -0.7983         1.4101      -1.0976     -1.00950       0.5940  \n",
523
       "1041         0.8354         1.2988      -0.4902     -0.42940      -2.0717  \n",
524
       "\n",
525
       "[1042 rows x 2842 columns]"
526
      ]
527
     },
528
     "execution_count": 43,
529
     "metadata": {},
530
     "output_type": "execute_result"
531
    }
532
   ],
533
   "source": [
534
    "pd.read_csv(fname)"
535
   ]
536
  },
537
  {
538
   "cell_type": "code",
539
   "execution_count": 76,
540
   "metadata": {},
541
   "outputs": [],
542
   "source": [
543
    "omic_from_signatures = []\n",
544
    "for col in signatures.columns:\n",
545
    "    omic = signatures[col].dropna().unique()\n",
546
    "    omic_from_signatures.append(omic)\n",
547
    "\n",
548
    "omic_from_signatures = np.concatenate(omic_from_signatures)"
549
   ]
550
  },
551
  {
552
   "cell_type": "code",
553
   "execution_count": 44,
554
   "metadata": {},
555
   "outputs": [
556
    {
557
     "name": "stderr",
558
     "output_type": "stream",
559
     "text": [
560
      "/home/mahmoodlab/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3071: DtypeWarning: Columns (3) have mixed types.Specify dtype option on import or set low_memory=False.\n",
561
      "  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n"
562
     ]
563
    }
564
   ],
565
   "source": [
566
    "for fname in os.listdir('./'):\n",
567
    "    if fname.endswith('.csv.zip'):\n",
568
    "        slide_df = pd.read_csv(fname)"
569
   ]
570
  },
571
  {
572
   "cell_type": "code",
573
   "execution_count": 81,
574
   "metadata": {},
575
   "outputs": [
576
    {
577
     "data": {
578
      "text/html": [
579
       "<div>\n",
580
       "<style scoped>\n",
581
       "    .dataframe tbody tr th:only-of-type {\n",
582
       "        vertical-align: middle;\n",
583
       "    }\n",
584
       "\n",
585
       "    .dataframe tbody tr th {\n",
586
       "        vertical-align: top;\n",
587
       "    }\n",
588
       "\n",
589
       "    .dataframe thead th {\n",
590
       "        text-align: right;\n",
591
       "    }\n",
592
       "</style>\n",
593
       "<table border=\"1\" class=\"dataframe\">\n",
594
       "  <thead>\n",
595
       "    <tr style=\"text-align: right;\">\n",
596
       "      <th></th>\n",
597
       "      <th>Unnamed: 0</th>\n",
598
       "      <th>Unnamed: 0.1</th>\n",
599
       "      <th>case_id</th>\n",
600
       "      <th>slide_id</th>\n",
601
       "      <th>site</th>\n",
602
       "      <th>is_female</th>\n",
603
       "      <th>oncotree_code</th>\n",
604
       "      <th>age</th>\n",
605
       "      <th>survival_months</th>\n",
606
       "      <th>censorship</th>\n",
607
       "      <th>...</th>\n",
608
       "      <th>ZSCAN10_rnaseq</th>\n",
609
       "      <th>ZSCAN12_rnaseq</th>\n",
610
       "      <th>ZSCAN20_rnaseq</th>\n",
611
       "      <th>ZSCAN21_rnaseq</th>\n",
612
       "      <th>ZSCAN22_rnaseq</th>\n",
613
       "      <th>ZSCAN2_rnaseq</th>\n",
614
       "      <th>ZSCAN9_rnaseq</th>\n",
615
       "      <th>ZXDA_rnaseq</th>\n",
616
       "      <th>ZXDB_rnaseq</th>\n",
617
       "      <th>ZXDC_rnaseq</th>\n",
618
       "    </tr>\n",
619
       "  </thead>\n",
620
       "  <tbody>\n",
621
       "    <tr>\n",
622
       "      <th>0</th>\n",
623
       "      <td>0</td>\n",
624
       "      <td>0</td>\n",
625
       "      <td>TCGA-05-4249</td>\n",
626
       "      <td>TCGA-05-4249-01Z-00-DX1.9fce0297-cc19-4c04-872...</td>\n",
627
       "      <td>5</td>\n",
628
       "      <td>0.0</td>\n",
629
       "      <td>LUAD</td>\n",
630
       "      <td>67.0</td>\n",
631
       "      <td>50.03</td>\n",
632
       "      <td>1.0</td>\n",
633
       "      <td>...</td>\n",
634
       "      <td>-0.1238</td>\n",
635
       "      <td>0.7530</td>\n",
636
       "      <td>0.6552</td>\n",
637
       "      <td>-1.0013</td>\n",
638
       "      <td>0.2353</td>\n",
639
       "      <td>2.6532</td>\n",
640
       "      <td>1.1103</td>\n",
641
       "      <td>0.6149</td>\n",
642
       "      <td>0.5725</td>\n",
643
       "      <td>0.2889</td>\n",
644
       "    </tr>\n",
645
       "    <tr>\n",
646
       "      <th>1</th>\n",
647
       "      <td>1</td>\n",
648
       "      <td>1</td>\n",
649
       "      <td>TCGA-05-4250</td>\n",
650
       "      <td>TCGA-05-4250-01Z-00-DX1.90f67fdf-dff9-46ca-af7...</td>\n",
651
       "      <td>5</td>\n",
652
       "      <td>1.0</td>\n",
653
       "      <td>LUAD</td>\n",
654
       "      <td>79.0</td>\n",
655
       "      <td>3.98</td>\n",
656
       "      <td>0.0</td>\n",
657
       "      <td>...</td>\n",
658
       "      <td>-0.1238</td>\n",
659
       "      <td>0.4810</td>\n",
660
       "      <td>-0.8255</td>\n",
661
       "      <td>0.2825</td>\n",
662
       "      <td>-1.2502</td>\n",
663
       "      <td>-0.9024</td>\n",
664
       "      <td>-0.1472</td>\n",
665
       "      <td>0.5118</td>\n",
666
       "      <td>-0.1673</td>\n",
667
       "      <td>-0.8006</td>\n",
668
       "    </tr>\n",
669
       "    <tr>\n",
670
       "      <th>2</th>\n",
671
       "      <td>2</td>\n",
672
       "      <td>2</td>\n",
673
       "      <td>TCGA-05-4382</td>\n",
674
       "      <td>TCGA-05-4382-01Z-00-DX1.76b49a4c-dbbb-48b0-b67...</td>\n",
675
       "      <td>5</td>\n",
676
       "      <td>0.0</td>\n",
677
       "      <td>LUAD</td>\n",
678
       "      <td>68.0</td>\n",
679
       "      <td>19.94</td>\n",
680
       "      <td>1.0</td>\n",
681
       "      <td>...</td>\n",
682
       "      <td>0.3265</td>\n",
683
       "      <td>0.4462</td>\n",
684
       "      <td>1.1847</td>\n",
685
       "      <td>0.8765</td>\n",
686
       "      <td>-0.7999</td>\n",
687
       "      <td>1.7566</td>\n",
688
       "      <td>1.1757</td>\n",
689
       "      <td>-0.4399</td>\n",
690
       "      <td>-0.2751</td>\n",
691
       "      <td>-0.4668</td>\n",
692
       "    </tr>\n",
693
       "    <tr>\n",
694
       "      <th>3</th>\n",
695
       "      <td>3</td>\n",
696
       "      <td>3</td>\n",
697
       "      <td>TCGA-05-4384</td>\n",
698
       "      <td>TCGA-05-4384-01Z-00-DX1.CA68BF29-BBE3-4C8E-B48...</td>\n",
699
       "      <td>5</td>\n",
700
       "      <td>0.0</td>\n",
701
       "      <td>LUAD</td>\n",
702
       "      <td>66.0</td>\n",
703
       "      <td>13.99</td>\n",
704
       "      <td>1.0</td>\n",
705
       "      <td>...</td>\n",
706
       "      <td>-0.1238</td>\n",
707
       "      <td>-0.0369</td>\n",
708
       "      <td>0.5766</td>\n",
709
       "      <td>0.0083</td>\n",
710
       "      <td>0.1344</td>\n",
711
       "      <td>0.8299</td>\n",
712
       "      <td>0.6599</td>\n",
713
       "      <td>1.4844</td>\n",
714
       "      <td>0.9748</td>\n",
715
       "      <td>0.7481</td>\n",
716
       "    </tr>\n",
717
       "    <tr>\n",
718
       "      <th>4</th>\n",
719
       "      <td>4</td>\n",
720
       "      <td>4</td>\n",
721
       "      <td>TCGA-05-4390</td>\n",
722
       "      <td>TCGA-05-4390-01Z-00-DX1.858E64DF-DD3E-4F43-B7C...</td>\n",
723
       "      <td>5</td>\n",
724
       "      <td>1.0</td>\n",
725
       "      <td>LUAD</td>\n",
726
       "      <td>58.0</td>\n",
727
       "      <td>36.99</td>\n",
728
       "      <td>1.0</td>\n",
729
       "      <td>...</td>\n",
730
       "      <td>-0.1238</td>\n",
731
       "      <td>0.4751</td>\n",
732
       "      <td>1.2404</td>\n",
733
       "      <td>0.6932</td>\n",
734
       "      <td>-0.2792</td>\n",
735
       "      <td>2.1326</td>\n",
736
       "      <td>0.1621</td>\n",
737
       "      <td>-0.0462</td>\n",
738
       "      <td>1.8418</td>\n",
739
       "      <td>-0.9922</td>\n",
740
       "    </tr>\n",
741
       "    <tr>\n",
742
       "      <th>...</th>\n",
743
       "      <td>...</td>\n",
744
       "      <td>...</td>\n",
745
       "      <td>...</td>\n",
746
       "      <td>...</td>\n",
747
       "      <td>...</td>\n",
748
       "      <td>...</td>\n",
749
       "      <td>...</td>\n",
750
       "      <td>...</td>\n",
751
       "      <td>...</td>\n",
752
       "      <td>...</td>\n",
753
       "      <td>...</td>\n",
754
       "      <td>...</td>\n",
755
       "      <td>...</td>\n",
756
       "      <td>...</td>\n",
757
       "      <td>...</td>\n",
758
       "      <td>...</td>\n",
759
       "      <td>...</td>\n",
760
       "      <td>...</td>\n",
761
       "      <td>...</td>\n",
762
       "      <td>...</td>\n",
763
       "      <td>...</td>\n",
764
       "    </tr>\n",
765
       "    <tr>\n",
766
       "      <th>511</th>\n",
767
       "      <td>511</td>\n",
768
       "      <td>511</td>\n",
769
       "      <td>TCGA-NJ-A55O</td>\n",
770
       "      <td>TCGA-NJ-A55O-01Z-00-DX1.8E23C821-B8BB-4D89-9E3...</td>\n",
771
       "      <td>NJ</td>\n",
772
       "      <td>1.0</td>\n",
773
       "      <td>LUAD</td>\n",
774
       "      <td>56.0</td>\n",
775
       "      <td>0.43</td>\n",
776
       "      <td>1.0</td>\n",
777
       "      <td>...</td>\n",
778
       "      <td>-0.0781</td>\n",
779
       "      <td>-0.2368</td>\n",
780
       "      <td>0.5056</td>\n",
781
       "      <td>-0.2771</td>\n",
782
       "      <td>0.1067</td>\n",
783
       "      <td>-0.0153</td>\n",
784
       "      <td>-0.2546</td>\n",
785
       "      <td>-0.4205</td>\n",
786
       "      <td>-0.3773</td>\n",
787
       "      <td>0.0551</td>\n",
788
       "    </tr>\n",
789
       "    <tr>\n",
790
       "      <th>512</th>\n",
791
       "      <td>512</td>\n",
792
       "      <td>512</td>\n",
793
       "      <td>TCGA-NJ-A55R</td>\n",
794
       "      <td>TCGA-NJ-A55R-01Z-00-DX1.2E2B3642-4E1C-47DB-AF7...</td>\n",
795
       "      <td>NJ</td>\n",
796
       "      <td>0.0</td>\n",
797
       "      <td>LUAD</td>\n",
798
       "      <td>67.0</td>\n",
799
       "      <td>19.81</td>\n",
800
       "      <td>1.0</td>\n",
801
       "      <td>...</td>\n",
802
       "      <td>6.1880</td>\n",
803
       "      <td>0.2405</td>\n",
804
       "      <td>0.0751</td>\n",
805
       "      <td>1.9723</td>\n",
806
       "      <td>0.6093</td>\n",
807
       "      <td>0.6135</td>\n",
808
       "      <td>1.7846</td>\n",
809
       "      <td>0.0588</td>\n",
810
       "      <td>-0.1157</td>\n",
811
       "      <td>1.2831</td>\n",
812
       "    </tr>\n",
813
       "    <tr>\n",
814
       "      <th>513</th>\n",
815
       "      <td>513</td>\n",
816
       "      <td>513</td>\n",
817
       "      <td>TCGA-NJ-A7XG</td>\n",
818
       "      <td>TCGA-NJ-A7XG-01Z-00-DX1.4A876254-653C-410B-A36...</td>\n",
819
       "      <td>NJ</td>\n",
820
       "      <td>0.0</td>\n",
821
       "      <td>LUAD</td>\n",
822
       "      <td>49.0</td>\n",
823
       "      <td>20.27</td>\n",
824
       "      <td>1.0</td>\n",
825
       "      <td>...</td>\n",
826
       "      <td>-0.1238</td>\n",
827
       "      <td>-0.0041</td>\n",
828
       "      <td>-0.8129</td>\n",
829
       "      <td>-0.4409</td>\n",
830
       "      <td>0.6778</td>\n",
831
       "      <td>-0.5506</td>\n",
832
       "      <td>1.4350</td>\n",
833
       "      <td>-1.5823</td>\n",
834
       "      <td>-1.3015</td>\n",
835
       "      <td>0.4371</td>\n",
836
       "    </tr>\n",
837
       "    <tr>\n",
838
       "      <th>514</th>\n",
839
       "      <td>514</td>\n",
840
       "      <td>514</td>\n",
841
       "      <td>TCGA-O1-A52J</td>\n",
842
       "      <td>TCGA-O1-A52J-01Z-00-DX1.26F6ECCA-D614-4950-98E...</td>\n",
843
       "      <td>O1</td>\n",
844
       "      <td>1.0</td>\n",
845
       "      <td>LUAD</td>\n",
846
       "      <td>74.0</td>\n",
847
       "      <td>59.07</td>\n",
848
       "      <td>0.0</td>\n",
849
       "      <td>...</td>\n",
850
       "      <td>-0.1238</td>\n",
851
       "      <td>-0.1263</td>\n",
852
       "      <td>0.8472</td>\n",
853
       "      <td>-0.3943</td>\n",
854
       "      <td>-0.7671</td>\n",
855
       "      <td>-1.1313</td>\n",
856
       "      <td>-0.9671</td>\n",
857
       "      <td>4.2234</td>\n",
858
       "      <td>0.9716</td>\n",
859
       "      <td>0.6699</td>\n",
860
       "    </tr>\n",
861
       "    <tr>\n",
862
       "      <th>515</th>\n",
863
       "      <td>515</td>\n",
864
       "      <td>515</td>\n",
865
       "      <td>TCGA-S2-AA1A</td>\n",
866
       "      <td>TCGA-S2-AA1A-01Z-00-DX1.4B5D5FAE-8305-4D2D-B24...</td>\n",
867
       "      <td>S2</td>\n",
868
       "      <td>1.0</td>\n",
869
       "      <td>LUAD</td>\n",
870
       "      <td>68.0</td>\n",
871
       "      <td>16.85</td>\n",
872
       "      <td>1.0</td>\n",
873
       "      <td>...</td>\n",
874
       "      <td>-0.1238</td>\n",
875
       "      <td>0.5292</td>\n",
876
       "      <td>-0.8343</td>\n",
877
       "      <td>0.7741</td>\n",
878
       "      <td>-0.6405</td>\n",
879
       "      <td>-0.3901</td>\n",
880
       "      <td>0.0245</td>\n",
881
       "      <td>0.5245</td>\n",
882
       "      <td>-0.1738</td>\n",
883
       "      <td>2.4043</td>\n",
884
       "    </tr>\n",
885
       "  </tbody>\n",
886
       "</table>\n",
887
       "<p>516 rows × 3106 columns</p>\n",
888
       "</div>"
889
      ],
890
      "text/plain": [
891
       "     Unnamed: 0  Unnamed: 0.1       case_id  \\\n",
892
       "0             0             0  TCGA-05-4249   \n",
893
       "1             1             1  TCGA-05-4250   \n",
894
       "2             2             2  TCGA-05-4382   \n",
895
       "3             3             3  TCGA-05-4384   \n",
896
       "4             4             4  TCGA-05-4390   \n",
897
       "..          ...           ...           ...   \n",
898
       "511         511           511  TCGA-NJ-A55O   \n",
899
       "512         512           512  TCGA-NJ-A55R   \n",
900
       "513         513           513  TCGA-NJ-A7XG   \n",
901
       "514         514           514  TCGA-O1-A52J   \n",
902
       "515         515           515  TCGA-S2-AA1A   \n",
903
       "\n",
904
       "                                              slide_id site  is_female  \\\n",
905
       "0    TCGA-05-4249-01Z-00-DX1.9fce0297-cc19-4c04-872...    5        0.0   \n",
906
       "1    TCGA-05-4250-01Z-00-DX1.90f67fdf-dff9-46ca-af7...    5        1.0   \n",
907
       "2    TCGA-05-4382-01Z-00-DX1.76b49a4c-dbbb-48b0-b67...    5        0.0   \n",
908
       "3    TCGA-05-4384-01Z-00-DX1.CA68BF29-BBE3-4C8E-B48...    5        0.0   \n",
909
       "4    TCGA-05-4390-01Z-00-DX1.858E64DF-DD3E-4F43-B7C...    5        1.0   \n",
910
       "..                                                 ...  ...        ...   \n",
911
       "511  TCGA-NJ-A55O-01Z-00-DX1.8E23C821-B8BB-4D89-9E3...   NJ        1.0   \n",
912
       "512  TCGA-NJ-A55R-01Z-00-DX1.2E2B3642-4E1C-47DB-AF7...   NJ        0.0   \n",
913
       "513  TCGA-NJ-A7XG-01Z-00-DX1.4A876254-653C-410B-A36...   NJ        0.0   \n",
914
       "514  TCGA-O1-A52J-01Z-00-DX1.26F6ECCA-D614-4950-98E...   O1        1.0   \n",
915
       "515  TCGA-S2-AA1A-01Z-00-DX1.4B5D5FAE-8305-4D2D-B24...   S2        1.0   \n",
916
       "\n",
917
       "    oncotree_code   age  survival_months  censorship  ...  ZSCAN10_rnaseq  \\\n",
918
       "0            LUAD  67.0            50.03         1.0  ...         -0.1238   \n",
919
       "1            LUAD  79.0             3.98         0.0  ...         -0.1238   \n",
920
       "2            LUAD  68.0            19.94         1.0  ...          0.3265   \n",
921
       "3            LUAD  66.0            13.99         1.0  ...         -0.1238   \n",
922
       "4            LUAD  58.0            36.99         1.0  ...         -0.1238   \n",
923
       "..            ...   ...              ...         ...  ...             ...   \n",
924
       "511          LUAD  56.0             0.43         1.0  ...         -0.0781   \n",
925
       "512          LUAD  67.0            19.81         1.0  ...          6.1880   \n",
926
       "513          LUAD  49.0            20.27         1.0  ...         -0.1238   \n",
927
       "514          LUAD  74.0            59.07         0.0  ...         -0.1238   \n",
928
       "515          LUAD  68.0            16.85         1.0  ...         -0.1238   \n",
929
       "\n",
930
       "     ZSCAN12_rnaseq  ZSCAN20_rnaseq  ZSCAN21_rnaseq  ZSCAN22_rnaseq  \\\n",
931
       "0            0.7530          0.6552         -1.0013          0.2353   \n",
932
       "1            0.4810         -0.8255          0.2825         -1.2502   \n",
933
       "2            0.4462          1.1847          0.8765         -0.7999   \n",
934
       "3           -0.0369          0.5766          0.0083          0.1344   \n",
935
       "4            0.4751          1.2404          0.6932         -0.2792   \n",
936
       "..              ...             ...             ...             ...   \n",
937
       "511         -0.2368          0.5056         -0.2771          0.1067   \n",
938
       "512          0.2405          0.0751          1.9723          0.6093   \n",
939
       "513         -0.0041         -0.8129         -0.4409          0.6778   \n",
940
       "514         -0.1263          0.8472         -0.3943         -0.7671   \n",
941
       "515          0.5292         -0.8343          0.7741         -0.6405   \n",
942
       "\n",
943
       "     ZSCAN2_rnaseq  ZSCAN9_rnaseq  ZXDA_rnaseq  ZXDB_rnaseq  ZXDC_rnaseq  \n",
944
       "0           2.6532         1.1103       0.6149       0.5725       0.2889  \n",
945
       "1          -0.9024        -0.1472       0.5118      -0.1673      -0.8006  \n",
946
       "2           1.7566         1.1757      -0.4399      -0.2751      -0.4668  \n",
947
       "3           0.8299         0.6599       1.4844       0.9748       0.7481  \n",
948
       "4           2.1326         0.1621      -0.0462       1.8418      -0.9922  \n",
949
       "..             ...            ...          ...          ...          ...  \n",
950
       "511        -0.0153        -0.2546      -0.4205      -0.3773       0.0551  \n",
951
       "512         0.6135         1.7846       0.0588      -0.1157       1.2831  \n",
952
       "513        -0.5506         1.4350      -1.5823      -1.3015       0.4371  \n",
953
       "514        -1.1313        -0.9671       4.2234       0.9716       0.6699  \n",
954
       "515        -0.3901         0.0245       0.5245      -0.1738       2.4043  \n",
955
       "\n",
956
       "[516 rows x 3106 columns]"
957
      ]
958
     },
959
     "execution_count": 81,
960
     "metadata": {},
961
     "output_type": "execute_result"
962
    }
963
   ],
964
   "source": [
965
    "fname = '../dataset_csv/tcga_luad_all_clean.csv.zip'\n",
966
    "slide_df = pd.read_csv(fname)\n",
967
    "slide_df"
968
   ]
969
  },
970
  {
971
   "cell_type": "code",
972
   "execution_count": 79,
973
   "metadata": {},
974
   "outputs": [],
975
   "source": [
976
    "fname = '../dataset_csv/tcga_luad_all_clean.csv.zip'\n",
977
    "slide_df = pd.read_csv(fname)\n",
978
    "omic_overlap = np.concatenate([omic_from_signatures+mode for mode in ['_mut', '_cnv', '_rnaseq']])\n",
979
    "omic_overlap = sorted(series_intersection(omic_overlap, slide_df.columns))\n",
980
    "slide_df[list(slide_df.columns[:9]) + omic_overlap].to_csv('../dataset_csv_sig/%s' % fname)"
981
   ]
982
  },
983
  {
984
   "cell_type": "code",
985
   "execution_count": 54,
986
   "metadata": {},
987
   "outputs": [
988
    {
989
     "name": "stderr",
990
     "output_type": "stream",
991
     "text": [
992
      "/home/mahmoodlab/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3071: DtypeWarning: Columns (3) have mixed types.Specify dtype option on import or set low_memory=False.\n",
993
      "  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n"
994
     ]
995
    }
996
   ],
997
   "source": [
998
    "for fname in os.listdir('./'):\n",
999
    "    if fname.endswith('.csv.zip'):\n",
1000
    "        slide_df = pd.read_csv(fname)"
1001
   ]
1002
  },
1003
  {
1004
   "cell_type": "code",
1005
   "execution_count": 55,
1006
   "metadata": {},
1007
   "outputs": [
1008
    {
1009
     "data": {
1010
      "text/plain": [
1011
       "'tcga_luad_all_clean.csv.zip'"
1012
      ]
1013
     },
1014
     "execution_count": 55,
1015
     "metadata": {},
1016
     "output_type": "execute_result"
1017
    }
1018
   ],
1019
   "source": [
1020
    "fname"
1021
   ]
1022
  },
1023
  {
1024
   "cell_type": "code",
1025
   "execution_count": null,
1026
   "metadata": {},
1027
   "outputs": [],
1028
   "source": []
1029
  },
1030
  {
1031
   "cell_type": "code",
1032
   "execution_count": null,
1033
   "metadata": {},
1034
   "outputs": [],
1035
   "source": []
1036
  },
1037
  {
1038
   "cell_type": "code",
1039
   "execution_count": null,
1040
   "metadata": {},
1041
   "outputs": [],
1042
   "source": []
1043
  },
1044
  {
1045
   "cell_type": "code",
1046
   "execution_count": 15,
1047
   "metadata": {},
1048
   "outputs": [
1049
    {
1050
     "name": "stderr",
1051
     "output_type": "stream",
1052
     "text": [
1053
      "/home/mahmoodlab/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3071: DtypeWarning: Columns (2) have mixed types.Specify dtype option on import or set low_memory=False.\n",
1054
      "  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n"
1055
     ]
1056
    }
1057
   ],
1058
   "source": [
1059
    "\n",
1060
    "\n",
1061
    "omic_from_signatures = []\n",
1062
    "for col in signatures.columns:\n",
1063
    "    omic = signatures[col].dropna().unique()\n",
1064
    "    omic_from_signatures.append(omic)\n",
1065
    "\n",
1066
    "omic_from_signatures = np.concatenate(omic_from_signatures)\n",
1067
    "\n",
1068
    "def series_intersection(s1, s2):\n",
1069
    "    return pd.Series(list(set(s1) & set(s2)))\n",
1070
    "\n",
1071
    "signatures = pd.read_csv('./signatures.csv')\n",
1072
    "slide_df = pd.read_csv('./tcga_gbmlgg_all_clean.csv.zip')\n",
1073
    "rnaseq_overlap = np.concatenate([omic_from_signatures+mode for mode in ['_rnaseq']])\n",
1074
    "rnaseq_overlap = sorted(series_intersection(rnaseq_overlap, slide_df.columns))\n",
1075
    "genomics_mut_cnv = list(slide_df.columns[slide_df.columns.str.contains('_mut|_cnv')])"
1076
   ]
1077
  },
1078
  {
1079
   "cell_type": "code",
1080
   "execution_count": 16,
1081
   "metadata": {},
1082
   "outputs": [],
1083
   "source": [
1084
    "_ = slide_df[list(slide_df.columns[:9]) + rnaseq_overlap + genomics_mut_cnv]"
1085
   ]
1086
  },
1087
  {
1088
   "cell_type": "code",
1089
   "execution_count": 17,
1090
   "metadata": {},
1091
   "outputs": [
1092
    {
1093
     "data": {
1094
      "text/html": [
1095
       "<div>\n",
1096
       "<style scoped>\n",
1097
       "    .dataframe tbody tr th:only-of-type {\n",
1098
       "        vertical-align: middle;\n",
1099
       "    }\n",
1100
       "\n",
1101
       "    .dataframe tbody tr th {\n",
1102
       "        vertical-align: top;\n",
1103
       "    }\n",
1104
       "\n",
1105
       "    .dataframe thead th {\n",
1106
       "        text-align: right;\n",
1107
       "    }\n",
1108
       "</style>\n",
1109
       "<table border=\"1\" class=\"dataframe\">\n",
1110
       "  <thead>\n",
1111
       "    <tr style=\"text-align: right;\">\n",
1112
       "      <th></th>\n",
1113
       "      <th>case_id</th>\n",
1114
       "      <th>slide_id</th>\n",
1115
       "      <th>site</th>\n",
1116
       "      <th>is_female</th>\n",
1117
       "      <th>oncotree_code</th>\n",
1118
       "      <th>age</th>\n",
1119
       "      <th>survival_months</th>\n",
1120
       "      <th>censorship</th>\n",
1121
       "      <th>train</th>\n",
1122
       "      <th>AAK1_rnaseq</th>\n",
1123
       "      <th>...</th>\n",
1124
       "      <th>AGAP2_cnv</th>\n",
1125
       "      <th>TSPAN31_cnv</th>\n",
1126
       "      <th>CDK4_cnv</th>\n",
1127
       "      <th>MARCH9_cnv</th>\n",
1128
       "      <th>CYP27B1_cnv</th>\n",
1129
       "      <th>METTL1_cnv</th>\n",
1130
       "      <th>TSFM_cnv</th>\n",
1131
       "      <th>AVIL_cnv</th>\n",
1132
       "      <th>CTDSP2_cnv</th>\n",
1133
       "      <th>RN7SKP65_cnv</th>\n",
1134
       "    </tr>\n",
1135
       "  </thead>\n",
1136
       "  <tbody>\n",
1137
       "    <tr>\n",
1138
       "      <th>0</th>\n",
1139
       "      <td>TCGA-02-0047</td>\n",
1140
       "      <td>TCGA-02-0047-01Z-00-DX1.4755D138-5842-4159-848...</td>\n",
1141
       "      <td>2</td>\n",
1142
       "      <td>0.0</td>\n",
1143
       "      <td>GBM</td>\n",
1144
       "      <td>78.0</td>\n",
1145
       "      <td>14.72</td>\n",
1146
       "      <td>0.0</td>\n",
1147
       "      <td>1.0</td>\n",
1148
       "      <td>1.5517</td>\n",
1149
       "      <td>...</td>\n",
1150
       "      <td>0</td>\n",
1151
       "      <td>0</td>\n",
1152
       "      <td>0</td>\n",
1153
       "      <td>0</td>\n",
1154
       "      <td>0</td>\n",
1155
       "      <td>0</td>\n",
1156
       "      <td>0</td>\n",
1157
       "      <td>0</td>\n",
1158
       "      <td>0</td>\n",
1159
       "      <td>0</td>\n",
1160
       "    </tr>\n",
1161
       "    <tr>\n",
1162
       "      <th>1</th>\n",
1163
       "      <td>TCGA-06-0125</td>\n",
1164
       "      <td>TCGA-06-0125-01Z-00-DX1.8e0915b2-8dc3-4753-806...</td>\n",
1165
       "      <td>6</td>\n",
1166
       "      <td>1.0</td>\n",
1167
       "      <td>GBM</td>\n",
1168
       "      <td>63.0</td>\n",
1169
       "      <td>47.57</td>\n",
1170
       "      <td>0.0</td>\n",
1171
       "      <td>1.0</td>\n",
1172
       "      <td>0.5557</td>\n",
1173
       "      <td>...</td>\n",
1174
       "      <td>0</td>\n",
1175
       "      <td>0</td>\n",
1176
       "      <td>0</td>\n",
1177
       "      <td>0</td>\n",
1178
       "      <td>0</td>\n",
1179
       "      <td>0</td>\n",
1180
       "      <td>0</td>\n",
1181
       "      <td>0</td>\n",
1182
       "      <td>0</td>\n",
1183
       "      <td>0</td>\n",
1184
       "    </tr>\n",
1185
       "    <tr>\n",
1186
       "      <th>2</th>\n",
1187
       "      <td>TCGA-06-0125</td>\n",
1188
       "      <td>TCGA-06-0125-01Z-00-DX2.4f9cef92-2bdb-480d-870...</td>\n",
1189
       "      <td>6</td>\n",
1190
       "      <td>1.0</td>\n",
1191
       "      <td>GBM</td>\n",
1192
       "      <td>63.0</td>\n",
1193
       "      <td>47.57</td>\n",
1194
       "      <td>0.0</td>\n",
1195
       "      <td>1.0</td>\n",
1196
       "      <td>0.5557</td>\n",
1197
       "      <td>...</td>\n",
1198
       "      <td>0</td>\n",
1199
       "      <td>0</td>\n",
1200
       "      <td>0</td>\n",
1201
       "      <td>0</td>\n",
1202
       "      <td>0</td>\n",
1203
       "      <td>0</td>\n",
1204
       "      <td>0</td>\n",
1205
       "      <td>0</td>\n",
1206
       "      <td>0</td>\n",
1207
       "      <td>0</td>\n",
1208
       "    </tr>\n",
1209
       "    <tr>\n",
1210
       "      <th>3</th>\n",
1211
       "      <td>TCGA-06-0129</td>\n",
1212
       "      <td>TCGA-06-0129-01Z-00-DX1.b7bddf7d-f39e-45e7-a78...</td>\n",
1213
       "      <td>6</td>\n",
1214
       "      <td>0.0</td>\n",
1215
       "      <td>GBM</td>\n",
1216
       "      <td>30.0</td>\n",
1217
       "      <td>33.64</td>\n",
1218
       "      <td>0.0</td>\n",
1219
       "      <td>1.0</td>\n",
1220
       "      <td>0.6442</td>\n",
1221
       "      <td>...</td>\n",
1222
       "      <td>2</td>\n",
1223
       "      <td>2</td>\n",
1224
       "      <td>2</td>\n",
1225
       "      <td>2</td>\n",
1226
       "      <td>2</td>\n",
1227
       "      <td>2</td>\n",
1228
       "      <td>2</td>\n",
1229
       "      <td>2</td>\n",
1230
       "      <td>2</td>\n",
1231
       "      <td>2</td>\n",
1232
       "    </tr>\n",
1233
       "    <tr>\n",
1234
       "      <th>4</th>\n",
1235
       "      <td>TCGA-06-0129</td>\n",
1236
       "      <td>TCGA-06-0129-01Z-00-DX2.1ea78b46-1dc7-44d8-81b...</td>\n",
1237
       "      <td>6</td>\n",
1238
       "      <td>0.0</td>\n",
1239
       "      <td>GBM</td>\n",
1240
       "      <td>30.0</td>\n",
1241
       "      <td>33.64</td>\n",
1242
       "      <td>0.0</td>\n",
1243
       "      <td>1.0</td>\n",
1244
       "      <td>0.6442</td>\n",
1245
       "      <td>...</td>\n",
1246
       "      <td>2</td>\n",
1247
       "      <td>2</td>\n",
1248
       "      <td>2</td>\n",
1249
       "      <td>2</td>\n",
1250
       "      <td>2</td>\n",
1251
       "      <td>2</td>\n",
1252
       "      <td>2</td>\n",
1253
       "      <td>2</td>\n",
1254
       "      <td>2</td>\n",
1255
       "      <td>2</td>\n",
1256
       "    </tr>\n",
1257
       "    <tr>\n",
1258
       "      <th>...</th>\n",
1259
       "      <td>...</td>\n",
1260
       "      <td>...</td>\n",
1261
       "      <td>...</td>\n",
1262
       "      <td>...</td>\n",
1263
       "      <td>...</td>\n",
1264
       "      <td>...</td>\n",
1265
       "      <td>...</td>\n",
1266
       "      <td>...</td>\n",
1267
       "      <td>...</td>\n",
1268
       "      <td>...</td>\n",
1269
       "      <td>...</td>\n",
1270
       "      <td>...</td>\n",
1271
       "      <td>...</td>\n",
1272
       "      <td>...</td>\n",
1273
       "      <td>...</td>\n",
1274
       "      <td>...</td>\n",
1275
       "      <td>...</td>\n",
1276
       "      <td>...</td>\n",
1277
       "      <td>...</td>\n",
1278
       "      <td>...</td>\n",
1279
       "      <td>...</td>\n",
1280
       "    </tr>\n",
1281
       "    <tr>\n",
1282
       "      <th>1037</th>\n",
1283
       "      <td>TCGA-WY-A85A</td>\n",
1284
       "      <td>TCGA-WY-A85A-01Z-00-DX1.CB302B89-F89A-40FD-A7D...</td>\n",
1285
       "      <td>WY</td>\n",
1286
       "      <td>0.0</td>\n",
1287
       "      <td>ASTR</td>\n",
1288
       "      <td>20.0</td>\n",
1289
       "      <td>43.36</td>\n",
1290
       "      <td>1.0</td>\n",
1291
       "      <td>1.0</td>\n",
1292
       "      <td>-0.3841</td>\n",
1293
       "      <td>...</td>\n",
1294
       "      <td>0</td>\n",
1295
       "      <td>0</td>\n",
1296
       "      <td>0</td>\n",
1297
       "      <td>0</td>\n",
1298
       "      <td>0</td>\n",
1299
       "      <td>0</td>\n",
1300
       "      <td>0</td>\n",
1301
       "      <td>0</td>\n",
1302
       "      <td>0</td>\n",
1303
       "      <td>0</td>\n",
1304
       "    </tr>\n",
1305
       "    <tr>\n",
1306
       "      <th>1038</th>\n",
1307
       "      <td>TCGA-WY-A85B</td>\n",
1308
       "      <td>TCGA-WY-A85B-01Z-00-DX1.1E4B796A-A1E3-45F9-807...</td>\n",
1309
       "      <td>WY</td>\n",
1310
       "      <td>0.0</td>\n",
1311
       "      <td>ASTR</td>\n",
1312
       "      <td>24.0</td>\n",
1313
       "      <td>45.76</td>\n",
1314
       "      <td>1.0</td>\n",
1315
       "      <td>1.0</td>\n",
1316
       "      <td>-0.4479</td>\n",
1317
       "      <td>...</td>\n",
1318
       "      <td>-1</td>\n",
1319
       "      <td>-1</td>\n",
1320
       "      <td>-1</td>\n",
1321
       "      <td>-1</td>\n",
1322
       "      <td>-1</td>\n",
1323
       "      <td>-1</td>\n",
1324
       "      <td>-1</td>\n",
1325
       "      <td>-1</td>\n",
1326
       "      <td>-1</td>\n",
1327
       "      <td>-1</td>\n",
1328
       "    </tr>\n",
1329
       "    <tr>\n",
1330
       "      <th>1039</th>\n",
1331
       "      <td>TCGA-WY-A85C</td>\n",
1332
       "      <td>TCGA-WY-A85C-01Z-00-DX1.E0A6429A-91B3-4FFE-9FF...</td>\n",
1333
       "      <td>WY</td>\n",
1334
       "      <td>0.0</td>\n",
1335
       "      <td>ASTR</td>\n",
1336
       "      <td>36.0</td>\n",
1337
       "      <td>46.85</td>\n",
1338
       "      <td>1.0</td>\n",
1339
       "      <td>1.0</td>\n",
1340
       "      <td>-0.2472</td>\n",
1341
       "      <td>...</td>\n",
1342
       "      <td>0</td>\n",
1343
       "      <td>0</td>\n",
1344
       "      <td>0</td>\n",
1345
       "      <td>0</td>\n",
1346
       "      <td>0</td>\n",
1347
       "      <td>0</td>\n",
1348
       "      <td>0</td>\n",
1349
       "      <td>0</td>\n",
1350
       "      <td>0</td>\n",
1351
       "      <td>0</td>\n",
1352
       "    </tr>\n",
1353
       "    <tr>\n",
1354
       "      <th>1040</th>\n",
1355
       "      <td>TCGA-WY-A85D</td>\n",
1356
       "      <td>TCGA-WY-A85D-01Z-00-DX1.FB8C252B-7A88-4B14-B3C...</td>\n",
1357
       "      <td>WY</td>\n",
1358
       "      <td>0.0</td>\n",
1359
       "      <td>OAST</td>\n",
1360
       "      <td>60.0</td>\n",
1361
       "      <td>37.68</td>\n",
1362
       "      <td>1.0</td>\n",
1363
       "      <td>1.0</td>\n",
1364
       "      <td>-0.5892</td>\n",
1365
       "      <td>...</td>\n",
1366
       "      <td>0</td>\n",
1367
       "      <td>0</td>\n",
1368
       "      <td>0</td>\n",
1369
       "      <td>0</td>\n",
1370
       "      <td>0</td>\n",
1371
       "      <td>0</td>\n",
1372
       "      <td>0</td>\n",
1373
       "      <td>0</td>\n",
1374
       "      <td>0</td>\n",
1375
       "      <td>0</td>\n",
1376
       "    </tr>\n",
1377
       "    <tr>\n",
1378
       "      <th>1041</th>\n",
1379
       "      <td>TCGA-WY-A85E</td>\n",
1380
       "      <td>TCGA-WY-A85E-01Z-00-DX1.AA7A4C1F-99AA-490D-B6D...</td>\n",
1381
       "      <td>WY</td>\n",
1382
       "      <td>1.0</td>\n",
1383
       "      <td>OAST</td>\n",
1384
       "      <td>48.0</td>\n",
1385
       "      <td>20.80</td>\n",
1386
       "      <td>1.0</td>\n",
1387
       "      <td>1.0</td>\n",
1388
       "      <td>-0.1087</td>\n",
1389
       "      <td>...</td>\n",
1390
       "      <td>0</td>\n",
1391
       "      <td>0</td>\n",
1392
       "      <td>0</td>\n",
1393
       "      <td>0</td>\n",
1394
       "      <td>0</td>\n",
1395
       "      <td>0</td>\n",
1396
       "      <td>0</td>\n",
1397
       "      <td>0</td>\n",
1398
       "      <td>0</td>\n",
1399
       "      <td>0</td>\n",
1400
       "    </tr>\n",
1401
       "  </tbody>\n",
1402
       "</table>\n",
1403
       "<p>1042 rows × 2891 columns</p>\n",
1404
       "</div>"
1405
      ],
1406
      "text/plain": [
1407
       "           case_id                                           slide_id site  \\\n",
1408
       "0     TCGA-02-0047  TCGA-02-0047-01Z-00-DX1.4755D138-5842-4159-848...    2   \n",
1409
       "1     TCGA-06-0125  TCGA-06-0125-01Z-00-DX1.8e0915b2-8dc3-4753-806...    6   \n",
1410
       "2     TCGA-06-0125  TCGA-06-0125-01Z-00-DX2.4f9cef92-2bdb-480d-870...    6   \n",
1411
       "3     TCGA-06-0129  TCGA-06-0129-01Z-00-DX1.b7bddf7d-f39e-45e7-a78...    6   \n",
1412
       "4     TCGA-06-0129  TCGA-06-0129-01Z-00-DX2.1ea78b46-1dc7-44d8-81b...    6   \n",
1413
       "...            ...                                                ...  ...   \n",
1414
       "1037  TCGA-WY-A85A  TCGA-WY-A85A-01Z-00-DX1.CB302B89-F89A-40FD-A7D...   WY   \n",
1415
       "1038  TCGA-WY-A85B  TCGA-WY-A85B-01Z-00-DX1.1E4B796A-A1E3-45F9-807...   WY   \n",
1416
       "1039  TCGA-WY-A85C  TCGA-WY-A85C-01Z-00-DX1.E0A6429A-91B3-4FFE-9FF...   WY   \n",
1417
       "1040  TCGA-WY-A85D  TCGA-WY-A85D-01Z-00-DX1.FB8C252B-7A88-4B14-B3C...   WY   \n",
1418
       "1041  TCGA-WY-A85E  TCGA-WY-A85E-01Z-00-DX1.AA7A4C1F-99AA-490D-B6D...   WY   \n",
1419
       "\n",
1420
       "      is_female oncotree_code   age  survival_months  censorship  train  \\\n",
1421
       "0           0.0           GBM  78.0            14.72         0.0    1.0   \n",
1422
       "1           1.0           GBM  63.0            47.57         0.0    1.0   \n",
1423
       "2           1.0           GBM  63.0            47.57         0.0    1.0   \n",
1424
       "3           0.0           GBM  30.0            33.64         0.0    1.0   \n",
1425
       "4           0.0           GBM  30.0            33.64         0.0    1.0   \n",
1426
       "...         ...           ...   ...              ...         ...    ...   \n",
1427
       "1037        0.0          ASTR  20.0            43.36         1.0    1.0   \n",
1428
       "1038        0.0          ASTR  24.0            45.76         1.0    1.0   \n",
1429
       "1039        0.0          ASTR  36.0            46.85         1.0    1.0   \n",
1430
       "1040        0.0          OAST  60.0            37.68         1.0    1.0   \n",
1431
       "1041        1.0          OAST  48.0            20.80         1.0    1.0   \n",
1432
       "\n",
1433
       "      AAK1_rnaseq  ...  AGAP2_cnv  TSPAN31_cnv  CDK4_cnv  MARCH9_cnv  \\\n",
1434
       "0          1.5517  ...          0            0         0           0   \n",
1435
       "1          0.5557  ...          0            0         0           0   \n",
1436
       "2          0.5557  ...          0            0         0           0   \n",
1437
       "3          0.6442  ...          2            2         2           2   \n",
1438
       "4          0.6442  ...          2            2         2           2   \n",
1439
       "...           ...  ...        ...          ...       ...         ...   \n",
1440
       "1037      -0.3841  ...          0            0         0           0   \n",
1441
       "1038      -0.4479  ...         -1           -1        -1          -1   \n",
1442
       "1039      -0.2472  ...          0            0         0           0   \n",
1443
       "1040      -0.5892  ...          0            0         0           0   \n",
1444
       "1041      -0.1087  ...          0            0         0           0   \n",
1445
       "\n",
1446
       "      CYP27B1_cnv  METTL1_cnv  TSFM_cnv  AVIL_cnv  CTDSP2_cnv  RN7SKP65_cnv  \n",
1447
       "0               0           0         0         0           0             0  \n",
1448
       "1               0           0         0         0           0             0  \n",
1449
       "2               0           0         0         0           0             0  \n",
1450
       "3               2           2         2         2           2             2  \n",
1451
       "4               2           2         2         2           2             2  \n",
1452
       "...           ...         ...       ...       ...         ...           ...  \n",
1453
       "1037            0           0         0         0           0             0  \n",
1454
       "1038           -1          -1        -1        -1          -1            -1  \n",
1455
       "1039            0           0         0         0           0             0  \n",
1456
       "1040            0           0         0         0           0             0  \n",
1457
       "1041            0           0         0         0           0             0  \n",
1458
       "\n",
1459
       "[1042 rows x 2891 columns]"
1460
      ]
1461
     },
1462
     "execution_count": 17,
1463
     "metadata": {},
1464
     "output_type": "execute_result"
1465
    }
1466
   ],
1467
   "source": [
1468
    "_"
1469
   ]
1470
  },
1471
  {
1472
   "cell_type": "code",
1473
   "execution_count": null,
1474
   "metadata": {},
1475
   "outputs": [],
1476
   "source": []
1477
  },
1478
  {
1479
   "cell_type": "code",
1480
   "execution_count": 30,
1481
   "metadata": {},
1482
   "outputs": [],
1483
   "source": [
1484
    "from scipy import stats\n",
1485
    "\n",
1486
    "slide_df = pd.read_csv(fname)\n",
1487
    "rnaseq = slide_df[slide_df.columns[slide_df.columns.str.contains('_rnaseq')]]\n",
1488
    "\n",
1489
    "top_k=2000\n",
1490
    "mad = stats.median_abs_deviation(rnaseq, axis=0)\n",
1491
    "sort_idx = np.argsort(mad)[-top_k:]\n",
1492
    "rnaseq = rnaseq[rnaseq.columns[sort_idx]]"
1493
   ]
1494
  },
1495
  {
1496
   "cell_type": "code",
1497
   "execution_count": 45,
1498
   "metadata": {},
1499
   "outputs": [
1500
    {
1501
     "data": {
1502
      "text/html": [
1503
       "<div>\n",
1504
       "<style scoped>\n",
1505
       "    .dataframe tbody tr th:only-of-type {\n",
1506
       "        vertical-align: middle;\n",
1507
       "    }\n",
1508
       "\n",
1509
       "    .dataframe tbody tr th {\n",
1510
       "        vertical-align: top;\n",
1511
       "    }\n",
1512
       "\n",
1513
       "    .dataframe thead th {\n",
1514
       "        text-align: right;\n",
1515
       "    }\n",
1516
       "</style>\n",
1517
       "<table border=\"1\" class=\"dataframe\">\n",
1518
       "  <thead>\n",
1519
       "    <tr style=\"text-align: right;\">\n",
1520
       "      <th></th>\n",
1521
       "      <th>CLASRP_rnaseq</th>\n",
1522
       "      <th>NBAS_rnaseq</th>\n",
1523
       "      <th>ARL2BP_rnaseq</th>\n",
1524
       "      <th>TMEM199_rnaseq</th>\n",
1525
       "      <th>TTC37_rnaseq</th>\n",
1526
       "      <th>GTF2I_rnaseq</th>\n",
1527
       "      <th>STYX_rnaseq</th>\n",
1528
       "      <th>TSR3_rnaseq</th>\n",
1529
       "      <th>SEC61A1_rnaseq</th>\n",
1530
       "      <th>TRRAP_rnaseq</th>\n",
1531
       "      <th>...</th>\n",
1532
       "      <th>GET4_rnaseq</th>\n",
1533
       "      <th>BRD9_rnaseq</th>\n",
1534
       "      <th>NSUN2_rnaseq</th>\n",
1535
       "      <th>PYCRL_rnaseq</th>\n",
1536
       "      <th>HGH1_rnaseq</th>\n",
1537
       "      <th>PRUNE_rnaseq</th>\n",
1538
       "      <th>MAF1_rnaseq</th>\n",
1539
       "      <th>CCDC127_rnaseq</th>\n",
1540
       "      <th>EXOC3_rnaseq</th>\n",
1541
       "      <th>PUF60_rnaseq</th>\n",
1542
       "    </tr>\n",
1543
       "  </thead>\n",
1544
       "  <tbody>\n",
1545
       "    <tr>\n",
1546
       "      <th>0</th>\n",
1547
       "      <td>-0.5874</td>\n",
1548
       "      <td>0.8371</td>\n",
1549
       "      <td>0.7587</td>\n",
1550
       "      <td>0.2188</td>\n",
1551
       "      <td>-0.4040</td>\n",
1552
       "      <td>2.3916</td>\n",
1553
       "      <td>-0.7124</td>\n",
1554
       "      <td>-1.0035</td>\n",
1555
       "      <td>0.7356</td>\n",
1556
       "      <td>-0.0249</td>\n",
1557
       "      <td>...</td>\n",
1558
       "      <td>-0.1915</td>\n",
1559
       "      <td>0.3503</td>\n",
1560
       "      <td>-1.1848</td>\n",
1561
       "      <td>-1.4121</td>\n",
1562
       "      <td>-0.2389</td>\n",
1563
       "      <td>5.0110</td>\n",
1564
       "      <td>1.4287</td>\n",
1565
       "      <td>-0.3531</td>\n",
1566
       "      <td>-0.8503</td>\n",
1567
       "      <td>1.2995</td>\n",
1568
       "    </tr>\n",
1569
       "    <tr>\n",
1570
       "      <th>1</th>\n",
1571
       "      <td>0.2811</td>\n",
1572
       "      <td>0.2232</td>\n",
1573
       "      <td>0.9000</td>\n",
1574
       "      <td>3.2327</td>\n",
1575
       "      <td>-0.0096</td>\n",
1576
       "      <td>-0.4464</td>\n",
1577
       "      <td>0.5219</td>\n",
1578
       "      <td>0.3927</td>\n",
1579
       "      <td>-0.3513</td>\n",
1580
       "      <td>-0.7917</td>\n",
1581
       "      <td>...</td>\n",
1582
       "      <td>0.7627</td>\n",
1583
       "      <td>-0.6092</td>\n",
1584
       "      <td>0.1291</td>\n",
1585
       "      <td>1.7400</td>\n",
1586
       "      <td>-0.0250</td>\n",
1587
       "      <td>-0.1531</td>\n",
1588
       "      <td>0.5344</td>\n",
1589
       "      <td>-0.0012</td>\n",
1590
       "      <td>-0.9606</td>\n",
1591
       "      <td>2.0233</td>\n",
1592
       "    </tr>\n",
1593
       "    <tr>\n",
1594
       "      <th>2</th>\n",
1595
       "      <td>1.5665</td>\n",
1596
       "      <td>-0.4726</td>\n",
1597
       "      <td>0.1693</td>\n",
1598
       "      <td>0.9845</td>\n",
1599
       "      <td>-0.6740</td>\n",
1600
       "      <td>-0.3986</td>\n",
1601
       "      <td>-0.2289</td>\n",
1602
       "      <td>-0.2791</td>\n",
1603
       "      <td>-0.0646</td>\n",
1604
       "      <td>-0.4431</td>\n",
1605
       "      <td>...</td>\n",
1606
       "      <td>4.4123</td>\n",
1607
       "      <td>1.4417</td>\n",
1608
       "      <td>-0.5196</td>\n",
1609
       "      <td>-1.3030</td>\n",
1610
       "      <td>-1.1373</td>\n",
1611
       "      <td>4.6041</td>\n",
1612
       "      <td>-1.0135</td>\n",
1613
       "      <td>1.3589</td>\n",
1614
       "      <td>2.6994</td>\n",
1615
       "      <td>-0.3068</td>\n",
1616
       "    </tr>\n",
1617
       "    <tr>\n",
1618
       "      <th>3</th>\n",
1619
       "      <td>0.6169</td>\n",
1620
       "      <td>-0.3266</td>\n",
1621
       "      <td>-0.3082</td>\n",
1622
       "      <td>-0.2220</td>\n",
1623
       "      <td>0.5305</td>\n",
1624
       "      <td>0.5360</td>\n",
1625
       "      <td>0.2785</td>\n",
1626
       "      <td>-1.0873</td>\n",
1627
       "      <td>-1.0712</td>\n",
1628
       "      <td>-0.3184</td>\n",
1629
       "      <td>...</td>\n",
1630
       "      <td>0.7665</td>\n",
1631
       "      <td>-0.3344</td>\n",
1632
       "      <td>0.0695</td>\n",
1633
       "      <td>0.0040</td>\n",
1634
       "      <td>0.2291</td>\n",
1635
       "      <td>3.6034</td>\n",
1636
       "      <td>0.1774</td>\n",
1637
       "      <td>-0.2766</td>\n",
1638
       "      <td>0.5080</td>\n",
1639
       "      <td>0.6178</td>\n",
1640
       "    </tr>\n",
1641
       "    <tr>\n",
1642
       "      <th>4</th>\n",
1643
       "      <td>0.6406</td>\n",
1644
       "      <td>-1.0330</td>\n",
1645
       "      <td>-0.6522</td>\n",
1646
       "      <td>0.1727</td>\n",
1647
       "      <td>-0.7455</td>\n",
1648
       "      <td>-0.6040</td>\n",
1649
       "      <td>0.2553</td>\n",
1650
       "      <td>1.0504</td>\n",
1651
       "      <td>1.0583</td>\n",
1652
       "      <td>-0.2884</td>\n",
1653
       "      <td>...</td>\n",
1654
       "      <td>3.3807</td>\n",
1655
       "      <td>0.3364</td>\n",
1656
       "      <td>-0.2792</td>\n",
1657
       "      <td>4.8566</td>\n",
1658
       "      <td>7.9296</td>\n",
1659
       "      <td>1.6951</td>\n",
1660
       "      <td>5.8943</td>\n",
1661
       "      <td>1.3652</td>\n",
1662
       "      <td>-0.8062</td>\n",
1663
       "      <td>9.2417</td>\n",
1664
       "    </tr>\n",
1665
       "    <tr>\n",
1666
       "      <th>...</th>\n",
1667
       "      <td>...</td>\n",
1668
       "      <td>...</td>\n",
1669
       "      <td>...</td>\n",
1670
       "      <td>...</td>\n",
1671
       "      <td>...</td>\n",
1672
       "      <td>...</td>\n",
1673
       "      <td>...</td>\n",
1674
       "      <td>...</td>\n",
1675
       "      <td>...</td>\n",
1676
       "      <td>...</td>\n",
1677
       "      <td>...</td>\n",
1678
       "      <td>...</td>\n",
1679
       "      <td>...</td>\n",
1680
       "      <td>...</td>\n",
1681
       "      <td>...</td>\n",
1682
       "      <td>...</td>\n",
1683
       "      <td>...</td>\n",
1684
       "      <td>...</td>\n",
1685
       "      <td>...</td>\n",
1686
       "      <td>...</td>\n",
1687
       "      <td>...</td>\n",
1688
       "    </tr>\n",
1689
       "    <tr>\n",
1690
       "      <th>511</th>\n",
1691
       "      <td>0.5640</td>\n",
1692
       "      <td>0.1255</td>\n",
1693
       "      <td>-1.3364</td>\n",
1694
       "      <td>-0.8430</td>\n",
1695
       "      <td>0.4406</td>\n",
1696
       "      <td>-0.9735</td>\n",
1697
       "      <td>-1.4547</td>\n",
1698
       "      <td>-0.1983</td>\n",
1699
       "      <td>-0.5259</td>\n",
1700
       "      <td>-0.2029</td>\n",
1701
       "      <td>...</td>\n",
1702
       "      <td>3.0311</td>\n",
1703
       "      <td>3.4963</td>\n",
1704
       "      <td>2.5079</td>\n",
1705
       "      <td>0.0556</td>\n",
1706
       "      <td>0.5691</td>\n",
1707
       "      <td>0.1104</td>\n",
1708
       "      <td>-0.3776</td>\n",
1709
       "      <td>2.6136</td>\n",
1710
       "      <td>3.4259</td>\n",
1711
       "      <td>-0.8442</td>\n",
1712
       "    </tr>\n",
1713
       "    <tr>\n",
1714
       "      <th>512</th>\n",
1715
       "      <td>1.2336</td>\n",
1716
       "      <td>0.1902</td>\n",
1717
       "      <td>-1.3500</td>\n",
1718
       "      <td>-0.3472</td>\n",
1719
       "      <td>0.4549</td>\n",
1720
       "      <td>-0.6806</td>\n",
1721
       "      <td>-1.1291</td>\n",
1722
       "      <td>1.0677</td>\n",
1723
       "      <td>1.1586</td>\n",
1724
       "      <td>0.1959</td>\n",
1725
       "      <td>...</td>\n",
1726
       "      <td>0.5573</td>\n",
1727
       "      <td>-0.7546</td>\n",
1728
       "      <td>0.8104</td>\n",
1729
       "      <td>0.1239</td>\n",
1730
       "      <td>0.0985</td>\n",
1731
       "      <td>2.9026</td>\n",
1732
       "      <td>0.0173</td>\n",
1733
       "      <td>0.3492</td>\n",
1734
       "      <td>2.5703</td>\n",
1735
       "      <td>1.0690</td>\n",
1736
       "    </tr>\n",
1737
       "    <tr>\n",
1738
       "      <th>513</th>\n",
1739
       "      <td>1.8148</td>\n",
1740
       "      <td>-0.8502</td>\n",
1741
       "      <td>-0.0628</td>\n",
1742
       "      <td>-0.7776</td>\n",
1743
       "      <td>0.6452</td>\n",
1744
       "      <td>-0.4622</td>\n",
1745
       "      <td>-1.2732</td>\n",
1746
       "      <td>1.8145</td>\n",
1747
       "      <td>-0.8767</td>\n",
1748
       "      <td>-0.2980</td>\n",
1749
       "      <td>...</td>\n",
1750
       "      <td>1.4671</td>\n",
1751
       "      <td>2.2343</td>\n",
1752
       "      <td>2.1466</td>\n",
1753
       "      <td>0.7868</td>\n",
1754
       "      <td>0.6893</td>\n",
1755
       "      <td>0.1571</td>\n",
1756
       "      <td>0.9686</td>\n",
1757
       "      <td>3.0870</td>\n",
1758
       "      <td>5.6169</td>\n",
1759
       "      <td>0.5300</td>\n",
1760
       "    </tr>\n",
1761
       "    <tr>\n",
1762
       "      <th>514</th>\n",
1763
       "      <td>0.0569</td>\n",
1764
       "      <td>-0.4511</td>\n",
1765
       "      <td>3.8784</td>\n",
1766
       "      <td>0.2609</td>\n",
1767
       "      <td>0.9393</td>\n",
1768
       "      <td>0.5776</td>\n",
1769
       "      <td>-0.9469</td>\n",
1770
       "      <td>2.9500</td>\n",
1771
       "      <td>-0.9261</td>\n",
1772
       "      <td>2.4218</td>\n",
1773
       "      <td>...</td>\n",
1774
       "      <td>5.0440</td>\n",
1775
       "      <td>0.0862</td>\n",
1776
       "      <td>0.1431</td>\n",
1777
       "      <td>-0.7761</td>\n",
1778
       "      <td>-0.8430</td>\n",
1779
       "      <td>0.2311</td>\n",
1780
       "      <td>-0.5913</td>\n",
1781
       "      <td>1.4958</td>\n",
1782
       "      <td>2.1736</td>\n",
1783
       "      <td>-0.5699</td>\n",
1784
       "    </tr>\n",
1785
       "    <tr>\n",
1786
       "      <th>515</th>\n",
1787
       "      <td>1.9203</td>\n",
1788
       "      <td>-0.0634</td>\n",
1789
       "      <td>-0.7142</td>\n",
1790
       "      <td>-1.3296</td>\n",
1791
       "      <td>0.3966</td>\n",
1792
       "      <td>1.0089</td>\n",
1793
       "      <td>-0.7931</td>\n",
1794
       "      <td>0.8513</td>\n",
1795
       "      <td>0.7651</td>\n",
1796
       "      <td>0.2217</td>\n",
1797
       "      <td>...</td>\n",
1798
       "      <td>2.2952</td>\n",
1799
       "      <td>0.3985</td>\n",
1800
       "      <td>-0.0072</td>\n",
1801
       "      <td>0.0683</td>\n",
1802
       "      <td>-0.8047</td>\n",
1803
       "      <td>-0.2712</td>\n",
1804
       "      <td>-0.5864</td>\n",
1805
       "      <td>-0.2393</td>\n",
1806
       "      <td>1.8585</td>\n",
1807
       "      <td>-1.0489</td>\n",
1808
       "    </tr>\n",
1809
       "  </tbody>\n",
1810
       "</table>\n",
1811
       "<p>516 rows × 2000 columns</p>\n",
1812
       "</div>"
1813
      ],
1814
      "text/plain": [
1815
       "     CLASRP_rnaseq  NBAS_rnaseq  ARL2BP_rnaseq  TMEM199_rnaseq  TTC37_rnaseq  \\\n",
1816
       "0          -0.5874       0.8371         0.7587          0.2188       -0.4040   \n",
1817
       "1           0.2811       0.2232         0.9000          3.2327       -0.0096   \n",
1818
       "2           1.5665      -0.4726         0.1693          0.9845       -0.6740   \n",
1819
       "3           0.6169      -0.3266        -0.3082         -0.2220        0.5305   \n",
1820
       "4           0.6406      -1.0330        -0.6522          0.1727       -0.7455   \n",
1821
       "..             ...          ...            ...             ...           ...   \n",
1822
       "511         0.5640       0.1255        -1.3364         -0.8430        0.4406   \n",
1823
       "512         1.2336       0.1902        -1.3500         -0.3472        0.4549   \n",
1824
       "513         1.8148      -0.8502        -0.0628         -0.7776        0.6452   \n",
1825
       "514         0.0569      -0.4511         3.8784          0.2609        0.9393   \n",
1826
       "515         1.9203      -0.0634        -0.7142         -1.3296        0.3966   \n",
1827
       "\n",
1828
       "     GTF2I_rnaseq  STYX_rnaseq  TSR3_rnaseq  SEC61A1_rnaseq  TRRAP_rnaseq  \\\n",
1829
       "0          2.3916      -0.7124      -1.0035          0.7356       -0.0249   \n",
1830
       "1         -0.4464       0.5219       0.3927         -0.3513       -0.7917   \n",
1831
       "2         -0.3986      -0.2289      -0.2791         -0.0646       -0.4431   \n",
1832
       "3          0.5360       0.2785      -1.0873         -1.0712       -0.3184   \n",
1833
       "4         -0.6040       0.2553       1.0504          1.0583       -0.2884   \n",
1834
       "..            ...          ...          ...             ...           ...   \n",
1835
       "511       -0.9735      -1.4547      -0.1983         -0.5259       -0.2029   \n",
1836
       "512       -0.6806      -1.1291       1.0677          1.1586        0.1959   \n",
1837
       "513       -0.4622      -1.2732       1.8145         -0.8767       -0.2980   \n",
1838
       "514        0.5776      -0.9469       2.9500         -0.9261        2.4218   \n",
1839
       "515        1.0089      -0.7931       0.8513          0.7651        0.2217   \n",
1840
       "\n",
1841
       "     ...  GET4_rnaseq  BRD9_rnaseq  NSUN2_rnaseq  PYCRL_rnaseq  HGH1_rnaseq  \\\n",
1842
       "0    ...      -0.1915       0.3503       -1.1848       -1.4121      -0.2389   \n",
1843
       "1    ...       0.7627      -0.6092        0.1291        1.7400      -0.0250   \n",
1844
       "2    ...       4.4123       1.4417       -0.5196       -1.3030      -1.1373   \n",
1845
       "3    ...       0.7665      -0.3344        0.0695        0.0040       0.2291   \n",
1846
       "4    ...       3.3807       0.3364       -0.2792        4.8566       7.9296   \n",
1847
       "..   ...          ...          ...           ...           ...          ...   \n",
1848
       "511  ...       3.0311       3.4963        2.5079        0.0556       0.5691   \n",
1849
       "512  ...       0.5573      -0.7546        0.8104        0.1239       0.0985   \n",
1850
       "513  ...       1.4671       2.2343        2.1466        0.7868       0.6893   \n",
1851
       "514  ...       5.0440       0.0862        0.1431       -0.7761      -0.8430   \n",
1852
       "515  ...       2.2952       0.3985       -0.0072        0.0683      -0.8047   \n",
1853
       "\n",
1854
       "     PRUNE_rnaseq  MAF1_rnaseq  CCDC127_rnaseq  EXOC3_rnaseq  PUF60_rnaseq  \n",
1855
       "0          5.0110       1.4287         -0.3531       -0.8503        1.2995  \n",
1856
       "1         -0.1531       0.5344         -0.0012       -0.9606        2.0233  \n",
1857
       "2          4.6041      -1.0135          1.3589        2.6994       -0.3068  \n",
1858
       "3          3.6034       0.1774         -0.2766        0.5080        0.6178  \n",
1859
       "4          1.6951       5.8943          1.3652       -0.8062        9.2417  \n",
1860
       "..            ...          ...             ...           ...           ...  \n",
1861
       "511        0.1104      -0.3776          2.6136        3.4259       -0.8442  \n",
1862
       "512        2.9026       0.0173          0.3492        2.5703        1.0690  \n",
1863
       "513        0.1571       0.9686          3.0870        5.6169        0.5300  \n",
1864
       "514        0.2311      -0.5913          1.4958        2.1736       -0.5699  \n",
1865
       "515       -0.2712      -0.5864         -0.2393        1.8585       -1.0489  \n",
1866
       "\n",
1867
       "[516 rows x 2000 columns]"
1868
      ]
1869
     },
1870
     "execution_count": 45,
1871
     "metadata": {},
1872
     "output_type": "execute_result"
1873
    }
1874
   ],
1875
   "source": [
1876
    "rnaseq"
1877
   ]
1878
  },
1879
  {
1880
   "cell_type": "code",
1881
   "execution_count": 51,
1882
   "metadata": {},
1883
   "outputs": [
1884
    {
1885
     "ename": "ModuleNotFoundError",
1886
     "evalue": "No module named 'torch'",
1887
     "output_type": "error",
1888
     "traceback": [
1889
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
1890
      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
1891
      "\u001b[0;32m<ipython-input-51-eb42ca6e4af3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
1892
      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'torch'"
1893
     ]
1894
    }
1895
   ],
1896
   "source": [
1897
    "import torch"
1898
   ]
1899
  },
1900
  {
1901
   "cell_type": "code",
1902
   "execution_count": null,
1903
   "metadata": {},
1904
   "outputs": [],
1905
   "source": []
1906
  },
1907
  {
1908
   "cell_type": "code",
1909
   "execution_count": 20,
1910
   "metadata": {},
1911
   "outputs": [
1912
    {
1913
     "data": {
1914
      "text/plain": [
1915
       "Index(['UBE2Q2P2_rnaseq', 'SSX9_rnaseq', 'CXORF67_rnaseq', 'EFCAB8_rnaseq',\n",
1916
       "       'SDR16C6P_rnaseq', 'EFCAB12_rnaseq', 'A1BG_rnaseq', 'A1CF_rnaseq',\n",
1917
       "       'RBFOX1_rnaseq', 'GGACT_rnaseq',\n",
1918
       "       ...\n",
1919
       "       'ZWINT_rnaseq', 'ZXDA_rnaseq', 'ZXDB_rnaseq', 'ZXDC_rnaseq',\n",
1920
       "       'ZYG11A_rnaseq', 'ZYG11B_rnaseq', 'ZYX_rnaseq', 'ZZEF1_rnaseq',\n",
1921
       "       'ZZZ3_rnaseq', 'TPTEP1_rnaseq'],\n",
1922
       "      dtype='object', length=18345)"
1923
      ]
1924
     },
1925
     "execution_count": 20,
1926
     "metadata": {},
1927
     "output_type": "execute_result"
1928
    }
1929
   ],
1930
   "source": [
1931
    "slide_df[slide_df.columns.str.contains('_rnaseq')]"
1932
   ]
1933
  },
1934
  {
1935
   "cell_type": "code",
1936
   "execution_count": 24,
1937
   "metadata": {},
1938
   "outputs": [],
1939
   "source": [
1940
    "slide_df = pd.read_csv(fname)\n",
1941
    "slide_df = slide_df[slide_df.columns[slide_df.columns.str.contains('_rnaseq')]]"
1942
   ]
1943
  },
1944
  {
1945
   "cell_type": "code",
1946
   "execution_count": 25,
1947
   "metadata": {},
1948
   "outputs": [
1949
    {
1950
     "data": {
1951
      "text/html": [
1952
       "<div>\n",
1953
       "<style scoped>\n",
1954
       "    .dataframe tbody tr th:only-of-type {\n",
1955
       "        vertical-align: middle;\n",
1956
       "    }\n",
1957
       "\n",
1958
       "    .dataframe tbody tr th {\n",
1959
       "        vertical-align: top;\n",
1960
       "    }\n",
1961
       "\n",
1962
       "    .dataframe thead th {\n",
1963
       "        text-align: right;\n",
1964
       "    }\n",
1965
       "</style>\n",
1966
       "<table border=\"1\" class=\"dataframe\">\n",
1967
       "  <thead>\n",
1968
       "    <tr style=\"text-align: right;\">\n",
1969
       "      <th></th>\n",
1970
       "      <th>UBE2Q2P2_rnaseq</th>\n",
1971
       "      <th>SSX9_rnaseq</th>\n",
1972
       "      <th>CXORF67_rnaseq</th>\n",
1973
       "      <th>EFCAB8_rnaseq</th>\n",
1974
       "      <th>SDR16C6P_rnaseq</th>\n",
1975
       "      <th>EFCAB12_rnaseq</th>\n",
1976
       "      <th>A1BG_rnaseq</th>\n",
1977
       "      <th>A1CF_rnaseq</th>\n",
1978
       "      <th>RBFOX1_rnaseq</th>\n",
1979
       "      <th>GGACT_rnaseq</th>\n",
1980
       "      <th>...</th>\n",
1981
       "      <th>ZWINT_rnaseq</th>\n",
1982
       "      <th>ZXDA_rnaseq</th>\n",
1983
       "      <th>ZXDB_rnaseq</th>\n",
1984
       "      <th>ZXDC_rnaseq</th>\n",
1985
       "      <th>ZYG11A_rnaseq</th>\n",
1986
       "      <th>ZYG11B_rnaseq</th>\n",
1987
       "      <th>ZYX_rnaseq</th>\n",
1988
       "      <th>ZZEF1_rnaseq</th>\n",
1989
       "      <th>ZZZ3_rnaseq</th>\n",
1990
       "      <th>TPTEP1_rnaseq</th>\n",
1991
       "    </tr>\n",
1992
       "  </thead>\n",
1993
       "  <tbody>\n",
1994
       "    <tr>\n",
1995
       "      <th>0</th>\n",
1996
       "      <td>-0.3291</td>\n",
1997
       "      <td>-0.1379</td>\n",
1998
       "      <td>-0.1805</td>\n",
1999
       "      <td>-0.0869</td>\n",
2000
       "      <td>-0.3317</td>\n",
2001
       "      <td>-0.1661</td>\n",
2002
       "      <td>-0.1483</td>\n",
2003
       "      <td>-0.1371</td>\n",
2004
       "      <td>-0.2260</td>\n",
2005
       "      <td>-0.5346</td>\n",
2006
       "      <td>...</td>\n",
2007
       "      <td>-0.7082</td>\n",
2008
       "      <td>0.6149</td>\n",
2009
       "      <td>0.5725</td>\n",
2010
       "      <td>0.2889</td>\n",
2011
       "      <td>-0.5255</td>\n",
2012
       "      <td>-0.2205</td>\n",
2013
       "      <td>-0.7847</td>\n",
2014
       "      <td>-0.2296</td>\n",
2015
       "      <td>-0.0897</td>\n",
2016
       "      <td>0.1457</td>\n",
2017
       "    </tr>\n",
2018
       "    <tr>\n",
2019
       "      <th>1</th>\n",
2020
       "      <td>-0.8531</td>\n",
2021
       "      <td>-0.1379</td>\n",
2022
       "      <td>-0.1805</td>\n",
2023
       "      <td>-0.2629</td>\n",
2024
       "      <td>-0.3317</td>\n",
2025
       "      <td>-0.2317</td>\n",
2026
       "      <td>-0.5528</td>\n",
2027
       "      <td>-0.1476</td>\n",
2028
       "      <td>-0.2508</td>\n",
2029
       "      <td>0.6921</td>\n",
2030
       "      <td>...</td>\n",
2031
       "      <td>0.9291</td>\n",
2032
       "      <td>0.5118</td>\n",
2033
       "      <td>-0.1673</td>\n",
2034
       "      <td>-0.8006</td>\n",
2035
       "      <td>-0.4348</td>\n",
2036
       "      <td>-1.7113</td>\n",
2037
       "      <td>0.7466</td>\n",
2038
       "      <td>-0.1563</td>\n",
2039
       "      <td>-0.9102</td>\n",
2040
       "      <td>-0.5005</td>\n",
2041
       "    </tr>\n",
2042
       "    <tr>\n",
2043
       "      <th>2</th>\n",
2044
       "      <td>-0.7262</td>\n",
2045
       "      <td>0.3883</td>\n",
2046
       "      <td>0.4908</td>\n",
2047
       "      <td>-0.0666</td>\n",
2048
       "      <td>-0.3317</td>\n",
2049
       "      <td>-0.3948</td>\n",
2050
       "      <td>0.0021</td>\n",
2051
       "      <td>-0.1476</td>\n",
2052
       "      <td>-0.2508</td>\n",
2053
       "      <td>-0.0800</td>\n",
2054
       "      <td>...</td>\n",
2055
       "      <td>0.2957</td>\n",
2056
       "      <td>-0.4399</td>\n",
2057
       "      <td>-0.2751</td>\n",
2058
       "      <td>-0.4668</td>\n",
2059
       "      <td>0.1222</td>\n",
2060
       "      <td>0.3555</td>\n",
2061
       "      <td>1.4078</td>\n",
2062
       "      <td>-0.1592</td>\n",
2063
       "      <td>-0.2276</td>\n",
2064
       "      <td>-0.3931</td>\n",
2065
       "    </tr>\n",
2066
       "    <tr>\n",
2067
       "      <th>3</th>\n",
2068
       "      <td>-1.0590</td>\n",
2069
       "      <td>-0.1379</td>\n",
2070
       "      <td>-0.1805</td>\n",
2071
       "      <td>-0.0959</td>\n",
2072
       "      <td>-0.3317</td>\n",
2073
       "      <td>-0.3372</td>\n",
2074
       "      <td>-0.1061</td>\n",
2075
       "      <td>-0.1476</td>\n",
2076
       "      <td>-0.2508</td>\n",
2077
       "      <td>-0.5641</td>\n",
2078
       "      <td>...</td>\n",
2079
       "      <td>-0.9962</td>\n",
2080
       "      <td>1.4844</td>\n",
2081
       "      <td>0.9748</td>\n",
2082
       "      <td>0.7481</td>\n",
2083
       "      <td>-0.7049</td>\n",
2084
       "      <td>-0.2617</td>\n",
2085
       "      <td>-0.2934</td>\n",
2086
       "      <td>1.1243</td>\n",
2087
       "      <td>0.0823</td>\n",
2088
       "      <td>0.8831</td>\n",
2089
       "    </tr>\n",
2090
       "    <tr>\n",
2091
       "      <th>4</th>\n",
2092
       "      <td>-0.7257</td>\n",
2093
       "      <td>-0.1379</td>\n",
2094
       "      <td>-0.1805</td>\n",
2095
       "      <td>-0.1756</td>\n",
2096
       "      <td>-0.3317</td>\n",
2097
       "      <td>-0.3778</td>\n",
2098
       "      <td>0.1119</td>\n",
2099
       "      <td>-0.1476</td>\n",
2100
       "      <td>1.2515</td>\n",
2101
       "      <td>-1.0113</td>\n",
2102
       "      <td>...</td>\n",
2103
       "      <td>1.7870</td>\n",
2104
       "      <td>-0.0462</td>\n",
2105
       "      <td>1.8418</td>\n",
2106
       "      <td>-0.9922</td>\n",
2107
       "      <td>-0.7090</td>\n",
2108
       "      <td>-1.0285</td>\n",
2109
       "      <td>0.6567</td>\n",
2110
       "      <td>-1.0377</td>\n",
2111
       "      <td>-1.1277</td>\n",
2112
       "      <td>-0.5026</td>\n",
2113
       "    </tr>\n",
2114
       "    <tr>\n",
2115
       "      <th>...</th>\n",
2116
       "      <td>...</td>\n",
2117
       "      <td>...</td>\n",
2118
       "      <td>...</td>\n",
2119
       "      <td>...</td>\n",
2120
       "      <td>...</td>\n",
2121
       "      <td>...</td>\n",
2122
       "      <td>...</td>\n",
2123
       "      <td>...</td>\n",
2124
       "      <td>...</td>\n",
2125
       "      <td>...</td>\n",
2126
       "      <td>...</td>\n",
2127
       "      <td>...</td>\n",
2128
       "      <td>...</td>\n",
2129
       "      <td>...</td>\n",
2130
       "      <td>...</td>\n",
2131
       "      <td>...</td>\n",
2132
       "      <td>...</td>\n",
2133
       "      <td>...</td>\n",
2134
       "      <td>...</td>\n",
2135
       "      <td>...</td>\n",
2136
       "      <td>...</td>\n",
2137
       "    </tr>\n",
2138
       "    <tr>\n",
2139
       "      <th>511</th>\n",
2140
       "      <td>0.5308</td>\n",
2141
       "      <td>-0.1379</td>\n",
2142
       "      <td>-0.1805</td>\n",
2143
       "      <td>-0.2629</td>\n",
2144
       "      <td>-0.3317</td>\n",
2145
       "      <td>-0.2827</td>\n",
2146
       "      <td>-0.6045</td>\n",
2147
       "      <td>-0.1476</td>\n",
2148
       "      <td>-0.2508</td>\n",
2149
       "      <td>-0.2014</td>\n",
2150
       "      <td>...</td>\n",
2151
       "      <td>-0.5331</td>\n",
2152
       "      <td>-0.4205</td>\n",
2153
       "      <td>-0.3773</td>\n",
2154
       "      <td>0.0551</td>\n",
2155
       "      <td>-0.5660</td>\n",
2156
       "      <td>-0.5123</td>\n",
2157
       "      <td>0.1254</td>\n",
2158
       "      <td>0.2124</td>\n",
2159
       "      <td>-0.6375</td>\n",
2160
       "      <td>1.4712</td>\n",
2161
       "    </tr>\n",
2162
       "    <tr>\n",
2163
       "      <th>512</th>\n",
2164
       "      <td>-0.5021</td>\n",
2165
       "      <td>-0.1379</td>\n",
2166
       "      <td>-0.0120</td>\n",
2167
       "      <td>1.7408</td>\n",
2168
       "      <td>-0.3317</td>\n",
2169
       "      <td>-0.2152</td>\n",
2170
       "      <td>0.7495</td>\n",
2171
       "      <td>1.8708</td>\n",
2172
       "      <td>-0.1178</td>\n",
2173
       "      <td>-1.3502</td>\n",
2174
       "      <td>...</td>\n",
2175
       "      <td>-0.3624</td>\n",
2176
       "      <td>0.0588</td>\n",
2177
       "      <td>-0.1157</td>\n",
2178
       "      <td>1.2831</td>\n",
2179
       "      <td>-0.0555</td>\n",
2180
       "      <td>-0.3620</td>\n",
2181
       "      <td>-0.4242</td>\n",
2182
       "      <td>1.6937</td>\n",
2183
       "      <td>-0.4990</td>\n",
2184
       "      <td>2.2944</td>\n",
2185
       "    </tr>\n",
2186
       "    <tr>\n",
2187
       "      <th>513</th>\n",
2188
       "      <td>5.2714</td>\n",
2189
       "      <td>-0.1379</td>\n",
2190
       "      <td>-0.1805</td>\n",
2191
       "      <td>0.1753</td>\n",
2192
       "      <td>-0.3317</td>\n",
2193
       "      <td>-0.2325</td>\n",
2194
       "      <td>0.5863</td>\n",
2195
       "      <td>-0.1476</td>\n",
2196
       "      <td>-0.0185</td>\n",
2197
       "      <td>-0.3172</td>\n",
2198
       "      <td>...</td>\n",
2199
       "      <td>-0.9598</td>\n",
2200
       "      <td>-1.5823</td>\n",
2201
       "      <td>-1.3015</td>\n",
2202
       "      <td>0.4371</td>\n",
2203
       "      <td>-0.6739</td>\n",
2204
       "      <td>-1.4417</td>\n",
2205
       "      <td>-0.9613</td>\n",
2206
       "      <td>0.4167</td>\n",
2207
       "      <td>-1.4631</td>\n",
2208
       "      <td>-0.5035</td>\n",
2209
       "    </tr>\n",
2210
       "    <tr>\n",
2211
       "      <th>514</th>\n",
2212
       "      <td>0.6290</td>\n",
2213
       "      <td>-0.1379</td>\n",
2214
       "      <td>0.1131</td>\n",
2215
       "      <td>-0.0667</td>\n",
2216
       "      <td>1.5316</td>\n",
2217
       "      <td>-0.3634</td>\n",
2218
       "      <td>0.3730</td>\n",
2219
       "      <td>-0.1476</td>\n",
2220
       "      <td>-0.2361</td>\n",
2221
       "      <td>-1.7106</td>\n",
2222
       "      <td>...</td>\n",
2223
       "      <td>-0.5337</td>\n",
2224
       "      <td>4.2234</td>\n",
2225
       "      <td>0.9716</td>\n",
2226
       "      <td>0.6699</td>\n",
2227
       "      <td>-0.8134</td>\n",
2228
       "      <td>-0.2453</td>\n",
2229
       "      <td>0.2731</td>\n",
2230
       "      <td>0.6346</td>\n",
2231
       "      <td>-1.1963</td>\n",
2232
       "      <td>0.1686</td>\n",
2233
       "    </tr>\n",
2234
       "    <tr>\n",
2235
       "      <th>515</th>\n",
2236
       "      <td>-0.6140</td>\n",
2237
       "      <td>-0.1379</td>\n",
2238
       "      <td>0.0493</td>\n",
2239
       "      <td>0.3641</td>\n",
2240
       "      <td>-0.3317</td>\n",
2241
       "      <td>-0.0722</td>\n",
2242
       "      <td>-0.1809</td>\n",
2243
       "      <td>-0.1263</td>\n",
2244
       "      <td>-0.2508</td>\n",
2245
       "      <td>0.1358</td>\n",
2246
       "      <td>...</td>\n",
2247
       "      <td>-1.0456</td>\n",
2248
       "      <td>0.5245</td>\n",
2249
       "      <td>-0.1738</td>\n",
2250
       "      <td>2.4043</td>\n",
2251
       "      <td>-0.7251</td>\n",
2252
       "      <td>-1.0053</td>\n",
2253
       "      <td>0.7014</td>\n",
2254
       "      <td>0.7755</td>\n",
2255
       "      <td>-1.0308</td>\n",
2256
       "      <td>0.6609</td>\n",
2257
       "    </tr>\n",
2258
       "  </tbody>\n",
2259
       "</table>\n",
2260
       "<p>516 rows × 18345 columns</p>\n",
2261
       "</div>"
2262
      ],
2263
      "text/plain": [
2264
       "     UBE2Q2P2_rnaseq  SSX9_rnaseq  CXORF67_rnaseq  EFCAB8_rnaseq  \\\n",
2265
       "0            -0.3291      -0.1379         -0.1805        -0.0869   \n",
2266
       "1            -0.8531      -0.1379         -0.1805        -0.2629   \n",
2267
       "2            -0.7262       0.3883          0.4908        -0.0666   \n",
2268
       "3            -1.0590      -0.1379         -0.1805        -0.0959   \n",
2269
       "4            -0.7257      -0.1379         -0.1805        -0.1756   \n",
2270
       "..               ...          ...             ...            ...   \n",
2271
       "511           0.5308      -0.1379         -0.1805        -0.2629   \n",
2272
       "512          -0.5021      -0.1379         -0.0120         1.7408   \n",
2273
       "513           5.2714      -0.1379         -0.1805         0.1753   \n",
2274
       "514           0.6290      -0.1379          0.1131        -0.0667   \n",
2275
       "515          -0.6140      -0.1379          0.0493         0.3641   \n",
2276
       "\n",
2277
       "     SDR16C6P_rnaseq  EFCAB12_rnaseq  A1BG_rnaseq  A1CF_rnaseq  RBFOX1_rnaseq  \\\n",
2278
       "0            -0.3317         -0.1661      -0.1483      -0.1371        -0.2260   \n",
2279
       "1            -0.3317         -0.2317      -0.5528      -0.1476        -0.2508   \n",
2280
       "2            -0.3317         -0.3948       0.0021      -0.1476        -0.2508   \n",
2281
       "3            -0.3317         -0.3372      -0.1061      -0.1476        -0.2508   \n",
2282
       "4            -0.3317         -0.3778       0.1119      -0.1476         1.2515   \n",
2283
       "..               ...             ...          ...          ...            ...   \n",
2284
       "511          -0.3317         -0.2827      -0.6045      -0.1476        -0.2508   \n",
2285
       "512          -0.3317         -0.2152       0.7495       1.8708        -0.1178   \n",
2286
       "513          -0.3317         -0.2325       0.5863      -0.1476        -0.0185   \n",
2287
       "514           1.5316         -0.3634       0.3730      -0.1476        -0.2361   \n",
2288
       "515          -0.3317         -0.0722      -0.1809      -0.1263        -0.2508   \n",
2289
       "\n",
2290
       "     GGACT_rnaseq  ...  ZWINT_rnaseq  ZXDA_rnaseq  ZXDB_rnaseq  ZXDC_rnaseq  \\\n",
2291
       "0         -0.5346  ...       -0.7082       0.6149       0.5725       0.2889   \n",
2292
       "1          0.6921  ...        0.9291       0.5118      -0.1673      -0.8006   \n",
2293
       "2         -0.0800  ...        0.2957      -0.4399      -0.2751      -0.4668   \n",
2294
       "3         -0.5641  ...       -0.9962       1.4844       0.9748       0.7481   \n",
2295
       "4         -1.0113  ...        1.7870      -0.0462       1.8418      -0.9922   \n",
2296
       "..            ...  ...           ...          ...          ...          ...   \n",
2297
       "511       -0.2014  ...       -0.5331      -0.4205      -0.3773       0.0551   \n",
2298
       "512       -1.3502  ...       -0.3624       0.0588      -0.1157       1.2831   \n",
2299
       "513       -0.3172  ...       -0.9598      -1.5823      -1.3015       0.4371   \n",
2300
       "514       -1.7106  ...       -0.5337       4.2234       0.9716       0.6699   \n",
2301
       "515        0.1358  ...       -1.0456       0.5245      -0.1738       2.4043   \n",
2302
       "\n",
2303
       "     ZYG11A_rnaseq  ZYG11B_rnaseq  ZYX_rnaseq  ZZEF1_rnaseq  ZZZ3_rnaseq  \\\n",
2304
       "0          -0.5255        -0.2205     -0.7847       -0.2296      -0.0897   \n",
2305
       "1          -0.4348        -1.7113      0.7466       -0.1563      -0.9102   \n",
2306
       "2           0.1222         0.3555      1.4078       -0.1592      -0.2276   \n",
2307
       "3          -0.7049        -0.2617     -0.2934        1.1243       0.0823   \n",
2308
       "4          -0.7090        -1.0285      0.6567       -1.0377      -1.1277   \n",
2309
       "..             ...            ...         ...           ...          ...   \n",
2310
       "511        -0.5660        -0.5123      0.1254        0.2124      -0.6375   \n",
2311
       "512        -0.0555        -0.3620     -0.4242        1.6937      -0.4990   \n",
2312
       "513        -0.6739        -1.4417     -0.9613        0.4167      -1.4631   \n",
2313
       "514        -0.8134        -0.2453      0.2731        0.6346      -1.1963   \n",
2314
       "515        -0.7251        -1.0053      0.7014        0.7755      -1.0308   \n",
2315
       "\n",
2316
       "     TPTEP1_rnaseq  \n",
2317
       "0           0.1457  \n",
2318
       "1          -0.5005  \n",
2319
       "2          -0.3931  \n",
2320
       "3           0.8831  \n",
2321
       "4          -0.5026  \n",
2322
       "..             ...  \n",
2323
       "511         1.4712  \n",
2324
       "512         2.2944  \n",
2325
       "513        -0.5035  \n",
2326
       "514         0.1686  \n",
2327
       "515         0.6609  \n",
2328
       "\n",
2329
       "[516 rows x 18345 columns]"
2330
      ]
2331
     },
2332
     "execution_count": 25,
2333
     "metadata": {},
2334
     "output_type": "execute_result"
2335
    }
2336
   ],
2337
   "source": [
2338
    "slide_df"
2339
   ]
2340
  },
2341
  {
2342
   "cell_type": "code",
2343
   "execution_count": 18,
2344
   "metadata": {},
2345
   "outputs": [
2346
    {
2347
     "name": "stderr",
2348
     "output_type": "stream",
2349
     "text": [
2350
      "<ipython-input-18-1ae2fdcf544f>:14: DeprecationWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.\n",
2351
      "  return pd.Series(list(set(s1) & set(s2)))\n",
2352
      "/home/mahmoodlab/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3071: DtypeWarning: Columns (2) have mixed types.Specify dtype option on import or set low_memory=False.\n",
2353
      "  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n",
2354
      "/home/mahmoodlab/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3071: DtypeWarning: Columns (4) have mixed types.Specify dtype option on import or set low_memory=False.\n",
2355
      "  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n"
2356
     ]
2357
    }
2358
   ],
2359
   "source": [
2360
    "### Snippet for creating genomic signatures\n",
2361
    "for fname in os.listdir('./'):\n",
2362
    "    if fname.endswith('.csv.zip'):\n",
2363
    "        slide_df = pd.read_csv(fname)\n",
2364
    "        \n",
2365
    "        signatures = pd.read_csv('./signatures.csv')\n",
2366
    "        omic_from_signatures = []\n",
2367
    "        for col in signatures.columns:\n",
2368
    "            omic = signatures[col].dropna().unique()\n",
2369
    "            omic_from_signatures.append(omic)\n",
2370
    "\n",
2371
    "        omic_from_signatures = np.concatenate(omic_from_signatures)\n",
2372
    "\n",
2373
    "        def series_intersection(s1, s2):\n",
2374
    "            return pd.Series(list(set(s1) & set(s2)))\n",
2375
    "\n",
2376
    "        rnaseq_overlap = np.concatenate([omic_from_signatures+mode for mode in ['_rnaseq']])\n",
2377
    "        rnaseq_overlap = sorted(series_intersection(rnaseq_overlap, slide_df.columns))\n",
2378
    "        genomics_mut_cnv = list(slide_df.columns[slide_df.columns.str.contains('_mut|_cnv')])\n",
2379
    "        \n",
2380
    "        slide_df[list(slide_df.columns[:9]) + rnaseq_overlap + genomics_mut_cnv].to_csv('../dataset_csv_mutsigdb/%s' % fname)"
2381
   ]
2382
  },
2383
  {
2384
   "cell_type": "code",
2385
   "execution_count": null,
2386
   "metadata": {},
2387
   "outputs": [],
2388
   "source": []
2389
  },
2390
  {
2391
   "cell_type": "code",
2392
   "execution_count": null,
2393
   "metadata": {},
2394
   "outputs": [],
2395
   "source": []
2396
  },
2397
  {
2398
   "cell_type": "code",
2399
   "execution_count": null,
2400
   "metadata": {},
2401
   "outputs": [],
2402
   "source": []
2403
  },
2404
  {
2405
   "cell_type": "code",
2406
   "execution_count": null,
2407
   "metadata": {},
2408
   "outputs": [],
2409
   "source": []
2410
  },
2411
  {
2412
   "cell_type": "code",
2413
   "execution_count": null,
2414
   "metadata": {},
2415
   "outputs": [],
2416
   "source": []
2417
  },
2418
  {
2419
   "cell_type": "code",
2420
   "execution_count": 36,
2421
   "metadata": {},
2422
   "outputs": [],
2423
   "source": [
2424
    "omic_from_signatures = []\n",
2425
    "for col in signatures.columns:\n",
2426
    "    omic = signatures[col].dropna().unique()\n",
2427
    "    omic_from_signatures.append(omic)\n",
2428
    "\n",
2429
    "omic_from_signatures = np.concatenate(omic_from_signatures)\n"
2430
   ]
2431
  },
2432
  {
2433
   "cell_type": "code",
2434
   "execution_count": 7,
2435
   "metadata": {},
2436
   "outputs": [
2437
    {
2438
     "name": "stdout",
2439
     "output_type": "stream",
2440
     "text": [
2441
      "Tumor Suppressor Genes Embedding Size: 84\n",
2442
      "Oncogenes Embedding Size: 314\n",
2443
      "Protein Kinases Embedding Size: 498\n",
2444
      "Cell Differentiation Markers Embedding Size: 415\n",
2445
      "Transcription Factors Embedding Size: 1396\n",
2446
      "Cytokines and Growth Factors Embedding Size: 428\n"
2447
     ]
2448
    }
2449
   ],
2450
   "source": [
2451
    "\n",
2452
    "def series_intersection(s1, s2):\n",
2453
    "    return pd.Series(list(set(s1) & set(s2)))\n",
2454
    "\n",
2455
    "sig_names = []\n",
2456
    "for col in signatures.columns:\n",
2457
    "    sig = signatures[col].dropna().unique()\n",
2458
    "    sig = np.concatenate([sig+mode for mode in ['_mut', '_cnv', '_rnaseq']])\n",
2459
    "    sig = sorted(series_intersection(sig, genomic_features.columns))\n",
2460
    "    sig_names.append(sig)\n",
2461
    "    print('%s Embedding Size: %d' % (col, len(sig)))\n",
2462
    "sig_sizes = [len(sig) for sig in sig_names]"
2463
   ]
2464
  },
2465
  {
2466
   "cell_type": "code",
2467
   "execution_count": 21,
2468
   "metadata": {},
2469
   "outputs": [
2470
    {
2471
     "data": {
2472
      "text/plain": [
2473
       "['IFNA10_cnv',\n",
2474
       " 'IFNA13_cnv',\n",
2475
       " 'IFNA14_cnv',\n",
2476
       " 'IFNA16_cnv',\n",
2477
       " 'IFNA17_cnv',\n",
2478
       " 'IFNA1_cnv',\n",
2479
       " 'IFNA21_cnv',\n",
2480
       " 'IFNA2_cnv',\n",
2481
       " 'IFNA4_cnv',\n",
2482
       " 'IFNA5_cnv',\n",
2483
       " 'IFNA6_cnv',\n",
2484
       " 'IFNA7_cnv',\n",
2485
       " 'IFNA8_cnv',\n",
2486
       " 'IFNB1_cnv',\n",
2487
       " 'IFNE_cnv',\n",
2488
       " 'IFNW1_cnv',\n",
2489
       " 'PDGFRA_cnv']"
2490
      ]
2491
     },
2492
     "execution_count": 21,
2493
     "metadata": {},
2494
     "output_type": "execute_result"
2495
    }
2496
   ],
2497
   "source": [
2498
    "sig"
2499
   ]
2500
  },
2501
  {
2502
   "cell_type": "code",
2503
   "execution_count": 434,
2504
   "metadata": {},
2505
   "outputs": [
2506
    {
2507
     "data": {
2508
      "text/html": [
2509
       "<div>\n",
2510
       "<style scoped>\n",
2511
       "    .dataframe tbody tr th:only-of-type {\n",
2512
       "        vertical-align: middle;\n",
2513
       "    }\n",
2514
       "\n",
2515
       "    .dataframe tbody tr th {\n",
2516
       "        vertical-align: top;\n",
2517
       "    }\n",
2518
       "\n",
2519
       "    .dataframe thead th {\n",
2520
       "        text-align: right;\n",
2521
       "    }\n",
2522
       "</style>\n",
2523
       "<table border=\"1\" class=\"dataframe\">\n",
2524
       "  <thead>\n",
2525
       "    <tr style=\"text-align: right;\">\n",
2526
       "      <th></th>\n",
2527
       "      <th>NDUFS5_cnv</th>\n",
2528
       "      <th>MACF1_cnv</th>\n",
2529
       "      <th>RNA5SP44_cnv</th>\n",
2530
       "      <th>KIAA0754_cnv</th>\n",
2531
       "      <th>BMP8A_cnv</th>\n",
2532
       "      <th>PABPC4_cnv</th>\n",
2533
       "      <th>SNORA55_cnv</th>\n",
2534
       "      <th>HEYL_cnv</th>\n",
2535
       "      <th>HPCAL4_cnv</th>\n",
2536
       "      <th>NT5C1A_cnv</th>\n",
2537
       "      <th>...</th>\n",
2538
       "      <th>ZWINT_rnaseq</th>\n",
2539
       "      <th>ZXDA_rnaseq</th>\n",
2540
       "      <th>ZXDB_rnaseq</th>\n",
2541
       "      <th>ZXDC_rnaseq</th>\n",
2542
       "      <th>ZYG11A_rnaseq</th>\n",
2543
       "      <th>ZYG11B_rnaseq</th>\n",
2544
       "      <th>ZYX_rnaseq</th>\n",
2545
       "      <th>ZZEF1_rnaseq</th>\n",
2546
       "      <th>ZZZ3_rnaseq</th>\n",
2547
       "      <th>TPTEP1_rnaseq</th>\n",
2548
       "    </tr>\n",
2549
       "  </thead>\n",
2550
       "  <tbody>\n",
2551
       "    <tr>\n",
2552
       "      <th>0</th>\n",
2553
       "      <td>-1</td>\n",
2554
       "      <td>-1</td>\n",
2555
       "      <td>-1</td>\n",
2556
       "      <td>-1</td>\n",
2557
       "      <td>-1</td>\n",
2558
       "      <td>-1</td>\n",
2559
       "      <td>-1</td>\n",
2560
       "      <td>-1</td>\n",
2561
       "      <td>-1</td>\n",
2562
       "      <td>-1</td>\n",
2563
       "      <td>...</td>\n",
2564
       "      <td>-0.8388</td>\n",
2565
       "      <td>4.1375</td>\n",
2566
       "      <td>3.9664</td>\n",
2567
       "      <td>1.8437</td>\n",
2568
       "      <td>-0.3959</td>\n",
2569
       "      <td>-0.2561</td>\n",
2570
       "      <td>-0.2866</td>\n",
2571
       "      <td>1.8770</td>\n",
2572
       "      <td>-0.3179</td>\n",
2573
       "      <td>-0.3633</td>\n",
2574
       "    </tr>\n",
2575
       "    <tr>\n",
2576
       "      <th>1</th>\n",
2577
       "      <td>2</td>\n",
2578
       "      <td>2</td>\n",
2579
       "      <td>2</td>\n",
2580
       "      <td>2</td>\n",
2581
       "      <td>2</td>\n",
2582
       "      <td>2</td>\n",
2583
       "      <td>2</td>\n",
2584
       "      <td>2</td>\n",
2585
       "      <td>2</td>\n",
2586
       "      <td>2</td>\n",
2587
       "      <td>...</td>\n",
2588
       "      <td>-0.1083</td>\n",
2589
       "      <td>0.3393</td>\n",
2590
       "      <td>0.2769</td>\n",
2591
       "      <td>1.7320</td>\n",
2592
       "      <td>-0.0975</td>\n",
2593
       "      <td>2.6955</td>\n",
2594
       "      <td>-0.6741</td>\n",
2595
       "      <td>1.0323</td>\n",
2596
       "      <td>1.2766</td>\n",
2597
       "      <td>-0.3982</td>\n",
2598
       "    </tr>\n",
2599
       "    <tr>\n",
2600
       "      <th>2</th>\n",
2601
       "      <td>0</td>\n",
2602
       "      <td>0</td>\n",
2603
       "      <td>0</td>\n",
2604
       "      <td>0</td>\n",
2605
       "      <td>0</td>\n",
2606
       "      <td>0</td>\n",
2607
       "      <td>0</td>\n",
2608
       "      <td>0</td>\n",
2609
       "      <td>0</td>\n",
2610
       "      <td>0</td>\n",
2611
       "      <td>...</td>\n",
2612
       "      <td>-0.4155</td>\n",
2613
       "      <td>1.6846</td>\n",
2614
       "      <td>0.7711</td>\n",
2615
       "      <td>-0.3061</td>\n",
2616
       "      <td>-0.5016</td>\n",
2617
       "      <td>2.8548</td>\n",
2618
       "      <td>-0.6171</td>\n",
2619
       "      <td>-0.8608</td>\n",
2620
       "      <td>-0.0486</td>\n",
2621
       "      <td>-0.3962</td>\n",
2622
       "    </tr>\n",
2623
       "    <tr>\n",
2624
       "      <th>3</th>\n",
2625
       "      <td>0</td>\n",
2626
       "      <td>0</td>\n",
2627
       "      <td>0</td>\n",
2628
       "      <td>0</td>\n",
2629
       "      <td>0</td>\n",
2630
       "      <td>0</td>\n",
2631
       "      <td>0</td>\n",
2632
       "      <td>0</td>\n",
2633
       "      <td>0</td>\n",
2634
       "      <td>0</td>\n",
2635
       "      <td>...</td>\n",
2636
       "      <td>-0.8143</td>\n",
2637
       "      <td>0.8344</td>\n",
2638
       "      <td>1.5075</td>\n",
2639
       "      <td>3.6068</td>\n",
2640
       "      <td>-0.5004</td>\n",
2641
       "      <td>-0.0747</td>\n",
2642
       "      <td>-0.2185</td>\n",
2643
       "      <td>-0.4379</td>\n",
2644
       "      <td>1.6913</td>\n",
2645
       "      <td>1.7748</td>\n",
2646
       "    </tr>\n",
2647
       "    <tr>\n",
2648
       "      <th>4</th>\n",
2649
       "      <td>0</td>\n",
2650
       "      <td>0</td>\n",
2651
       "      <td>0</td>\n",
2652
       "      <td>0</td>\n",
2653
       "      <td>0</td>\n",
2654
       "      <td>0</td>\n",
2655
       "      <td>0</td>\n",
2656
       "      <td>0</td>\n",
2657
       "      <td>0</td>\n",
2658
       "      <td>0</td>\n",
2659
       "      <td>...</td>\n",
2660
       "      <td>0.0983</td>\n",
2661
       "      <td>-0.7908</td>\n",
2662
       "      <td>-0.0053</td>\n",
2663
       "      <td>-0.0643</td>\n",
2664
       "      <td>-0.3706</td>\n",
2665
       "      <td>0.3870</td>\n",
2666
       "      <td>-0.5589</td>\n",
2667
       "      <td>-0.5979</td>\n",
2668
       "      <td>0.0047</td>\n",
2669
       "      <td>-0.3548</td>\n",
2670
       "    </tr>\n",
2671
       "    <tr>\n",
2672
       "      <th>...</th>\n",
2673
       "      <td>...</td>\n",
2674
       "      <td>...</td>\n",
2675
       "      <td>...</td>\n",
2676
       "      <td>...</td>\n",
2677
       "      <td>...</td>\n",
2678
       "      <td>...</td>\n",
2679
       "      <td>...</td>\n",
2680
       "      <td>...</td>\n",
2681
       "      <td>...</td>\n",
2682
       "      <td>...</td>\n",
2683
       "      <td>...</td>\n",
2684
       "      <td>...</td>\n",
2685
       "      <td>...</td>\n",
2686
       "      <td>...</td>\n",
2687
       "      <td>...</td>\n",
2688
       "      <td>...</td>\n",
2689
       "      <td>...</td>\n",
2690
       "      <td>...</td>\n",
2691
       "      <td>...</td>\n",
2692
       "      <td>...</td>\n",
2693
       "      <td>...</td>\n",
2694
       "    </tr>\n",
2695
       "    <tr>\n",
2696
       "      <th>368</th>\n",
2697
       "      <td>2</td>\n",
2698
       "      <td>2</td>\n",
2699
       "      <td>2</td>\n",
2700
       "      <td>2</td>\n",
2701
       "      <td>2</td>\n",
2702
       "      <td>2</td>\n",
2703
       "      <td>2</td>\n",
2704
       "      <td>2</td>\n",
2705
       "      <td>2</td>\n",
2706
       "      <td>2</td>\n",
2707
       "      <td>...</td>\n",
2708
       "      <td>-0.0291</td>\n",
2709
       "      <td>-0.1058</td>\n",
2710
       "      <td>-0.6721</td>\n",
2711
       "      <td>0.2802</td>\n",
2712
       "      <td>1.9504</td>\n",
2713
       "      <td>-0.8784</td>\n",
2714
       "      <td>0.9506</td>\n",
2715
       "      <td>0.0607</td>\n",
2716
       "      <td>1.1883</td>\n",
2717
       "      <td>-0.3521</td>\n",
2718
       "    </tr>\n",
2719
       "    <tr>\n",
2720
       "      <th>369</th>\n",
2721
       "      <td>0</td>\n",
2722
       "      <td>0</td>\n",
2723
       "      <td>0</td>\n",
2724
       "      <td>0</td>\n",
2725
       "      <td>0</td>\n",
2726
       "      <td>0</td>\n",
2727
       "      <td>0</td>\n",
2728
       "      <td>0</td>\n",
2729
       "      <td>0</td>\n",
2730
       "      <td>0</td>\n",
2731
       "      <td>...</td>\n",
2732
       "      <td>0.0497</td>\n",
2733
       "      <td>0.3673</td>\n",
2734
       "      <td>-0.2208</td>\n",
2735
       "      <td>0.3034</td>\n",
2736
       "      <td>3.2580</td>\n",
2737
       "      <td>-0.2089</td>\n",
2738
       "      <td>1.6053</td>\n",
2739
       "      <td>-0.8746</td>\n",
2740
       "      <td>-0.4491</td>\n",
2741
       "      <td>-0.3450</td>\n",
2742
       "    </tr>\n",
2743
       "    <tr>\n",
2744
       "      <th>370</th>\n",
2745
       "      <td>1</td>\n",
2746
       "      <td>1</td>\n",
2747
       "      <td>1</td>\n",
2748
       "      <td>1</td>\n",
2749
       "      <td>1</td>\n",
2750
       "      <td>1</td>\n",
2751
       "      <td>1</td>\n",
2752
       "      <td>1</td>\n",
2753
       "      <td>1</td>\n",
2754
       "      <td>1</td>\n",
2755
       "      <td>...</td>\n",
2756
       "      <td>0.3822</td>\n",
2757
       "      <td>-0.7003</td>\n",
2758
       "      <td>-0.7661</td>\n",
2759
       "      <td>-1.7035</td>\n",
2760
       "      <td>-0.5423</td>\n",
2761
       "      <td>-0.3488</td>\n",
2762
       "      <td>1.3713</td>\n",
2763
       "      <td>-0.4365</td>\n",
2764
       "      <td>2.3456</td>\n",
2765
       "      <td>-0.3866</td>\n",
2766
       "    </tr>\n",
2767
       "    <tr>\n",
2768
       "      <th>371</th>\n",
2769
       "      <td>0</td>\n",
2770
       "      <td>0</td>\n",
2771
       "      <td>0</td>\n",
2772
       "      <td>0</td>\n",
2773
       "      <td>0</td>\n",
2774
       "      <td>0</td>\n",
2775
       "      <td>0</td>\n",
2776
       "      <td>0</td>\n",
2777
       "      <td>0</td>\n",
2778
       "      <td>0</td>\n",
2779
       "      <td>...</td>\n",
2780
       "      <td>-0.6853</td>\n",
2781
       "      <td>-1.0240</td>\n",
2782
       "      <td>-1.2890</td>\n",
2783
       "      <td>-1.5666</td>\n",
2784
       "      <td>-0.1270</td>\n",
2785
       "      <td>-1.4662</td>\n",
2786
       "      <td>0.3981</td>\n",
2787
       "      <td>-0.5976</td>\n",
2788
       "      <td>-1.3822</td>\n",
2789
       "      <td>-0.4157</td>\n",
2790
       "    </tr>\n",
2791
       "    <tr>\n",
2792
       "      <th>372</th>\n",
2793
       "      <td>0</td>\n",
2794
       "      <td>0</td>\n",
2795
       "      <td>0</td>\n",
2796
       "      <td>0</td>\n",
2797
       "      <td>0</td>\n",
2798
       "      <td>0</td>\n",
2799
       "      <td>0</td>\n",
2800
       "      <td>0</td>\n",
2801
       "      <td>0</td>\n",
2802
       "      <td>0</td>\n",
2803
       "      <td>...</td>\n",
2804
       "      <td>0.0517</td>\n",
2805
       "      <td>-0.3570</td>\n",
2806
       "      <td>-0.4843</td>\n",
2807
       "      <td>-0.3792</td>\n",
2808
       "      <td>-0.1964</td>\n",
2809
       "      <td>0.4200</td>\n",
2810
       "      <td>3.2547</td>\n",
2811
       "      <td>-0.1232</td>\n",
2812
       "      <td>3.4519</td>\n",
2813
       "      <td>-0.1962</td>\n",
2814
       "    </tr>\n",
2815
       "  </tbody>\n",
2816
       "</table>\n",
2817
       "<p>373 rows × 20395 columns</p>\n",
2818
       "</div>"
2819
      ],
2820
      "text/plain": [
2821
       "     NDUFS5_cnv  MACF1_cnv  RNA5SP44_cnv  KIAA0754_cnv  BMP8A_cnv  PABPC4_cnv  \\\n",
2822
       "0            -1         -1            -1            -1         -1          -1   \n",
2823
       "1             2          2             2             2          2           2   \n",
2824
       "2             0          0             0             0          0           0   \n",
2825
       "3             0          0             0             0          0           0   \n",
2826
       "4             0          0             0             0          0           0   \n",
2827
       "..          ...        ...           ...           ...        ...         ...   \n",
2828
       "368           2          2             2             2          2           2   \n",
2829
       "369           0          0             0             0          0           0   \n",
2830
       "370           1          1             1             1          1           1   \n",
2831
       "371           0          0             0             0          0           0   \n",
2832
       "372           0          0             0             0          0           0   \n",
2833
       "\n",
2834
       "     SNORA55_cnv  HEYL_cnv  HPCAL4_cnv  NT5C1A_cnv  ...  ZWINT_rnaseq  \\\n",
2835
       "0             -1        -1          -1          -1  ...       -0.8388   \n",
2836
       "1              2         2           2           2  ...       -0.1083   \n",
2837
       "2              0         0           0           0  ...       -0.4155   \n",
2838
       "3              0         0           0           0  ...       -0.8143   \n",
2839
       "4              0         0           0           0  ...        0.0983   \n",
2840
       "..           ...       ...         ...         ...  ...           ...   \n",
2841
       "368            2         2           2           2  ...       -0.0291   \n",
2842
       "369            0         0           0           0  ...        0.0497   \n",
2843
       "370            1         1           1           1  ...        0.3822   \n",
2844
       "371            0         0           0           0  ...       -0.6853   \n",
2845
       "372            0         0           0           0  ...        0.0517   \n",
2846
       "\n",
2847
       "     ZXDA_rnaseq  ZXDB_rnaseq  ZXDC_rnaseq  ZYG11A_rnaseq  ZYG11B_rnaseq  \\\n",
2848
       "0         4.1375       3.9664       1.8437        -0.3959        -0.2561   \n",
2849
       "1         0.3393       0.2769       1.7320        -0.0975         2.6955   \n",
2850
       "2         1.6846       0.7711      -0.3061        -0.5016         2.8548   \n",
2851
       "3         0.8344       1.5075       3.6068        -0.5004        -0.0747   \n",
2852
       "4        -0.7908      -0.0053      -0.0643        -0.3706         0.3870   \n",
2853
       "..           ...          ...          ...            ...            ...   \n",
2854
       "368      -0.1058      -0.6721       0.2802         1.9504        -0.8784   \n",
2855
       "369       0.3673      -0.2208       0.3034         3.2580        -0.2089   \n",
2856
       "370      -0.7003      -0.7661      -1.7035        -0.5423        -0.3488   \n",
2857
       "371      -1.0240      -1.2890      -1.5666        -0.1270        -1.4662   \n",
2858
       "372      -0.3570      -0.4843      -0.3792        -0.1964         0.4200   \n",
2859
       "\n",
2860
       "     ZYX_rnaseq  ZZEF1_rnaseq  ZZZ3_rnaseq  TPTEP1_rnaseq  \n",
2861
       "0       -0.2866        1.8770      -0.3179        -0.3633  \n",
2862
       "1       -0.6741        1.0323       1.2766        -0.3982  \n",
2863
       "2       -0.6171       -0.8608      -0.0486        -0.3962  \n",
2864
       "3       -0.2185       -0.4379       1.6913         1.7748  \n",
2865
       "4       -0.5589       -0.5979       0.0047        -0.3548  \n",
2866
       "..          ...           ...          ...            ...  \n",
2867
       "368      0.9506        0.0607       1.1883        -0.3521  \n",
2868
       "369      1.6053       -0.8746      -0.4491        -0.3450  \n",
2869
       "370      1.3713       -0.4365       2.3456        -0.3866  \n",
2870
       "371      0.3981       -0.5976      -1.3822        -0.4157  \n",
2871
       "372      3.2547       -0.1232       3.4519        -0.1962  \n",
2872
       "\n",
2873
       "[373 rows x 20395 columns]"
2874
      ]
2875
     },
2876
     "execution_count": 434,
2877
     "metadata": {},
2878
     "output_type": "execute_result"
2879
    }
2880
   ],
2881
   "source": [
2882
    "genomic_features"
2883
   ]
2884
  },
2885
  {
2886
   "cell_type": "code",
2887
   "execution_count": 2,
2888
   "metadata": {},
2889
   "outputs": [],
2890
   "source": [
2891
    "import torch\n",
2892
    "import torch.nn as nn\n",
2893
    "import torch.nn.functional as F\n",
2894
    "import pdb\n",
2895
    "import numpy as np\n",
2896
    "\n",
2897
    "class MIL_Sum_FC_surv(nn.Module):\n",
2898
    "    def __init__(self, size_arg = \"small\", dropout=0.25, n_classes=4):\n",
2899
    "        super(MIL_Sum_FC_surv, self).__init__()\n",
2900
    "\n",
2901
    "        self.size_dict = {\"small\": [1024, 512, 256], \"big\": [1024, 512, 384]}\n",
2902
    "        size = self.size_dict[size_arg]\n",
2903
    "        self.phi = nn.Sequential(*[nn.Linear(size[0], size[1]), nn.ReLU(), nn.Dropout(dropout)])\n",
2904
    "        self.rho = nn.Sequential(*[nn.Linear(size[1], size[2]), nn.ReLU(), nn.Dropout(dropout)])\n",
2905
    "        self.classifier = nn.Linear(size[2], n_classes)\n",
2906
    "\n",
2907
    "    def relocate(self):\n",
2908
    "        device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
2909
    "        if torch.cuda.device_count() >= 1:\n",
2910
    "            device_ids = list(range(torch.cuda.device_count()))\n",
2911
    "            self.phi = nn.DataParallel(self.phi, device_ids=device_ids).to('cuda:0')\n",
2912
    "\n",
2913
    "        self.rho = self.rho.to(device)\n",
2914
    "        self.classifier = self.classifier.to(device)\n",
2915
    "\n",
2916
    "    def forward(self, **kwargs):\n",
2917
    "        h = kwargs['x_path']\n",
2918
    "\n",
2919
    "        h = self.phi(h).sum(axis=0)\n",
2920
    "        h = self.rho(h)\n",
2921
    "        logits  = self.classifier(h).unsqueeze(0)\n",
2922
    "        Y_hat = torch.topk(logits, 1, dim = 1)[1]\n",
2923
    "        hazards = torch.sigmoid(logits)\n",
2924
    "        S = torch.cumprod(1 - hazards, dim=1)\n",
2925
    "        \n",
2926
    "        return hazards, S, Y_hat, None, None\n",
2927
    "\n",
2928
    "from os.path import join\n",
2929
    "from collections import OrderedDict\n",
2930
    "\n",
2931
    "import torch\n",
2932
    "import torch.nn as nn\n",
2933
    "import torch.nn.functional as F\n",
2934
    "import pdb\n",
2935
    "import numpy as np\n",
2936
    "\n",
2937
    "\"\"\"\n",
2938
    "A Modified Implementation of Deep Attention MIL\n",
2939
    "\"\"\"\n",
2940
    "\n",
2941
    "\n",
2942
    "\"\"\"\n",
2943
    "Attention Network without Gating (2 fc layers)\n",
2944
    "args:\n",
2945
    "    L: input feature dimension\n",
2946
    "    D: hidden layer dimension\n",
2947
    "    dropout: whether to use dropout (p = 0.25)\n",
2948
    "    n_classes: number of classes (experimental usage for multiclass MIL)\n",
2949
    "\"\"\"\n",
2950
    "class Attn_Net(nn.Module):\n",
2951
    "\n",
2952
    "    def __init__(self, L = 1024, D = 256, dropout = False, n_classes = 1):\n",
2953
    "        super(Attn_Net, self).__init__()\n",
2954
    "        self.module = [\n",
2955
    "            nn.Linear(L, D),\n",
2956
    "            nn.Tanh()]\n",
2957
    "\n",
2958
    "        if dropout:\n",
2959
    "            self.module.append(nn.Dropout(0.25))\n",
2960
    "\n",
2961
    "        self.module.append(nn.Linear(D, n_classes))\n",
2962
    "        \n",
2963
    "        self.module = nn.Sequential(*self.module)\n",
2964
    "    \n",
2965
    "    def forward(self, x):\n",
2966
    "        return self.module(x), x # N x n_classes\n",
2967
    "\n",
2968
    "\"\"\"\n",
2969
    "Attention Network with Sigmoid Gating (3 fc layers)\n",
2970
    "args:\n",
2971
    "    L: input feature dimension\n",
2972
    "    D: hidden layer dimension\n",
2973
    "    dropout: whether to use dropout (p = 0.25)\n",
2974
    "    n_classes: number of classes (experimental usage for multiclass MIL)\n",
2975
    "\"\"\"\n",
2976
    "class Attn_Net_Gated(nn.Module):\n",
2977
    "\n",
2978
    "    def __init__(self, L = 1024, D = 256, dropout = False, n_classes = 1):\n",
2979
    "        super(Attn_Net_Gated, self).__init__()\n",
2980
    "        self.attention_a = [\n",
2981
    "            nn.Linear(L, D),\n",
2982
    "            nn.Tanh()]\n",
2983
    "        \n",
2984
    "        self.attention_b = [nn.Linear(L, D),\n",
2985
    "                            nn.Sigmoid()]\n",
2986
    "        if dropout:\n",
2987
    "            self.attention_a.append(nn.Dropout(0.25))\n",
2988
    "            self.attention_b.append(nn.Dropout(0.25))\n",
2989
    "\n",
2990
    "        self.attention_a = nn.Sequential(*self.attention_a)\n",
2991
    "        self.attention_b = nn.Sequential(*self.attention_b)\n",
2992
    "        \n",
2993
    "        self.attention_c = nn.Linear(D, n_classes)\n",
2994
    "\n",
2995
    "    def forward(self, x):\n",
2996
    "        a = self.attention_a(x)\n",
2997
    "        b = self.attention_b(x)\n",
2998
    "        A = a.mul(b)\n",
2999
    "        A = self.attention_c(A)  # N x n_classes\n",
3000
    "        return A, x\n",
3001
    "    \n",
3002
    "class MIL_Cluster_FC_surv(nn.Module):\n",
3003
    "    def __init__(self, num_clusters=10, size_arg = \"small\", dropout=0.25, n_classes=4):\n",
3004
    "        super(MIL_Cluster_FC_surv, self).__init__()\n",
3005
    "        self.size_dict = {\"small\": [1024, 512, 256], \"big\": [1024, 512, 384]}\n",
3006
    "        self.num_clusters = num_clusters\n",
3007
    "        \n",
3008
    "        ### Phenotype Learning\n",
3009
    "        size = self.size_dict[size_arg]\n",
3010
    "        phis = []\n",
3011
    "        for phenotype_i in range(num_clusters):\n",
3012
    "            phi = [nn.Linear(size[0], size[1]), nn.ReLU(), nn.Dropout(dropout),\n",
3013
    "                   nn.Linear(size[1], size[1]), nn.ReLU(), nn.Dropout(dropout)]\n",
3014
    "            phis.append(nn.Sequential(*phi))\n",
3015
    "        self.phis = nn.ModuleList(phis)\n",
3016
    "        self.pool1d = nn.AdaptiveAvgPool1d(1)\n",
3017
    "        \n",
3018
    "        \n",
3019
    "        ### WSI Attention MIL Construction\n",
3020
    "        fc = [nn.Linear(size[1], size[1]), nn.ReLU()]\n",
3021
    "        fc.append(nn.Dropout(0.25))\n",
3022
    "        attention_net = Attn_Net_Gated(L=size[1], D=size[2], dropout=dropout, n_classes=1)\n",
3023
    "        fc.append(attention_net)\n",
3024
    "        self.attention_net = nn.Sequential(*fc)\n",
3025
    "\n",
3026
    "        \n",
3027
    "        self.rho = nn.Sequential(*[nn.Linear(size[1], size[2]), nn.ReLU(), nn.Dropout(dropout)])\n",
3028
    "        self.classifier = nn.Linear(size[2], n_classes)\n",
3029
    "\n",
3030
    "    def relocate(self):\n",
3031
    "        device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
3032
    "        if torch.cuda.device_count() >= 1:\n",
3033
    "            device_ids = list(range(torch.cuda.device_count()))\n",
3034
    "            self.phis = nn.DataParallel(self.phi, device_ids=device_ids).to('cuda:0')\n",
3035
    "\n",
3036
    "        self.rho = self.rho.to(device)\n",
3037
    "        self.classifier = self.classifier.to(device)\n",
3038
    "\n",
3039
    "    def forward(self, **kwargs):\n",
3040
    "        x_path = kwargs['x_path']\n",
3041
    "        ### Phenotyping\n",
3042
    "        h_phenotypes = []\n",
3043
    "        from sklearn.cluster import KMeans\n",
3044
    "        kmeans = KMeans(n_clusters=self.num_clusters, random_state=2021).fit(X)\n",
3045
    "        #cluster_ids_x, cluster_centers = kmeans(X=x_path, num_clusters=self.num_clusters, distance='euclidean', device=torch.device('cpu'))\n",
3046
    "        cluster_ids_x = KMeans(n_clusters=10, random_state=2021, max_iter=20).fit_predict(x_path)\n",
3047
    "        for i in range(self.num_clusters):\n",
3048
    "            h_phenotypes_i = self.phis[i](x_path[cluster_ids_x==i])\n",
3049
    "            h_phenotypes.append(self.pool1d(h_phenotypes_i.T.unsqueeze(0)).squeeze(2))\n",
3050
    "        h_phenotypes = torch.stack(h_phenotypes, dim=1).squeeze(0)\n",
3051
    "\n",
3052
    "\n",
3053
    "        ### Attention MIL\n",
3054
    "        A, h = self.attention_net(h_phenotypes)  \n",
3055
    "        A = torch.transpose(A, 1, 0)\n",
3056
    "        if 'attention_only' in kwargs.keys():\n",
3057
    "            if kwargs['attention_only']:\n",
3058
    "                return A\n",
3059
    "        A_raw = A \n",
3060
    "        A = F.softmax(A, dim=1) \n",
3061
    "        h = torch.mm(A, h_phenotypes)\n",
3062
    "\n",
3063
    "        \n",
3064
    "        h = self.rho(h)\n",
3065
    "        logits  = self.classifier(h).unsqueeze(0)\n",
3066
    "        Y_hat = torch.topk(logits, 1, dim = 1)[1]\n",
3067
    "        hazards = torch.sigmoid(logits)\n",
3068
    "        S = torch.cumprod(1 - hazards, dim=1)\n",
3069
    "        \n",
3070
    "        return hazards, S, Y_hat, None, None"
3071
   ]
3072
  },
3073
  {
3074
   "cell_type": "code",
3075
   "execution_count": 15,
3076
   "metadata": {},
3077
   "outputs": [],
3078
   "source": [
3079
    "x_path = torch.randint(10, size=(500, 1024)).type(torch.cuda.FloatTensor)\n"
3080
   ]
3081
  },
3082
  {
3083
   "cell_type": "code",
3084
   "execution_count": 17,
3085
   "metadata": {},
3086
   "outputs": [],
3087
   "source": [
3088
    "from sklearn.cluster import KMeans\n",
3089
    "kmeans = KMeans(n_clusters=10, random_state=2021, max_iter=20).fit_predict(x_path.cpu())"
3090
   ]
3091
  },
3092
  {
3093
   "cell_type": "code",
3094
   "execution_count": 18,
3095
   "metadata": {},
3096
   "outputs": [
3097
    {
3098
     "data": {
3099
      "text/plain": [
3100
       "array([5, 5, 3, 5, 8, 4, 8, 7, 5, 4, 9, 1, 9, 1, 6, 1, 1, 0, 5, 0, 4, 3,\n",
3101
       "       0, 6, 3, 1, 0, 7, 9, 8, 0, 5, 5, 3, 0, 1, 5, 1, 0, 6, 6, 4, 1, 5,\n",
3102
       "       3, 0, 1, 0, 8, 5, 1, 8, 1, 0, 5, 0, 2, 5, 6, 5, 0, 0, 5, 1, 2, 7,\n",
3103
       "       4, 6, 5, 3, 0, 7, 9, 1, 3, 4, 4, 5, 7, 9, 9, 5, 0, 1, 9, 1, 2, 0,\n",
3104
       "       6, 3, 1, 1, 2, 4, 0, 5, 1, 1, 1, 0, 0, 9, 8, 1, 5, 5, 0, 9, 2, 3,\n",
3105
       "       7, 0, 1, 6, 7, 5, 3, 5, 0, 1, 6, 1, 6, 2, 8, 7, 6, 1, 6, 2, 5, 0,\n",
3106
       "       1, 6, 0, 9, 2, 1, 0, 1, 7, 7, 6, 1, 6, 0, 3, 4, 1, 3, 2, 4, 4, 5,\n",
3107
       "       4, 1, 1, 9, 6, 0, 3, 6, 4, 8, 7, 9, 6, 5, 5, 9, 0, 6, 0, 1, 9, 2,\n",
3108
       "       3, 5, 1, 9, 6, 1, 0, 6, 6, 0, 0, 6, 7, 1, 6, 1, 1, 1, 4, 0, 2, 1,\n",
3109
       "       9, 5, 7, 5, 9, 0, 1, 0, 6, 2, 2, 1, 1, 5, 3, 5, 3, 6, 5, 6, 9, 5,\n",
3110
       "       2, 2, 2, 6, 0, 0, 0, 5, 2, 6, 6, 0, 2, 5, 1, 9, 2, 4, 4, 0, 4, 7,\n",
3111
       "       4, 1, 1, 3, 6, 0, 1, 2, 4, 0, 8, 1, 8, 5, 5, 7, 4, 1, 6, 1, 0, 8,\n",
3112
       "       6, 1, 1, 4, 8, 7, 5, 2, 3, 0, 2, 9, 5, 6, 4, 3, 6, 5, 5, 4, 6, 6,\n",
3113
       "       0, 1, 5, 1, 1, 1, 1, 9, 5, 7, 3, 0, 2, 4, 0, 5, 4, 0, 5, 0, 6, 0,\n",
3114
       "       3, 1, 4, 6, 3, 7, 1, 6, 7, 0, 1, 4, 6, 1, 6, 0, 6, 0, 5, 9, 1, 1,\n",
3115
       "       3, 1, 5, 6, 1, 6, 6, 8, 2, 0, 7, 9, 9, 6, 0, 6, 2, 6, 8, 0, 8, 5,\n",
3116
       "       1, 3, 1, 9, 2, 3, 5, 8, 2, 5, 6, 6, 5, 2, 9, 0, 1, 8, 5, 9, 5, 1,\n",
3117
       "       0, 1, 0, 8, 6, 1, 7, 2, 8, 3, 1, 6, 2, 2, 1, 6, 0, 2, 6, 1, 1, 4,\n",
3118
       "       5, 6, 4, 0, 5, 0, 9, 0, 4, 8, 0, 7, 6, 5, 5, 0, 4, 1, 1, 2, 2, 0,\n",
3119
       "       0, 6, 4, 0, 7, 7, 2, 3, 1, 4, 7, 9, 4, 7, 2, 4, 5, 6, 4, 5, 7, 9,\n",
3120
       "       8, 0, 6, 2, 0, 6, 6, 3, 5, 4, 4, 0, 1, 0, 5, 3, 1, 6, 0, 7, 4, 1,\n",
3121
       "       6, 3, 6, 0, 4, 1, 5, 7, 3, 1, 4, 8, 0, 7, 0, 6, 1, 1, 0, 1, 5, 1,\n",
3122
       "       2, 3, 2, 3, 8, 8, 4, 6, 5, 6, 1, 0, 7, 6, 4, 4], dtype=int32)"
3123
      ]
3124
     },
3125
     "execution_count": 18,
3126
     "metadata": {},
3127
     "output_type": "execute_result"
3128
    }
3129
   ],
3130
   "source": [
3131
    "kmeans"
3132
   ]
3133
  },
3134
  {
3135
   "cell_type": "code",
3136
   "execution_count": 2,
3137
   "metadata": {},
3138
   "outputs": [
3139
    {
3140
     "data": {
3141
      "text/plain": [
3142
       "(tensor([[0.9992, 0.0000, 0.0000, 1.0000]], grad_fn=<SigmoidBackward>),\n",
3143
       " tensor([[0.0008, 0.0008, 0.0008, 0.0000]], grad_fn=<CumprodBackward>),\n",
3144
       " tensor([[3]]),\n",
3145
       " None,\n",
3146
       " None)"
3147
      ]
3148
     },
3149
     "execution_count": 2,
3150
     "metadata": {},
3151
     "output_type": "execute_result"
3152
    }
3153
   ],
3154
   "source": [
3155
    "x_path = torch.randint(10, size=(500, 1024)).type(torch.FloatTensor)\n",
3156
    "model = MIL_Sum_FC_surv()\n",
3157
    "model.forward(x_path=x_path)"
3158
   ]
3159
  },
3160
  {
3161
   "cell_type": "code",
3162
   "execution_count": 3,
3163
   "metadata": {},
3164
   "outputs": [
3165
    {
3166
     "data": {
3167
      "text/plain": [
3168
       "(tensor([[4.2595e-07, 1.0000e+00, 0.0000e+00, 7.2488e-12]],\n",
3169
       "        grad_fn=<SigmoidBackward>),\n",
3170
       " tensor([[1.0000, 0.0000, 0.0000, 0.0000]], grad_fn=<CumprodBackward>),\n",
3171
       " tensor([[1]]),\n",
3172
       " None,\n",
3173
       " None)"
3174
      ]
3175
     },
3176
     "execution_count": 3,
3177
     "metadata": {},
3178
     "output_type": "execute_result"
3179
    }
3180
   ],
3181
   "source": [
3182
    "x_path = torch.randint(10, size=(500, 1024)).type(torch.FloatTensor)\n",
3183
    "self = MIL_Cluster_FC_surv()\n",
3184
    "model.forward(x_path=x_path)"
3185
   ]
3186
  },
3187
  {
3188
   "cell_type": "code",
3189
   "execution_count": 7,
3190
   "metadata": {},
3191
   "outputs": [],
3192
   "source": [
3193
    "import os\n",
3194
    "fname = os.path.join('/media/ssd1/pan-cancer/tcga_gbm_20x_features/h5_files/TCGA-02-0001-01Z-00-DX1.83fce43e-42ac-4dcd-b156-2908e75f2e47.h5')"
3195
   ]
3196
  },
3197
  {
3198
   "cell_type": "code",
3199
   "execution_count": 27,
3200
   "metadata": {},
3201
   "outputs": [],
3202
   "source": [
3203
    "import h5py\n",
3204
    "h5 = h5py.File(fname, \"r\")\n",
3205
    "coords = np.array(h5['coords'])"
3206
   ]
3207
  },
3208
  {
3209
   "cell_type": "code",
3210
   "execution_count": null,
3211
   "metadata": {},
3212
   "outputs": [],
3213
   "source": [
3214
    "fm"
3215
   ]
3216
  },
3217
  {
3218
   "cell_type": "code",
3219
   "execution_count": 17,
3220
   "metadata": {},
3221
   "outputs": [
3222
    {
3223
     "data": {
3224
      "text/plain": [
3225
       "array([43121, 29428])"
3226
      ]
3227
     },
3228
     "execution_count": 17,
3229
     "metadata": {},
3230
     "output_type": "execute_result"
3231
    }
3232
   ],
3233
   "source": [
3234
    "np.array(h5['coords'])[0]"
3235
   ]
3236
  },
3237
  {
3238
   "cell_type": "code",
3239
   "execution_count": 19,
3240
   "metadata": {},
3241
   "outputs": [
3242
    {
3243
     "data": {
3244
      "text/plain": [
3245
       "array([43121, 29940])"
3246
      ]
3247
     },
3248
     "execution_count": 19,
3249
     "metadata": {},
3250
     "output_type": "execute_result"
3251
    }
3252
   ],
3253
   "source": [
3254
    "np.array(h5['coords'])[1]"
3255
   ]
3256
  },
3257
  {
3258
   "cell_type": "code",
3259
   "execution_count": 20,
3260
   "metadata": {},
3261
   "outputs": [
3262
    {
3263
     "data": {
3264
      "text/plain": [
3265
       "512"
3266
      ]
3267
     },
3268
     "execution_count": 20,
3269
     "metadata": {},
3270
     "output_type": "execute_result"
3271
    }
3272
   ],
3273
   "source": [
3274
    "np.array(h5['coords'])[1][1] - np.array(h5['coords'])[0][1]"
3275
   ]
3276
  },
3277
  {
3278
   "cell_type": "code",
3279
   "execution_count": 21,
3280
   "metadata": {},
3281
   "outputs": [
3282
    {
3283
     "data": {
3284
      "text/plain": [
3285
       "512"
3286
      ]
3287
     },
3288
     "execution_count": 21,
3289
     "metadata": {},
3290
     "output_type": "execute_result"
3291
    }
3292
   ],
3293
   "source": [
3294
    "np.array(h5['coords'])[2][1] - np.array(h5['coords'])[1][1]"
3295
   ]
3296
  },
3297
  {
3298
   "cell_type": "code",
3299
   "execution_count": 23,
3300
   "metadata": {},
3301
   "outputs": [],
3302
   "source": [
3303
    "import nmslib\n",
3304
    "class Hnsw:\n",
3305
    "\n",
3306
    "    def __init__(self, space='cosinesimil', index_params=None,\n",
3307
    "                 query_params=None, print_progress=True):\n",
3308
    "        self.space = space\n",
3309
    "        self.index_params = index_params\n",
3310
    "        self.query_params = query_params\n",
3311
    "        self.print_progress = print_progress\n",
3312
    "\n",
3313
    "    def fit(self, X):\n",
3314
    "        index_params = self.index_params\n",
3315
    "        if index_params is None:\n",
3316
    "            index_params = {'M': 16, 'post': 0, 'efConstruction': 400}\n",
3317
    "\n",
3318
    "        query_params = self.query_params\n",
3319
    "        if query_params is None:\n",
3320
    "            query_params = {'ef': 90}\n",
3321
    "\n",
3322
    "        # this is the actual nmslib part, hopefully the syntax should\n",
3323
    "        # be pretty readable, the documentation also has a more verbiage\n",
3324
    "        # introduction: https://nmslib.github.io/nmslib/quickstart.html\n",
3325
    "        index = nmslib.init(space=self.space, method='hnsw')\n",
3326
    "        index.addDataPointBatch(X)\n",
3327
    "        index.createIndex(index_params, print_progress=self.print_progress)\n",
3328
    "        index.setQueryTimeParams(query_params)\n",
3329
    "\n",
3330
    "        self.index_ = index\n",
3331
    "        self.index_params_ = index_params\n",
3332
    "        self.query_params_ = query_params\n",
3333
    "        return self\n",
3334
    "\n",
3335
    "    def query(self, vector, topn):\n",
3336
    "        # the knnQuery returns indices and corresponding distance\n",
3337
    "        # we will throw the distance away for now\n",
3338
    "        indices, _ = self.index_.knnQuery(vector, k=topn)\n",
3339
    "        return indices"
3340
   ]
3341
  },
3342
  {
3343
   "cell_type": "code",
3344
   "execution_count": null,
3345
   "metadata": {},
3346
   "outputs": [],
3347
   "source": [
3348
    "x"
3349
   ]
3350
  },
3351
  {
3352
   "cell_type": "code",
3353
   "execution_count": 54,
3354
   "metadata": {},
3355
   "outputs": [
3356
    {
3357
     "data": {
3358
      "text/plain": [
3359
       "array([85, 87, 88, 73, 75, 76, 63, 29], dtype=int32)"
3360
      ]
3361
     },
3362
     "execution_count": 54,
3363
     "metadata": {},
3364
     "output_type": "execute_result"
3365
    }
3366
   ],
3367
   "source": [
3368
    "model = Hnsw(space='l2')\n",
3369
    "model.fit(coords)\n",
3370
    "model.query(coords, topn=8)"
3371
   ]
3372
  },
3373
  {
3374
   "cell_type": "code",
3375
   "execution_count": 59,
3376
   "metadata": {},
3377
   "outputs": [],
3378
   "source": [
3379
    "import networkx as nx\n",
3380
    "G = nx.Graph()\n"
3381
   ]
3382
  },
3383
  {
3384
   "cell_type": "code",
3385
   "execution_count": 56,
3386
   "metadata": {},
3387
   "outputs": [
3388
    {
3389
     "data": {
3390
      "text/plain": [
3391
       "array([43121, 29428])"
3392
      ]
3393
     },
3394
     "execution_count": 56,
3395
     "metadata": {},
3396
     "output_type": "execute_result"
3397
    }
3398
   ],
3399
   "source": [
3400
    "for"
3401
   ]
3402
  },
3403
  {
3404
   "cell_type": "code",
3405
   "execution_count": 52,
3406
   "metadata": {},
3407
   "outputs": [
3408
    {
3409
     "data": {
3410
      "text/plain": [
3411
       "130"
3412
      ]
3413
     },
3414
     "execution_count": 52,
3415
     "metadata": {},
3416
     "output_type": "execute_result"
3417
    }
3418
   ],
3419
   "source": [
3420
    "temp[3]"
3421
   ]
3422
  },
3423
  {
3424
   "cell_type": "code",
3425
   "execution_count": null,
3426
   "metadata": {},
3427
   "outputs": [],
3428
   "source": [
3429
    "model"
3430
   ]
3431
  },
3432
  {
3433
   "cell_type": "code",
3434
   "execution_count": 29,
3435
   "metadata": {},
3436
   "outputs": [
3437
    {
3438
     "data": {
3439
      "text/plain": [
3440
       "array([ 7440, 13280])"
3441
      ]
3442
     },
3443
     "execution_count": 29,
3444
     "metadata": {},
3445
     "output_type": "execute_result"
3446
    }
3447
   ],
3448
   "source": [
3449
    "coords[100]"
3450
   ]
3451
  },
3452
  {
3453
   "cell_type": "code",
3454
   "execution_count": 33,
3455
   "metadata": {},
3456
   "outputs": [],
3457
   "source": [
3458
    "indices = model.query(coords[100], topn =10)"
3459
   ]
3460
  },
3461
  {
3462
   "cell_type": "code",
3463
   "execution_count": 34,
3464
   "metadata": {},
3465
   "outputs": [
3466
    {
3467
     "data": {
3468
      "text/plain": [
3469
       "array([[ 7440, 13280],\n",
3470
       "       [ 7440, 13792],\n",
3471
       "       [ 7952, 13280],\n",
3472
       "       [ 6928, 13792],\n",
3473
       "       [ 7952, 12768],\n",
3474
       "       [ 7952, 13792],\n",
3475
       "       [ 7440, 14304],\n",
3476
       "       [ 8464, 13280],\n",
3477
       "       [ 6928, 14304],\n",
3478
       "       [ 8464, 13792]])"
3479
      ]
3480
     },
3481
     "execution_count": 34,
3482
     "metadata": {},
3483
     "output_type": "execute_result"
3484
    }
3485
   ],
3486
   "source": [
3487
    "coords[indices]"
3488
   ]
3489
  },
3490
  {
3491
   "cell_type": "code",
3492
   "execution_count": 84,
3493
   "metadata": {},
3494
   "outputs": [],
3495
   "source": [
3496
    "def do_KmeansPCA(X=None, y=None, scaler=None, n_clusters=4, n_components=5):\n",
3497
    "    import pandas as pd\n",
3498
    "    import seaborn as sns\n",
3499
    "    from sklearn.datasets import make_blobs\n",
3500
    "    from sklearn import decomposition\n",
3501
    "    from sklearn.decomposition import PCA, TruncatedSVD\n",
3502
    "    from sklearn.preprocessing import StandardScaler, Normalizer\n",
3503
    "    from sklearn.pipeline import make_pipeline\n",
3504
    "    from sklearn.cluster import KMeans\n",
3505
    "    ### Initialize Scaler\n",
3506
    "    if scaler is None: \n",
3507
    "        scaler = StandardScaler()\n",
3508
    "    ### Get Random Data\n",
3509
    "    X, y = make_blobs(n_features=10, n_samples=100, centers=4, random_state=4, cluster_std=7)\n",
3510
    "    ### Scale Data\n",
3511
    "    X = scaler.fit_transform(X)\n",
3512
    "    ### Perform K-Means Clustering\n",
3513
    "    cls = KMeans(n_clusters=n_clusters, init='k-means++', n_jobs=-1, n_init=1)\n",
3514
    "    y_pred = cls.fit_predict(X)\n",
3515
    "    ### Perform PCA\n",
3516
    "    pca = PCA(n_components=n_components)\n",
3517
    "    pc = pca.fit_transform(X)\n",
3518
    "    ### Plot Results\n",
3519
    "    columns = ['PC%d'%c for c in range(1, n_components+1)]\n",
3520
    "    pc_df = pd.DataFrame(data=pc, columns=columns)\n",
3521
    "    pc_df['y_pred'] = y_pred\n",
3522
    "    pc_df['y'] = y\n",
3523
    "    df = pd.DataFrame({'Variance Explained':pca.explained_variance_ratio_, 'Principal Components': columns})\n",
3524
    "    sns.barplot(x='Principal Components',y=\"Variance Explained\", data=df, color=\"c\")\n",
3525
    "    sns.lmplot( x=\"PC1\", y=\"PC2\", data=pc_df, fit_reg=False, \n",
3526
    "      hue='y', legend=True, scatter_kws={\"s\": 80})\n",
3527
    "    sns.lmplot( x=\"PC1\", y=\"PC2\", data=pc_df, fit_reg=False, \n",
3528
    "      hue='y', legend=True, scatter_kws={\"s\": 80})"
3529
   ]
3530
  },
3531
  {
3532
   "cell_type": "code",
3533
   "execution_count": 85,
3534
   "metadata": {},
3535
   "outputs": [
3536
    {
3537
     "data": {
3538
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEGCAYAAAB/+QKOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAX2UlEQVR4nO3de7RedX3n8feHoCDiBSFtEYhBjW2htaABRMVLFUVbREZnCI4dsFbGcVi2dZxWZ2YBYtslVp06Faeg0loGjVcwVZSiXMQLl3ARCIoE5BLBioKCiGDgO3/sfczDyT4n+4Ts85ycvF9rPevs+/N9dk7O59mX32+nqpAkabKtxl2AJGluMiAkSZ0MCElSJwNCktTJgJAkddp63AVsKjvttFMtXrx43GVI0mbl0ksv/VFVLeyaN28CYvHixaxcuXLcZUjSZiXJTVPN8xSTJKmTASFJ6mRASJI6GRCSpE4GhCSpkwEhSepkQEiSOhkQkqROBoQkqdO8aUk9nZ2XLx93CYO4bdmycZcgaR7zCEKS1MmAkCR1MiAkSZ0MCElSJwNCktTJgJAkdTIgJEmdDAhJUicDQpLUyYCQJHUyICRJnQwISVInA0KS1MmAkCR1MiAkSZ0MCElSJwNCktTJgJAkdRo0IJIclOTaJKuTvK1j/luSXJPkyiRfSfKkkXlHJLmufR0xZJ2SpPUNFhBJFgAnAi8D9gAOT7LHpMUuB5ZW1dOBTwPvbtd9AnAssB+wL3Bskh2GqlWStL6tB9z2vsDqqroBIMly4BDgmokFqurckeUvBF7bDr8UOLuq7mjXPRs4CPj4gPVuEXZevnzcJQzitmXLxl2CNO8MeYppF+CWkfE17bSpvB744kzWTXJUkpVJVt5+++0Ps1xJ0qghAyId06pzweS1wFLgb2eyblWdXFVLq2rpwoULN7pQSdL6hgyINcBuI+O7ArdOXijJi4H/Cbyiqu6bybqSpOEMGRCXAEuS7J7kkcAyYMXoAkn2Bk6iCYcfjsw6C3hJkh3ai9MvaadJkmbJYBepq2ptkqNp/rAvAE6pqlVJjgdWVtUKmlNK2wOfSgJwc1W9oqruSPJOmpABOH7igrUkaXYMeRcTVXUmcOakaceMDL94mnVPAU4ZrjpJ0nRsSS1J6mRASJI6GRCSpE4GhCSpkwEhSepkQEiSOhkQkqROBoQkqZMBIUnqZEBIkjoZEJKkTgaEJKmTASFJ6mRASJI6GRCSpE4GhCSpkwEhSepkQEiSOhkQkqROBoQkqZMBIUnqZEBIkjoZEJKkTgaEJKnT1lPNSPKW6Vasqvdt+nIkSXPFlAEBPKb9+ZvAPsCKdvxg4KtDFiVJGr8pA6Kq3gGQ5F+BZ1TV3e34ccCnZqU6SdLY9LkGsQi4f2T8fmDxINVIkuaM6U4xTTgVuDjJ6UABhwL/PGhVkqSx22BAVNVfJ/kicEA76XVVdfmwZUmSxq3vba7bAXdV1fuBNUl2H7AmSdIcsMGASHIs8JfA29tJjwD+35BFSZLGr88RxKHAK4B7AKrqVtbdAitJmqf6BMT9VVU0F6hJ8uhhS5IkzQV9AuKTSU4CHp/kDcCXgQ8NW5Ykadz63MX0niQHAnfRtKo+pqrOHrwySdJY9WkHQRsIhoIkbUH63MX075Jcl+SnSe5KcneSu/psPMlBSa5NsjrJ2zrmPy/JZUnWJnn1pHkPJLmifa2YvK4kaVh9jiDeDRxcVd+eyYaTLABOBA4E1gCXJFlRVdeMLHYzcCTw1o5N3FtVe83kPSVJm06fgPi3mYZDa19gdVXdAJBkOXAI8KuAqKob23kPbsT2JUkD6hMQK5N8AjgDuG9iYlV9dgPr7QLcMjK+BthvBrVtm2QlsBZ4V1WdMXmBJEcBRwEsWrRoBpuWJG1In4B4LPBz4CUj0wrYUECkY1r1rAtgUVXdmuTJwDlJrqqq6x+ysaqTgZMBli5dOpNtS5I2oM9trq/byG2vAXYbGd8VuLXvym2LbarqhiTnAXsD10+7kiRpk5nukaN/UVXvTvL3dHzzr6o3b2DblwBL2o79vg8sA17Tp6gkOwA/r6r7kuwEPIfmYrkkaZZMdwQxcWF65cZsuKrWJjkaOAtYAJxSVauSHA+srKoVSfYBTgd2AA5O8o6q2hP4beCk9uL1VjTXIK6Z4q0kSQOY7pGj/9L+/OjGbryqzgTOnDTtmJHhS2hOPU1e7xvA727s+0qSHr4NXoNIspCmu+89gG0nplfV7w9YlyRpzPp01ncazemm3YF3ADfSXF+QJM1jfQJix6r6CPDLqjq/qv4YeNbAdUmSxqxPO4hftj9vS/IHNLeqrnfdQJI0v/QJiL9K8jjgvwF/T9Nw7s8HrUqSNHZ9Gsp9vh38KfDCYcuRJM0V0zWU62wgN6FHQzlJ0mZsuiOIjWogJ0maH6ZrKPeQBnJJHttMrrsHr0qSNHZ9nii3NMlVwJXA1Um+leSZw5cmSRqnPncxnQK8qaouAEjyXOAfgacPWZgkabz6NJS7eyIcAKrqa4CnmSRpnutzBHFxkpOAj9Pc1XQYcF6SZwBU1WUD1idJGpM+AbFX+/PYSdOfTRMYdtonSfNQn4ZyNo6TpC1Qn7uYTm272pgYf1KSrwxbliRp3PpcpP4acFGSlyd5A3A28HfDliVJGrc+p5hOSrIKOBf4EbB3Vf1g8MokSWPV5xTTH9G0hfhPwD8BZyb5vYHrkiSNWZ+7mF4FPLeqfgh8PMnpwEdZd3eTJGke6nOK6ZWTxi9Osu9wJUmS5oIpTzEl+eTI8AmTZn8eSdK8Nt01iCUjwwdOmrdwgFokSXPIdKeYpnxY0AbmSZuFnZcvH3cJg7ht2bJxl6B5YrqA2C7J3jRHGY9qh9O+HjUbxUmSxme6gLgNeF87/IOR4YlxSdI8Nt0T5eyDSZK2YH262pAkbYEMCElSJwNCktSpT19MSfLaJMe044tsSS1J81+fI4gPAvsDh7fjdwMnDlaRJGlO6NNZ335V9YwklwNU1Z1JHjlwXZKkMetzBPHLJAtoW08nWQg8OGhVkqSx6xMQ/wc4Hfi1JH9N84S5vxm0KknS2PXp7vu0JJcCL6LpZuOVVfXtwSuTNGvsl0pdNhgQSZ4FrKqqE9vxxyTZr6ouGrw6SdLY9DnF9H+Bn42M39NO26AkByW5NsnqJG/rmP+8JJclWZvk1ZPmHZHkuvZ1RJ/3kyRtOn0CIlX1q+69q+pB+h15LKC5HfZlwB7A4Un2mLTYzcCRwMcmrfsE4FhgP2Bf4NgkO/SoVZK0ifQJiBuSvDnJI9rXnwI39FhvX2B1Vd1QVfcDy4FDRheoqhur6krWvyvqpcDZVXVHVd0JnA0c1OM9JUmbSJ+AeCPwbOD7wBqab/VH9VhvF+CWkfE17bQ+eq2b5KgkK5OsvP3223tuWpLUR5+7mH4IbMytAOna3KZct6pOBk4GWLp0qU+5k6RNqM+1hIXAG4DFo8tX1R9vYNU1wG4j47sCt/asaw3wgknrntdzXUnSJtCnq43PARcAXwYemMG2LwGWJNmd5vTUMuA1Pdc9C/ibkQvTLwHePoP3liQ9TH0CYruq+suZbriq1iY5muaP/QLglKpaleR4YGVVrUiyD00r7R2Ag5O8o6r2rKo7kryTJmQAjq+qO2ZagyRp4/UJiM8neXlVnTnTjbfrnDlp2jEjw5fQnD7qWvcU4JSZvqckadPocxfTn9KExL1J7kpyd5K7hi5MkjRefe5iesxsFCJJmlv6nGKivVi8BNh2YlpVfXWooiRJ49fnNtc/oTnNtCtwBfAs4JvA7w9bmiRpnPpeg9gHuKmqXgjsDdhsWZLmuT4B8Yuq+gVAkm2q6jvAbw5bliRp3Ppcg1iT5PHAGcDZSe6kf4toSdJmqs9dTIe2g8clORd4HPClQauSJI3dlAGR5LFVdVf7bIYJV7U/twds2SxJ89h0RxAfA/4QuJSmJ9VM+vnkwauTJI3NlAFRVX+YJMDzq+rmWaxJkjQHTHsXU/uo0dNnqRZJ0hzS5y6mC5Ps03asJ0nz2s7Ll4+7hEHctmzmz33rExAvBP5zkpuAe2ivQVTV02f8bpKkzUafgHjZ4FVIkuacPu0gbgJI8muMdNYnSZrfNtjVRpJXJLkO+B5wPnAj8MWB65IkjVmfvpjeSdOD63eranfgRcDXB61KkjR2fQLil1X1Y2CrJFtV1bnAXgPXJUkasz4XqX+SZHvgq8BpSX4IrB22LEnSuPU5gjgEuBf4c5pO+q4HDh6yKEnS+E3XWd8HgI9V1TdGJn90+JIkSXPBdEcQ1wHvTXJjkhOSeN1BkrYgUwZEVb2/qvYHnk/Ttfc/Jvl2kmOSPG3WKpQkjcUGr0FU1U1VdUJV7Q28BjgU+PbglUmSxqpPQ7lHJDk4yWk0DeS+C7xq8MokSWM13UXqA4HDgT8ALgaWA0dV1T2zVJskaYymawfxP2ieKvfWqvLxopK0hZnuiXIvnM1CJElzS5+GcpKkLZABIUnqZEBIkjoZEJKkTgaEJKmTASFJ6mRASJI6GRCSpE6DBkSSg5Jcm2R1krd1zN8mySfa+RclWdxOX5zk3iRXtK9/GLJOSdL6+jxydKMkWQCcCBwIrAEuSbKiqq4ZWez1wJ1V9dQky4ATgMPaeddXlc+gkKQxGfIIYl9gdVXdUFX303T2d8ikZQ5h3VPqPg28KEkGrEmS1NOQAbELcMvI+Jp2WucyVbUW+CmwYztv9ySXJzk/yQED1ilJ6jDYKSag60igei5zG7Coqn6c5JnAGUn2rKq7HrJychRwFMCiRYs2QcmSpAlDHkGsAXYbGd8VuHWqZZJsDTwOuKOq7quqHwNU1aXA9cB6jzmtqpOramlVLV24cOEAH0GStlxDBsQlwJIkuyd5JLAMWDFpmRXAEe3wq4FzqqqSLGwvcpPkycAS4IYBa5UkTTLYKaaqWpvkaOAsYAFwSlWtSnI8sLKqVgAfAU5Nshq4gyZEAJ4HHJ9kLfAA8EYfWiRJs2vIaxBU1ZnAmZOmHTMy/Avg33es9xngM0PWJkmani2pJUmdDAhJUicDQpLUyYCQJHUyICRJnQwISVInA0KS1MmAkCR1MiAkSZ0MCElSJwNCktTJgJAkdTIgJEmdDAhJUicDQpLUyYCQJHUyICRJnQwISVInA0KS1MmAkCR1MiAkSZ0MCElSJwNCktTJgJAkdTIgJEmdDAhJUicDQpLUyYCQJHUyICRJnQwISVInA0KS1MmAkCR1MiAkSZ0MCElSJwNCktTJgJAkdTIgJEmdDAhJUqdBAyLJQUmuTbI6yds65m+T5BPt/IuSLB6Z9/Z2+rVJXjpknZKk9Q0WEEkWACcCLwP2AA5PssekxV4P3FlVTwX+N3BCu+4ewDJgT+Ag4IPt9iRJs2TII4h9gdVVdUNV3Q8sBw6ZtMwhwEfb4U8DL0qSdvryqrqvqr4HrG63J0maJVsPuO1dgFtGxtcA+021TFWtTfJTYMd2+oWT1t1l8hskOQo4qh39WZJrN03pD8tOwI9m441y+OGz8TYPh/tiHffFOu6LdebCvnjSVDOGDIh0TKuey/RZl6o6GTh55qUNJ8nKqlo67jrmAvfFOu6LddwX68z1fTHkKaY1wG4j47sCt061TJKtgccBd/RcV5I0oCED4hJgSZLdkzyS5qLziknLrACOaIdfDZxTVdVOX9be5bQ7sAS4eMBaJUmTDHaKqb2mcDRwFrAAOKWqViU5HlhZVSuAjwCnJllNc+SwrF13VZJPAtcAa4H/WlUPDFXrJjanTnmNmftiHffFOu6Ldeb0vkjzhV2SpIeyJbUkqZMBIUnqZEDMQJIHklyR5Ookn0qyXTv9N5IsT3J9kmuSnJnkae28LyX5SZLPj7f6TWum+yLJXkm+mWRVkiuTHDbuz7CpbMS+eFKSS9t1ViV547g/w6ayMf9H2vmPTfL9JB8YX/Wb1kb+vZhY54okk2/qmX1V5avnC/jZyPBpwFto2mx8E3jjyLy9gAPa4RcBBwOfH3f949wXwNOAJe20JwK3AY8f9+cY0754JLBNO2174EbgieP+HOPYFyPj7wc+Bnxg3J9hnPtidJ258Bqyodx8dwHwdOCFwC+r6h8mZlTVFSPDX0nygtkvb1b12hcj025N8kNgIfCTWatydsxoXwDbMH+P5HvtiyTPBH4d+BIwZxuNPUwz/b2YE+brL+ag2kZ9LwOuAn4HuHS8FY3PxuyLJPvSfIu+ftjqZtdM9kWS3ZJcSdPVzAlVNa8agvbdF0m2At4L/PfZq252zfD/yLZJVia5MMkrZ6XAaRgQM/OoJFcAK4GbadpxbKk2al8k2Rk4FXhdVT04YH2zacb7oqpuqaqnA08Fjkjy6wPXOFtmui/eBJxZVbdsYLnN0cb8H1lUTdcbrwH+LslThixwQzzFNDP3VtVeoxOSrKJpBb6lmfG+SPJY4AvA/6qqC6dabjO00b8X7em2VTTXJj49UH2zaab7Yn/ggCRvorke88gkP6uq9Z4fsxma8e/FxJFkVd2Q5Dxgb8Z4pO0RxMN3DrBNkjdMTEiyT5Lnj7GmcZlyX7TdrZwO/HNVfWpsFc6e6fbFrkke1U7bAXgOMBd6Ih7KlPuiqv5jVS2qqsXAW2l+P+ZDOExlut+LHZJs007bieb34pox1QkYEA9bNbceHAoc2N62tgo4jrZzwSQXAJ+iedbFmszjp+NtYF/8B+B5wJEjt/HtNfXWNm8b2Be/DVyU5FvA+cB7quqqsRU7sA39H9mS9Pi9WNn+XpwLvKuqxhoQdrUhSerkEYQkqZMBIUnqZEBIkjoZEJKkTgaEJKmTAaE5ZaoeMDuWOzPJ4zdi+09MstEN0pLc2N6jPnn69klOmrh1MclXk+y3se8zF6Tpgffl465D42NAaK65t6r2qqrfAe4HHtIVdhpbVdXLq2rGHf1V1a1VNUTL9w/TPDZ3SVXtCRwJrBckm5m9AANiC2ZAaC67AHhqksVJvp3kg8BlwG4T3+RH5n2o/eb+ryOtlJ+a5MtJvpXksiRPaZe/up1/ZJLPpXlmx7VJjp144yRnpHlmw6okR01XZNtfzn40XYg8CE1XCVX1hXb+W9ojoquT/Fk7bXGS7yT5cDv9tCQvTvL1JNel6dCQJMclOTXJOe30N7TTk+Rv23WvSvt8jSQvSHJekk+32z8tSdp5z0xyfvu5zkrTLxbt8ickuTjJd5MckKbl+/HAYe0R3WFta9+JRo6XJ3nMJvp31lw17v7GffkafdH2h0/TT9jngP8CLAYeBJ41styNNN/QFwNrgb3a6Z8EXtsOXwQc2g5vC2zXLn91O+1ImudS7Ag8CrgaWNrOe0L7c2L6jqPvO6nmVwCnT/F5nknTi+ejafoaWkXTv85E3b9L80XtUuAUmucFHAKc0a5/HPCtto6daHp/fSLwKuBsYAFNV9k3AzsDLwB+CuzabvebwHOBRwDfABa22z0MOKUdPg94bzv8cuDLI/vnAyOf5V+A57TD2wNbj/v3xdewLzvr01wz0QMmNEcQH6H5g3hTTd3B3/dqXZ/6lwKL22+3u1TV6QBV9QuA9sv0qLOr6sftvM/S/DFdCbw5yaHtMrsBS4Afb8TneS5NeNwz8h4HACvauq9qp68CvlJVleQqmgCZ8Lmquhe4N8m5wL7tdj9eVQ8A/5bkfGAf4C7g4qpa0273inZbP6Hpavrsdh8soAnHCZ9tf1466b1HfR14X5LTgM9OvIfmLwNCc01XD5gA90yzzn0jww/QfNteLwmmMLmvmUrzgKcXA/tX1c/T9Kq57TTbWAX8XnttZHIX5tPVMVr3gyPjD/LQ/5vr1TiD7T7QbivAqqrafwPrTCy/nqp6V5Iv0BxlXJjkxVX1nWnq0GbOaxCal6rqLmBN2oeuJNlmijuiDkzyhPa6xStpviU/DrizDYffAp61gfe6nuao4x0j5/uXJDkE+CrwyiTbJXk0TUdtF8zw4xySZNskO9KcQrqk3e5hSRYkWUjTEeLF02zjWmBhkv3b+h6RZM8NvO/dwK+uMyR5SlVdVVUntJ/3t2b4ObSZMSA0n/0RzamiK2nOv/9GxzJfo3mA0RXAZ6pqJc2jL7du13sn0OfZFX/Sbn91e4roQ8CtVXUZ8E80f7wvAj5cVZfP8HNcTPMcjQuBd1bzzIDTgStprk+cA/xFVf1gqg1U1f00zyE4IU1voVcAz97A+54L7DFxkRr4s/ai+LeAe4EvzvBzaDNjb67aYiU5kuai9NHjrmUqSY6juXD/nnHXoi2PRxCSpE4eQUiSOnkEIUnqZEBIkjoZEJKkTgaEJKmTASFJ6vT/AQBQk+iug2T9AAAAAElFTkSuQmCC\n",
3539
      "text/plain": [
3540
       "<Figure size 432x288 with 1 Axes>"
3541
      ]
3542
     },
3543
     "metadata": {
3544
      "needs_background": "light"
3545
     },
3546
     "output_type": "display_data"
3547
    },
3548
    {
3549
     "data": {
3550
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAFgCAYAAABKY1XKAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3de5Rc9XXo+e8+p6rf6ocksIwFFgQcMA+D0xL4ARjHsWXFOJIIHidOrgVJOrAuTrxWxgmJZybc65V1k/FMEhtnxbcTg0iMzTUxKA6QYCcyA36AJB4BFGGDFZIoRkgtqaXurlI9ztnzx6lqVbequqrrdc6p2h8WS+qqrq5ft6Szz2/v32//RFUxxhhjluKEPQBjjDHRZ8HCGGNMVRYsjDHGVGXBwhhjTFUWLIwxxlSVCHsAy7Fx40b9h3/4h7CHYYwx9ZCwB9CIWM0spqamwh6CMcZ0pVgFC2OMMeGwYGGMMaYqCxbGGGOqsmBhjDGmKgsWxhhjqrJgYYwxpioLFsYYY6qK1aa8TpHOenx//xTH5nKMDSZ5x3mr6e9xwx7WvKiPzxjTfhYs2khVuX/PASaf2E/e8/F8xXWEhOswcfV53Di+FpHwNnlGfXzGmPBYsGij+/cc4M6dLzPYm6A/mZx/POf53LnzZQA+sv7ssIYX+fEZY8ITWs1CRPpEZJeI/LOI7BWR/xbWWNohnfWYfGI/g70Jku7CH3vSdRjsTTD5xH7SWc/GZ4yJnDAL3Bngvar6NuByYKOIXBXieFrq+/unyHv+aRfioqTrkPd8ntx/pM0jC0R9fMaYcIWWhtLg8O/ZwofJwv8deyD4sbkcnr/0t+f5ytG5bJtGtFDUx2eMCVeoS2dFxBWR54BDwLdU9akwx9NKY4NJXGfp4rDrCCsHe9o0ooWiPj5jTLhCDRaq6qnq5cBaYIOIXLL4c0RkQkT2iMiew4cPt3+QTfKO81aTcB1ynl/2+Zznk3AdrjpvVZtHFoj6+Iwx4YrEpjxVnQYeAzaWeW5SVcdVdfyMM85o+9iapb/HZeLq85jL5E+7IOc8n1Q2z8TV54W2nyHq4zPGhCu0moWInAHkVHVaRPqB9wF/FNZ42uHG8bUATD6xn5O53IJ9DLddd8H88zY+Y0zUSFBnDuGNRS4D7gFcghnO11T1vy/1mvHxcd2zZ087htdS6azHk/uPcHQuy8rBHq46b1Wk7tijPj5jYirWO1pDCxb16JRgYYzpSrEOFpGoWRhjjIk2CxbGGGOqsmBhjDGmKgsWxhhjqrKus8a0gZ0RYuLOgoUxLWRnhJhOYcHCRFKn3Ik364yQTvl5mPiyYGEipZPuxGs9I+T6t51V8cLfST8PE29W4DaRUrwTTzjCir4kowM9rOhLknCEO3e+zP17DoQ9xJo144yQTvp5mHizYGEio9NO62v0jJBO+3mYeLNgYSKj007ra/SMkE77eZh4s5qFWSDMQmqnndZXekZIuQt+tTNCOu3nYeLNgoUBolFI7bTT+opnhBRXQ5UGjOIZIbddd0HFYNxpPw8TbxYsDNC8JZ6NaPROPIoaOSOkE38eJr4sWJimLPFshkbvxKNIRPjI+rO5/m1nLfuMkE78eZj4smBh5guppTOKUknX4WQux5P7j3DdhWe2dCydelpff49b18+uU38eJn4sWJhIFVIbuRPvRPbzMFFhwcJEspBa7514p7KfhwmbBYuYacXSViukGmOqsWARE61c2mqFVGNMNRYsYqLVS1utkGqMWYqoLl3YjJLx8XHds2dP2MNou3TW4/ovfIeEIxXTRHlf+bvb3t3w3X8661kh1ZjWiHV7YJtZxEA7l7ZaIdUYU441EoyBKC1tNcZ0J5tZtEgzVy1FcWmrMaa7WLBoslasWrKlrcaYsFkaqslacbJZcWnrXCZPzvMXPFdc2jpx9XmRLESnsx47X3qdrz99gJ0vvW4H9RgTUzazaKJWNuSL29LWpWZY2965jjeO9DKdyrf9zAxjTH0sWDRRK1ctxa1HUPl9IcrhmSx3fGMvI/0JBnoSbT8zwxhTH0tDNVE7Vi3197hcdd4qxgaTHJ3L8v39U5FL7VSaYR2dy3FkLoMrMJvxGO5LNpyiM8a0h80smqjVq5aicJpdLcrNsHxVpmYzuCKICJ7vM5vNM9yXbOuZGcaY+tjMoolKVy2V0+iqpVYUz1uh3AxrNpNHlflgprDgc5KuQ97zeXL/kXYO1RhTIwsWTdTKVUu1Fs+jkJIqN8PyfEU5FRwEyn6ObSw0JposWDTZjeNr+cR7LyDvKzMnc0ynssyczJH3taFVS8XUTrl9FhCtO/NyMyzXEaTQGkdVERGGehZmQW1joTHRZTWLJmvVqqU4tfxY3PLcFUFV8VXxCwHkDcN9OCUzC9tYaEy0WbBokWY35Itby48bx9eiqvy/3/oBx1I5fFVUwddgnKqnZhh2ZoYx0WfBIibi1vJDCqueehIubxh2AMF1hFzeZ2o2w6GZDOmcx0h/surGwlacDmiMWR4LFjERt9PsigX5oTIF+bHBHo6ns+Tyyu0fvJBr33Jm2XHHZamwMd3AgkWMxKnlx1K72R0RxgZ6mTmZozfhVgxwrT4d0BhTu9CChYicDfwVsAbwgUlV/VxY44mDOLX8aLQg38o+W8aY5QtzZpEHfktVnxGRFcDTIvItVf2XEMcUC3E4za7Rgnw7Twc0xlQX2j4LVX1NVZ8p/H4G2Ae8KazxmOZqdDd7nJYKG9MNIrEpT0TWAVcAT5V5bkJE9ojInsOHD7d7aKZOje5mj9tSYWM6XejBQkSGgK8Dn1TVE4ufV9VJVR1X1fEzzjij/QM0dWtkN3ur+2wZY5Yn1NVQIpIkCBT3quoDYY7FNF8jBfm4LRU2ptOFuRpKgC8B+1T1j8Mah2m9egvycVoqbEynE9Wli4gte2ORdwNPAC8QLJ0F+D1VfaTSa8bHx3XPnj3tGJ6JkHTWi/xSYWNqEOsdpKHNLFT1O8T8h2faIw5LhY3pdKEXuI0xxkSfBQtjjDFVWbAwxhhTlTUSNMaYcrIpePUJSB2FgZWw7mroGQh7VKGxYGGMMaVU4dl74XufBz8HvgeOC04S3vkbcMXHoAtb41uwMMaYUs/eC4//39AzBMn+U497ueBxgLf/UjhjC5EFi5iI6mlxUR2X6RDtTgVlU8GMomcI3EUdj91k8Pj3Pg+XbO26lJQFi4iL6mlxUR2X6RBhpYJefSJ4v9IZRSk3CfmT8Op34C3vb/77R5gFi4iL6mlxUR2X6RBhpYJSR4PAtBTfg9SR5r93xNnS2Qir9bS4dLbKX+4uGZfpELWmgrKp5r/3wMpgBrMUx4WB7ut2bMEiwoqnxS2+IBclXYe85/Pk/vbe5bR7XOmsx86XXufrTx9g50uvWxDqdMVU0OJAUeQmwc8HqaBmW3d1kOrycuWf93LgJGDdu5v/3hFnaagIi+ppce0aVyvrIlaYj7AwU0E9A0FNpJgCKw1YXg6yc3DNp7quuA0WLCItqqfFtWtcraiLWGE+BsJOBV3xseDX730+KGbPF9cTQaAoPt9lLFhEWOlpceVSPmGdFteOcdVaF7n+bWcta0ZghfkYKE0FlUtFFVNBb3o7/PDR5i+rFQmK55dsDVJdqSNBYFr37q6cURRZzSLCGj3HOs7jakVdxArzMVFMBWVnT68deDnIzMLa9XD3JviH2+Gx/xH8OvkeeObLwbLbZo3jLe+Hy38h+LWLAwXYzCLyonpaXKvH1Yq6SDEAlc4oSiVdh5O5HE/uP2LnZ4RtqVTQuVcHRXDbYd1WFiwirpFzrOM8rlbURaK6YMCUUSkV9Ka3BzMK22HddhYsYiKqp8W1alytqItEdcGAWUIxFVT0w0dth3VIrGZhIqkVdZHSAFROWAsGzDLYDuvQWLAwkXXj+Fo+8d4LyPvKzMkc06ksMydz5H2tqy4S1QUDZhnCXlbbxSwNZSKrFXWRqC4YMDWqdVltF+6wbjXRZi0za4Px8XHds2dP2MMwHSCd9SK1YMAswzNfrr7DOpqroWK929NmFqYrRXXBgKmB7bAOhQUL03LWh8k0le2wDoUFC9My1ofJtNTiZbWmpSxYREwn3YVbHyZjOocFi4jotLvwVjUCNMaEw/ZZRETxLjzhCCv6kowO9LCiL0nCEe7c+TL37zkQ9hCXJaoHNxlj6mPBIgI6sRtqM/sw2Ul5xoTP0lAR0IndUJvRh6nTUnPGxJkFiwjoxG6ozWgEaAVyY6LD0lAR0IndUBvtw9SJqbm2yqaCDq3PfTX4NZsKe0Qm5mxmEQHl7sJ9VWYzeTxfUZS+hBu7bqiN9GHqxNRcW6jCs/cGu5v9XMnu5mRw+twVHws2tRmzTBYsIqB4Fx6kXFxmTnpMzWZQBV99QFg5mOTv/vnHscrTN9IIsBNTc23x7L2n+ibZKXKmiSxYRETxLvv/+eYPODKXwUEQAddxWD3Uw1BvIrZ5+nr6MHViaq7lsqlgRmGnyJkWsJpFRIgI17/tLIb6ErxxpJ83jPTxxtF+zj9jiJWDvfQk3K7K09tBRXV49Ykg9VSudTcEj/v5oJ+SMctkwSJCvr9/Ct9XxgZ6GBvoYbgviVNyd91NG9nsoKI62ClypoUsDRUhlqdfyA4qWiY7Rc60kAWLCLE8/UKtOCmvo9kpcqaFQg0WInIX8CHgkKpeEuZYoqAZG9k6kR1UVKOegWB5bLVT5KJc3M6mgtpL6mgwU1p3dbTH20XCnllsB74A/FXI44iEhUtoF25GK+bpb7vuArurNpXF9RQ52x8SeaEGC1V9XETWhTmGqIlbnr6Tzt/oCHE9Rc72h0SeqC5dUG35AIJg8VClNJSITAATAOecc85P/du//Vv7BheidNaLdJ7emvyZpsmmYPI9weynUq3Fz8PEY9EOeNXF+h9E2GmoqlR1EpgEGB8fDzeytVHU8/TW5M80TXF/SOmMopSbDFJqr37HjlENke2zMMtmTf5MU9n+kFiwYGGWrfQUPJ8sqcQLzCafJJV4AZ9sV20eNE1g+0NiIeyls18F3gOsFpEDwO+r6pfCHJOp7thcjrzvM5P8Hid6voXiofgIDtLrMpz9GfKpn+qazYOmQbY/JBbCXg31C2G+v6nP2GCS/MAu5noeRejF4dQmQSXP8Z5H6B3wWDl4aYijNLHRCftDuoClocyyXXHOICcHdoL2IovuN4QEaC8nB3Zy+Tn2j9vU6IqPwTW/Hax6ysxAejr41c9He39IF4n8aigTPS8cfYbhfocTKRdl4V4pVVB1GRkIPu+agWtCG6eJkbjuD+kiFizMsk1nphnoEfrcXg7PZvBLmsKKwJkrenESaaYz0+EN0sRTz4Atj40oCxZm2UZ7R3Edl5G+HkYGepjL5Ml7SsIVBnsTOAKz2SyjvaNhD9UY0yQWLMyyrV+znoSTIOfnSDpJVvQt/GuU83MknATr16wPaYTGmGazArc5TTqf5vEDj/ONH32Dxw88TjqfXvB8f6KfbRdvI51Lk/NzC57L+TnSuTTbLt5Gf6LCjlxjTOzYzMLMU1V2vLKD7Xu3k/fzeOrhikvCSbDt4m1sPn/zfL+nzedvBmD73u1k8hl89XHEIeEkmLhsYv55Y0xnCL2R4HKMj4/rnj17wh5Gx3rw5QeZfH6S/mQ/Saek31NhtjBx2QRbLtiy4DXpfJrdB3cznZlmtHeU9WvW24yiHnaOQzeIdSNBCxYGCC76H33oo7iOuyBQFOX8HJ7vcd+H7rNg0Ex2jkM3ifUfpNUsDAC7D+4m7+fLBgqApJMk7+fZfXB3m0fW4YrnODgJ6B2G/rHgVycRPP7svWGP0BjAahYtEcfUzHRmGk+X7vzpq297J5opmwpmFItbXEDwcc9Q8PwlWy0lZUJnwaKJllMgjprR3lFcWbrzpyOO7Z1opmrnODhusJN55x/AeddYHcOEyoJFE+14Zcd8gbgv0Tf/eM7PMfn8JMBpBeKoWLx3YjHbO9EClc5xUIX0UZg9BOrB81+FHz5idQwTKqtZNEk6n2b73u2nrSSCIN/fn+xn+97tp+1ZiArbO1FBNgU/fBSe+2rwazbVvK9d6RyH9FGYfT0ICOJC34jVMUzobGbRJMUCcemMolTSSZLJZ9h9cDfXrI1mcz3bO1GiHauUyp3joH4wo5DCfZwQ1C7A6hgmVBYslqlS8boTCsQiwpYLtrDx3I2xK9A3XXGVUs/QwpqClwseh6BLaiPKneOQmYGgl28QOIbWnAocYOdRm9BYsKhRteL1yr6VHVMg7k/01zT7ieOqr5q0c5VS8ZyG730+CAKZmSBIiASBYmDl6a+x86hbwzZGLsmCRY2qFa9vuvimrikQx3nVV02qrVJq5t394nMc9j8GL34dhs5cOKMoZedRN5dtjKyJFbhrUEvx+t6X7uUXL/zFrigQFwOn67gM9Qwx0jvCUM8QruMy+fwkO17ZEfYQG1NplVKpZt/dF89xeO//ERS0K72/nUfdfLYxsiYWLGpQ6+7mNw6+kYnLJvB8j9nsLCcyJ5jNzuL5XscUiOO+6qsmlVYplWrV3X2xjpGdDQJDqeJ51O/8DUuPNEutKcdmroKLKUtD1aDm4nV2uuMLxJ2w6quqcquUSrX67n5xHWM+LZKw86ibrZ0pxyYTkc8AU6r6ucLHfwC8rqqfb8X7WbCowXJ3N9daII6jTlj1VVW5VUpFxbv7az7Vurt7O4+6fcJIOTbPl4AHgM+JiAN8FNjQqjezYFED2918Ste0BYnC3b2dR916YaYcG6Sqr4rIERG5AngD8KyqtiyqWbCoQXF38+Tzk5Ck4lkPnZJqWkrXBE67u+8OYaccG/eXwDZgDXBXK9/ICtw12nz+5o4vXtei69qCFO/uL/+F4FcLFJ0l/gsKHgQ2AuuBR1v5RjazqJHtbj7F2oKYjhKFlGOdVDUrIt8GplWrFBMbZCflmbo1uoM7nfX4/v4pjs3lGBtM8o7zVtPfUyV/bEyrZFOtTjk2fWdfobD9DHCjqr7c7K9fymYWpm71rvpSVe7fc4DJJ/aT93w8X3EdIeE6TFx9HjeOr433DnATTzFbUCAibwUeAh5sdaAACxYmBPfvOcCdO19msDdBf7JksYDnc+fO4O/8R9afXfPXsxmK6Uaq+i/Aee16PwsWpq3SWY/JJ/Yz2Jsg6S5cX5F0HQZ7E0w+sZ/r33ZW1Qu+zVCMaR9bDWXa6vv7p8h7/mmBoijpOuQ9nyf3V18uXpyhJBxhRV+S0YEeVvQlSTjCnTtf5v49B5o9fGO6VtWZhYgMA2eo6o8WPX6Zqj7fspGZjnRsLofnL72owvOVo3PZJT+n2gxl1PH57r1/y3X/dhZ9q1cyeOWVOP3dtWotMqz1d0dYMliIyEeAPwUOiUgS2KaquwtPbwfe3trhmU4zNpjEdZZODbmOsHKwZ8nPKc5QSmseAKgy/tL3uPbZR8HLc/DZBP19PUgiwaqbb2Jk61ZLTbXLclt/W1CJtGozi98DfkpVXxORDcBfi8jvqeoDtGAZmOl87zhvNQnXIVchFZXzfBKuw1XnLd1eodIMZfyl7/HTex4ik+glm+wnN9DL0EAPmssx9edfBGD0hhua882YpdV62qCdJ1HRutsfHgCuA1YBR4Bvv/qHP9twC1wR2Qh8DnCBv1TVP6z2mmrBwlXV1wBUdZeIXAc8JCJrCc5+NGZZ+ntcJq4+b341VGnAyHk+qWye2667oGpxu9wMJZnLcO2zj5JJ9OK7CUSVhBN8fUkmcQYGOHLX3Qxv2mQpqVZbzmmDLz7Q+iNsY2bd7Q8L8HHgU0CS4FqdB3Lrbn/4s8A9r/7hz9Z1DRYRF/gz4GeAA8BuEflGYXVVRdUK3DMi8hPFDwqB4z3AzwEX1zNQY24cX8sn3nsBeV+ZOZljOpVl5mSOvK/cdt0F3Di+turXKJ2hFP3Ef/4Ax/fw3QSKIsBg76mgI8kkms+T2rWrFd+WKVVs/V2u3xIEj/t5eOWf7DyJ8j4O/J8EAeIEcLTwa77w+Mcb+NobgFdUdb+qZoH7CK7pS6o2s7iVRekmVZ0pTGE+Uu9ITXcTET6y/myuf9tZPLn/CEfnsqwc7OGq81bVvD+i3Axl4OQsjvooiu/DGcO9OIvTF55H/tixFnxXZoFaW3//+/dje55EqxRST58CZoFFDavIFR7/1LrbH/5anSmpNwH/UfLxAeDKai+qFizmCFrfvrLo8auAJ5czOtMcjbbYiJL+HpfrLjyz7tcXZyCTT+znZC7HYekjj6AaBIqVA2WK5K5LYmys7vc0Naq19TcS5/MkWuU6gtRTpeMmc0A/QZbnkTq+frkCUNWUVrVg8acERe7F0oXnrq8+LtMMqsqOV3awfe928n4eTz1ccUk4CbZdvI3N52/uulU+i2cox46ezcqXHmGgvwe35/RAobkckkgwsKFl58OYolpbf59zJfywyvUuoudJtNAqaqgnA6vr/PoHgNIWCWuBH1d7UbWaxbpyeylUdQ+wbjmjK0dENorID0TkFRG5vdGv18l2vLKDyecncR2XoZ4hRnpHGOoZwnVcJp+fZMcrO8IeYmiKM5St7zyfc279NUin0dzC2bvmcvipFKtuvsmK2+1Qa+vv8993KqiUE/3zJFrhCEFtYikeMFXn198NXCAi54pID8EJe9+o9qJq0av8QcuBhv7F1VuR70bpfJrte7fTn+yfP3DIV5+53Nz8Ead3vXgXG8/dGNuUVLOMbN0KwJG77sbPZMDzwHWRRILVt94y/3xLddJ+gUa+l1paf4uEe4RtNH2bINWU5PSaBSWPP1bPF1fVvIjcRnD+hQvcpap7q71uyRblIvJVYKeq/sWix38FeL+q/m/1DLbwNd4B3KGqHyh8/LuFb+R/VHpNN7UoL61NHJg5wN/96O8Y7h1GVTmeOc7UySlK/+wU5aM/+VF+e/1vd106qhw/nSa1axf5Y8dIjI0xsGFD62cUnbRfoJnfS7XW3wveK78wqMTt57a0mr+Jdbc/vI1g1dPiIncSGAI+8+of/uz2Zg6ummozi08CD4rIx4CnC4+NAz3Algbfu6aKvIhMABMA55xzToNvGX3lahMn8yc5njmOrz6qytTJKZxCBjFYIiqoKg/vf5i3jL2FLRc0+kcTf05/P0PXXtveN611E1ocNPN7qdb6246wLeeewq+fIsjiuASppxzwmZLn26amw48Km/EuKXy4V1V3NvzGIjcCH1DVXy18/MvABlX9RKXXdMPM4sGXH2Ty+ckFKafZ7Cyvzb4WLBxRH0Hm009FijLaO8rKvpXc96H7uj4d1XbZFEy+J7gbrlTQ9fMw8Vj0L4Cl34vjBnUHPx983DMU3PnH5XuJlmVPjwrLaN9DUMyeAh5rxg7uelTrDdUH3AKcD7wAfElVqxVealVXRb6TlatNAAwkBxBH8H1/PkjIor93gjCXm8MVl90Hd9d1KJFpQHETWifsFyh+L7k0zL0epImKRGDwDcH3E4fvJeYKgaGe5bFNVy0NdQ/BtOcJ4IPARQSpqWaYr8gD/0lQkf/FJn3tWNp9cDd5P09fYuG6AkccVvet5vXU6xVfm3ASOOJwPHOcQ6lDdY+hk/ZxtFWtm9DauV+g3uJ06iicPA6ZGRAHnJJFk6owexB6V3Tb3oeuVy1YvFVVLwUQkS8BTeuTUG9FvpNNZ6ZPSy8VjfaNciJ7gtnc7GnPJZwErgQboBStK1jUs4/DAkuJpTahqR9ceHMpOPZqcBFvZfqm0eJ0zyCcPBG8ZvHniQBO8HzPUOu+BxM51YLFfBW+cHFv6pur6iNEZIoVBaO9o/MX/XJW9KxgNjeLg4NbuDCVfr5qUOw+c6D6rujFF/rXZl+bT4GVzmxyfo7J5ycB5gvnYW0QjPTxqeU2oalC+ijMHgI0+Pj5/wV7H2ztKp9OKrSbyKgWLN4mIicKvxegv/CxAKqqwy0dXZdZv2Y9CSdBzs8tqFkUCYIjDsX/Si/IqoqPz3DP8JLBotyF3sHhYOogY31jrJAVCz4/6SQhCdv3bp/fx1HcIFhLYGmGWo9PDXWmU9yEVrpfIH0UZl9nvq654o3QN9Lai/Zyur1Wmt1k56B3GLIzgLMwoKkGM6Xe4aDwbVrrjpHTWpRzx/GGCtwichfwIeCQql5S7fOLlgwWqhqR27bu0J/oZ9vF24ILbpIFASPn5xARVvauxHEcpjPT+Hqq46qIMNY7xmBikPVr1ld8j3IX+tnsLCgcO3mMhCQY7Rtd8JqkkySTz7D74G7Wr1lftghf/LzFgaUZF/Di8amDvYkFhx3lPJ87d76MqtIz9nT4rVBKN6HlUjBzEJCgT8LgmiBVBbVftOvRjEL7wEroGw7GNXsoCA5KIeYJDL0hmEV1VwuO9rpjpGKLcu4Y+SxwD3ccr/eYiO3AF4C/Ws6Lqh6ratpr8/mbgeCCm8ln8NXHEYeEk+DXL/t1VJW/eOEvOHvF2WS97PyFscftIZPPcNMlN1W8GFdabZXXfOGa5jB1corh3mEcWdgJxlef6cx0xSJ8UTGw7HptF0dPHm34Al7t+NTB3gSf2/UVVqx5jMGegbbMdCoq3S/w3c/Bni8FheCeoaBQXKpVq6OaUWhfdzW4PZAcgP4xyJQsne0tWTrbXS042q3YonyWhQ0Fk4XHIbjoL5uqPi4i65b7uq4KFnEoyIoIWy7YwsZzN5Ydq6oiImzfuz1IS+HMb8qbuGxiPtiUU+lCn5AEgiAi821EVvQsTEc54jDaO7pkEb7IV5+d/76TXQd3NZyqqnh8aoHr5kn1/yMr/MGaZjpt0TMAY+uCi23vEpnaVqyOqrXb61KzgsUptb6S76F7W3C0T5B6qtqinDtGvtZoSmo5uiJYxLFja3+iv+xeiWrBZCmVLvQDyQGQYCUVcNrn5PwcCSfB+jXr2X1w95JF+KLv/fh7rOhd0fAFvNLxqUUnEz9A8aiUMS1NobV170kzLtr1qLXba7VZQS19nUyrtLpFeV26Ili0uyDbDpWCyVIqrbYq7uM4nD4MLFxhlfNzpHNpJi6boD/RX7UIn/Nz5P08SSdZ9nmofgH302nmnnoKb3qaNbPQ5/cTdJg5nS9zgJJwKwf7YgqtrZp10V6ucoX20vesdVZgLV5/1oAAAB7mSURBVDjC1OoW5XXp+GBRKU8PIaYpQrLUhX60bxTP9ziaOYqqciJzYr5WUpreqlaET+fSvPOsd/LUwaeWHEu5C7iqcvyBBzhy191oPg+ex5mOw++eyPLty9/Pc29992lLTTU/gCSD2kUlxRRaWzXrol2PZs4KqvV1Mq3Q6hblden4YFFrQbYbWmRUu9An3SS3r7+dNUNrlkxvLVWEn7hsgpV9K9nz+tI9vMpdwI8/8ABTf/5FnIEBnL5Tf16jmuJn9jwMwHMXX31qzJ6Plz2PsVV9eJrDkfIznWIKre3CSuXYrCDuWtqivNBN/D3AahE5APy+qn6p2us6PljUWpBte5oiJNUu9LXUb6rVTdL5dNVU1eILuJ9Oc+Suu3EGBpBFxeyx4eACd91z3+TJN7+dk05yfp/FJ667mOTIrzH5QuWZTjGF1nZhX7RtVhBPdxxPFZbHLtmivN7itqr+Qj2v6/hgUW1XNISUpmix0ry/OzrK4JVX4vT3N1QgX6xS3aSWVNXiC/jcU0+h+fyCGUWpseEBVojPHWdnOHjRxawc7OGq81bR3+OiuhZk6QAYqk64aHfSgU7xELkW5R0fLGopyIaWpmiBcnn/4klxq26+iZGtWxGRugrkUPvy41pmMKW86elgrEsQ3+PiIZ93/dTahY83MQCaRTrpQKc4CTbcbeeOka+xqEV5O5fLlur4YFHPXW6cVcr7ay7H1J9/EYDRG25Y9tdd7vLj5V7A3dFRcKssNXVdEmNjFZ+uNwCaJVifqXAFgSES/fNqOvwoKuo9/Gjxha70Ljeq+yzq4afT/OvP34i47ml5fwgChnoe5/7N/cs+YrTcoUywMOA2svy4lWM3dar1QKebHoH/fNpSVNXF+iLT8TML6J40RbW8vyST+JkMqV27lnXkaDuWHzv9/ay6+ab5WVFpwNBcDj+VYvWtt1igaKdqfaacRNA76n9eC4meaKSorLbSMl0RLIo6PU1RS94fzyN/7Niyvm67lh+PbN0KwJG77sbPZBbUW1bfesv881FWaWFBLFXrM5U+CiePwdCZC9uahJGistpKy3VVsOh0zcj7l9Ou5cciwugNNzC8aROpXbvIHztGYmyMgQ0bIn/BrXVhQaxUO9Bp9hDgwOKbiFZ21a2kQ2srl95z6Wktyl/4+AuNtig/m6Dj7BrAByZV9XPVXmfBooMMXnklkkiguVzFvL8kEgxs2LCsr9vu5cdOf/+y0mRR0KqFBaFaqmVJZgZQcGThiXnqB+dc+HnIZ+GVf4K3Xt/acTbjDI+IufSeSyu2KL/0nks/C9zzwsdfqLfgnAd+S1WfEZEVwNMi8i1V/ZelXuQs9aSpLp1P8/iBx/nGj77B4wceJ52v1Pur9Yp5fz+VQnMLN34W8/6rbr5p2XfppcuPy+m05cfLtdSGQkkmcQYGgtRaOry/G3UptizJzgYBo1Q+G6R6Bt9wqv166ihM/QCOHwjO8khNwSP/Ozzz5SBN1CrF2kq5IjwEj/v5YGNkfBRblOeBE8DRwq/5wuMfr/cLq+prqvpM4fczwD7gTdVeZzOLOkW1k20r8v7dtvx4uVq1sCASKrUsEYI0VfFAp9RRmD0YBA6nEDx8gnMxWp0GasYZHhFSSD1VbVF+6T2Xfq0JKal1wBXA0s3csGBRt6h2sm1V3n+5m+y6SasWFkRCpZYlb3o73L2p0D3XhbnXg0BRvEHSwtF6/aPBhbqVaaCw2sG3TltalIvIEPB14JOqeqLa51uwqEMcOtk2O+/fLcuP69GqhQWRUq5lSbGrLhoEh+KMonhO91AhReU6rTkVsCisdvCt0/IW5SKSJAgU96rqA7W8xoJFHbq5k22rlh/H4RTDSlq1sCDyiimqb38mCA4eC8/p7l956nNbmQYKsx18a7S0RbkE+fEvAftU9Y9rfZ0FizpYJ9vmiWrtZzm6dkNhMUXVNwyPfCqoTxTP6V585nir00CddbJfS1uUA+8Cfhl4QUSeKzz2e6q6ZErLgkUdurWTbStEtfazXJHfUNjKnc3nvw/6RpZuC9LqNFDY7eCb6IWPv5AqLI9dskV5vcVtVf0OdbQesWBRh27rZNsqcaj91CqyGwrbsbM5SmmgTmgHH7AW5Z3AlpI2plifePK1J5nJzrCqv3x6Io61n8htKGzXzubOSgOFrrDhbvul91x6WovyRpfL1suCRZ1sKenyLa5PzGRnOJE9wfHscVb3rWa07/S0ndV+GtDOnc0dlAaKkkJgiESLcgsWdYrqUtJ0Ps13//O7PHPoGQTh8jMv591venckZjmL6xOKMpubRRAOpw8DnBYwrPbTgGpdY91k85e0dk4ayCxiwaJBUelkq6o8+PKD3PnsnUxnpymeU/KVfV9hrG+MT1z+CTZfEN7KonL1icHkYDAeDYLC1MkphnuHcQoraaz206AO29lswmXBokPseGUHf/r0nzKTn8HFRZwgKCjKsZPH+JNn/gQkvJVF5famOOKwum81h9OHcTQIEKlciqGeIav9NEPUdjbbWROxZsGiA6Tzae568S5m87NBoCg93hTBdVxmc7Pc/eLdoa0sqrQ3ZaR3BICpk1N4vsdMdgbAaj/NEJWdzXbWRN32XXjRaS3KL3ppX6P9oPqAx4FeghjwN6r6+9VeZ8GiA+w+uJu53BxA2TSTFP6bzc2GtrKo0t4UEWG0b5Th3mGOpI/wwXM/yJVvvDL02k9HiMqS1g49a6KV9l14UcUW5fsuvOizwD0XvbSv3la+GeC9qjpbaPvxHRH5e1V9cqkXWbDoANOZafL+0t0BFCXv50NbWVRtb4qnHit6VnDbFbdZkGimsJe0duBZE21SbFE+y8KGgsnC4wDb6/nCGhQ0Z0u+XhKoGngsWHSA0d5REs7Sf5SCkHASoa0ssr0pIQl7SWsYK7JirpB6qtqifN+FF32t3pSUiLjA08D5wJ+pqrUo7wbr16xnMDnIdCZYBbU4FaWF/4aSQ6GuLLK9KSEKa0mrrciqR8tblKuqB1wuIqPAgyJyiaq+uNRrLFh0gP5EPzdfcjN/sudPgtVQeqrIrSieH6R4brrkplDv3Nu1N8VPp5l76im86Wnc0VEGr7yy85r4xUXUVmTFQ8tblBep6rSIPAZsBCxYdIPN529GVU/ts/CDFKQgp/ZZROTOvVV7U1SV4w88wJG77kbz+QXN/FbdfBMjW7dGvoNtx4nKiqx4aXWL8jOAXCFQ9APvA/6o2ussWHQIEWHrW7bywfM+yHf/87s8e+hZAK448wre9aZ3dUUt4PgDD8y3CS894lRzOab+/IsAjN5wQ1jD605RWZEVL61uUf5G4J5C3cIBvqaqD1V7kWgrD1JvsvHxcd2zZ0/YwzAR5KfT/OvP34i4bsUDiNTzOPdv7reUVLst2GeRX7giq7v2WdT8Te678KJtVGlRftFL+7Y3c3DVhDKzEJEbgTuAi4ANqmoRwDRk7qmn0Hx+wYyilCST+JkMqV27otUVthuEvSIrnqxFecGLwFbgf4b0/qaKuBWJvenpoEax5Cd55I8da8+AzOmsyWDNChvutu+78KLTWpQ3uoO7XqEEC1XdB+V3G5twxbVI7I6Ogltl1Y3rkhgba8+AjGmCQmCwFuW1EJEJYALgnHPOCXk0nS+uReLBK69EEgk0l6tYs5BEgoENG0IYXZ2s8Z6JkJYFCxH5R2BNmac+rap/W+vXUdVJYBKCAneThmfK8NNpjtx1N87AwGkXXEkmcQYGOHLX3Qxv2hS5lJTT38+qm29i6s+/iPT1odnsqVlRTw968iSrb70lcuMuyxrvmQhqWbBQ1fe16mub1oh7kXh4yxZSTz/N8b97KLjgqgYXVRFGrv8Qw1vCac++bNZ4z0SQE/YATHTEvUh84sEHSe3aTc+b30zyrLNIrllD8qyz6Hnzm0nt2s2JBx8Me4jV1dp4LxtKjdN0sVCChYhsEZEDwDuAh0Xk0TDGYRaKc5G4NIXm9PbirliBOzqKu2IFTm/vfArNT1dqtxMRxcZ75XY7Q/C4nw+WoBrTRqEEC1V9UFXXqmqvqr5BVT8QxjjMQoNXXgmOQ/7YMbzpabyZGdT355+PcpG4mEIr1lrU9/FmZua/D1wXzedJ7doV8kirsMZ7JqIivxrKnK4VeyBUlROPPII/O4t39GiQ63ccBHBXr8YZHETT6bqKxO3Ys1FMoalqECCmpoIG/YW6hQAyOEju6NGmvm/TWeM9E1EWLGKklXsgiktmE2NjOMkk+SNHQBX1ffKHDuGOjXHmJ3+Tka1bIzHexYopNG96Gu/wYdRxFnxtVUVnZsjsewmiXOe2xnsmoixYRFClO/FW7YFYvGTWHRvDGRnBn5ubv1t3ensZ3rRpWRf3du7ZKKbQvKmp0wIFBE151HGYe+IJ/PQno7uE1hrvmYiyYBEhS92Jr/ylj3H0y19uyR6IYr6f3h5mczN4vofruAwODeJIUNbyZmeXtWS2WXs2ak1hOf39DL373Rz7yleQxKK/1qrg+yTPOAP1/cgu/Z0X9lGoxpRhwSJClroTP/yFPwMguabcPsfG9kB4x6aZO3mCQ/mDKBqcxivBWRir+lcz1ju67CWzje7ZqCeF1ffWt+IMr0BTafD9BfssEmecgTs2hnf8eGSX/s6zxnsmgixYRES1O3FJJslPTZE480zEqbCIrc49ELvS++jLzyCJRDCTKFyDVZWp1GEAht3EspbMNrpno54Uljs2ijs8grPmjfMpNFwXZ3Dw1M8sokt/y7LGeyZCbFNeRCxe+rmY9PQABBfBSuq4EKbzaSYT38N3HVx/4XMigiMOx2YPg+ssa8lsI3s2ak1hLd4zUewPhect2GdRDBRRXvprTNRZsIiIanfizuAgQNDzqIx6L4S7D+4mnfD5/nvOoCfj43gL22+5PvRmfI5svXpZtZDSxn7LHW/VwJlMlt0zUewP5adSp72v5nL4qRSrbr4pusVtYyLMgkVEVLsTF8fBGRlB87mmXginM9N46vHc+pU8/r4zcDyl56RHXypPz0kPx1O+ed0Ir7/nrcv6uktduP1MhvyRI/RfcTlzTz2Fn07jp9PMPPYY0zt2MPfkkxWDzLwKKayRrVtZfestqOfhzc7iHT+ONzuLeh6rb71lWUt/jTGnWM0iImppse2OjLDqv/wyR//6y/iZzIKib70XwtHeUVxxQYTnNqxi7+VjrPvRLANzHqlBl1d/YohjpHhf3/Lz/MXxHLnrbvxMBs3n8U+exD9xAnd4mNSTTzG3azdaSCc5AwPgefiZDF4hELijo+WX61ZIYYkIozfcwPCmTaR27SJ/7BiJsTEGNmywGYUxDbAzuCNk+utfny/qlgaM4sxh9a23MHrDDfjpdNMuhOl8mo8+9FFcxyXpnB6kcn4Oz/e470P30Z+o7z2K4z3xzW8x+/89hjs6htPbC0D+2DHyhw4BkDzzTNyxMdT3yfzoR+D7JM4887SgYOdpm5iKdV95S0NFSK0pFKe/n6Frr2V082aGrr22oQtmf6KfbRdvI51Lk/MXpn5yfo50Ls22i7fVHSiK4x3YsIH0c8+RWLlqPlCo7+NNTQWzI9clf+QI6vuI45BcvRog2GS3qD+V1R6MaT9LQ0VIWCmUzedvBmD73u1k8hl89XHEIeEkmLhsYv75RpTbd+HPzQVbOoppJt/Hn5sLVjIVZhO5Q4fIHzmC09PTcMrNVGAn8pkaWLCIoOLMoV1EhC0XbGHjuRvZfXA305lpRntHWb9mfdUZRTqfruk1ZVd7eV6wca5IdcHnuGNjILDiZ95P74U/abWHZrMT+cwyWLAw8/oT/Vyz9pqaPjeVS3Hns3fy6KvBUSRJJ0nCSZBwEmy7eBubz9+8oDBdXO2lhdkDnoe/eBmwyOkrwhJJBt9xVbTbc8SVnchnlsGChVkWVWXHKzv4/DOf51jmGILM7/pe3bea3kQvk89PArDlglPtXQc2bMBPpci99lpQ5VMNWojn86gq4gYrsor7ScA20bVUrSfyXbLVUlIGsAK3WaYdr+zgi//8RY5nj5OQYCbhiIMgHE4fZi47R3+yn+17t5POn9phPfP3fx/0ePJ9tDCDENcFxwkCRi5HYtWqBbutrZDdQnYin1kmCxamZul8mu17t88vACxNMxVnGFMnp3DFJe/n2X1wN3CqfUdi9eqgt1XhnAz1vCBYFGYVJFzbRNcudiKfWSZLQ5ma7T64m7yfr/i8IKgqqVwKX32mM9PAqZVQ7tAQTk8PbslZGcVGf97sLCMf/jDJtWutkN0OdiKfWSYLFqZmxdYgrlS+yChKXvMkJMFo7yhw+koocRzcFSsWvE5USa5dy+jmxpfpmhrYiXxmmSwNZWpWbA0ymBxEJJhFLCYIKCScBOvXrAca60BrWqR4Il92NggMpYon8r3zN6y4bebZzMLUbP2a9SScBJ56rO5bzeH0YRw9dYSpFv4TkQW7vmvpe2WrnkIQ1xP5bBNhKCxYmJoVW4NMPj/JQHKAMziDqZNT+OoHYUKVlX0r+fXLfn3Bru9iB9pqfa+sRtFmcTuRzzYRhsqChVmW0tYgCSfB6r7V8z2lPrDuA/zXC38VfeYFju/92wVnZi/uQNuMjrmmSeJyIp9tIgyVdZ01dVnc5mP8DeNkv/H3Vc/MbmbHXNNFsimYfE+QIqtUkPfzMPFYNGdFgVhPe2xmYeqyuDVIaXv1pc7MbnffK9MhipsIkxVuLNxkUHd59TvxmCXFkK2GMg2r98xsY2pmmwhDZ8HCNKzeM7ONqZltIgydpaFMw8q2Hz/tk8qfmR03fjrN3FNP4U1PLyjgmxazTYShs2BhGtYNm+5UleMPPFC1gF9kQaXJipsIi6uhSgNGcRPhNZ+KcnE79ixYmIZ1w6a74w88UFMBf7lBxSxDXDcRdghbOmuaonQ1VKVNd6M33BDiCOvnp9P868/fiLhuxWConse5f3M/Jx55pGN/DpGRTcVjE+HpYn2XYDML0xSdvOmu3PnhpSSZxM9kmH3iOzWtChvetMlSUo2IyybCDmPBwjSFiDB6ww0Mb9rUcZvuai3gp555uqagktq1y/aamNixYGGaqhM33dVawAe6ZlWY6T62z8KYKkoL+OUUC/j9b397x68KM93LgoUxVRS75vqp1GkBo/Ss8BVXX11TUInzqjDTvSwNZVpqccPB9WvWz59zESe1FPBFxFqxm45lS2dNS6gqO17Zwfa928n7+fnjWBNOgm0Xb2Pz+Ztjud+gWtdc22dhlhDrP/hQgoWIfBa4HsgCPwJuUtXpaq+zYBEfD778IJPPT9Kf7CfpnLrDzvk50rk0E5dNsOWCLSGOsLWsFbspw4LFst9U5P3ATlXNi8gfAajq71R7nQWLeEjn03z0oY/iOu6CQFGU83N4vsd9H7ovlikpY+oU62ARSoFbVb+pqvnCh08Ca8MYh2mN3Qd3k/fzZQMFQNJJkvfz7D64u80jM8bUKwoF7puB/1XpSRGZACYAzjnnnHaNqaO0u6nddGYaT5feb+Crz3SmaubRGBMRLQsWIvKPwJoyT31aVf+28DmfBvLAvZW+jqpOApMQpKFaMNSOFVaxdbR3FFeW3m/giMNo72jT39sY0xotCxaq+r6lnheRjwMfAn5a47QkK0Zq7ZTabOvXrCfhJMj5uYo1i4STYP2a9U1/b2NMa4RSsxCRjcDvAB9W1VQYY+h0YR512p/oZ9vF20jn0uT8hRvUiquhtl28zYrbxsRIWDu4vwCsAL4lIs+JyBdDGkfHCvuo083nb2bisgk832M2O8uJzAlms7N4vsfEZRNsPn9zS97XGNMaoRS4VfX8MN63m4R91KmIsOWCLWw8d2NH7OA2pttFYTWUaYGoHHXan+jnmrXXtPQ9jDGtZ40EO1StnVKtqZ0xphYWLDpUrZ1SrQWFMaYWlobqYJ181Kkxpr2s62wXsKZ2xkRCrHtD2cyiC3TiUafGmPaymoUxxpiqLFgYY4ypyoKFMcaYqixYGGOMqcqChTHGmKosWBhjjKnKls6aWGr36X/GdDsLFiZWwjr9z5huZ8HCxEpYp/8Z0+2sZmFiI8zT/4zpdhYsTE38dJqZxx5jescOZh57LJQLctin/xnTzSwNZZYUpRpB2Kf/GdPNLFiYJUWpRhCV0/+M6UaWhjIVRa1GYKf/GRMeCxZdbqlaRNRqBHb6nzHhsTRUl6qlFhHFGoGd/mdMOCxYdKlaahHuqlWRqxGICKM33MDwpk12+p8xbWTBogvVWot481//1XyNoFwqqt4aQTNaddjpf8a0lwWLLlSsRZTOKEpJMomfyXDyhRdYdfNN8zOQ0oAxXyP41V+p+cIfpWW4xpjlsWDRhZZTi6hcI3AZ2LCeI3/118FjNVz4o7QM1xizPBYsutBy9itUqhHkfvxjjnzprmDGUTKTqHThrzX1Nbxpk9UejIkgWzrbherZr1CsEYxu3szAhg0c/fK9y9p/EbVluMaY5bFg0YUa3a9Qz4U/istwjTG1szRUl2pkv0I9F35r1WFMvFmw6FKN7Feo58Jfmvpq5jJcY0x7WLDocvXsV6jnwl9MfS21DHf1rbdYcduYiLKahVm2emseI1u3svrWW1DPw5udxTt+HG92FvU8a9VhTMSJqoY9hpqNj4/rnj17wh6GobENdn46ba06TDeK9Y5TCxamIXbhN6ZmsQ4WVrMwDbEeTcZ0B6tZGGOMqcqChTHGmKosWBhjjKkqlGAhIp8RkedF5DkR+aaInBXGOIwxxtQmrJnFZ1X1MlW9HHgI+L9CGocxxpgahBIsVPVEyYeDQHzW7xpjTBcKbemsiPwB8F+A48B1S3zeBDABcM4557RncMYYYxZo2aY8EflHYE2Zpz6tqn9b8nm/C/Sp6u9X+5q2Kc8YE2O2Ka8cVX1fjZ/6FeBhoGqwMMYYE46wVkNdUPLhh4GXwhiHMcaY2oRVs/hDEflJwAf+DbglpHEYY4ypQawaCYrIYYLgsthqYKrNw6lHXMYJ8RlrXMYJ8RlrXMYJ8RnrauAlVd0Y9kDqFatgUYmI7FHV8bDHUU1cxgnxGWtcxgnxGWtcxgnxGWtcxrkUa/dhjDGmKgsWxhhjquqUYDEZ9gBqFJdxQnzGGpdxQnzGGpdxQnzGGpdxVtQRNQtjjDGt1SkzC2OMMS1kwcIYY0xVHRMs4nJGhoh8VkReKoz1QREZDXtMlYjIjSKyV0R8EYncsj8R2SgiPxCRV0Tk9rDHU4mI3CUih0TkxbDHshQROVtEvi0i+wp/7r8Z9pjKEZE+EdklIv9cGOd/C3tMSxERV0SeFZGHwh5LIzomWBCfMzK+BVyiqpcBPwR+N+TxLOVFYCvweNgDWUxEXODPgA8CbwV+QUTeGu6oKtoOxGEzVh74LVW9CLgK+K8R/ZlmgPeq6tuAy4GNInJVyGNaym8C+8IeRKM6JljE5YwMVf2mquYLHz4JrA1zPEtR1X2q+oOwx1HBBuAVVd2vqlngPuDnQh5TWar6OHA07HFUo6qvqeozhd/PEFzg3hTuqE6ngdnCh8nC/5H89y4ia4GfBf4y7LE0qmOCBQRnZIjIfwAfI7ozi1I3A38f9iBi6k3Af5R8fIAIXtjiSkTWAVcAT4U7kvIKqZ3ngEPAt1Q1kuME/hT4bYI+eLEWq2AhIv8oIi+W+f/nAFT106p6NnAvcFtUx1n4nE8TTPvvDWuchXFUHWtElTsbIJJ3l3EjIkPA14FPLpqxR4aqeoWU81pgg4hcEvaYFhORDwGHVPXpsMfSDKGdlFePuJyRUW2cIvJx4EPAT2vIG12W8TONmgPA2SUfrwV+HNJYOoaIJAkCxb2q+kDY46lGVadF5DGCmlDUFhC8C/iwiGwC+oBhEfmyqv5SyOOqS6xmFkuJyxkZIrIR+B3gw6qaCns8MbYbuEBEzhWRHuCjwDdCHlOsiYgAXwL2qeofhz2eSkTkjOIqQhHpB95HBP+9q+rvqupaVV1H8PdzZ1wDBXRQsCA4I+NFEXkeeD/BCoQo+gKwAvhWYZnvF8MeUCUiskVEDgDvAB4WkUfDHlNRYZHAbcCjBIXYr6nq3nBHVZ6IfBX4PvCTInJARH4l7DFV8C7gl4H3Fv5uPle4K46aNwLfLvxb301Qs4j1stQ4sHYfxhhjquqkmYUxxpgWsWBhjDGmKgsWxhhjqrJgYYwxpioLFsYYY6qyYGG6goh4haWgL4rI/SIyUHh8jYjcJyI/EpF/EZFHROQthef+QUSm494t1JhmsGBhukVaVS9X1UuALHBLYRPag8BjqvoTqvpW4PeANxRe81mCfQfGdD0LFqYbPQGcD1wH5FR1fmOkqj6nqk8Ufv9PwEw4QzQmWixYmK4iIgmCMzBeAC4BOqLJmzGtZsHCdIv+QkvrPcC/E/RAMsbUKFZdZ41pQLrQ0nqeiOwFfj6k8RgTKzazMN1sJ9ArIr9WfEBE1ovItSGOyZhIsmBhulbhLJEtwM8Uls7uBe6gcC6GiDwB3A/8dKFb7AdCG6wxIbOus8YYY6qymYUxxpiqLFgYY4ypyoKFMcaYqixYGGOMqcqChTHGmKosWBhjjKnKgoUxxpiq/n9jPPDBLiPgsQAAAABJRU5ErkJggg==\n",
3551
      "text/plain": [
3552
       "<Figure size 402.375x360 with 1 Axes>"
3553
      ]
3554
     },
3555
     "metadata": {
3556
      "needs_background": "light"
3557
     },
3558
     "output_type": "display_data"
3559
    },
3560
    {
3561
     "data": {
3562
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAFgCAYAAABKY1XKAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3de5Rc9XXo+e8+p6rf6ocksIwFFgQcMA+D0xL4ARjHsWXFOJIIHidOrgVJOrAuTrxWxgmJZybc65V1k/FMEhtnxbcTg0iMzTUxKA6QYCcyA36AJB4BFGGDFZIoRkgtqaXurlI9ztnzx6lqVbequqrrdc6p2h8WS+qqrq5ft6Szz2/v32//RFUxxhhjluKEPQBjjDHRZ8HCGGNMVRYsjDHGVGXBwhhjTFUWLIwxxlSVCHsAy7Fx40b9h3/4h7CHYYwx9ZCwB9CIWM0spqamwh6CMcZ0pVgFC2OMMeGwYGGMMaYqCxbGGGOqsmBhjDGmKgsWxhhjqrJgYYwxpioLFsYYY6qK1aa8TpHOenx//xTH5nKMDSZ5x3mr6e9xwx7WvKiPzxjTfhYs2khVuX/PASaf2E/e8/F8xXWEhOswcfV53Di+FpHwNnlGfXzGmPBYsGij+/cc4M6dLzPYm6A/mZx/POf53LnzZQA+sv7ssIYX+fEZY8ITWs1CRPpEZJeI/LOI7BWR/xbWWNohnfWYfGI/g70Jku7CH3vSdRjsTTD5xH7SWc/GZ4yJnDAL3Bngvar6NuByYKOIXBXieFrq+/unyHv+aRfioqTrkPd8ntx/pM0jC0R9fMaYcIWWhtLg8O/ZwofJwv8deyD4sbkcnr/0t+f5ytG5bJtGtFDUx2eMCVeoS2dFxBWR54BDwLdU9akwx9NKY4NJXGfp4rDrCCsHe9o0ooWiPj5jTLhCDRaq6qnq5cBaYIOIXLL4c0RkQkT2iMiew4cPt3+QTfKO81aTcB1ynl/2+Zznk3AdrjpvVZtHFoj6+Iwx4YrEpjxVnQYeAzaWeW5SVcdVdfyMM85o+9iapb/HZeLq85jL5E+7IOc8n1Q2z8TV54W2nyHq4zPGhCu0moWInAHkVHVaRPqB9wF/FNZ42uHG8bUATD6xn5O53IJ9DLddd8H88zY+Y0zUSFBnDuGNRS4D7gFcghnO11T1vy/1mvHxcd2zZ087htdS6azHk/uPcHQuy8rBHq46b1Wk7tijPj5jYirWO1pDCxb16JRgYYzpSrEOFpGoWRhjjIk2CxbGGGOqsmBhjDGmKgsWxhhjqrKus8a0gZ0RYuLOgoUxLWRnhJhOYcHCRFKn3Ik364yQTvl5mPiyYGEipZPuxGs9I+T6t51V8cLfST8PE29W4DaRUrwTTzjCir4kowM9rOhLknCEO3e+zP17DoQ9xJo144yQTvp5mHizYGEio9NO62v0jJBO+3mYeLNgYSKj007ra/SMkE77eZh4s5qFWSDMQmqnndZXekZIuQt+tTNCOu3nYeLNgoUBolFI7bTT+opnhBRXQ5UGjOIZIbddd0HFYNxpPw8TbxYsDNC8JZ6NaPROPIoaOSOkE38eJr4sWJimLPFshkbvxKNIRPjI+rO5/m1nLfuMkE78eZj4smBh5guppTOKUknX4WQux5P7j3DdhWe2dCydelpff49b18+uU38eJn4sWJhIFVIbuRPvRPbzMFFhwcJEspBa7514p7KfhwmbBYuYacXSViukGmOqsWARE61c2mqFVGNMNRYsYqLVS1utkGqMWYqoLl3YjJLx8XHds2dP2MNou3TW4/ovfIeEIxXTRHlf+bvb3t3w3X8661kh1ZjWiHV7YJtZxEA7l7ZaIdUYU441EoyBKC1tNcZ0J5tZtEgzVy1FcWmrMaa7WLBoslasWrKlrcaYsFkaqslacbJZcWnrXCZPzvMXPFdc2jpx9XmRLESnsx47X3qdrz99gJ0vvW4H9RgTUzazaKJWNuSL29LWpWZY2965jjeO9DKdyrf9zAxjTH0sWDRRK1ctxa1HUPl9IcrhmSx3fGMvI/0JBnoSbT8zwxhTH0tDNVE7Vi3197hcdd4qxgaTHJ3L8v39U5FL7VSaYR2dy3FkLoMrMJvxGO5LNpyiM8a0h80smqjVq5aicJpdLcrNsHxVpmYzuCKICJ7vM5vNM9yXbOuZGcaY+tjMoolKVy2V0+iqpVYUz1uh3AxrNpNHlflgprDgc5KuQ97zeXL/kXYO1RhTIwsWTdTKVUu1Fs+jkJIqN8PyfEU5FRwEyn6ObSw0JposWDTZjeNr+cR7LyDvKzMnc0ynssyczJH3taFVS8XUTrl9FhCtO/NyMyzXEaTQGkdVERGGehZmQW1joTHRZTWLJmvVqqU4tfxY3PLcFUFV8VXxCwHkDcN9OCUzC9tYaEy0WbBokWY35Itby48bx9eiqvy/3/oBx1I5fFVUwddgnKqnZhh2ZoYx0WfBIibi1vJDCqueehIubxh2AMF1hFzeZ2o2w6GZDOmcx0h/surGwlacDmiMWR4LFjERt9PsigX5oTIF+bHBHo6ns+Tyyu0fvJBr33Jm2XHHZamwMd3AgkWMxKnlx1K72R0RxgZ6mTmZozfhVgxwrT4d0BhTu9CChYicDfwVsAbwgUlV/VxY44mDOLX8aLQg38o+W8aY5QtzZpEHfktVnxGRFcDTIvItVf2XEMcUC3E4za7Rgnw7Twc0xlQX2j4LVX1NVZ8p/H4G2Ae8KazxmOZqdDd7nJYKG9MNIrEpT0TWAVcAT5V5bkJE9ojInsOHD7d7aKZOje5mj9tSYWM6XejBQkSGgK8Dn1TVE4ufV9VJVR1X1fEzzjij/QM0dWtkN3ur+2wZY5Yn1NVQIpIkCBT3quoDYY7FNF8jBfm4LRU2ptOFuRpKgC8B+1T1j8Mah2m9egvycVoqbEynE9Wli4gte2ORdwNPAC8QLJ0F+D1VfaTSa8bHx3XPnj3tGJ6JkHTWi/xSYWNqEOsdpKHNLFT1O8T8h2faIw5LhY3pdKEXuI0xxkSfBQtjjDFVWbAwxhhTlTUSNMaYcrIpePUJSB2FgZWw7mroGQh7VKGxYGGMMaVU4dl74XufBz8HvgeOC04S3vkbcMXHoAtb41uwMMaYUs/eC4//39AzBMn+U497ueBxgLf/UjhjC5EFi5iI6mlxUR2X6RDtTgVlU8GMomcI3EUdj91k8Pj3Pg+XbO26lJQFi4iL6mlxUR2X6RBhpYJefSJ4v9IZRSk3CfmT8Op34C3vb/77R5gFi4iL6mlxUR2X6RBhpYJSR4PAtBTfg9SR5r93xNnS2Qir9bS4dLbKX+4uGZfpELWmgrKp5r/3wMpgBrMUx4WB7ut2bMEiwoqnxS2+IBclXYe85/Pk/vbe5bR7XOmsx86XXufrTx9g50uvWxDqdMVU0OJAUeQmwc8HqaBmW3d1kOrycuWf93LgJGDdu5v/3hFnaagIi+ppce0aVyvrIlaYj7AwU0E9A0FNpJgCKw1YXg6yc3DNp7quuA0WLCItqqfFtWtcraiLWGE+BsJOBV3xseDX730+KGbPF9cTQaAoPt9lLFhEWOlpceVSPmGdFteOcdVaF7n+bWcta0ZghfkYKE0FlUtFFVNBb3o7/PDR5i+rFQmK55dsDVJdqSNBYFr37q6cURRZzSLCGj3HOs7jakVdxArzMVFMBWVnT68deDnIzMLa9XD3JviH2+Gx/xH8OvkeeObLwbLbZo3jLe+Hy38h+LWLAwXYzCLyonpaXKvH1Yq6SDEAlc4oSiVdh5O5HE/uP2LnZ4RtqVTQuVcHRXDbYd1WFiwirpFzrOM8rlbURaK6YMCUUSkV9Ka3BzMK22HddhYsYiKqp8W1alytqItEdcGAWUIxFVT0w0dth3VIrGZhIqkVdZHSAFROWAsGzDLYDuvQWLAwkXXj+Fo+8d4LyPvKzMkc06ksMydz5H2tqy4S1QUDZhnCXlbbxSwNZSKrFXWRqC4YMDWqdVltF+6wbjXRZi0za4Px8XHds2dP2MMwHSCd9SK1YMAswzNfrr7DOpqroWK929NmFqYrRXXBgKmB7bAOhQUL03LWh8k0le2wDoUFC9My1ofJtNTiZbWmpSxYREwn3YVbHyZjOocFi4jotLvwVjUCNMaEw/ZZRETxLjzhCCv6kowO9LCiL0nCEe7c+TL37zkQ9hCXJaoHNxlj6mPBIgI6sRtqM/sw2Ul5xoTP0lAR0IndUJvRh6nTUnPGxJkFiwjoxG6ozWgEaAVyY6LD0lAR0IndUBvtw9SJqbm2yqaCDq3PfTX4NZsKe0Qm5mxmEQHl7sJ9VWYzeTxfUZS+hBu7bqiN9GHqxNRcW6jCs/cGu5v9XMnu5mRw+twVHws2tRmzTBYsIqB4Fx6kXFxmTnpMzWZQBV99QFg5mOTv/vnHscrTN9IIsBNTc23x7L2n+ibZKXKmiSxYRETxLvv/+eYPODKXwUEQAddxWD3Uw1BvIrZ5+nr6MHViaq7lsqlgRmGnyJkWsJpFRIgI17/tLIb6ErxxpJ83jPTxxtF+zj9jiJWDvfQk3K7K09tBRXV49Ykg9VSudTcEj/v5oJ+SMctkwSJCvr9/Ct9XxgZ6GBvoYbgviVNyd91NG9nsoKI62ClypoUsDRUhlqdfyA4qWiY7Rc60kAWLCLE8/UKtOCmvo9kpcqaFQg0WInIX8CHgkKpeEuZYoqAZG9k6kR1UVKOegWB5bLVT5KJc3M6mgtpL6mgwU1p3dbTH20XCnllsB74A/FXI44iEhUtoF25GK+bpb7vuArurNpXF9RQ52x8SeaEGC1V9XETWhTmGqIlbnr6Tzt/oCHE9Rc72h0SeqC5dUG35AIJg8VClNJSITAATAOecc85P/du//Vv7BheidNaLdJ7emvyZpsmmYPI9weynUq3Fz8PEY9EOeNXF+h9E2GmoqlR1EpgEGB8fDzeytVHU8/TW5M80TXF/SOmMopSbDFJqr37HjlENke2zMMtmTf5MU9n+kFiwYGGWrfQUPJ8sqcQLzCafJJV4AZ9sV20eNE1g+0NiIeyls18F3gOsFpEDwO+r6pfCHJOp7thcjrzvM5P8Hid6voXiofgIDtLrMpz9GfKpn+qazYOmQbY/JBbCXg31C2G+v6nP2GCS/MAu5noeRejF4dQmQSXP8Z5H6B3wWDl4aYijNLHRCftDuoClocyyXXHOICcHdoL2IovuN4QEaC8nB3Zy+Tn2j9vU6IqPwTW/Hax6ysxAejr41c9He39IF4n8aigTPS8cfYbhfocTKRdl4V4pVVB1GRkIPu+agWtCG6eJkbjuD+kiFizMsk1nphnoEfrcXg7PZvBLmsKKwJkrenESaaYz0+EN0sRTz4Atj40oCxZm2UZ7R3Edl5G+HkYGepjL5Ml7SsIVBnsTOAKz2SyjvaNhD9UY0yQWLMyyrV+znoSTIOfnSDpJVvQt/GuU83MknATr16wPaYTGmGazArc5TTqf5vEDj/ONH32Dxw88TjqfXvB8f6KfbRdvI51Lk/NzC57L+TnSuTTbLt5Gf6LCjlxjTOzYzMLMU1V2vLKD7Xu3k/fzeOrhikvCSbDt4m1sPn/zfL+nzedvBmD73u1k8hl89XHEIeEkmLhsYv55Y0xnCL2R4HKMj4/rnj17wh5Gx3rw5QeZfH6S/mQ/Saek31NhtjBx2QRbLtiy4DXpfJrdB3cznZlmtHeU9WvW24yiHnaOQzeIdSNBCxYGCC76H33oo7iOuyBQFOX8HJ7vcd+H7rNg0Ex2jkM3ifUfpNUsDAC7D+4m7+fLBgqApJMk7+fZfXB3m0fW4YrnODgJ6B2G/rHgVycRPP7svWGP0BjAahYtEcfUzHRmGk+X7vzpq297J5opmwpmFItbXEDwcc9Q8PwlWy0lZUJnwaKJllMgjprR3lFcWbrzpyOO7Z1opmrnODhusJN55x/AeddYHcOEyoJFE+14Zcd8gbgv0Tf/eM7PMfn8JMBpBeKoWLx3YjHbO9EClc5xUIX0UZg9BOrB81+FHz5idQwTKqtZNEk6n2b73u2nrSSCIN/fn+xn+97tp+1ZiArbO1FBNgU/fBSe+2rwazbVvK9d6RyH9FGYfT0ICOJC34jVMUzobGbRJMUCcemMolTSSZLJZ9h9cDfXrI1mcz3bO1GiHauUyp3joH4wo5DCfZwQ1C7A6hgmVBYslqlS8boTCsQiwpYLtrDx3I2xK9A3XXGVUs/QwpqClwseh6BLaiPKneOQmYGgl28QOIbWnAocYOdRm9BYsKhRteL1yr6VHVMg7k/01zT7ieOqr5q0c5VS8ZyG730+CAKZmSBIiASBYmDl6a+x86hbwzZGLsmCRY2qFa9vuvimrikQx3nVV02qrVJq5t394nMc9j8GL34dhs5cOKMoZedRN5dtjKyJFbhrUEvx+t6X7uUXL/zFrigQFwOn67gM9Qwx0jvCUM8QruMy+fwkO17ZEfYQG1NplVKpZt/dF89xeO//ERS0K72/nUfdfLYxsiYWLGpQ6+7mNw6+kYnLJvB8j9nsLCcyJ5jNzuL5XscUiOO+6qsmlVYplWrV3X2xjpGdDQJDqeJ51O/8DUuPNEutKcdmroKLKUtD1aDm4nV2uuMLxJ2w6quqcquUSrX67n5xHWM+LZKw86ibrZ0pxyYTkc8AU6r6ucLHfwC8rqqfb8X7WbCowXJ3N9daII6jTlj1VVW5VUpFxbv7az7Vurt7O4+6fcJIOTbPl4AHgM+JiAN8FNjQqjezYFED2918Ste0BYnC3b2dR916YaYcG6Sqr4rIERG5AngD8KyqtiyqWbCoQXF38+Tzk5Ck4lkPnZJqWkrXBE67u+8OYaccG/eXwDZgDXBXK9/ICtw12nz+5o4vXtei69qCFO/uL/+F4FcLFJ0l/gsKHgQ2AuuBR1v5RjazqJHtbj7F2oKYjhKFlGOdVDUrIt8GplWrFBMbZCflmbo1uoM7nfX4/v4pjs3lGBtM8o7zVtPfUyV/bEyrZFOtTjk2fWdfobD9DHCjqr7c7K9fymYWpm71rvpSVe7fc4DJJ/aT93w8X3EdIeE6TFx9HjeOr433DnATTzFbUCAibwUeAh5sdaAACxYmBPfvOcCdO19msDdBf7JksYDnc+fO4O/8R9afXfPXsxmK6Uaq+i/Aee16PwsWpq3SWY/JJ/Yz2Jsg6S5cX5F0HQZ7E0w+sZ/r33ZW1Qu+zVCMaR9bDWXa6vv7p8h7/mmBoijpOuQ9nyf3V18uXpyhJBxhRV+S0YEeVvQlSTjCnTtf5v49B5o9fGO6VtWZhYgMA2eo6o8WPX6Zqj7fspGZjnRsLofnL72owvOVo3PZJT+n2gxl1PH57r1/y3X/dhZ9q1cyeOWVOP3dtWotMqz1d0dYMliIyEeAPwUOiUgS2KaquwtPbwfe3trhmU4zNpjEdZZODbmOsHKwZ8nPKc5QSmseAKgy/tL3uPbZR8HLc/DZBP19PUgiwaqbb2Jk61ZLTbXLclt/W1CJtGozi98DfkpVXxORDcBfi8jvqeoDtGAZmOl87zhvNQnXIVchFZXzfBKuw1XnLd1eodIMZfyl7/HTex4ik+glm+wnN9DL0EAPmssx9edfBGD0hhua882YpdV62qCdJ1HRutsfHgCuA1YBR4Bvv/qHP9twC1wR2Qh8DnCBv1TVP6z2mmrBwlXV1wBUdZeIXAc8JCJrCc5+NGZZ+ntcJq4+b341VGnAyHk+qWye2667oGpxu9wMJZnLcO2zj5JJ9OK7CUSVhBN8fUkmcQYGOHLX3Qxv2mQpqVZbzmmDLz7Q+iNsY2bd7Q8L8HHgU0CS4FqdB3Lrbn/4s8A9r/7hz9Z1DRYRF/gz4GeAA8BuEflGYXVVRdUK3DMi8hPFDwqB4z3AzwEX1zNQY24cX8sn3nsBeV+ZOZljOpVl5mSOvK/cdt0F3Di+turXKJ2hFP3Ef/4Ax/fw3QSKIsBg76mgI8kkms+T2rWrFd+WKVVs/V2u3xIEj/t5eOWf7DyJ8j4O/J8EAeIEcLTwa77w+Mcb+NobgFdUdb+qZoH7CK7pS6o2s7iVRekmVZ0pTGE+Uu9ITXcTET6y/myuf9tZPLn/CEfnsqwc7OGq81bVvD+i3Axl4OQsjvooiu/DGcO9OIvTF55H/tixFnxXZoFaW3//+/dje55EqxRST58CZoFFDavIFR7/1LrbH/5anSmpNwH/UfLxAeDKai+qFizmCFrfvrLo8auAJ5czOtMcjbbYiJL+HpfrLjyz7tcXZyCTT+znZC7HYekjj6AaBIqVA2WK5K5LYmys7vc0Naq19TcS5/MkWuU6gtRTpeMmc0A/QZbnkTq+frkCUNWUVrVg8acERe7F0oXnrq8+LtMMqsqOV3awfe928n4eTz1ccUk4CbZdvI3N52/uulU+i2cox46ezcqXHmGgvwe35/RAobkckkgwsKFl58OYolpbf59zJfywyvUuoudJtNAqaqgnA6vr/PoHgNIWCWuBH1d7UbWaxbpyeylUdQ+wbjmjK0dENorID0TkFRG5vdGv18l2vLKDyecncR2XoZ4hRnpHGOoZwnVcJp+fZMcrO8IeYmiKM5St7zyfc279NUin0dzC2bvmcvipFKtuvsmK2+1Qa+vv8993KqiUE/3zJFrhCEFtYikeMFXn198NXCAi54pID8EJe9+o9qJq0av8QcuBhv7F1VuR70bpfJrte7fTn+yfP3DIV5+53Nz8Ead3vXgXG8/dGNuUVLOMbN0KwJG77sbPZMDzwHWRRILVt94y/3xLddJ+gUa+l1paf4uEe4RtNH2bINWU5PSaBSWPP1bPF1fVvIjcRnD+hQvcpap7q71uyRblIvJVYKeq/sWix38FeL+q/m/1DLbwNd4B3KGqHyh8/LuFb+R/VHpNN7UoL61NHJg5wN/96O8Y7h1GVTmeOc7UySlK/+wU5aM/+VF+e/1vd106qhw/nSa1axf5Y8dIjI0xsGFD62cUnbRfoJnfS7XW3wveK78wqMTt57a0mr+Jdbc/vI1g1dPiIncSGAI+8+of/uz2Zg6ummozi08CD4rIx4CnC4+NAz3Algbfu6aKvIhMABMA55xzToNvGX3lahMn8yc5njmOrz6qytTJKZxCBjFYIiqoKg/vf5i3jL2FLRc0+kcTf05/P0PXXtveN611E1ocNPN7qdb6246wLeeewq+fIsjiuASppxzwmZLn26amw48Km/EuKXy4V1V3NvzGIjcCH1DVXy18/MvABlX9RKXXdMPM4sGXH2Ty+ckFKafZ7Cyvzb4WLBxRH0Hm009FijLaO8rKvpXc96H7uj4d1XbZFEy+J7gbrlTQ9fMw8Vj0L4Cl34vjBnUHPx983DMU3PnH5XuJlmVPjwrLaN9DUMyeAh5rxg7uelTrDdUH3AKcD7wAfElVqxVealVXRb6TlatNAAwkBxBH8H1/PkjIor93gjCXm8MVl90Hd9d1KJFpQHETWifsFyh+L7k0zL0epImKRGDwDcH3E4fvJeYKgaGe5bFNVy0NdQ/BtOcJ4IPARQSpqWaYr8gD/0lQkf/FJn3tWNp9cDd5P09fYuG6AkccVvet5vXU6xVfm3ASOOJwPHOcQ6lDdY+hk/ZxtFWtm9DauV+g3uJ06iicPA6ZGRAHnJJFk6owexB6V3Tb3oeuVy1YvFVVLwUQkS8BTeuTUG9FvpNNZ6ZPSy8VjfaNciJ7gtnc7GnPJZwErgQboBStK1jUs4/DAkuJpTahqR9ceHMpOPZqcBFvZfqm0eJ0zyCcPBG8ZvHniQBO8HzPUOu+BxM51YLFfBW+cHFv6pur6iNEZIoVBaO9o/MX/XJW9KxgNjeLg4NbuDCVfr5qUOw+c6D6rujFF/rXZl+bT4GVzmxyfo7J5ycB5gvnYW0QjPTxqeU2oalC+ijMHgI0+Pj5/wV7H2ztKp9OKrSbyKgWLN4mIicKvxegv/CxAKqqwy0dXZdZv2Y9CSdBzs8tqFkUCYIjDsX/Si/IqoqPz3DP8JLBotyF3sHhYOogY31jrJAVCz4/6SQhCdv3bp/fx1HcIFhLYGmGWo9PDXWmU9yEVrpfIH0UZl9nvq654o3QN9Lai/Zyur1Wmt1k56B3GLIzgLMwoKkGM6Xe4aDwbVrrjpHTWpRzx/GGCtwichfwIeCQql5S7fOLlgwWqhqR27bu0J/oZ9vF24ILbpIFASPn5xARVvauxHEcpjPT+Hqq46qIMNY7xmBikPVr1ld8j3IX+tnsLCgcO3mMhCQY7Rtd8JqkkySTz7D74G7Wr1lftghf/LzFgaUZF/Di8amDvYkFhx3lPJ87d76MqtIz9nT4rVBKN6HlUjBzEJCgT8LgmiBVBbVftOvRjEL7wEroGw7GNXsoCA5KIeYJDL0hmEV1VwuO9rpjpGKLcu4Y+SxwD3ccr/eYiO3AF4C/Ws6Lqh6ratpr8/mbgeCCm8ln8NXHEYeEk+DXL/t1VJW/eOEvOHvF2WS97PyFscftIZPPcNMlN1W8GFdabZXXfOGa5jB1corh3mEcWdgJxlef6cx0xSJ8UTGw7HptF0dPHm34Al7t+NTB3gSf2/UVVqx5jMGegbbMdCoq3S/w3c/Bni8FheCeoaBQXKpVq6OaUWhfdzW4PZAcgP4xyJQsne0tWTrbXS042q3YonyWhQ0Fk4XHIbjoL5uqPi4i65b7uq4KFnEoyIoIWy7YwsZzN5Ydq6oiImzfuz1IS+HMb8qbuGxiPtiUU+lCn5AEgiAi821EVvQsTEc54jDaO7pkEb7IV5+d/76TXQd3NZyqqnh8aoHr5kn1/yMr/MGaZjpt0TMAY+uCi23vEpnaVqyOqrXb61KzgsUptb6S76F7W3C0T5B6qtqinDtGvtZoSmo5uiJYxLFja3+iv+xeiWrBZCmVLvQDyQGQYCUVcNrn5PwcCSfB+jXr2X1w95JF+KLv/fh7rOhd0fAFvNLxqUUnEz9A8aiUMS1NobV170kzLtr1qLXba7VZQS19nUyrtLpFeV26Ili0uyDbDpWCyVIqrbYq7uM4nD4MLFxhlfNzpHNpJi6boD/RX7UIn/Nz5P08SSdZ9nmofgH302nmnnoKb3qaNbPQ5/cTdJg5nS9zgJJwKwf7YgqtrZp10V6ucoX20vesdVZgLV5/1oAAAB7mSURBVDjC1OoW5XXp+GBRKU8PIaYpQrLUhX60bxTP9ziaOYqqciJzYr5WUpreqlaET+fSvPOsd/LUwaeWHEu5C7iqcvyBBzhy191oPg+ex5mOw++eyPLty9/Pc29992lLTTU/gCSD2kUlxRRaWzXrol2PZs4KqvV1Mq3Q6hblden4YFFrQbYbWmRUu9An3SS3r7+dNUNrlkxvLVWEn7hsgpV9K9nz+tI9vMpdwI8/8ABTf/5FnIEBnL5Tf16jmuJn9jwMwHMXX31qzJ6Plz2PsVV9eJrDkfIznWIKre3CSuXYrCDuWtqivNBN/D3AahE5APy+qn6p2us6PljUWpBte5oiJNUu9LXUb6rVTdL5dNVU1eILuJ9Oc+Suu3EGBpBFxeyx4eACd91z3+TJN7+dk05yfp/FJ667mOTIrzH5QuWZTjGF1nZhX7RtVhBPdxxPFZbHLtmivN7itqr+Qj2v6/hgUW1XNISUpmix0ry/OzrK4JVX4vT3N1QgX6xS3aSWVNXiC/jcU0+h+fyCGUWpseEBVojPHWdnOHjRxawc7OGq81bR3+OiuhZk6QAYqk64aHfSgU7xELkW5R0fLGopyIaWpmiBcnn/4klxq26+iZGtWxGRugrkUPvy41pmMKW86elgrEsQ3+PiIZ93/dTahY83MQCaRTrpQKc4CTbcbeeOka+xqEV5O5fLlur4YFHPXW6cVcr7ay7H1J9/EYDRG25Y9tdd7vLj5V7A3dFRcKssNXVdEmNjFZ+uNwCaJVifqXAFgSES/fNqOvwoKuo9/Gjxha70Ljeq+yzq4afT/OvP34i47ml5fwgChnoe5/7N/cs+YrTcoUywMOA2svy4lWM3dar1QKebHoH/fNpSVNXF+iLT8TML6J40RbW8vyST+JkMqV27lnXkaDuWHzv9/ay6+ab5WVFpwNBcDj+VYvWtt1igaKdqfaacRNA76n9eC4meaKSorLbSMl0RLIo6PU1RS94fzyN/7Niyvm67lh+PbN0KwJG77sbPZBbUW1bfesv881FWaWFBLFXrM5U+CiePwdCZC9uahJGistpKy3VVsOh0zcj7l9Ou5cciwugNNzC8aROpXbvIHztGYmyMgQ0bIn/BrXVhQaxUO9Bp9hDgwOKbiFZ21a2kQ2srl95z6Wktyl/4+AuNtig/m6Dj7BrAByZV9XPVXmfBooMMXnklkkiguVzFvL8kEgxs2LCsr9vu5cdOf/+y0mRR0KqFBaFaqmVJZgZQcGThiXnqB+dc+HnIZ+GVf4K3Xt/acTbjDI+IufSeSyu2KL/0nks/C9zzwsdfqLfgnAd+S1WfEZEVwNMi8i1V/ZelXuQs9aSpLp1P8/iBx/nGj77B4wceJ52v1Pur9Yp5fz+VQnMLN34W8/6rbr5p2XfppcuPy+m05cfLtdSGQkkmcQYGgtRaOry/G3UptizJzgYBo1Q+G6R6Bt9wqv166ihM/QCOHwjO8khNwSP/Ozzz5SBN1CrF2kq5IjwEj/v5YGNkfBRblOeBE8DRwq/5wuMfr/cLq+prqvpM4fczwD7gTdVeZzOLOkW1k20r8v7dtvx4uVq1sCASKrUsEYI0VfFAp9RRmD0YBA6nEDx8gnMxWp0GasYZHhFSSD1VbVF+6T2Xfq0JKal1wBXA0s3csGBRt6h2sm1V3n+5m+y6SasWFkRCpZYlb3o73L2p0D3XhbnXg0BRvEHSwtF6/aPBhbqVaaCw2sG3TltalIvIEPB14JOqeqLa51uwqEMcOtk2O+/fLcuP69GqhQWRUq5lSbGrLhoEh+KMonhO91AhReU6rTkVsCisdvCt0/IW5SKSJAgU96rqA7W8xoJFHbq5k22rlh/H4RTDSlq1sCDyiimqb38mCA4eC8/p7l956nNbmQYKsx18a7S0RbkE+fEvAftU9Y9rfZ0FizpYJ9vmiWrtZzm6dkNhMUXVNwyPfCqoTxTP6V585nir00CddbJfS1uUA+8Cfhl4QUSeKzz2e6q6ZErLgkUdurWTbStEtfazXJHfUNjKnc3nvw/6RpZuC9LqNFDY7eCb6IWPv5AqLI9dskV5vcVtVf0OdbQesWBRh27rZNsqcaj91CqyGwrbsbM5SmmgTmgHH7AW5Z3AlpI2plifePK1J5nJzrCqv3x6Io61n8htKGzXzubOSgOFrrDhbvul91x6WovyRpfL1suCRZ1sKenyLa5PzGRnOJE9wfHscVb3rWa07/S0ndV+GtDOnc0dlAaKkkJgiESLcgsWdYrqUtJ0Ps13//O7PHPoGQTh8jMv591venckZjmL6xOKMpubRRAOpw8DnBYwrPbTgGpdY91k85e0dk4ayCxiwaJBUelkq6o8+PKD3PnsnUxnpymeU/KVfV9hrG+MT1z+CTZfEN7KonL1icHkYDAeDYLC1MkphnuHcQoraaz206AO29lswmXBokPseGUHf/r0nzKTn8HFRZwgKCjKsZPH+JNn/gQkvJVF5famOOKwum81h9OHcTQIEKlciqGeIav9NEPUdjbbWROxZsGiA6Tzae568S5m87NBoCg93hTBdVxmc7Pc/eLdoa0sqrQ3ZaR3BICpk1N4vsdMdgbAaj/NEJWdzXbWRN32XXjRaS3KL3ppX6P9oPqAx4FeghjwN6r6+9VeZ8GiA+w+uJu53BxA2TSTFP6bzc2GtrKo0t4UEWG0b5Th3mGOpI/wwXM/yJVvvDL02k9HiMqS1g49a6KV9l14UcUW5fsuvOizwD0XvbSv3la+GeC9qjpbaPvxHRH5e1V9cqkXWbDoANOZafL+0t0BFCXv50NbWVRtb4qnHit6VnDbFbdZkGimsJe0duBZE21SbFE+y8KGgsnC4wDb6/nCGhQ0Z0u+XhKoGngsWHSA0d5REs7Sf5SCkHASoa0ssr0pIQl7SWsYK7JirpB6qtqifN+FF32t3pSUiLjA08D5wJ+pqrUo7wbr16xnMDnIdCZYBbU4FaWF/4aSQ6GuLLK9KSEKa0mrrciqR8tblKuqB1wuIqPAgyJyiaq+uNRrLFh0gP5EPzdfcjN/sudPgtVQeqrIrSieH6R4brrkplDv3Nu1N8VPp5l76im86Wnc0VEGr7yy85r4xUXUVmTFQ8tblBep6rSIPAZsBCxYdIPN529GVU/ts/CDFKQgp/ZZROTOvVV7U1SV4w88wJG77kbz+QXN/FbdfBMjW7dGvoNtx4nKiqx4aXWL8jOAXCFQ9APvA/6o2ussWHQIEWHrW7bywfM+yHf/87s8e+hZAK448wre9aZ3dUUt4PgDD8y3CS894lRzOab+/IsAjN5wQ1jD605RWZEVL61uUf5G4J5C3cIBvqaqD1V7kWgrD1JvsvHxcd2zZ0/YwzAR5KfT/OvP34i4bsUDiNTzOPdv7reUVLst2GeRX7giq7v2WdT8Te678KJtVGlRftFL+7Y3c3DVhDKzEJEbgTuAi4ANqmoRwDRk7qmn0Hx+wYyilCST+JkMqV27otUVthuEvSIrnqxFecGLwFbgf4b0/qaKuBWJvenpoEax5Cd55I8da8+AzOmsyWDNChvutu+78KLTWpQ3uoO7XqEEC1XdB+V3G5twxbVI7I6Ogltl1Y3rkhgba8+AjGmCQmCwFuW1EJEJYALgnHPOCXk0nS+uReLBK69EEgk0l6tYs5BEgoENG0IYXZ2s8Z6JkJYFCxH5R2BNmac+rap/W+vXUdVJYBKCAneThmfK8NNpjtx1N87AwGkXXEkmcQYGOHLX3Qxv2hS5lJTT38+qm29i6s+/iPT1odnsqVlRTw968iSrb70lcuMuyxrvmQhqWbBQ1fe16mub1oh7kXh4yxZSTz/N8b97KLjgqgYXVRFGrv8Qw1vCac++bNZ4z0SQE/YATHTEvUh84sEHSe3aTc+b30zyrLNIrllD8qyz6Hnzm0nt2s2JBx8Me4jV1dp4LxtKjdN0sVCChYhsEZEDwDuAh0Xk0TDGYRaKc5G4NIXm9PbirliBOzqKu2IFTm/vfArNT1dqtxMRxcZ75XY7Q/C4nw+WoBrTRqEEC1V9UFXXqmqvqr5BVT8QxjjMQoNXXgmOQ/7YMbzpabyZGdT355+PcpG4mEIr1lrU9/FmZua/D1wXzedJ7doV8kirsMZ7JqIivxrKnK4VeyBUlROPPII/O4t39GiQ63ccBHBXr8YZHETT6bqKxO3Ys1FMoalqECCmpoIG/YW6hQAyOEju6NGmvm/TWeM9E1EWLGKklXsgiktmE2NjOMkk+SNHQBX1ffKHDuGOjXHmJ3+Tka1bIzHexYopNG96Gu/wYdRxFnxtVUVnZsjsewmiXOe2xnsmoixYRFClO/FW7YFYvGTWHRvDGRnBn5ubv1t3ensZ3rRpWRf3du7ZKKbQvKmp0wIFBE151HGYe+IJ/PQno7uE1hrvmYiyYBEhS92Jr/ylj3H0y19uyR6IYr6f3h5mczN4vofruAwODeJIUNbyZmeXtWS2WXs2ak1hOf39DL373Rz7yleQxKK/1qrg+yTPOAP1/cgu/Z0X9lGoxpRhwSJClroTP/yFPwMguabcPsfG9kB4x6aZO3mCQ/mDKBqcxivBWRir+lcz1ju67CWzje7ZqCeF1ffWt+IMr0BTafD9BfssEmecgTs2hnf8eGSX/s6zxnsmgixYRES1O3FJJslPTZE480zEqbCIrc49ELvS++jLzyCJRDCTKFyDVZWp1GEAht3EspbMNrpno54Uljs2ijs8grPmjfMpNFwXZ3Dw1M8sokt/y7LGeyZCbFNeRCxe+rmY9PQABBfBSuq4EKbzaSYT38N3HVx/4XMigiMOx2YPg+ssa8lsI3s2ak1hLd4zUewPhect2GdRDBRRXvprTNRZsIiIanfizuAgQNDzqIx6L4S7D+4mnfD5/nvOoCfj43gL22+5PvRmfI5svXpZtZDSxn7LHW/VwJlMlt0zUewP5adSp72v5nL4qRSrbr4pusVtYyLMgkVEVLsTF8fBGRlB87mmXginM9N46vHc+pU8/r4zcDyl56RHXypPz0kPx1O+ed0Ir7/nrcv6uktduP1MhvyRI/RfcTlzTz2Fn07jp9PMPPYY0zt2MPfkkxWDzLwKKayRrVtZfestqOfhzc7iHT+ONzuLeh6rb71lWUt/jTGnWM0iImppse2OjLDqv/wyR//6y/iZzIKib70XwtHeUVxxQYTnNqxi7+VjrPvRLANzHqlBl1d/YohjpHhf3/Lz/MXxHLnrbvxMBs3n8U+exD9xAnd4mNSTTzG3azdaSCc5AwPgefiZDF4hELijo+WX61ZIYYkIozfcwPCmTaR27SJ/7BiJsTEGNmywGYUxDbAzuCNk+utfny/qlgaM4sxh9a23MHrDDfjpdNMuhOl8mo8+9FFcxyXpnB6kcn4Oz/e470P30Z+o7z2K4z3xzW8x+/89hjs6htPbC0D+2DHyhw4BkDzzTNyxMdT3yfzoR+D7JM4887SgYOdpm5iKdV95S0NFSK0pFKe/n6Frr2V082aGrr22oQtmf6KfbRdvI51Lk/MXpn5yfo50Ls22i7fVHSiK4x3YsIH0c8+RWLlqPlCo7+NNTQWzI9clf+QI6vuI45BcvRog2GS3qD+V1R6MaT9LQ0VIWCmUzedvBmD73u1k8hl89XHEIeEkmLhsYv75RpTbd+HPzQVbOoppJt/Hn5sLVjIVZhO5Q4fIHzmC09PTcMrNVGAn8pkaWLCIoOLMoV1EhC0XbGHjuRvZfXA305lpRntHWb9mfdUZRTqfruk1ZVd7eV6wca5IdcHnuGNjILDiZ95P74U/abWHZrMT+cwyWLAw8/oT/Vyz9pqaPjeVS3Hns3fy6KvBUSRJJ0nCSZBwEmy7eBubz9+8oDBdXO2lhdkDnoe/eBmwyOkrwhJJBt9xVbTbc8SVnchnlsGChVkWVWXHKzv4/DOf51jmGILM7/pe3bea3kQvk89PArDlglPtXQc2bMBPpci99lpQ5VMNWojn86gq4gYrsor7ScA20bVUrSfyXbLVUlIGsAK3WaYdr+zgi//8RY5nj5OQYCbhiIMgHE4fZi47R3+yn+17t5POn9phPfP3fx/0ePJ9tDCDENcFxwkCRi5HYtWqBbutrZDdQnYin1kmCxamZul8mu17t88vACxNMxVnGFMnp3DFJe/n2X1wN3CqfUdi9eqgt1XhnAz1vCBYFGYVJFzbRNcudiKfWSZLQ5ma7T64m7yfr/i8IKgqqVwKX32mM9PAqZVQ7tAQTk8PbslZGcVGf97sLCMf/jDJtWutkN0OdiKfWSYLFqZmxdYgrlS+yChKXvMkJMFo7yhw+koocRzcFSsWvE5USa5dy+jmxpfpmhrYiXxmmSwNZWpWbA0ymBxEJJhFLCYIKCScBOvXrAca60BrWqR4Il92NggMpYon8r3zN6y4bebZzMLUbP2a9SScBJ56rO5bzeH0YRw9dYSpFv4TkQW7vmvpe2WrnkIQ1xP5bBNhKCxYmJoVW4NMPj/JQHKAMziDqZNT+OoHYUKVlX0r+fXLfn3Bru9iB9pqfa+sRtFmcTuRzzYRhsqChVmW0tYgCSfB6r7V8z2lPrDuA/zXC38VfeYFju/92wVnZi/uQNuMjrmmSeJyIp9tIgyVdZ01dVnc5mP8DeNkv/H3Vc/MbmbHXNNFsimYfE+QIqtUkPfzMPFYNGdFgVhPe2xmYeqyuDVIaXv1pc7MbnffK9MhipsIkxVuLNxkUHd59TvxmCXFkK2GMg2r98xsY2pmmwhDZ8HCNKzeM7ONqZltIgydpaFMw8q2Hz/tk8qfmR03fjrN3FNP4U1PLyjgmxazTYShs2BhGtYNm+5UleMPPFC1gF9kQaXJipsIi6uhSgNGcRPhNZ+KcnE79ixYmIZ1w6a74w88UFMBf7lBxSxDXDcRdghbOmuaonQ1VKVNd6M33BDiCOvnp9P868/fiLhuxWConse5f3M/Jx55pGN/DpGRTcVjE+HpYn2XYDML0xSdvOmu3PnhpSSZxM9kmH3iOzWtChvetMlSUo2IyybCDmPBwjSFiDB6ww0Mb9rUcZvuai3gp555uqagktq1y/aamNixYGGaqhM33dVawAe6ZlWY6T62z8KYKkoL+OUUC/j9b397x68KM93LgoUxVRS75vqp1GkBo/Ss8BVXX11TUInzqjDTvSwNZVpqccPB9WvWz59zESe1FPBFxFqxm45lS2dNS6gqO17Zwfa928n7+fnjWBNOgm0Xb2Pz+Ztjud+gWtdc22dhlhDrP/hQgoWIfBa4HsgCPwJuUtXpaq+zYBEfD778IJPPT9Kf7CfpnLrDzvk50rk0E5dNsOWCLSGOsLWsFbspw4LFst9U5P3ATlXNi8gfAajq71R7nQWLeEjn03z0oY/iOu6CQFGU83N4vsd9H7ovlikpY+oU62ARSoFbVb+pqvnCh08Ca8MYh2mN3Qd3k/fzZQMFQNJJkvfz7D64u80jM8bUKwoF7puB/1XpSRGZACYAzjnnnHaNqaO0u6nddGYaT5feb+Crz3SmaubRGBMRLQsWIvKPwJoyT31aVf+28DmfBvLAvZW+jqpOApMQpKFaMNSOFVaxdbR3FFeW3m/giMNo72jT39sY0xotCxaq+r6lnheRjwMfAn5a47QkK0Zq7ZTabOvXrCfhJMj5uYo1i4STYP2a9U1/b2NMa4RSsxCRjcDvAB9W1VQYY+h0YR512p/oZ9vF20jn0uT8hRvUiquhtl28zYrbxsRIWDu4vwCsAL4lIs+JyBdDGkfHCvuo083nb2bisgk832M2O8uJzAlms7N4vsfEZRNsPn9zS97XGNMaoRS4VfX8MN63m4R91KmIsOWCLWw8d2NH7OA2pttFYTWUaYGoHHXan+jnmrXXtPQ9jDGtZ40EO1StnVKtqZ0xphYWLDpUrZ1SrQWFMaYWlobqYJ181Kkxpr2s62wXsKZ2xkRCrHtD2cyiC3TiUafGmPaymoUxxpiqLFgYY4ypyoKFMcaYqixYGGOMqcqChTHGmKosWBhjjKnKls6aWGr36X/GdDsLFiZWwjr9z5huZ8HCxEpYp/8Z0+2sZmFiI8zT/4zpdhYsTE38dJqZxx5jescOZh57LJQLctin/xnTzSwNZZYUpRpB2Kf/GdPNLFiYJUWpRhCV0/+M6UaWhjIVRa1GYKf/GRMeCxZdbqlaRNRqBHb6nzHhsTRUl6qlFhHFGoGd/mdMOCxYdKlaahHuqlWRqxGICKM33MDwpk12+p8xbWTBogvVWot481//1XyNoFwqqt4aQTNaddjpf8a0lwWLLlSsRZTOKEpJMomfyXDyhRdYdfNN8zOQ0oAxXyP41V+p+cIfpWW4xpjlsWDRhZZTi6hcI3AZ2LCeI3/118FjNVz4o7QM1xizPBYsutBy9itUqhHkfvxjjnzprmDGUTKTqHThrzX1Nbxpk9UejIkgWzrbherZr1CsEYxu3szAhg0c/fK9y9p/EbVluMaY5bFg0YUa3a9Qz4U/istwjTG1szRUl2pkv0I9F35r1WFMvFmw6FKN7Feo58Jfmvpq5jJcY0x7WLDocvXsV6jnwl9MfS21DHf1rbdYcduYiLKahVm2emseI1u3svrWW1DPw5udxTt+HG92FvU8a9VhTMSJqoY9hpqNj4/rnj17wh6GobENdn46ba06TDeK9Y5TCxamIXbhN6ZmsQ4WVrMwDbEeTcZ0B6tZGGOMqcqChTHGmKosWBhjjKkqlGAhIp8RkedF5DkR+aaInBXGOIwxxtQmrJnFZ1X1MlW9HHgI+L9CGocxxpgahBIsVPVEyYeDQHzW7xpjTBcKbemsiPwB8F+A48B1S3zeBDABcM4557RncMYYYxZo2aY8EflHYE2Zpz6tqn9b8nm/C/Sp6u9X+5q2Kc8YE2O2Ka8cVX1fjZ/6FeBhoGqwMMYYE46wVkNdUPLhh4GXwhiHMcaY2oRVs/hDEflJwAf+DbglpHEYY4ypQawaCYrIYYLgsthqYKrNw6lHXMYJ8RlrXMYJ8RlrXMYJ8RnrauAlVd0Y9kDqFatgUYmI7FHV8bDHUU1cxgnxGWtcxgnxGWtcxgnxGWtcxrkUa/dhjDGmKgsWxhhjquqUYDEZ9gBqFJdxQnzGGpdxQnzGGpdxQnzGGpdxVtQRNQtjjDGt1SkzC2OMMS1kwcIYY0xVHRMs4nJGhoh8VkReKoz1QREZDXtMlYjIjSKyV0R8EYncsj8R2SgiPxCRV0Tk9rDHU4mI3CUih0TkxbDHshQROVtEvi0i+wp/7r8Z9pjKEZE+EdklIv9cGOd/C3tMSxERV0SeFZGHwh5LIzomWBCfMzK+BVyiqpcBPwR+N+TxLOVFYCvweNgDWUxEXODPgA8CbwV+QUTeGu6oKtoOxGEzVh74LVW9CLgK+K8R/ZlmgPeq6tuAy4GNInJVyGNaym8C+8IeRKM6JljE5YwMVf2mquYLHz4JrA1zPEtR1X2q+oOwx1HBBuAVVd2vqlngPuDnQh5TWar6OHA07HFUo6qvqeozhd/PEFzg3hTuqE6ngdnCh8nC/5H89y4ia4GfBf4y7LE0qmOCBQRnZIjIfwAfI7ozi1I3A38f9iBi6k3Af5R8fIAIXtjiSkTWAVcAT4U7kvIKqZ3ngEPAt1Q1kuME/hT4bYI+eLEWq2AhIv8oIi+W+f/nAFT106p6NnAvcFtUx1n4nE8TTPvvDWuchXFUHWtElTsbIJJ3l3EjIkPA14FPLpqxR4aqeoWU81pgg4hcEvaYFhORDwGHVPXpsMfSDKGdlFePuJyRUW2cIvJx4EPAT2vIG12W8TONmgPA2SUfrwV+HNJYOoaIJAkCxb2q+kDY46lGVadF5DGCmlDUFhC8C/iwiGwC+oBhEfmyqv5SyOOqS6xmFkuJyxkZIrIR+B3gw6qaCns8MbYbuEBEzhWRHuCjwDdCHlOsiYgAXwL2qeofhz2eSkTkjOIqQhHpB95HBP+9q+rvqupaVV1H8PdzZ1wDBXRQsCA4I+NFEXkeeD/BCoQo+gKwAvhWYZnvF8MeUCUiskVEDgDvAB4WkUfDHlNRYZHAbcCjBIXYr6nq3nBHVZ6IfBX4PvCTInJARH4l7DFV8C7gl4H3Fv5uPle4K46aNwLfLvxb301Qs4j1stQ4sHYfxhhjquqkmYUxxpgWsWBhjDGmKgsWxhhjqrJgYYwxpioLFsYYY6qyYGG6goh4haWgL4rI/SIyUHh8jYjcJyI/EpF/EZFHROQthef+QUSm494t1JhmsGBhukVaVS9X1UuALHBLYRPag8BjqvoTqvpW4PeANxRe81mCfQfGdD0LFqYbPQGcD1wH5FR1fmOkqj6nqk8Ufv9PwEw4QzQmWixYmK4iIgmCMzBeAC4BOqLJmzGtZsHCdIv+QkvrPcC/E/RAMsbUKFZdZ41pQLrQ0nqeiOwFfj6k8RgTKzazMN1sJ9ArIr9WfEBE1ovItSGOyZhIsmBhulbhLJEtwM8Uls7uBe6gcC6GiDwB3A/8dKFb7AdCG6wxIbOus8YYY6qymYUxxpiqLFgYY4ypyoKFMcaYqixYGGOMqcqChTHGmKosWBhjjKnKgoUxxpiq/n9jPPDBLiPgsQAAAABJRU5ErkJggg==\n",
3563
      "text/plain": [
3564
       "<Figure size 402.375x360 with 1 Axes>"
3565
      ]
3566
     },
3567
     "metadata": {
3568
      "needs_background": "light"
3569
     },
3570
     "output_type": "display_data"
3571
    }
3572
   ],
3573
   "source": [
3574
    "do_KmeansPCA()"
3575
   ]
3576
  },
3577
  {
3578
   "cell_type": "code",
3579
   "execution_count": 76,
3580
   "metadata": {},
3581
   "outputs": [
3582
    {
3583
     "data": {
3584
      "text/plain": [
3585
       "(tensor([[1.0000, 0.0000, 1.0000, 0.9998]], grad_fn=<SigmoidBackward>),\n",
3586
       " tensor([[0., 0., 0., 0.]], grad_fn=<CumprodBackward>),\n",
3587
       " tensor([[2]]),\n",
3588
       " None,\n",
3589
       " None)"
3590
      ]
3591
     },
3592
     "execution_count": 76,
3593
     "metadata": {},
3594
     "output_type": "execute_result"
3595
    }
3596
   ],
3597
   "source": [
3598
    "model.forward(x_path=x_path)"
3599
   ]
3600
  },
3601
  {
3602
   "cell_type": "code",
3603
   "execution_count": 69,
3604
   "metadata": {},
3605
   "outputs": [
3606
    {
3607
     "ename": "SyntaxError",
3608
     "evalue": "invalid syntax (<ipython-input-69-c543913fa78f>, line 1)",
3609
     "output_type": "error",
3610
     "traceback": [
3611
      "\u001b[0;36m  File \u001b[0;32m\"<ipython-input-69-c543913fa78f>\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m    import ..models\u001b[0m\n\u001b[0m           ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
3612
     ]
3613
    }
3614
   ],
3615
   "source": [
3616
    "import ..models"
3617
   ]
3618
  },
3619
  {
3620
   "cell_type": "code",
3621
   "execution_count": 63,
3622
   "metadata": {},
3623
   "outputs": [],
3624
   "source": [
3625
    "x_path = torch.randint(10, size=(500, 1024)).type(torch.FloatTensor)"
3626
   ]
3627
  },
3628
  {
3629
   "cell_type": "code",
3630
   "execution_count": 65,
3631
   "metadata": {},
3632
   "outputs": [
3633
    {
3634
     "ename": "NameError",
3635
     "evalue": "name 'MultiheadAttention' is not defined",
3636
     "output_type": "error",
3637
     "traceback": [
3638
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
3639
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
3640
      "\u001b[0;32m<ipython-input-65-f85a99af33ee>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mself\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mMM_CoAttn_Transformer_Surv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0momic_sizes\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msig_sizes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
3641
      "\u001b[0;32m<ipython-input-62-9e5f322e30a0>\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, omic_sizes, n_classes, model_size_wsi, model_size_omic, dropout)\u001b[0m\n\u001b[1;32m     28\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     29\u001b[0m         \u001b[0;31m### Multihead Attention\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 30\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcoattn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mMultiheadAttention\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0membed_dim\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m256\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_heads\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     31\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     32\u001b[0m         \u001b[0;31m### Transformer\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
3642
      "\u001b[0;31mNameError\u001b[0m: name 'MultiheadAttention' is not defined"
3643
     ]
3644
    }
3645
   ],
3646
   "source": [
3647
    "self = MM_CoAttn_Transformer_Surv(omic_sizes=sig_sizes)"
3648
   ]
3649
  },
3650
  {
3651
   "cell_type": "code",
3652
   "execution_count": 52,
3653
   "metadata": {},
3654
   "outputs": [
3655
    {
3656
     "ename": "NameError",
3657
     "evalue": "name 'sig_size' is not defined",
3658
     "output_type": "error",
3659
     "traceback": [
3660
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
3661
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
3662
      "\u001b[0;32m<ipython-input-52-097a03ed0c40>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mself\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mMM_CoAttn_Surv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msig_sizes\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msig_sizes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0mx_path\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m500\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1024\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mFloatTensor\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0msig_feats\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mFloatTensor\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0msize\u001b[0m \u001b[0;32min\u001b[0m \u001b[0msig_sizes\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0mx_path\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mattention_net\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munsqueeze\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
3663
      "\u001b[0;32m<ipython-input-43-4469ba9e1eea>\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, sig_sizes, n_classes, model_size_wsi, model_size_omic, dropout)\u001b[0m\n\u001b[1;32m     19\u001b[0m         \u001b[0mhidden\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize_dict_omic\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmodel_size_omic\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     20\u001b[0m         \u001b[0msig_networks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m         \u001b[0;32mfor\u001b[0m \u001b[0minput_dim\u001b[0m \u001b[0;32min\u001b[0m \u001b[0msig_size\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     22\u001b[0m             \u001b[0mfc_omic\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mSNN_Block\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdim1\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minput_dim\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdim2\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mhidden\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     23\u001b[0m             \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhidden\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
3664
      "\u001b[0;31mNameError\u001b[0m: name 'sig_size' is not defined"
3665
     ]
3666
    }
3667
   ],
3668
   "source": [
3669
    "self = MM_CoAttn_Surv(sig_sizes=sig_sizes)\n",
3670
    "x_path = torch.randint(10, size=(500, 1024)).type(torch.FloatTensor)\n",
3671
    "sig_feats = [torch.randint(10, size=(size,)).type(torch.FloatTensor) for size in sig_sizes]\n",
3672
    "\n",
3673
    "x_path = self.attention_net(x_path).unsqueeze(1)\n",
3674
    "x_omic = torch.stack([self.sig_networks[idx].forward(sig_feat) for idx, sig_feat in enumerate(sig_feats)]).unsqueeze(1)\n",
3675
    "\n",
3676
    "out, attention_weights = self.coattn(x_omic, x_path, x_path)\n",
3677
    "out = self.transformer(out)\n",
3678
    "out = self.conv(out.squeeze(1).T.unsqueeze(0))\n",
3679
    "#out = self.classifier(out.squeeze(0).squeeze(1))"
3680
   ]
3681
  },
3682
  {
3683
   "cell_type": "code",
3684
   "execution_count": 471,
3685
   "metadata": {},
3686
   "outputs": [
3687
    {
3688
     "data": {
3689
      "text/plain": [
3690
       "torch.Size([1, 256, 1])"
3691
      ]
3692
     },
3693
     "execution_count": 471,
3694
     "metadata": {},
3695
     "output_type": "execute_result"
3696
    }
3697
   ],
3698
   "source": [
3699
    "out.shape"
3700
   ]
3701
  },
3702
  {
3703
   "cell_type": "code",
3704
   "execution_count": 472,
3705
   "metadata": {},
3706
   "outputs": [
3707
    {
3708
     "data": {
3709
      "text/plain": [
3710
       "tensor([[[ 0.5998,  1.9873, -1.1435,  ..., -0.0048,  0.2963,  1.1112]],\n",
3711
       "\n",
3712
       "        [[-0.4201, -0.1456,  0.2057,  ..., -0.2175,  0.4188,  0.4702]],\n",
3713
       "\n",
3714
       "        [[ 1.0294,  3.1634,  0.4595,  ...,  1.2059,  0.5845,  1.4114]],\n",
3715
       "\n",
3716
       "        [[-1.1435, -1.1435, -1.1435,  ...,  0.1951, -0.4378,  0.2051]],\n",
3717
       "\n",
3718
       "        [[ 0.9948,  1.1596,  2.1419,  ..., -0.1225,  1.3597, -0.3037]],\n",
3719
       "\n",
3720
       "        [[ 0.4019, -1.1435, -0.1522,  ..., -0.2058,  0.0351, -1.1435]]],\n",
3721
       "       grad_fn=<UnsqueezeBackward0>)"
3722
      ]
3723
     },
3724
     "execution_count": 472,
3725
     "metadata": {},
3726
     "output_type": "execute_result"
3727
    }
3728
   ],
3729
   "source": [
3730
    "x_omic"
3731
   ]
3732
  },
3733
  {
3734
   "cell_type": "code",
3735
   "execution_count": 474,
3736
   "metadata": {},
3737
   "outputs": [],
3738
   "source": [
3739
    "self = MM_CoAttn_Surv(sig_sizes=sig_sizes)\n",
3740
    "x_path = torch.randint(10, size=(500, 1024)).type(torch.FloatTensor)\n",
3741
    "sig_feats = [torch.randint(10, size=(size,)).type(torch.FloatTensor) for size in sig_sizes]\n",
3742
    "\n",
3743
    "x_path = self.attention_net(x_path).unsqueeze(1)\n",
3744
    "x_omic = torch.stack([self.sig_networks[idx].forward(sig_feat) for idx, sig_feat in enumerate(sig_feats)]).unsqueeze(1)\n",
3745
    "out, attention_weights = self.coattn(x_omic, x_path, x_path)\n",
3746
    "\n",
3747
    "out = self.transformer(out)\n"
3748
   ]
3749
  },
3750
  {
3751
   "cell_type": "code",
3752
   "execution_count": 491,
3753
   "metadata": {},
3754
   "outputs": [
3755
    {
3756
     "data": {
3757
      "text/plain": [
3758
       "torch.Size([1536])"
3759
      ]
3760
     },
3761
     "execution_count": 491,
3762
     "metadata": {},
3763
     "output_type": "execute_result"
3764
    }
3765
   ],
3766
   "source": [
3767
    "torch.cat([self.sig_networks[idx].forward(sig_feat) for idx, sig_feat in enumerate(sig_feats)]).shape"
3768
   ]
3769
  },
3770
  {
3771
   "cell_type": "code",
3772
   "execution_count": 484,
3773
   "metadata": {},
3774
   "outputs": [
3775
    {
3776
     "data": {
3777
      "text/plain": [
3778
       "torch.Size([6, 1, 512])"
3779
      ]
3780
     },
3781
     "execution_count": 484,
3782
     "metadata": {},
3783
     "output_type": "execute_result"
3784
    }
3785
   ],
3786
   "source": [
3787
    "torch.cat([out, out], axis=2).shape"
3788
   ]
3789
  },
3790
  {
3791
   "cell_type": "code",
3792
   "execution_count": 455,
3793
   "metadata": {},
3794
   "outputs": [
3795
    {
3796
     "data": {
3797
      "text/plain": [
3798
       "torch.Size([6, 1, 256])"
3799
      ]
3800
     },
3801
     "execution_count": 455,
3802
     "metadata": {},
3803
     "output_type": "execute_result"
3804
    }
3805
   ],
3806
   "source": [
3807
    "out.shape"
3808
   ]
3809
  },
3810
  {
3811
   "cell_type": "code",
3812
   "execution_count": 452,
3813
   "metadata": {},
3814
   "outputs": [
3815
    {
3816
     "data": {
3817
      "text/plain": [
3818
       "torch.Size([6, 1, 256])"
3819
      ]
3820
     },
3821
     "execution_count": 452,
3822
     "metadata": {},
3823
     "output_type": "execute_result"
3824
    }
3825
   ],
3826
   "source": [
3827
    "out.shape"
3828
   ]
3829
  },
3830
  {
3831
   "cell_type": "code",
3832
   "execution_count": null,
3833
   "metadata": {},
3834
   "outputs": [],
3835
   "source": []
3836
  },
3837
  {
3838
   "cell_type": "code",
3839
   "execution_count": 423,
3840
   "metadata": {},
3841
   "outputs": [
3842
    {
3843
     "data": {
3844
      "text/plain": [
3845
       "torch.Size([1, 8, 6, 500])"
3846
      ]
3847
     },
3848
     "execution_count": 423,
3849
     "metadata": {},
3850
     "output_type": "execute_result"
3851
    }
3852
   ],
3853
   "source": [
3854
    "attention_weights.shape"
3855
   ]
3856
  },
3857
  {
3858
   "cell_type": "code",
3859
   "execution_count": 415,
3860
   "metadata": {},
3861
   "outputs": [
3862
    {
3863
     "data": {
3864
      "text/plain": [
3865
       "tensor([[[0.0018, 0.0020, 0.0012,  ..., 0.0016, 0.0025, 0.0031],\n",
3866
       "         [0.0026, 0.0015, 0.0016,  ..., 0.0021, 0.0021, 0.0016],\n",
3867
       "         [0.0019, 0.0014, 0.0011,  ..., 0.0020, 0.0013, 0.0025],\n",
3868
       "         [0.0016, 0.0013, 0.0023,  ..., 0.0009, 0.0015, 0.0027],\n",
3869
       "         [0.0015, 0.0013, 0.0023,  ..., 0.0026, 0.0019, 0.0026],\n",
3870
       "         [0.0013, 0.0019, 0.0025,  ..., 0.0022, 0.0020, 0.0021]]],\n",
3871
       "       grad_fn=<DivBackward0>)"
3872
      ]
3873
     },
3874
     "execution_count": 415,
3875
     "metadata": {},
3876
     "output_type": "execute_result"
3877
    }
3878
   ],
3879
   "source": [
3880
    "attention_weights_0"
3881
   ]
3882
  },
3883
  {
3884
   "cell_type": "code",
3885
   "execution_count": 416,
3886
   "metadata": {},
3887
   "outputs": [
3888
    {
3889
     "data": {
3890
      "text/plain": [
3891
       "tensor([[[0.0018, 0.0020, 0.0012,  ..., 0.0016, 0.0025, 0.0031],\n",
3892
       "         [0.0026, 0.0015, 0.0016,  ..., 0.0021, 0.0021, 0.0016],\n",
3893
       "         [0.0019, 0.0014, 0.0011,  ..., 0.0020, 0.0013, 0.0025],\n",
3894
       "         [0.0016, 0.0013, 0.0023,  ..., 0.0009, 0.0015, 0.0027],\n",
3895
       "         [0.0015, 0.0013, 0.0023,  ..., 0.0026, 0.0019, 0.0026],\n",
3896
       "         [0.0013, 0.0019, 0.0025,  ..., 0.0022, 0.0020, 0.0021]]],\n",
3897
       "       grad_fn=<DivBackward0>)"
3898
      ]
3899
     },
3900
     "execution_count": 416,
3901
     "metadata": {},
3902
     "output_type": "execute_result"
3903
    }
3904
   ],
3905
   "source": [
3906
    "softmax(attention_weights_1, dim=-1).sum(axis=1) / 8"
3907
   ]
3908
  },
3909
  {
3910
   "cell_type": "code",
3911
   "execution_count": 411,
3912
   "metadata": {},
3913
   "outputs": [
3914
    {
3915
     "data": {
3916
      "text/plain": [
3917
       "torch.Size([1, 1, 6, 500])"
3918
      ]
3919
     },
3920
     "execution_count": 411,
3921
     "metadata": {},
3922
     "output_type": "execute_result"
3923
    }
3924
   ],
3925
   "source": [
3926
    "softmax(attention_weights_1, dim=-1).shape"
3927
   ]
3928
  },
3929
  {
3930
   "cell_type": "code",
3931
   "execution_count": 339,
3932
   "metadata": {},
3933
   "outputs": [
3934
    {
3935
     "data": {
3936
      "text/plain": [
3937
       "tensor(1.0000, grad_fn=<SumBackward0>)"
3938
      ]
3939
     },
3940
     "execution_count": 339,
3941
     "metadata": {},
3942
     "output_type": "execute_result"
3943
    }
3944
   ],
3945
   "source": [
3946
    "attention_weights_0[0][0].sum()"
3947
   ]
3948
  },
3949
  {
3950
   "cell_type": "code",
3951
   "execution_count": 396,
3952
   "metadata": {},
3953
   "outputs": [],
3954
   "source": [
3955
    "test = softmax(attention_weights_2, dim=-1)"
3956
   ]
3957
  },
3958
  {
3959
   "cell_type": "code",
3960
   "execution_count": 402,
3961
   "metadata": {},
3962
   "outputs": [
3963
    {
3964
     "data": {
3965
      "text/plain": [
3966
       "tensor([0.0024, 0.0030, 0.0019, 0.0018, 0.0038, 0.0015, 0.0020, 0.0016, 0.0015,\n",
3967
       "        0.0019, 0.0015, 0.0035, 0.0026, 0.0017, 0.0014, 0.0013, 0.0023, 0.0020,\n",
3968
       "        0.0017, 0.0010], grad_fn=<SliceBackward>)"
3969
      ]
3970
     },
3971
     "execution_count": 402,
3972
     "metadata": {},
3973
     "output_type": "execute_result"
3974
    }
3975
   ],
3976
   "source": [
3977
    "attention_weights_0[0][0][:20]"
3978
   ]
3979
  },
3980
  {
3981
   "cell_type": "code",
3982
   "execution_count": 404,
3983
   "metadata": {},
3984
   "outputs": [
3985
    {
3986
     "data": {
3987
      "text/plain": [
3988
       "tensor([0.0028, 0.0033, 0.0019, 0.0013, 0.0042, 0.0016, 0.0024, 0.0018, 0.0019,\n",
3989
       "        0.0024, 0.0016, 0.0033, 0.0022, 0.0014, 0.0016, 0.0013, 0.0023, 0.0021,\n",
3990
       "        0.0013, 0.0013], grad_fn=<SliceBackward>)"
3991
      ]
3992
     },
3993
     "execution_count": 404,
3994
     "metadata": {},
3995
     "output_type": "execute_result"
3996
    }
3997
   ],
3998
   "source": [
3999
    "test[0][0][:20]"
4000
   ]
4001
  },
4002
  {
4003
   "cell_type": "code",
4004
   "execution_count": 366,
4005
   "metadata": {},
4006
   "outputs": [
4007
    {
4008
     "data": {
4009
      "text/plain": [
4010
       "tensor([[[False, False, False,  ..., False, False, False],\n",
4011
       "         [False, False, False,  ..., False, False, False],\n",
4012
       "         [False, False, False,  ..., False, False, False],\n",
4013
       "         [False, False, False,  ..., False, False, False],\n",
4014
       "         [False, False, False,  ..., False, False, False],\n",
4015
       "         [False, False, False,  ..., False, False, False]]])"
4016
      ]
4017
     },
4018
     "execution_count": 366,
4019
     "metadata": {},
4020
     "output_type": "execute_result"
4021
    }
4022
   ],
4023
   "source": [
4024
    "torch.eq(attention_weights_0, test)"
4025
   ]
4026
  },
4027
  {
4028
   "cell_type": "code",
4029
   "execution_count": 320,
4030
   "metadata": {},
4031
   "outputs": [
4032
    {
4033
     "data": {
4034
      "text/plain": [
4035
       "torch.Size([1, 8, 6, 500])"
4036
      ]
4037
     },
4038
     "execution_count": 320,
4039
     "metadata": {},
4040
     "output_type": "execute_result"
4041
    }
4042
   ],
4043
   "source": [
4044
    "attention_weights_1.shape"
4045
   ]
4046
  },
4047
  {
4048
   "cell_type": "code",
4049
   "execution_count": 318,
4050
   "metadata": {},
4051
   "outputs": [
4052
    {
4053
     "data": {
4054
      "text/plain": [
4055
       "torch.Size([1, 6, 500])"
4056
      ]
4057
     },
4058
     "execution_count": 318,
4059
     "metadata": {},
4060
     "output_type": "execute_result"
4061
    }
4062
   ],
4063
   "source": [
4064
    "attention_weights_2.shape"
4065
   ]
4066
  },
4067
  {
4068
   "cell_type": "code",
4069
   "execution_count": 282,
4070
   "metadata": {},
4071
   "outputs": [],
4072
   "source": [
4073
    "out = self.classifier(out.squeeze(0).squeeze(1))"
4074
   ]
4075
  },
4076
  {
4077
   "cell_type": "code",
4078
   "execution_count": 284,
4079
   "metadata": {},
4080
   "outputs": [
4081
    {
4082
     "data": {
4083
      "text/plain": [
4084
       "tensor([ 0.2832,  0.1548, -0.0972, -0.2801], grad_fn=<AddBackward0>)"
4085
      ]
4086
     },
4087
     "execution_count": 284,
4088
     "metadata": {},
4089
     "output_type": "execute_result"
4090
    }
4091
   ],
4092
   "source": [
4093
    "out"
4094
   ]
4095
  },
4096
  {
4097
   "cell_type": "code",
4098
   "execution_count": 269,
4099
   "metadata": {},
4100
   "outputs": [
4101
    {
4102
     "data": {
4103
      "text/plain": [
4104
       "tensor([[0.0018, 0.0019, 0.0019,  ..., 0.0019, 0.0022, 0.0018],\n",
4105
       "        [0.0020, 0.0020, 0.0021,  ..., 0.0021, 0.0020, 0.0020],\n",
4106
       "        [0.0019, 0.0022, 0.0021,  ..., 0.0019, 0.0019, 0.0020],\n",
4107
       "        [0.0021, 0.0022, 0.0019,  ..., 0.0018, 0.0020, 0.0021],\n",
4108
       "        [0.0019, 0.0019, 0.0020,  ..., 0.0020, 0.0018, 0.0019],\n",
4109
       "        [0.0021, 0.0021, 0.0019,  ..., 0.0019, 0.0021, 0.0021]],\n",
4110
       "       grad_fn=<SelectBackward>)"
4111
      ]
4112
     },
4113
     "execution_count": 269,
4114
     "metadata": {},
4115
     "output_type": "execute_result"
4116
    }
4117
   ],
4118
   "source": [
4119
    "attention_weights[0]"
4120
   ]
4121
  },
4122
  {
4123
   "cell_type": "code",
4124
   "execution_count": 241,
4125
   "metadata": {},
4126
   "outputs": [
4127
    {
4128
     "data": {
4129
      "text/plain": [
4130
       "(tensor([[[-0.0504,  0.0757, -0.0366,  ..., -0.0275, -0.0294,  0.1300]],\n",
4131
       " \n",
4132
       "         [[-0.0500,  0.0762, -0.0352,  ..., -0.0253, -0.0289,  0.1311]],\n",
4133
       " \n",
4134
       "         [[-0.0497,  0.0772, -0.0321,  ..., -0.0246, -0.0288,  0.1301]],\n",
4135
       " \n",
4136
       "         [[-0.0491,  0.0794, -0.0337,  ..., -0.0260, -0.0278,  0.1281]],\n",
4137
       " \n",
4138
       "         [[-0.0483,  0.0781, -0.0343,  ..., -0.0246, -0.0301,  0.1321]],\n",
4139
       " \n",
4140
       "         [[-0.0499,  0.0768, -0.0305,  ..., -0.0257, -0.0280,  0.1321]]],\n",
4141
       "        grad_fn=<AddBackward0>),\n",
4142
       " tensor([[[0.0019, 0.0019, 0.0019,  ..., 0.0020, 0.0021, 0.0021],\n",
4143
       "          [0.0017, 0.0020, 0.0020,  ..., 0.0019, 0.0019, 0.0018],\n",
4144
       "          [0.0019, 0.0018, 0.0019,  ..., 0.0019, 0.0019, 0.0021],\n",
4145
       "          [0.0020, 0.0020, 0.0019,  ..., 0.0020, 0.0021, 0.0019],\n",
4146
       "          [0.0017, 0.0023, 0.0021,  ..., 0.0019, 0.0020, 0.0020],\n",
4147
       "          [0.0021, 0.0021, 0.0020,  ..., 0.0021, 0.0021, 0.0020]]],\n",
4148
       "        grad_fn=<DivBackward0>))"
4149
      ]
4150
     },
4151
     "execution_count": 241,
4152
     "metadata": {},
4153
     "output_type": "execute_result"
4154
    }
4155
   ],
4156
   "source": [
4157
    "self.coattn(x_omic, x_path, x_path)"
4158
   ]
4159
  },
4160
  {
4161
   "cell_type": "code",
4162
   "execution_count": null,
4163
   "metadata": {},
4164
   "outputs": [],
4165
   "source": [
4166
    "h"
4167
   ]
4168
  },
4169
  {
4170
   "cell_type": "code",
4171
   "execution_count": 208,
4172
   "metadata": {},
4173
   "outputs": [],
4174
   "source": [
4175
    "sig_feats = [torch.randn(size) for size in sig_sizes]\n",
4176
    "x_omic = torch.stack([self.sig_networks[idx].forward(sig_feat) for idx, sig_feat in enumerate(sig_feats)])\n"
4177
   ]
4178
  },
4179
  {
4180
   "cell_type": "code",
4181
   "execution_count": 204,
4182
   "metadata": {},
4183
   "outputs": [],
4184
   "source": []
4185
  },
4186
  {
4187
   "cell_type": "code",
4188
   "execution_count": 206,
4189
   "metadata": {},
4190
   "outputs": [
4191
    {
4192
     "data": {
4193
      "text/plain": [
4194
       "torch.Size([6, 256])"
4195
      ]
4196
     },
4197
     "execution_count": 206,
4198
     "metadata": {},
4199
     "output_type": "execute_result"
4200
    }
4201
   ],
4202
   "source": [
4203
    "x_omic.shape"
4204
   ]
4205
  },
4206
  {
4207
   "cell_type": "code",
4208
   "execution_count": 166,
4209
   "metadata": {},
4210
   "outputs": [
4211
    {
4212
     "ename": "NameError",
4213
     "evalue": "name 'sig1' is not defined",
4214
     "output_type": "error",
4215
     "traceback": [
4216
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
4217
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
4218
      "\u001b[0;32m<ipython-input-166-aea4cb4c555c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msig1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msig2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msig3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msig4\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msig5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msig6\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
4219
      "\u001b[0;31mNameError\u001b[0m: name 'sig1' is not defined"
4220
     ]
4221
    }
4222
   ],
4223
   "source": [
4224
    "sig1, sig2, sig3, sig4, sig5, sig6 = torch.randn()"
4225
   ]
4226
  },
4227
  {
4228
   "cell_type": "code",
4229
   "execution_count": 158,
4230
   "metadata": {},
4231
   "outputs": [],
4232
   "source": [
4233
    "src = torch.rand(6, 1, 256)\n",
4234
    "out = transformer(src)\n",
4235
    "out = out.squeeze(1).T.unsqueeze(0)"
4236
   ]
4237
  },
4238
  {
4239
   "cell_type": "code",
4240
   "execution_count": 163,
4241
   "metadata": {},
4242
   "outputs": [],
4243
   "source": [
4244
    "conv = nn.Conv1d(in_channels=256, out_channels=256, kernel_size=4, stride=4)"
4245
   ]
4246
  },
4247
  {
4248
   "cell_type": "code",
4249
   "execution_count": 164,
4250
   "metadata": {},
4251
   "outputs": [
4252
    {
4253
     "data": {
4254
      "text/plain": [
4255
       "torch.Size([1, 256, 6])"
4256
      ]
4257
     },
4258
     "execution_count": 164,
4259
     "metadata": {},
4260
     "output_type": "execute_result"
4261
    }
4262
   ],
4263
   "source": [
4264
    "out.shape"
4265
   ]
4266
  },
4267
  {
4268
   "cell_type": "code",
4269
   "execution_count": 165,
4270
   "metadata": {},
4271
   "outputs": [
4272
    {
4273
     "data": {
4274
      "text/plain": [
4275
       "torch.Size([1, 256, 1])"
4276
      ]
4277
     },
4278
     "execution_count": 165,
4279
     "metadata": {},
4280
     "output_type": "execute_result"
4281
    }
4282
   ],
4283
   "source": [
4284
    "conv(out).shape"
4285
   ]
4286
  },
4287
  {
4288
   "cell_type": "code",
4289
   "execution_count": 112,
4290
   "metadata": {},
4291
   "outputs": [
4292
    {
4293
     "data": {
4294
      "text/plain": [
4295
       "torch.Size([1536])"
4296
      ]
4297
     },
4298
     "execution_count": 112,
4299
     "metadata": {},
4300
     "output_type": "execute_result"
4301
    }
4302
   ],
4303
   "source": [
4304
    "x.reshape(-1).shape"
4305
   ]
4306
  },
4307
  {
4308
   "cell_type": "code",
4309
   "execution_count": 106,
4310
   "metadata": {},
4311
   "outputs": [
4312
    {
4313
     "data": {
4314
      "text/plain": [
4315
       "3072"
4316
      ]
4317
     },
4318
     "execution_count": 106,
4319
     "metadata": {},
4320
     "output_type": "execute_result"
4321
    }
4322
   ],
4323
   "source": [
4324
    "256 * 12"
4325
   ]
4326
  },
4327
  {
4328
   "cell_type": "code",
4329
   "execution_count": 88,
4330
   "metadata": {},
4331
   "outputs": [],
4332
   "source": [
4333
    "net = Attn_Net_Gated()\n",
4334
    "wsi_feats = torch.randn(500, 1, 256)\n",
4335
    "sig_feats = torch.randn(6, 1, 256)"
4336
   ]
4337
  },
4338
  {
4339
   "cell_type": "code",
4340
   "execution_count": 89,
4341
   "metadata": {},
4342
   "outputs": [],
4343
   "source": [
4344
    "multihead_attn = nn.MultiheadAttention(embed_dim=256, num_heads=8)"
4345
   ]
4346
  },
4347
  {
4348
   "cell_type": "code",
4349
   "execution_count": 90,
4350
   "metadata": {},
4351
   "outputs": [],
4352
   "source": [
4353
    "out, coattn_weights = multihead_attn(sig_feats, wsi_feats, wsi_feats)"
4354
   ]
4355
  },
4356
  {
4357
   "cell_type": "code",
4358
   "execution_count": 96,
4359
   "metadata": {},
4360
   "outputs": [],
4361
   "source": [
4362
    "cotton = DenseCoAttn(dim1=256, dim2=256, num_attn=8, num_none=3, dropout=0.3b)"
4363
   ]
4364
  },
4365
  {
4366
   "cell_type": "code",
4367
   "execution_count": 100,
4368
   "metadata": {},
4369
   "outputs": [],
4370
   "source": [
4371
    "from math import sqrt\n",
4372
    "wsi_feats = torch.randn(1, 500, 256)\n",
4373
    "sig_feats = torch.randn(1, 6, 256)\n",
4374
    "_ = cotton(wsi_feats, sig_feats)"
4375
   ]
4376
  },
4377
  {
4378
   "cell_type": "code",
4379
   "execution_count": 103,
4380
   "metadata": {},
4381
   "outputs": [
4382
    {
4383
     "data": {
4384
      "text/plain": [
4385
       "torch.Size([1, 6, 256])"
4386
      ]
4387
     },
4388
     "execution_count": 103,
4389
     "metadata": {},
4390
     "output_type": "execute_result"
4391
    }
4392
   ],
4393
   "source": [
4394
    "_[0].shape"
4395
   ]
4396
  },
4397
  {
4398
   "cell_type": "code",
4399
   "execution_count": 104,
4400
   "metadata": {},
4401
   "outputs": [
4402
    {
4403
     "data": {
4404
      "text/plain": [
4405
       "torch.Size([1, 500, 256])"
4406
      ]
4407
     },
4408
     "execution_count": 104,
4409
     "metadata": {},
4410
     "output_type": "execute_result"
4411
    }
4412
   ],
4413
   "source": [
4414
    "_[1].shape"
4415
   ]
4416
  },
4417
  {
4418
   "cell_type": "code",
4419
   "execution_count": 94,
4420
   "metadata": {},
4421
   "outputs": [],
4422
   "source": [
4423
    "\n",
4424
    "import torch\n",
4425
    "import torch.nn as nn\n",
4426
    "import torch.nn.functional as F\n",
4427
    "\n",
4428
    "\n",
4429
    "def qkv_attention(query, key, value, mask=None, dropout=None):\n",
4430
    "\td_k = query.size(-1)\n",
4431
    "\tscores = torch.matmul(query, key.transpose(-2,-1)) / sqrt(d_k)\n",
4432
    "\tif mask is not None:\n",
4433
    "\t\tscores.data.masked_fill_(mask.eq(0), -65504.0)\n",
4434
    "\t\n",
4435
    "\tp_attn = F.softmax(scores, dim=-1)\n",
4436
    "\tif dropout is not None:\n",
4437
    "\t\tp_attn = dropout(p_attn)\n",
4438
    "\n",
4439
    "\treturn torch.matmul(p_attn, value), p_attn\n",
4440
    "\n",
4441
    "\n",
4442
    "class DenseCoAttn(nn.Module):\n",
4443
    "\n",
4444
    "\tdef __init__(self, dim1, dim2, num_attn, num_none, dropout, is_multi_head=False):\n",
4445
    "\t\tsuper(DenseCoAttn, self).__init__()\n",
4446
    "\t\tdim = min(dim1, dim2)\n",
4447
    "\t\tself.linears = nn.ModuleList([nn.Linear(dim1, dim, bias=False),\n",
4448
    "\t\t\t\t\t\t\t\t\t  nn.Linear(dim2, dim, bias=False)])\n",
4449
    "\t\tself.nones = nn.ParameterList([nn.Parameter(nn.init.xavier_uniform_(torch.empty(num_none, dim1))),\n",
4450
    "\t\t\t\t\t\t\t\t\t   nn.Parameter(nn.init.xavier_uniform_(torch.empty(num_none, dim2)))])\n",
4451
    "\t\tself.d_k = dim // num_attn\n",
4452
    "\t\tself.h = num_attn\n",
4453
    "\t\tself.num_none = num_none\n",
4454
    "\t\tself.is_multi_head = is_multi_head\n",
4455
    "\t\tself.attn = None\n",
4456
    "\t\tself.dropouts = nn.ModuleList([nn.Dropout(p=dropout) for _ in range(2)])\n",
4457
    "\n",
4458
    "\tdef forward(self, value1, value2, mask1=None, mask2=None):\n",
4459
    "\t\tbatch = value1.size(0)\n",
4460
    "\t\tdim1, dim2 = value1.size(-1), value2.size(-1)\n",
4461
    "\t\tvalue1 = torch.cat([self.nones[0].unsqueeze(0).expand(batch, self.num_none, dim1), value1], dim=1)\n",
4462
    "\t\tvalue2 = torch.cat([self.nones[1].unsqueeze(0).expand(batch, self.num_none, dim2), value2], dim=1)\n",
4463
    "\t\tnone_mask = value1.new_ones((batch, self.num_none))\n",
4464
    "\n",
4465
    "\t\tif mask1 is not None:\n",
4466
    "\t\t\tmask1 = torch.cat([none_mask, mask1], dim=1)\n",
4467
    "\t\t\tmask1 = mask1.unsqueeze(1).unsqueeze(2)\n",
4468
    "\t\tif mask2 is not None:\n",
4469
    "\t\t\tmask2 = torch.cat([none_mask, mask2], dim=1)\n",
4470
    "\t\t\tmask2 = mask2.unsqueeze(1).unsqueeze(2)\n",
4471
    "\n",
4472
    "\t\tquery1, query2 = [l(x).view(batch, -1, self.h, self.d_k).transpose(1, 2) \n",
4473
    "\t\t\tfor l, x in zip(self.linears, (value1, value2))]\n",
4474
    "\n",
4475
    "\t\tif self.is_multi_head:\n",
4476
    "\t\t\tweighted1, attn1 = qkv_attention(query2, query1, query1, mask=mask1, dropout=self.dropouts[0])\n",
4477
    "\t\t\tweighted1 = weighted1.transpose(1, 2).contiguous()[:, self.num_none:, :]\n",
4478
    "\t\t\tweighted2, attn2 = qkv_attention(query1, query2, query2, mask=mask2, dropout=self.dropouts[1])\n",
4479
    "\t\t\tweighted2 = weighted2.transpose(1, 2).contiguous()[:, self.num_none:, :]\n",
4480
    "\t\telse:\n",
4481
    "\t\t\tweighted1, attn1 = qkv_attention(query2, query1, value1.unsqueeze(1), mask=mask1, \n",
4482
    "\t\t\t\tdropout=self.dropouts[0])\n",
4483
    "\t\t\tweighted1 = weighted1.mean(dim=1)[:, self.num_none:, :]\n",
4484
    "\t\t\tweighted2, attn2 = qkv_attention(query1, query2, value2.unsqueeze(1), mask=mask2, \n",
4485
    "\t\t\t\tdropout=self.dropouts[1])\n",
4486
    "\t\t\tweighted2 = weighted2.mean(dim=1)[:, self.num_none:, :]\n",
4487
    "\t\tself.attn = [attn1[:,:,self.num_none:,self.num_none:], attn2[:,:,self.num_none:,self.num_none:]]\n",
4488
    "\n",
4489
    "\t\treturn weighted1, weighted2\n"
4490
   ]
4491
  },
4492
  {
4493
   "cell_type": "code",
4494
   "execution_count": null,
4495
   "metadata": {},
4496
   "outputs": [],
4497
   "source": []
4498
  },
4499
  {
4500
   "cell_type": "code",
4501
   "execution_count": null,
4502
   "metadata": {},
4503
   "outputs": [],
4504
   "source": []
4505
  },
4506
  {
4507
   "cell_type": "code",
4508
   "execution_count": null,
4509
   "metadata": {},
4510
   "outputs": [],
4511
   "source": []
4512
  },
4513
  {
4514
   "cell_type": "code",
4515
   "execution_count": 417,
4516
   "metadata": {},
4517
   "outputs": [],
4518
   "source": [
4519
    "from torch.nn.functional import *\n",
4520
    "\n",
4521
    "def multi_head_attention_forward(\n",
4522
    "    query: Tensor,\n",
4523
    "    key: Tensor,\n",
4524
    "    value: Tensor,\n",
4525
    "    embed_dim_to_check: int,\n",
4526
    "    num_heads: int,\n",
4527
    "    in_proj_weight: Tensor,\n",
4528
    "    in_proj_bias: Tensor,\n",
4529
    "    bias_k: Optional[Tensor],\n",
4530
    "    bias_v: Optional[Tensor],\n",
4531
    "    add_zero_attn: bool,\n",
4532
    "    dropout_p: float,\n",
4533
    "    out_proj_weight: Tensor,\n",
4534
    "    out_proj_bias: Tensor,\n",
4535
    "    training: bool = True,\n",
4536
    "    key_padding_mask: Optional[Tensor] = None,\n",
4537
    "    need_weights: bool = True,\n",
4538
    "    need_raw: bool = True,\n",
4539
    "    attn_mask: Optional[Tensor] = None,\n",
4540
    "    use_separate_proj_weight: bool = False,\n",
4541
    "    q_proj_weight: Optional[Tensor] = None,\n",
4542
    "    k_proj_weight: Optional[Tensor] = None,\n",
4543
    "    v_proj_weight: Optional[Tensor] = None,\n",
4544
    "    static_k: Optional[Tensor] = None,\n",
4545
    "    static_v: Optional[Tensor] = None,\n",
4546
    ") -> Tuple[Tensor, Optional[Tensor]]:\n",
4547
    "    r\"\"\"\n",
4548
    "    Args:\n",
4549
    "        query, key, value: map a query and a set of key-value pairs to an output.\n",
4550
    "            See \"Attention Is All You Need\" for more details.\n",
4551
    "        embed_dim_to_check: total dimension of the model.\n",
4552
    "        num_heads: parallel attention heads.\n",
4553
    "        in_proj_weight, in_proj_bias: input projection weight and bias.\n",
4554
    "        bias_k, bias_v: bias of the key and value sequences to be added at dim=0.\n",
4555
    "        add_zero_attn: add a new batch of zeros to the key and\n",
4556
    "                       value sequences at dim=1.\n",
4557
    "        dropout_p: probability of an element to be zeroed.\n",
4558
    "        out_proj_weight, out_proj_bias: the output projection weight and bias.\n",
4559
    "        training: apply dropout if is ``True``.\n",
4560
    "        key_padding_mask: if provided, specified padding elements in the key will\n",
4561
    "            be ignored by the attention. This is an binary mask. When the value is True,\n",
4562
    "            the corresponding value on the attention layer will be filled with -inf.\n",
4563
    "        need_weights: output attn_output_weights.\n",
4564
    "        attn_mask: 2D or 3D mask that prevents attention to certain positions. A 2D mask will be broadcasted for all\n",
4565
    "            the batches while a 3D mask allows to specify a different mask for the entries of each batch.\n",
4566
    "        use_separate_proj_weight: the function accept the proj. weights for query, key,\n",
4567
    "            and value in different forms. If false, in_proj_weight will be used, which is\n",
4568
    "            a combination of q_proj_weight, k_proj_weight, v_proj_weight.\n",
4569
    "        q_proj_weight, k_proj_weight, v_proj_weight, in_proj_bias: input projection weight and bias.\n",
4570
    "        static_k, static_v: static key and value used for attention operators.\n",
4571
    "    Shape:\n",
4572
    "        Inputs:\n",
4573
    "        - query: :math:`(L, N, E)` where L is the target sequence length, N is the batch size, E is\n",
4574
    "          the embedding dimension.\n",
4575
    "        - key: :math:`(S, N, E)`, where S is the source sequence length, N is the batch size, E is\n",
4576
    "          the embedding dimension.\n",
4577
    "        - value: :math:`(S, N, E)` where S is the source sequence length, N is the batch size, E is\n",
4578
    "          the embedding dimension.\n",
4579
    "        - key_padding_mask: :math:`(N, S)` where N is the batch size, S is the source sequence length.\n",
4580
    "          If a ByteTensor is provided, the non-zero positions will be ignored while the zero positions\n",
4581
    "          will be unchanged. If a BoolTensor is provided, the positions with the\n",
4582
    "          value of ``True`` will be ignored while the position with the value of ``False`` will be unchanged.\n",
4583
    "        - attn_mask: 2D mask :math:`(L, S)` where L is the target sequence length, S is the source sequence length.\n",
4584
    "          3D mask :math:`(N*num_heads, L, S)` where N is the batch size, L is the target sequence length,\n",
4585
    "          S is the source sequence length. attn_mask ensures that position i is allowed to attend the unmasked\n",
4586
    "          positions. If a ByteTensor is provided, the non-zero positions are not allowed to attend\n",
4587
    "          while the zero positions will be unchanged. If a BoolTensor is provided, positions with ``True``\n",
4588
    "          are not allowed to attend while ``False`` values will be unchanged. If a FloatTensor\n",
4589
    "          is provided, it will be added to the attention weight.\n",
4590
    "        - static_k: :math:`(N*num_heads, S, E/num_heads)`, where S is the source sequence length,\n",
4591
    "          N is the batch size, E is the embedding dimension. E/num_heads is the head dimension.\n",
4592
    "        - static_v: :math:`(N*num_heads, S, E/num_heads)`, where S is the source sequence length,\n",
4593
    "          N is the batch size, E is the embedding dimension. E/num_heads is the head dimension.\n",
4594
    "        Outputs:\n",
4595
    "        - attn_output: :math:`(L, N, E)` where L is the target sequence length, N is the batch size,\n",
4596
    "          E is the embedding dimension.\n",
4597
    "        - attn_output_weights: :math:`(N, L, S)` where N is the batch size,\n",
4598
    "          L is the target sequence length, S is the source sequence length.\n",
4599
    "    \"\"\"\n",
4600
    "    tens_ops = (query, key, value, in_proj_weight, in_proj_bias, bias_k, bias_v, out_proj_weight, out_proj_bias)\n",
4601
    "    if has_torch_function(tens_ops):\n",
4602
    "        return handle_torch_function(\n",
4603
    "            multi_head_attention_forward,\n",
4604
    "            tens_ops,\n",
4605
    "            query,\n",
4606
    "            key,\n",
4607
    "            value,\n",
4608
    "            embed_dim_to_check,\n",
4609
    "            num_heads,\n",
4610
    "            in_proj_weight,\n",
4611
    "            in_proj_bias,\n",
4612
    "            bias_k,\n",
4613
    "            bias_v,\n",
4614
    "            add_zero_attn,\n",
4615
    "            dropout_p,\n",
4616
    "            out_proj_weight,\n",
4617
    "            out_proj_bias,\n",
4618
    "            training=training,\n",
4619
    "            key_padding_mask=key_padding_mask,\n",
4620
    "            need_weights=need_weights,\n",
4621
    "            need_raw=need_raw,\n",
4622
    "            attn_mask=attn_mask,\n",
4623
    "            use_separate_proj_weight=use_separate_proj_weight,\n",
4624
    "            q_proj_weight=q_proj_weight,\n",
4625
    "            k_proj_weight=k_proj_weight,\n",
4626
    "            v_proj_weight=v_proj_weight,\n",
4627
    "            static_k=static_k,\n",
4628
    "            static_v=static_v,\n",
4629
    "        )\n",
4630
    "    tgt_len, bsz, embed_dim = query.size()\n",
4631
    "    assert embed_dim == embed_dim_to_check\n",
4632
    "    # allow MHA to have different sizes for the feature dimension\n",
4633
    "    assert key.size(0) == value.size(0) and key.size(1) == value.size(1)\n",
4634
    "\n",
4635
    "    head_dim = embed_dim // num_heads\n",
4636
    "    assert head_dim * num_heads == embed_dim, \"embed_dim must be divisible by num_heads\"\n",
4637
    "    scaling = float(head_dim) ** -0.5\n",
4638
    "\n",
4639
    "    if not use_separate_proj_weight:\n",
4640
    "        if (query is key or torch.equal(query, key)) and (key is value or torch.equal(key, value)):\n",
4641
    "            # self-attention\n",
4642
    "            q, k, v = linear(query, in_proj_weight, in_proj_bias).chunk(3, dim=-1)\n",
4643
    "\n",
4644
    "        elif key is value or torch.equal(key, value):\n",
4645
    "            # encoder-decoder attention\n",
4646
    "            # This is inline in_proj function with in_proj_weight and in_proj_bias\n",
4647
    "            _b = in_proj_bias\n",
4648
    "            _start = 0\n",
4649
    "            _end = embed_dim\n",
4650
    "            _w = in_proj_weight[_start:_end, :]\n",
4651
    "            if _b is not None:\n",
4652
    "                _b = _b[_start:_end]\n",
4653
    "            q = linear(query, _w, _b)\n",
4654
    "\n",
4655
    "            if key is None:\n",
4656
    "                assert value is None\n",
4657
    "                k = None\n",
4658
    "                v = None\n",
4659
    "            else:\n",
4660
    "\n",
4661
    "                # This is inline in_proj function with in_proj_weight and in_proj_bias\n",
4662
    "                _b = in_proj_bias\n",
4663
    "                _start = embed_dim\n",
4664
    "                _end = None\n",
4665
    "                _w = in_proj_weight[_start:, :]\n",
4666
    "                if _b is not None:\n",
4667
    "                    _b = _b[_start:]\n",
4668
    "                k, v = linear(key, _w, _b).chunk(2, dim=-1)\n",
4669
    "\n",
4670
    "        else:\n",
4671
    "            # This is inline in_proj function with in_proj_weight and in_proj_bias\n",
4672
    "            _b = in_proj_bias\n",
4673
    "            _start = 0\n",
4674
    "            _end = embed_dim\n",
4675
    "            _w = in_proj_weight[_start:_end, :]\n",
4676
    "            if _b is not None:\n",
4677
    "                _b = _b[_start:_end]\n",
4678
    "            q = linear(query, _w, _b)\n",
4679
    "\n",
4680
    "            # This is inline in_proj function with in_proj_weight and in_proj_bias\n",
4681
    "            _b = in_proj_bias\n",
4682
    "            _start = embed_dim\n",
4683
    "            _end = embed_dim * 2\n",
4684
    "            _w = in_proj_weight[_start:_end, :]\n",
4685
    "            if _b is not None:\n",
4686
    "                _b = _b[_start:_end]\n",
4687
    "            k = linear(key, _w, _b)\n",
4688
    "\n",
4689
    "            # This is inline in_proj function with in_proj_weight and in_proj_bias\n",
4690
    "            _b = in_proj_bias\n",
4691
    "            _start = embed_dim * 2\n",
4692
    "            _end = None\n",
4693
    "            _w = in_proj_weight[_start:, :]\n",
4694
    "            if _b is not None:\n",
4695
    "                _b = _b[_start:]\n",
4696
    "            v = linear(value, _w, _b)\n",
4697
    "    else:\n",
4698
    "        q_proj_weight_non_opt = torch.jit._unwrap_optional(q_proj_weight)\n",
4699
    "        len1, len2 = q_proj_weight_non_opt.size()\n",
4700
    "        assert len1 == embed_dim and len2 == query.size(-1)\n",
4701
    "\n",
4702
    "        k_proj_weight_non_opt = torch.jit._unwrap_optional(k_proj_weight)\n",
4703
    "        len1, len2 = k_proj_weight_non_opt.size()\n",
4704
    "        assert len1 == embed_dim and len2 == key.size(-1)\n",
4705
    "\n",
4706
    "        v_proj_weight_non_opt = torch.jit._unwrap_optional(v_proj_weight)\n",
4707
    "        len1, len2 = v_proj_weight_non_opt.size()\n",
4708
    "        assert len1 == embed_dim and len2 == value.size(-1)\n",
4709
    "\n",
4710
    "        if in_proj_bias is not None:\n",
4711
    "            q = linear(query, q_proj_weight_non_opt, in_proj_bias[0:embed_dim])\n",
4712
    "            k = linear(key, k_proj_weight_non_opt, in_proj_bias[embed_dim : (embed_dim * 2)])\n",
4713
    "            v = linear(value, v_proj_weight_non_opt, in_proj_bias[(embed_dim * 2) :])\n",
4714
    "        else:\n",
4715
    "            q = linear(query, q_proj_weight_non_opt, in_proj_bias)\n",
4716
    "            k = linear(key, k_proj_weight_non_opt, in_proj_bias)\n",
4717
    "            v = linear(value, v_proj_weight_non_opt, in_proj_bias)\n",
4718
    "    q = q * scaling\n",
4719
    "\n",
4720
    "    if attn_mask is not None:\n",
4721
    "        assert (\n",
4722
    "            attn_mask.dtype == torch.float32\n",
4723
    "            or attn_mask.dtype == torch.float64\n",
4724
    "            or attn_mask.dtype == torch.float16\n",
4725
    "            or attn_mask.dtype == torch.uint8\n",
4726
    "            or attn_mask.dtype == torch.bool\n",
4727
    "        ), \"Only float, byte, and bool types are supported for attn_mask, not {}\".format(attn_mask.dtype)\n",
4728
    "        if attn_mask.dtype == torch.uint8:\n",
4729
    "            warnings.warn(\"Byte tensor for attn_mask in nn.MultiheadAttention is deprecated. Use bool tensor instead.\")\n",
4730
    "            attn_mask = attn_mask.to(torch.bool)\n",
4731
    "\n",
4732
    "        if attn_mask.dim() == 2:\n",
4733
    "            attn_mask = attn_mask.unsqueeze(0)\n",
4734
    "            if list(attn_mask.size()) != [1, query.size(0), key.size(0)]:\n",
4735
    "                raise RuntimeError(\"The size of the 2D attn_mask is not correct.\")\n",
4736
    "        elif attn_mask.dim() == 3:\n",
4737
    "            if list(attn_mask.size()) != [bsz * num_heads, query.size(0), key.size(0)]:\n",
4738
    "                raise RuntimeError(\"The size of the 3D attn_mask is not correct.\")\n",
4739
    "        else:\n",
4740
    "            raise RuntimeError(\"attn_mask's dimension {} is not supported\".format(attn_mask.dim()))\n",
4741
    "        # attn_mask's dim is 3 now.\n",
4742
    "\n",
4743
    "    # convert ByteTensor key_padding_mask to bool\n",
4744
    "    if key_padding_mask is not None and key_padding_mask.dtype == torch.uint8:\n",
4745
    "        warnings.warn(\n",
4746
    "            \"Byte tensor for key_padding_mask in nn.MultiheadAttention is deprecated. Use bool tensor instead.\"\n",
4747
    "        )\n",
4748
    "        key_padding_mask = key_padding_mask.to(torch.bool)\n",
4749
    "\n",
4750
    "    if bias_k is not None and bias_v is not None:\n",
4751
    "        if static_k is None and static_v is None:\n",
4752
    "            k = torch.cat([k, bias_k.repeat(1, bsz, 1)])\n",
4753
    "            v = torch.cat([v, bias_v.repeat(1, bsz, 1)])\n",
4754
    "            if attn_mask is not None:\n",
4755
    "                attn_mask = pad(attn_mask, (0, 1))\n",
4756
    "            if key_padding_mask is not None:\n",
4757
    "                key_padding_mask = pad(key_padding_mask, (0, 1))\n",
4758
    "        else:\n",
4759
    "            assert static_k is None, \"bias cannot be added to static key.\"\n",
4760
    "            assert static_v is None, \"bias cannot be added to static value.\"\n",
4761
    "    else:\n",
4762
    "        assert bias_k is None\n",
4763
    "        assert bias_v is None\n",
4764
    "\n",
4765
    "    q = q.contiguous().view(tgt_len, bsz * num_heads, head_dim).transpose(0, 1)\n",
4766
    "    if k is not None:\n",
4767
    "        k = k.contiguous().view(-1, bsz * num_heads, head_dim).transpose(0, 1)\n",
4768
    "    if v is not None:\n",
4769
    "        v = v.contiguous().view(-1, bsz * num_heads, head_dim).transpose(0, 1)\n",
4770
    "\n",
4771
    "    if static_k is not None:\n",
4772
    "        assert static_k.size(0) == bsz * num_heads\n",
4773
    "        assert static_k.size(2) == head_dim\n",
4774
    "        k = static_k\n",
4775
    "\n",
4776
    "    if static_v is not None:\n",
4777
    "        assert static_v.size(0) == bsz * num_heads\n",
4778
    "        assert static_v.size(2) == head_dim\n",
4779
    "        v = static_v\n",
4780
    "\n",
4781
    "    src_len = k.size(1)\n",
4782
    "\n",
4783
    "    if key_padding_mask is not None:\n",
4784
    "        assert key_padding_mask.size(0) == bsz\n",
4785
    "        assert key_padding_mask.size(1) == src_len\n",
4786
    "\n",
4787
    "    if add_zero_attn:\n",
4788
    "        src_len += 1\n",
4789
    "        k = torch.cat([k, torch.zeros((k.size(0), 1) + k.size()[2:], dtype=k.dtype, device=k.device)], dim=1)\n",
4790
    "        v = torch.cat([v, torch.zeros((v.size(0), 1) + v.size()[2:], dtype=v.dtype, device=v.device)], dim=1)\n",
4791
    "        if attn_mask is not None:\n",
4792
    "            attn_mask = pad(attn_mask, (0, 1))\n",
4793
    "        if key_padding_mask is not None:\n",
4794
    "            key_padding_mask = pad(key_padding_mask, (0, 1))\n",
4795
    "\n",
4796
    "    attn_output_weights = torch.bmm(q, k.transpose(1, 2))\n",
4797
    "    assert list(attn_output_weights.size()) == [bsz * num_heads, tgt_len, src_len]\n",
4798
    "\n",
4799
    "    if attn_mask is not None:\n",
4800
    "        if attn_mask.dtype == torch.bool:\n",
4801
    "            attn_output_weights.masked_fill_(attn_mask, float(\"-inf\"))\n",
4802
    "        else:\n",
4803
    "            attn_output_weights += attn_mask\n",
4804
    "\n",
4805
    "    if key_padding_mask is not None:\n",
4806
    "        attn_output_weights = attn_output_weights.view(bsz, num_heads, tgt_len, src_len)\n",
4807
    "        attn_output_weights = attn_output_weights.masked_fill(\n",
4808
    "            key_padding_mask.unsqueeze(1).unsqueeze(2),\n",
4809
    "            float(\"-inf\"),\n",
4810
    "        )\n",
4811
    "        attn_output_weights = attn_output_weights.view(bsz * num_heads, tgt_len, src_len)\n",
4812
    "    \n",
4813
    "    attn_output_weights_raw = attn_output_weights\n",
4814
    "    attn_output_weights = softmax(attn_output_weights, dim=-1)\n",
4815
    "    attn_output_weights = dropout(attn_output_weights, p=dropout_p, training=training)\n",
4816
    "\n",
4817
    "    attn_output = torch.bmm(attn_output_weights, v)\n",
4818
    "    assert list(attn_output.size()) == [bsz * num_heads, tgt_len, head_dim]\n",
4819
    "    attn_output = attn_output.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim)\n",
4820
    "    attn_output = linear(attn_output, out_proj_weight, out_proj_bias)\n",
4821
    "    \n",
4822
    "    if need_weights:\n",
4823
    "        if need_raw:\n",
4824
    "            \n",
4825
    "            attn_output_weights_raw = attn_output_weights_raw.view(bsz, num_heads, tgt_len, src_len)\n",
4826
    "            return attn_output,attn_output_weights_raw\n",
4827
    "            \n",
4828
    "            #attn_output_weights = attn_output_weights.view(bsz, num_heads, tgt_len, src_len)\n",
4829
    "            #return attn_output, attn_output_weights.sum(dim=1) / num_heads, attn_output_weights_raw, attn_output_weights_raw.sum(dim=1) / num_heads\n",
4830
    "        else:\n",
4831
    "            # average attention weights over heads\n",
4832
    "            attn_output_weights = attn_output_weights.view(bsz, num_heads, tgt_len, src_len)\n",
4833
    "            return attn_output, attn_output_weights.sum(dim=1) / num_heads\n",
4834
    "    else:\n",
4835
    "        return attn_output, None\n"
4836
   ]
4837
  },
4838
  {
4839
   "cell_type": "code",
4840
   "execution_count": 418,
4841
   "metadata": {},
4842
   "outputs": [],
4843
   "source": [
4844
    "import torch\n",
4845
    "from torch import Tensor\n",
4846
    "from torch.nn.modules.linear import _LinearWithBias\n",
4847
    "from torch.nn.init import xavier_uniform_\n",
4848
    "from torch.nn.init import constant_\n",
4849
    "from torch.nn.init import xavier_normal_\n",
4850
    "from torch.nn.parameter import Parameter\n",
4851
    "from torch.nn import Module\n",
4852
    "\n",
4853
    "class MultiheadAttention(Module):\n",
4854
    "    r\"\"\"Allows the model to jointly attend to information\n",
4855
    "    from different representation subspaces.\n",
4856
    "    See reference: Attention Is All You Need\n",
4857
    "\n",
4858
    "    .. math::\n",
4859
    "        \\text{MultiHead}(Q, K, V) = \\text{Concat}(head_1,\\dots,head_h)W^O\n",
4860
    "        \\text{where} head_i = \\text{Attention}(QW_i^Q, KW_i^K, VW_i^V)\n",
4861
    "\n",
4862
    "    Args:\n",
4863
    "        embed_dim: total dimension of the model.\n",
4864
    "        num_heads: parallel attention heads.\n",
4865
    "        dropout: a Dropout layer on attn_output_weights. Default: 0.0.\n",
4866
    "        bias: add bias as module parameter. Default: True.\n",
4867
    "        add_bias_kv: add bias to the key and value sequences at dim=0.\n",
4868
    "        add_zero_attn: add a new batch of zeros to the key and\n",
4869
    "                       value sequences at dim=1.\n",
4870
    "        kdim: total number of features in key. Default: None.\n",
4871
    "        vdim: total number of features in value. Default: None.\n",
4872
    "\n",
4873
    "        Note: if kdim and vdim are None, they will be set to embed_dim such that\n",
4874
    "        query, key, and value have the same number of features.\n",
4875
    "\n",
4876
    "    Examples::\n",
4877
    "\n",
4878
    "        >>> multihead_attn = nn.MultiheadAttention(embed_dim, num_heads)\n",
4879
    "        >>> attn_output, attn_output_weights = multihead_attn(query, key, value)\n",
4880
    "    \"\"\"\n",
4881
    "    bias_k: Optional[torch.Tensor]\n",
4882
    "    bias_v: Optional[torch.Tensor]\n",
4883
    "\n",
4884
    "    def __init__(self, embed_dim, num_heads, dropout=0., bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None):\n",
4885
    "        super(MultiheadAttention, self).__init__()\n",
4886
    "        self.embed_dim = embed_dim\n",
4887
    "        self.kdim = kdim if kdim is not None else embed_dim\n",
4888
    "        self.vdim = vdim if vdim is not None else embed_dim\n",
4889
    "        self._qkv_same_embed_dim = self.kdim == embed_dim and self.vdim == embed_dim\n",
4890
    "\n",
4891
    "        self.num_heads = num_heads\n",
4892
    "        self.dropout = dropout\n",
4893
    "        self.head_dim = embed_dim // num_heads\n",
4894
    "        assert self.head_dim * num_heads == self.embed_dim, \"embed_dim must be divisible by num_heads\"\n",
4895
    "\n",
4896
    "        if self._qkv_same_embed_dim is False:\n",
4897
    "            self.q_proj_weight = Parameter(torch.Tensor(embed_dim, embed_dim))\n",
4898
    "            self.k_proj_weight = Parameter(torch.Tensor(embed_dim, self.kdim))\n",
4899
    "            self.v_proj_weight = Parameter(torch.Tensor(embed_dim, self.vdim))\n",
4900
    "            self.register_parameter('in_proj_weight', None)\n",
4901
    "        else:\n",
4902
    "            self.in_proj_weight = Parameter(torch.empty(3 * embed_dim, embed_dim))\n",
4903
    "            self.register_parameter('q_proj_weight', None)\n",
4904
    "            self.register_parameter('k_proj_weight', None)\n",
4905
    "            self.register_parameter('v_proj_weight', None)\n",
4906
    "\n",
4907
    "        if bias:\n",
4908
    "            self.in_proj_bias = Parameter(torch.empty(3 * embed_dim))\n",
4909
    "        else:\n",
4910
    "            self.register_parameter('in_proj_bias', None)\n",
4911
    "        self.out_proj = _LinearWithBias(embed_dim, embed_dim)\n",
4912
    "\n",
4913
    "        if add_bias_kv:\n",
4914
    "            self.bias_k = Parameter(torch.empty(1, 1, embed_dim))\n",
4915
    "            self.bias_v = Parameter(torch.empty(1, 1, embed_dim))\n",
4916
    "        else:\n",
4917
    "            self.bias_k = self.bias_v = None\n",
4918
    "\n",
4919
    "        self.add_zero_attn = add_zero_attn\n",
4920
    "\n",
4921
    "        self._reset_parameters()\n",
4922
    "\n",
4923
    "    def _reset_parameters(self):\n",
4924
    "        if self._qkv_same_embed_dim:\n",
4925
    "            xavier_uniform_(self.in_proj_weight)\n",
4926
    "        else:\n",
4927
    "            xavier_uniform_(self.q_proj_weight)\n",
4928
    "            xavier_uniform_(self.k_proj_weight)\n",
4929
    "            xavier_uniform_(self.v_proj_weight)\n",
4930
    "\n",
4931
    "        if self.in_proj_bias is not None:\n",
4932
    "            constant_(self.in_proj_bias, 0.)\n",
4933
    "            constant_(self.out_proj.bias, 0.)\n",
4934
    "        if self.bias_k is not None:\n",
4935
    "            xavier_normal_(self.bias_k)\n",
4936
    "        if self.bias_v is not None:\n",
4937
    "            xavier_normal_(self.bias_v)\n",
4938
    "\n",
4939
    "    def __setstate__(self, state):\n",
4940
    "        # Support loading old MultiheadAttention checkpoints generated by v1.1.0\n",
4941
    "        if '_qkv_same_embed_dim' not in state:\n",
4942
    "            state['_qkv_same_embed_dim'] = True\n",
4943
    "\n",
4944
    "        super(MultiheadAttention, self).__setstate__(state)\n",
4945
    "\n",
4946
    "    def forward(self, query, key, value, key_padding_mask=None,\n",
4947
    "                need_weights=True, need_raw=True, attn_mask=None):\n",
4948
    "        # type: (Tensor, Tensor, Tensor, Optional[Tensor], bool, Optional[Tensor]) -> Tuple[Tensor, Optional[Tensor]]\n",
4949
    "        r\"\"\"\n",
4950
    "    Args:\n",
4951
    "        query, key, value: map a query and a set of key-value pairs to an output.\n",
4952
    "            See \"Attention Is All You Need\" for more details.\n",
4953
    "        key_padding_mask: if provided, specified padding elements in the key will\n",
4954
    "            be ignored by the attention. When given a binary mask and a value is True,\n",
4955
    "            the corresponding value on the attention layer will be ignored. When given\n",
4956
    "            a byte mask and a value is non-zero, the corresponding value on the attention\n",
4957
    "            layer will be ignored\n",
4958
    "        need_weights: output attn_output_weights.\n",
4959
    "        attn_mask: 2D or 3D mask that prevents attention to certain positions. A 2D mask will be broadcasted for all\n",
4960
    "            the batches while a 3D mask allows to specify a different mask for the entries of each batch.\n",
4961
    "\n",
4962
    "    Shape:\n",
4963
    "        - Inputs:\n",
4964
    "        - query: :math:`(L, N, E)` where L is the target sequence length, N is the batch size, E is\n",
4965
    "          the embedding dimension.\n",
4966
    "        - key: :math:`(S, N, E)`, where S is the source sequence length, N is the batch size, E is\n",
4967
    "          the embedding dimension.\n",
4968
    "        - value: :math:`(S, N, E)` where S is the source sequence length, N is the batch size, E is\n",
4969
    "          the embedding dimension.\n",
4970
    "        - key_padding_mask: :math:`(N, S)` where N is the batch size, S is the source sequence length.\n",
4971
    "          If a ByteTensor is provided, the non-zero positions will be ignored while the position\n",
4972
    "          with the zero positions will be unchanged. If a BoolTensor is provided, the positions with the\n",
4973
    "          value of ``True`` will be ignored while the position with the value of ``False`` will be unchanged.\n",
4974
    "        - attn_mask: 2D mask :math:`(L, S)` where L is the target sequence length, S is the source sequence length.\n",
4975
    "          3D mask :math:`(N*num_heads, L, S)` where N is the batch size, L is the target sequence length,\n",
4976
    "          S is the source sequence length. attn_mask ensure that position i is allowed to attend the unmasked\n",
4977
    "          positions. If a ByteTensor is provided, the non-zero positions are not allowed to attend\n",
4978
    "          while the zero positions will be unchanged. If a BoolTensor is provided, positions with ``True``\n",
4979
    "          is not allowed to attend while ``False`` values will be unchanged. If a FloatTensor\n",
4980
    "          is provided, it will be added to the attention weight.\n",
4981
    "\n",
4982
    "        - Outputs:\n",
4983
    "        - attn_output: :math:`(L, N, E)` where L is the target sequence length, N is the batch size,\n",
4984
    "          E is the embedding dimension.\n",
4985
    "        - attn_output_weights: :math:`(N, L, S)` where N is the batch size,\n",
4986
    "          L is the target sequence length, S is the source sequence length.\n",
4987
    "        \"\"\"\n",
4988
    "        if not self._qkv_same_embed_dim:\n",
4989
    "            return multi_head_attention_forward(\n",
4990
    "                query, key, value, self.embed_dim, self.num_heads,\n",
4991
    "                self.in_proj_weight, self.in_proj_bias,\n",
4992
    "                self.bias_k, self.bias_v, self.add_zero_attn,\n",
4993
    "                self.dropout, self.out_proj.weight, self.out_proj.bias,\n",
4994
    "                training=self.training,\n",
4995
    "                key_padding_mask=key_padding_mask, need_weights=need_weights, need_raw=need_raw,\n",
4996
    "                attn_mask=attn_mask, use_separate_proj_weight=True,\n",
4997
    "                q_proj_weight=self.q_proj_weight, k_proj_weight=self.k_proj_weight,\n",
4998
    "                v_proj_weight=self.v_proj_weight)\n",
4999
    "        else:\n",
5000
    "            return multi_head_attention_forward(\n",
5001
    "                query, key, value, self.embed_dim, self.num_heads,\n",
5002
    "                self.in_proj_weight, self.in_proj_bias,\n",
5003
    "                self.bias_k, self.bias_v, self.add_zero_attn,\n",
5004
    "                self.dropout, self.out_proj.weight, self.out_proj.bias,\n",
5005
    "                training=self.training,\n",
5006
    "                key_padding_mask=key_padding_mask, need_weights=need_weights, need_raw=need_raw,\n",
5007
    "                attn_mask=attn_mask)"
5008
   ]
5009
  },
5010
  {
5011
   "cell_type": "code",
5012
   "execution_count": null,
5013
   "metadata": {},
5014
   "outputs": [],
5015
   "source": []
5016
  },
5017
  {
5018
   "cell_type": "code",
5019
   "execution_count": null,
5020
   "metadata": {},
5021
   "outputs": [],
5022
   "source": []
5023
  },
5024
  {
5025
   "cell_type": "code",
5026
   "execution_count": null,
5027
   "metadata": {},
5028
   "outputs": [],
5029
   "source": []
5030
  },
5031
  {
5032
   "cell_type": "code",
5033
   "execution_count": null,
5034
   "metadata": {},
5035
   "outputs": [],
5036
   "source": []
5037
  },
5038
  {
5039
   "cell_type": "code",
5040
   "execution_count": null,
5041
   "metadata": {},
5042
   "outputs": [],
5043
   "source": []
5044
  },
5045
  {
5046
   "cell_type": "code",
5047
   "execution_count": null,
5048
   "metadata": {},
5049
   "outputs": [],
5050
   "source": []
5051
  },
5052
  {
5053
   "cell_type": "code",
5054
   "execution_count": null,
5055
   "metadata": {},
5056
   "outputs": [],
5057
   "source": []
5058
  },
5059
  {
5060
   "cell_type": "code",
5061
   "execution_count": null,
5062
   "metadata": {},
5063
   "outputs": [],
5064
   "source": []
5065
  },
5066
  {
5067
   "cell_type": "code",
5068
   "execution_count": null,
5069
   "metadata": {},
5070
   "outputs": [],
5071
   "source": []
5072
  },
5073
  {
5074
   "cell_type": "code",
5075
   "execution_count": null,
5076
   "metadata": {},
5077
   "outputs": [],
5078
   "source": []
5079
  },
5080
  {
5081
   "cell_type": "code",
5082
   "execution_count": null,
5083
   "metadata": {},
5084
   "outputs": [],
5085
   "source": []
5086
  },
5087
  {
5088
   "cell_type": "code",
5089
   "execution_count": null,
5090
   "metadata": {},
5091
   "outputs": [],
5092
   "source": []
5093
  },
5094
  {
5095
   "cell_type": "code",
5096
   "execution_count": 104,
5097
   "metadata": {},
5098
   "outputs": [
5099
    {
5100
     "ename": "ModuleNotFoundError",
5101
     "evalue": "No module named 'torch'",
5102
     "output_type": "error",
5103
     "traceback": [
5104
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
5105
      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
5106
      "\u001b[0;32m<ipython-input-104-6bb47b25d46a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtorch\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
5107
      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'torch'"
5108
     ]
5109
    }
5110
   ],
5111
   "source": [
5112
    "import math\n",
5113
    "\n",
5114
    "import torch\n",
5115
    "from torch import nn\n",
5116
    "\n",
5117
    "############\n",
5118
    "# Omic Model\n",
5119
    "############\n",
5120
    "def init_max_weights(module):\n",
5121
    "    for m in module.modules():\n",
5122
    "        if type(m) == nn.Linear:\n",
5123
    "            stdv = 1. / math.sqrt(m.weight.size(1))\n",
5124
    "            m.weight.data.normal_(0, stdv)\n",
5125
    "            m.bias.data.zero_()\n",
5126
    "\n",
5127
    "def SNN_Block(dim1, dim2, dropout=0.25):\n",
5128
    "    return nn.Sequential(\n",
5129
    "            nn.Linear(dim1, dim2),\n",
5130
    "            nn.ELU(),\n",
5131
    "            nn.AlphaDropout(p=dropout, inplace=False))\n",
5132
    "\n",
5133
    "class MaxNet(nn.Module):\n",
5134
    "    def __init__(self, input_dim: int, meta_dim: int=0, model_size_omic: str='small', n_classes: int=4):\n",
5135
    "        super(MaxNet, self).__init__()\n",
5136
    "        self.meta_dim = meta_dim\n",
5137
    "        self.n_classes = n_classes\n",
5138
    "        self.size_dict_omic = {'small': [256, 256, 256, 256], 'big': [1024, 1024, 1024, 256]}\n",
5139
    "        \n",
5140
    "        ### Constructing Genomic SNN\n",
5141
    "        hidden = self.size_dict_omic[model_size_omic]\n",
5142
    "        fc_omic = [SNN_Block(dim1=input_dim, dim2=hidden[0])]\n",
5143
    "        for i, _ in enumerate(hidden[1:]):\n",
5144
    "            fc_omic.append(SNN_Block(dim1=hidden[i], dim2=hidden[i+1], dropout=0.25))\n",
5145
    "        self.fc_omic = nn.Sequential(*fc_omic)\n",
5146
    "        self.classifier = nn.Linear(hidden[-1]+self.meta_dim, n_classes)\n",
5147
    "        init_max_weights(self)\n",
5148
    "\n",
5149
    "    def forward(self, **kwargs):\n",
5150
    "        x = kwargs['x_omic']\n",
5151
    "        meta = kwargs['meta']\n",
5152
    "        features = self.fc_omic(x)\n",
5153
    "\n",
5154
    "        if self.meta_dim: \n",
5155
    "            axis_dim = 1 if len(meta.shape) > 1 else 0\n",
5156
    "            features = torch.cat((features, meta), axis_dim)\n",
5157
    "\n",
5158
    "        logits = self.classifier(features).unsqueeze(0)\n",
5159
    "        Y_hat = torch.topk(logits, 1, dim=1)[1]\n",
5160
    "        hazards = torch.sigmoid(logits)\n",
5161
    "        S = torch.cumprod(1 - hazards, dim=1)\n",
5162
    "        return hazards, S, Y_hat, None, None\n",
5163
    "\n",
5164
    "    def relocate(self):\n",
5165
    "            device=torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
5166
    "\n",
5167
    "            if torch.cuda.device_count() > 1:\n",
5168
    "                device_ids = list(range(torch.cuda.device_count()))\n",
5169
    "                self.fc_omic = nn.DataParallel(self.fc_omic, device_ids=device_ids).to('cuda:0')\n",
5170
    "            else:\n",
5171
    "                self.fc_omic = self.fc_omic.to(device)\n",
5172
    "\n",
5173
    "\n",
5174
    "            self.classifier = self.classifier.to(device)"
5175
   ]
5176
  },
5177
  {
5178
   "cell_type": "code",
5179
   "execution_count": null,
5180
   "metadata": {},
5181
   "outputs": [],
5182
   "source": []
5183
  },
5184
  {
5185
   "cell_type": "code",
5186
   "execution_count": 88,
5187
   "metadata": {},
5188
   "outputs": [
5189
    {
5190
     "data": {
5191
      "text/html": [
5192
       "<div>\n",
5193
       "<style scoped>\n",
5194
       "    .dataframe tbody tr th:only-of-type {\n",
5195
       "        vertical-align: middle;\n",
5196
       "    }\n",
5197
       "\n",
5198
       "    .dataframe tbody tr th {\n",
5199
       "        vertical-align: top;\n",
5200
       "    }\n",
5201
       "\n",
5202
       "    .dataframe thead th {\n",
5203
       "        text-align: right;\n",
5204
       "    }\n",
5205
       "</style>\n",
5206
       "<table border=\"1\" class=\"dataframe\">\n",
5207
       "  <thead>\n",
5208
       "    <tr style=\"text-align: right;\">\n",
5209
       "      <th></th>\n",
5210
       "      <th>CXCL14_rnaseq</th>\n",
5211
       "      <th>FGF1_rnaseq</th>\n",
5212
       "      <th>IFNA8_cnv</th>\n",
5213
       "      <th>ADM_rnaseq</th>\n",
5214
       "      <th>LTBP2_rnaseq</th>\n",
5215
       "      <th>CCL28_rnaseq</th>\n",
5216
       "      <th>IFNA7_rnaseq</th>\n",
5217
       "      <th>GH2_rnaseq</th>\n",
5218
       "      <th>AIMP1_rnaseq</th>\n",
5219
       "      <th>DEFB1_rnaseq</th>\n",
5220
       "      <th>...</th>\n",
5221
       "      <th>NPPB_rnaseq</th>\n",
5222
       "      <th>CCL27_rnaseq</th>\n",
5223
       "      <th>FASLG_rnaseq</th>\n",
5224
       "      <th>FGF20_cnv</th>\n",
5225
       "      <th>FAM3C_rnaseq</th>\n",
5226
       "      <th>IL18_rnaseq</th>\n",
5227
       "      <th>GDF10_rnaseq</th>\n",
5228
       "      <th>MYDGF_rnaseq</th>\n",
5229
       "      <th>IL10_rnaseq</th>\n",
5230
       "      <th>IFNW1_rnaseq</th>\n",
5231
       "    </tr>\n",
5232
       "  </thead>\n",
5233
       "  <tbody>\n",
5234
       "    <tr>\n",
5235
       "      <th>0</th>\n",
5236
       "      <td>-0.1170</td>\n",
5237
       "      <td>-0.2221</td>\n",
5238
       "      <td>1</td>\n",
5239
       "      <td>-0.5126</td>\n",
5240
       "      <td>-0.3289</td>\n",
5241
       "      <td>-0.7331</td>\n",
5242
       "      <td>-0.1244</td>\n",
5243
       "      <td>-0.1693</td>\n",
5244
       "      <td>0.5942</td>\n",
5245
       "      <td>-0.4707</td>\n",
5246
       "      <td>...</td>\n",
5247
       "      <td>-0.2276</td>\n",
5248
       "      <td>1.2033</td>\n",
5249
       "      <td>0.9826</td>\n",
5250
       "      <td>-1</td>\n",
5251
       "      <td>-0.6161</td>\n",
5252
       "      <td>-0.5643</td>\n",
5253
       "      <td>-0.2165</td>\n",
5254
       "      <td>-0.2836</td>\n",
5255
       "      <td>0.9991</td>\n",
5256
       "      <td>-0.3899</td>\n",
5257
       "    </tr>\n",
5258
       "    <tr>\n",
5259
       "      <th>1</th>\n",
5260
       "      <td>-0.2330</td>\n",
5261
       "      <td>-0.4343</td>\n",
5262
       "      <td>-1</td>\n",
5263
       "      <td>-0.2381</td>\n",
5264
       "      <td>-0.4799</td>\n",
5265
       "      <td>-0.0520</td>\n",
5266
       "      <td>-0.1244</td>\n",
5267
       "      <td>-0.1693</td>\n",
5268
       "      <td>1.1854</td>\n",
5269
       "      <td>-0.4820</td>\n",
5270
       "      <td>...</td>\n",
5271
       "      <td>-0.2276</td>\n",
5272
       "      <td>-0.2946</td>\n",
5273
       "      <td>-0.5443</td>\n",
5274
       "      <td>-1</td>\n",
5275
       "      <td>-0.3499</td>\n",
5276
       "      <td>-0.7958</td>\n",
5277
       "      <td>-0.3140</td>\n",
5278
       "      <td>-0.3359</td>\n",
5279
       "      <td>-0.4865</td>\n",
5280
       "      <td>-0.3899</td>\n",
5281
       "    </tr>\n",
5282
       "    <tr>\n",
5283
       "      <th>2</th>\n",
5284
       "      <td>-0.1384</td>\n",
5285
       "      <td>-0.1597</td>\n",
5286
       "      <td>-1</td>\n",
5287
       "      <td>-0.1521</td>\n",
5288
       "      <td>-0.3348</td>\n",
5289
       "      <td>-0.5310</td>\n",
5290
       "      <td>-0.1244</td>\n",
5291
       "      <td>-0.1693</td>\n",
5292
       "      <td>0.3889</td>\n",
5293
       "      <td>-0.3607</td>\n",
5294
       "      <td>...</td>\n",
5295
       "      <td>3.4177</td>\n",
5296
       "      <td>-0.2946</td>\n",
5297
       "      <td>-0.5320</td>\n",
5298
       "      <td>0</td>\n",
5299
       "      <td>0.4581</td>\n",
5300
       "      <td>-0.6179</td>\n",
5301
       "      <td>-0.2107</td>\n",
5302
       "      <td>0.2751</td>\n",
5303
       "      <td>-0.5108</td>\n",
5304
       "      <td>1.0629</td>\n",
5305
       "    </tr>\n",
5306
       "    <tr>\n",
5307
       "      <th>3</th>\n",
5308
       "      <td>-0.1624</td>\n",
5309
       "      <td>-0.3463</td>\n",
5310
       "      <td>-1</td>\n",
5311
       "      <td>0.0272</td>\n",
5312
       "      <td>-0.7623</td>\n",
5313
       "      <td>0.8196</td>\n",
5314
       "      <td>-0.1244</td>\n",
5315
       "      <td>-0.1693</td>\n",
5316
       "      <td>-0.0416</td>\n",
5317
       "      <td>0.1661</td>\n",
5318
       "      <td>...</td>\n",
5319
       "      <td>-0.2276</td>\n",
5320
       "      <td>-0.1020</td>\n",
5321
       "      <td>-0.4682</td>\n",
5322
       "      <td>-1</td>\n",
5323
       "      <td>-0.4391</td>\n",
5324
       "      <td>-0.7275</td>\n",
5325
       "      <td>-0.2876</td>\n",
5326
       "      <td>-0.4696</td>\n",
5327
       "      <td>-0.6248</td>\n",
5328
       "      <td>-0.3899</td>\n",
5329
       "    </tr>\n",
5330
       "    <tr>\n",
5331
       "      <th>4</th>\n",
5332
       "      <td>-0.2346</td>\n",
5333
       "      <td>-0.4090</td>\n",
5334
       "      <td>-1</td>\n",
5335
       "      <td>-0.2078</td>\n",
5336
       "      <td>0.5702</td>\n",
5337
       "      <td>-0.4219</td>\n",
5338
       "      <td>-0.1244</td>\n",
5339
       "      <td>0.5257</td>\n",
5340
       "      <td>-0.9790</td>\n",
5341
       "      <td>0.3938</td>\n",
5342
       "      <td>...</td>\n",
5343
       "      <td>-0.2276</td>\n",
5344
       "      <td>-0.1035</td>\n",
5345
       "      <td>-0.4688</td>\n",
5346
       "      <td>-1</td>\n",
5347
       "      <td>1.2596</td>\n",
5348
       "      <td>-0.5807</td>\n",
5349
       "      <td>0.4108</td>\n",
5350
       "      <td>0.1801</td>\n",
5351
       "      <td>-0.6086</td>\n",
5352
       "      <td>-0.3899</td>\n",
5353
       "    </tr>\n",
5354
       "    <tr>\n",
5355
       "      <th>...</th>\n",
5356
       "      <td>...</td>\n",
5357
       "      <td>...</td>\n",
5358
       "      <td>...</td>\n",
5359
       "      <td>...</td>\n",
5360
       "      <td>...</td>\n",
5361
       "      <td>...</td>\n",
5362
       "      <td>...</td>\n",
5363
       "      <td>...</td>\n",
5364
       "      <td>...</td>\n",
5365
       "      <td>...</td>\n",
5366
       "      <td>...</td>\n",
5367
       "      <td>...</td>\n",
5368
       "      <td>...</td>\n",
5369
       "      <td>...</td>\n",
5370
       "      <td>...</td>\n",
5371
       "      <td>...</td>\n",
5372
       "      <td>...</td>\n",
5373
       "      <td>...</td>\n",
5374
       "      <td>...</td>\n",
5375
       "      <td>...</td>\n",
5376
       "      <td>...</td>\n",
5377
       "    </tr>\n",
5378
       "    <tr>\n",
5379
       "      <th>368</th>\n",
5380
       "      <td>-0.2417</td>\n",
5381
       "      <td>10.1423</td>\n",
5382
       "      <td>-1</td>\n",
5383
       "      <td>-0.5456</td>\n",
5384
       "      <td>0.8742</td>\n",
5385
       "      <td>-0.1822</td>\n",
5386
       "      <td>-0.1244</td>\n",
5387
       "      <td>-0.1693</td>\n",
5388
       "      <td>-1.2395</td>\n",
5389
       "      <td>-0.5125</td>\n",
5390
       "      <td>...</td>\n",
5391
       "      <td>-0.2276</td>\n",
5392
       "      <td>-0.2946</td>\n",
5393
       "      <td>0.0777</td>\n",
5394
       "      <td>0</td>\n",
5395
       "      <td>-0.8242</td>\n",
5396
       "      <td>-0.6727</td>\n",
5397
       "      <td>0.1938</td>\n",
5398
       "      <td>0.9210</td>\n",
5399
       "      <td>0.4479</td>\n",
5400
       "      <td>-0.3899</td>\n",
5401
       "    </tr>\n",
5402
       "    <tr>\n",
5403
       "      <th>369</th>\n",
5404
       "      <td>-0.2412</td>\n",
5405
       "      <td>1.3253</td>\n",
5406
       "      <td>1</td>\n",
5407
       "      <td>-0.5680</td>\n",
5408
       "      <td>1.0719</td>\n",
5409
       "      <td>-0.1707</td>\n",
5410
       "      <td>-0.1244</td>\n",
5411
       "      <td>-0.1693</td>\n",
5412
       "      <td>-1.6694</td>\n",
5413
       "      <td>-0.4528</td>\n",
5414
       "      <td>...</td>\n",
5415
       "      <td>0.5679</td>\n",
5416
       "      <td>-0.2661</td>\n",
5417
       "      <td>1.0215</td>\n",
5418
       "      <td>-2</td>\n",
5419
       "      <td>-0.5327</td>\n",
5420
       "      <td>0.3335</td>\n",
5421
       "      <td>-0.1730</td>\n",
5422
       "      <td>0.0147</td>\n",
5423
       "      <td>0.6012</td>\n",
5424
       "      <td>2.2526</td>\n",
5425
       "    </tr>\n",
5426
       "    <tr>\n",
5427
       "      <th>370</th>\n",
5428
       "      <td>-0.2396</td>\n",
5429
       "      <td>0.0435</td>\n",
5430
       "      <td>0</td>\n",
5431
       "      <td>-0.3610</td>\n",
5432
       "      <td>3.1965</td>\n",
5433
       "      <td>1.3670</td>\n",
5434
       "      <td>-0.1244</td>\n",
5435
       "      <td>-0.1693</td>\n",
5436
       "      <td>0.4439</td>\n",
5437
       "      <td>-0.5099</td>\n",
5438
       "      <td>...</td>\n",
5439
       "      <td>-0.2276</td>\n",
5440
       "      <td>-0.2289</td>\n",
5441
       "      <td>0.0521</td>\n",
5442
       "      <td>-1</td>\n",
5443
       "      <td>1.0317</td>\n",
5444
       "      <td>-0.1473</td>\n",
5445
       "      <td>-0.1517</td>\n",
5446
       "      <td>0.9384</td>\n",
5447
       "      <td>-0.3165</td>\n",
5448
       "      <td>0.6239</td>\n",
5449
       "    </tr>\n",
5450
       "    <tr>\n",
5451
       "      <th>371</th>\n",
5452
       "      <td>-0.2393</td>\n",
5453
       "      <td>-0.4475</td>\n",
5454
       "      <td>0</td>\n",
5455
       "      <td>0.4772</td>\n",
5456
       "      <td>2.9612</td>\n",
5457
       "      <td>-0.7799</td>\n",
5458
       "      <td>-0.1244</td>\n",
5459
       "      <td>-0.1693</td>\n",
5460
       "      <td>0.5778</td>\n",
5461
       "      <td>1.7607</td>\n",
5462
       "      <td>...</td>\n",
5463
       "      <td>-0.2276</td>\n",
5464
       "      <td>9.4098</td>\n",
5465
       "      <td>-0.5443</td>\n",
5466
       "      <td>0</td>\n",
5467
       "      <td>0.2992</td>\n",
5468
       "      <td>-0.5451</td>\n",
5469
       "      <td>-0.2456</td>\n",
5470
       "      <td>0.8898</td>\n",
5471
       "      <td>-0.5781</td>\n",
5472
       "      <td>-0.3899</td>\n",
5473
       "    </tr>\n",
5474
       "    <tr>\n",
5475
       "      <th>372</th>\n",
5476
       "      <td>-0.1936</td>\n",
5477
       "      <td>-0.2281</td>\n",
5478
       "      <td>0</td>\n",
5479
       "      <td>-0.4124</td>\n",
5480
       "      <td>-0.1873</td>\n",
5481
       "      <td>-0.1200</td>\n",
5482
       "      <td>-0.1244</td>\n",
5483
       "      <td>-0.0326</td>\n",
5484
       "      <td>-0.8786</td>\n",
5485
       "      <td>-0.3912</td>\n",
5486
       "      <td>...</td>\n",
5487
       "      <td>-0.2276</td>\n",
5488
       "      <td>-0.2570</td>\n",
5489
       "      <td>-0.3810</td>\n",
5490
       "      <td>-1</td>\n",
5491
       "      <td>-0.6399</td>\n",
5492
       "      <td>-0.9128</td>\n",
5493
       "      <td>0.3367</td>\n",
5494
       "      <td>-0.4686</td>\n",
5495
       "      <td>0.8995</td>\n",
5496
       "      <td>1.3522</td>\n",
5497
       "    </tr>\n",
5498
       "  </tbody>\n",
5499
       "</table>\n",
5500
       "<p>373 rows × 347 columns</p>\n",
5501
       "</div>"
5502
      ],
5503
      "text/plain": [
5504
       "     CXCL14_rnaseq  FGF1_rnaseq  IFNA8_cnv  ADM_rnaseq  LTBP2_rnaseq  \\\n",
5505
       "0          -0.1170      -0.2221          1     -0.5126       -0.3289   \n",
5506
       "1          -0.2330      -0.4343         -1     -0.2381       -0.4799   \n",
5507
       "2          -0.1384      -0.1597         -1     -0.1521       -0.3348   \n",
5508
       "3          -0.1624      -0.3463         -1      0.0272       -0.7623   \n",
5509
       "4          -0.2346      -0.4090         -1     -0.2078        0.5702   \n",
5510
       "..             ...          ...        ...         ...           ...   \n",
5511
       "368        -0.2417      10.1423         -1     -0.5456        0.8742   \n",
5512
       "369        -0.2412       1.3253          1     -0.5680        1.0719   \n",
5513
       "370        -0.2396       0.0435          0     -0.3610        3.1965   \n",
5514
       "371        -0.2393      -0.4475          0      0.4772        2.9612   \n",
5515
       "372        -0.1936      -0.2281          0     -0.4124       -0.1873   \n",
5516
       "\n",
5517
       "     CCL28_rnaseq  IFNA7_rnaseq  GH2_rnaseq  AIMP1_rnaseq  DEFB1_rnaseq  ...  \\\n",
5518
       "0         -0.7331       -0.1244     -0.1693        0.5942       -0.4707  ...   \n",
5519
       "1         -0.0520       -0.1244     -0.1693        1.1854       -0.4820  ...   \n",
5520
       "2         -0.5310       -0.1244     -0.1693        0.3889       -0.3607  ...   \n",
5521
       "3          0.8196       -0.1244     -0.1693       -0.0416        0.1661  ...   \n",
5522
       "4         -0.4219       -0.1244      0.5257       -0.9790        0.3938  ...   \n",
5523
       "..            ...           ...         ...           ...           ...  ...   \n",
5524
       "368       -0.1822       -0.1244     -0.1693       -1.2395       -0.5125  ...   \n",
5525
       "369       -0.1707       -0.1244     -0.1693       -1.6694       -0.4528  ...   \n",
5526
       "370        1.3670       -0.1244     -0.1693        0.4439       -0.5099  ...   \n",
5527
       "371       -0.7799       -0.1244     -0.1693        0.5778        1.7607  ...   \n",
5528
       "372       -0.1200       -0.1244     -0.0326       -0.8786       -0.3912  ...   \n",
5529
       "\n",
5530
       "     NPPB_rnaseq  CCL27_rnaseq  FASLG_rnaseq  FGF20_cnv  FAM3C_rnaseq  \\\n",
5531
       "0        -0.2276        1.2033        0.9826         -1       -0.6161   \n",
5532
       "1        -0.2276       -0.2946       -0.5443         -1       -0.3499   \n",
5533
       "2         3.4177       -0.2946       -0.5320          0        0.4581   \n",
5534
       "3        -0.2276       -0.1020       -0.4682         -1       -0.4391   \n",
5535
       "4        -0.2276       -0.1035       -0.4688         -1        1.2596   \n",
5536
       "..           ...           ...           ...        ...           ...   \n",
5537
       "368      -0.2276       -0.2946        0.0777          0       -0.8242   \n",
5538
       "369       0.5679       -0.2661        1.0215         -2       -0.5327   \n",
5539
       "370      -0.2276       -0.2289        0.0521         -1        1.0317   \n",
5540
       "371      -0.2276        9.4098       -0.5443          0        0.2992   \n",
5541
       "372      -0.2276       -0.2570       -0.3810         -1       -0.6399   \n",
5542
       "\n",
5543
       "     IL18_rnaseq  GDF10_rnaseq  MYDGF_rnaseq  IL10_rnaseq  IFNW1_rnaseq  \n",
5544
       "0        -0.5643       -0.2165       -0.2836       0.9991       -0.3899  \n",
5545
       "1        -0.7958       -0.3140       -0.3359      -0.4865       -0.3899  \n",
5546
       "2        -0.6179       -0.2107        0.2751      -0.5108        1.0629  \n",
5547
       "3        -0.7275       -0.2876       -0.4696      -0.6248       -0.3899  \n",
5548
       "4        -0.5807        0.4108        0.1801      -0.6086       -0.3899  \n",
5549
       "..           ...           ...           ...          ...           ...  \n",
5550
       "368      -0.6727        0.1938        0.9210       0.4479       -0.3899  \n",
5551
       "369       0.3335       -0.1730        0.0147       0.6012        2.2526  \n",
5552
       "370      -0.1473       -0.1517        0.9384      -0.3165        0.6239  \n",
5553
       "371      -0.5451       -0.2456        0.8898      -0.5781       -0.3899  \n",
5554
       "372      -0.9128        0.3367       -0.4686       0.8995        1.3522  \n",
5555
       "\n",
5556
       "[373 rows x 347 columns]"
5557
      ]
5558
     },
5559
     "execution_count": 88,
5560
     "metadata": {},
5561
     "output_type": "execute_result"
5562
    }
5563
   ],
5564
   "source": [
5565
    "genomic_features[series_intersecdef series_intersection(s1, s2):\n",
5566
    "    return pd.Series(list(set(s1) & set(s2)))\n",
5567
    "tion(sig, genomic_features.columns)]"
5568
   ]
5569
  },
5570
  {
5571
   "cell_type": "code",
5572
   "execution_count": 84,
5573
   "metadata": {},
5574
   "outputs": [],
5575
   "source": [
5576
    "def series_intersection(s1, s2):\n",
5577
    "    return pd.Series(list(set(s1) & set(s2)))\n"
5578
   ]
5579
  },
5580
  {
5581
   "cell_type": "code",
5582
   "execution_count": 68,
5583
   "metadata": {},
5584
   "outputs": [
5585
    {
5586
     "data": {
5587
      "text/html": [
5588
       "<div>\n",
5589
       "<style scoped>\n",
5590
       "    .dataframe tbody tr th:only-of-type {\n",
5591
       "        vertical-align: middle;\n",
5592
       "    }\n",
5593
       "\n",
5594
       "    .dataframe tbody tr th {\n",
5595
       "        vertical-align: top;\n",
5596
       "    }\n",
5597
       "\n",
5598
       "    .dataframe thead th {\n",
5599
       "        text-align: right;\n",
5600
       "    }\n",
5601
       "</style>\n",
5602
       "<table border=\"1\" class=\"dataframe\">\n",
5603
       "  <thead>\n",
5604
       "    <tr style=\"text-align: right;\">\n",
5605
       "      <th></th>\n",
5606
       "      <th>NDUFS5_cnv</th>\n",
5607
       "      <th>MACF1_cnv</th>\n",
5608
       "      <th>RNA5SP44_cnv</th>\n",
5609
       "      <th>KIAA0754_cnv</th>\n",
5610
       "      <th>BMP8A_cnv</th>\n",
5611
       "      <th>PABPC4_cnv</th>\n",
5612
       "      <th>SNORA55_cnv</th>\n",
5613
       "      <th>HEYL_cnv</th>\n",
5614
       "      <th>HPCAL4_cnv</th>\n",
5615
       "      <th>NT5C1A_cnv</th>\n",
5616
       "      <th>...</th>\n",
5617
       "      <th>ZWINT_rnaseq</th>\n",
5618
       "      <th>ZXDA_rnaseq</th>\n",
5619
       "      <th>ZXDB_rnaseq</th>\n",
5620
       "      <th>ZXDC_rnaseq</th>\n",
5621
       "      <th>ZYG11A_rnaseq</th>\n",
5622
       "      <th>ZYG11B_rnaseq</th>\n",
5623
       "      <th>ZYX_rnaseq</th>\n",
5624
       "      <th>ZZEF1_rnaseq</th>\n",
5625
       "      <th>ZZZ3_rnaseq</th>\n",
5626
       "      <th>TPTEP1_rnaseq</th>\n",
5627
       "    </tr>\n",
5628
       "  </thead>\n",
5629
       "  <tbody>\n",
5630
       "    <tr>\n",
5631
       "      <th>0</th>\n",
5632
       "      <td>-1</td>\n",
5633
       "      <td>-1</td>\n",
5634
       "      <td>-1</td>\n",
5635
       "      <td>-1</td>\n",
5636
       "      <td>-1</td>\n",
5637
       "      <td>-1</td>\n",
5638
       "      <td>-1</td>\n",
5639
       "      <td>-1</td>\n",
5640
       "      <td>-1</td>\n",
5641
       "      <td>-1</td>\n",
5642
       "      <td>...</td>\n",
5643
       "      <td>-0.8388</td>\n",
5644
       "      <td>4.1375</td>\n",
5645
       "      <td>3.9664</td>\n",
5646
       "      <td>1.8437</td>\n",
5647
       "      <td>-0.3959</td>\n",
5648
       "      <td>-0.2561</td>\n",
5649
       "      <td>-0.2866</td>\n",
5650
       "      <td>1.8770</td>\n",
5651
       "      <td>-0.3179</td>\n",
5652
       "      <td>-0.3633</td>\n",
5653
       "    </tr>\n",
5654
       "    <tr>\n",
5655
       "      <th>1</th>\n",
5656
       "      <td>2</td>\n",
5657
       "      <td>2</td>\n",
5658
       "      <td>2</td>\n",
5659
       "      <td>2</td>\n",
5660
       "      <td>2</td>\n",
5661
       "      <td>2</td>\n",
5662
       "      <td>2</td>\n",
5663
       "      <td>2</td>\n",
5664
       "      <td>2</td>\n",
5665
       "      <td>2</td>\n",
5666
       "      <td>...</td>\n",
5667
       "      <td>-0.1083</td>\n",
5668
       "      <td>0.3393</td>\n",
5669
       "      <td>0.2769</td>\n",
5670
       "      <td>1.7320</td>\n",
5671
       "      <td>-0.0975</td>\n",
5672
       "      <td>2.6955</td>\n",
5673
       "      <td>-0.6741</td>\n",
5674
       "      <td>1.0323</td>\n",
5675
       "      <td>1.2766</td>\n",
5676
       "      <td>-0.3982</td>\n",
5677
       "    </tr>\n",
5678
       "    <tr>\n",
5679
       "      <th>2</th>\n",
5680
       "      <td>0</td>\n",
5681
       "      <td>0</td>\n",
5682
       "      <td>0</td>\n",
5683
       "      <td>0</td>\n",
5684
       "      <td>0</td>\n",
5685
       "      <td>0</td>\n",
5686
       "      <td>0</td>\n",
5687
       "      <td>0</td>\n",
5688
       "      <td>0</td>\n",
5689
       "      <td>0</td>\n",
5690
       "      <td>...</td>\n",
5691
       "      <td>-0.4155</td>\n",
5692
       "      <td>1.6846</td>\n",
5693
       "      <td>0.7711</td>\n",
5694
       "      <td>-0.3061</td>\n",
5695
       "      <td>-0.5016</td>\n",
5696
       "      <td>2.8548</td>\n",
5697
       "      <td>-0.6171</td>\n",
5698
       "      <td>-0.8608</td>\n",
5699
       "      <td>-0.0486</td>\n",
5700
       "      <td>-0.3962</td>\n",
5701
       "    </tr>\n",
5702
       "    <tr>\n",
5703
       "      <th>3</th>\n",
5704
       "      <td>0</td>\n",
5705
       "      <td>0</td>\n",
5706
       "      <td>0</td>\n",
5707
       "      <td>0</td>\n",
5708
       "      <td>0</td>\n",
5709
       "      <td>0</td>\n",
5710
       "      <td>0</td>\n",
5711
       "      <td>0</td>\n",
5712
       "      <td>0</td>\n",
5713
       "      <td>0</td>\n",
5714
       "      <td>...</td>\n",
5715
       "      <td>-0.8143</td>\n",
5716
       "      <td>0.8344</td>\n",
5717
       "      <td>1.5075</td>\n",
5718
       "      <td>3.6068</td>\n",
5719
       "      <td>-0.5004</td>\n",
5720
       "      <td>-0.0747</td>\n",
5721
       "      <td>-0.2185</td>\n",
5722
       "      <td>-0.4379</td>\n",
5723
       "      <td>1.6913</td>\n",
5724
       "      <td>1.7748</td>\n",
5725
       "    </tr>\n",
5726
       "    <tr>\n",
5727
       "      <th>4</th>\n",
5728
       "      <td>0</td>\n",
5729
       "      <td>0</td>\n",
5730
       "      <td>0</td>\n",
5731
       "      <td>0</td>\n",
5732
       "      <td>0</td>\n",
5733
       "      <td>0</td>\n",
5734
       "      <td>0</td>\n",
5735
       "      <td>0</td>\n",
5736
       "      <td>0</td>\n",
5737
       "      <td>0</td>\n",
5738
       "      <td>...</td>\n",
5739
       "      <td>0.0983</td>\n",
5740
       "      <td>-0.7908</td>\n",
5741
       "      <td>-0.0053</td>\n",
5742
       "      <td>-0.0643</td>\n",
5743
       "      <td>-0.3706</td>\n",
5744
       "      <td>0.3870</td>\n",
5745
       "      <td>-0.5589</td>\n",
5746
       "      <td>-0.5979</td>\n",
5747
       "      <td>0.0047</td>\n",
5748
       "      <td>-0.3548</td>\n",
5749
       "    </tr>\n",
5750
       "    <tr>\n",
5751
       "      <th>...</th>\n",
5752
       "      <td>...</td>\n",
5753
       "      <td>...</td>\n",
5754
       "      <td>...</td>\n",
5755
       "      <td>...</td>\n",
5756
       "      <td>...</td>\n",
5757
       "      <td>...</td>\n",
5758
       "      <td>...</td>\n",
5759
       "      <td>...</td>\n",
5760
       "      <td>...</td>\n",
5761
       "      <td>...</td>\n",
5762
       "      <td>...</td>\n",
5763
       "      <td>...</td>\n",
5764
       "      <td>...</td>\n",
5765
       "      <td>...</td>\n",
5766
       "      <td>...</td>\n",
5767
       "      <td>...</td>\n",
5768
       "      <td>...</td>\n",
5769
       "      <td>...</td>\n",
5770
       "      <td>...</td>\n",
5771
       "      <td>...</td>\n",
5772
       "      <td>...</td>\n",
5773
       "    </tr>\n",
5774
       "    <tr>\n",
5775
       "      <th>368</th>\n",
5776
       "      <td>2</td>\n",
5777
       "      <td>2</td>\n",
5778
       "      <td>2</td>\n",
5779
       "      <td>2</td>\n",
5780
       "      <td>2</td>\n",
5781
       "      <td>2</td>\n",
5782
       "      <td>2</td>\n",
5783
       "      <td>2</td>\n",
5784
       "      <td>2</td>\n",
5785
       "      <td>2</td>\n",
5786
       "      <td>...</td>\n",
5787
       "      <td>-0.0291</td>\n",
5788
       "      <td>-0.1058</td>\n",
5789
       "      <td>-0.6721</td>\n",
5790
       "      <td>0.2802</td>\n",
5791
       "      <td>1.9504</td>\n",
5792
       "      <td>-0.8784</td>\n",
5793
       "      <td>0.9506</td>\n",
5794
       "      <td>0.0607</td>\n",
5795
       "      <td>1.1883</td>\n",
5796
       "      <td>-0.3521</td>\n",
5797
       "    </tr>\n",
5798
       "    <tr>\n",
5799
       "      <th>369</th>\n",
5800
       "      <td>0</td>\n",
5801
       "      <td>0</td>\n",
5802
       "      <td>0</td>\n",
5803
       "      <td>0</td>\n",
5804
       "      <td>0</td>\n",
5805
       "      <td>0</td>\n",
5806
       "      <td>0</td>\n",
5807
       "      <td>0</td>\n",
5808
       "      <td>0</td>\n",
5809
       "      <td>0</td>\n",
5810
       "      <td>...</td>\n",
5811
       "      <td>0.0497</td>\n",
5812
       "      <td>0.3673</td>\n",
5813
       "      <td>-0.2208</td>\n",
5814
       "      <td>0.3034</td>\n",
5815
       "      <td>3.2580</td>\n",
5816
       "      <td>-0.2089</td>\n",
5817
       "      <td>1.6053</td>\n",
5818
       "      <td>-0.8746</td>\n",
5819
       "      <td>-0.4491</td>\n",
5820
       "      <td>-0.3450</td>\n",
5821
       "    </tr>\n",
5822
       "    <tr>\n",
5823
       "      <th>370</th>\n",
5824
       "      <td>1</td>\n",
5825
       "      <td>1</td>\n",
5826
       "      <td>1</td>\n",
5827
       "      <td>1</td>\n",
5828
       "      <td>1</td>\n",
5829
       "      <td>1</td>\n",
5830
       "      <td>1</td>\n",
5831
       "      <td>1</td>\n",
5832
       "      <td>1</td>\n",
5833
       "      <td>1</td>\n",
5834
       "      <td>...</td>\n",
5835
       "      <td>0.3822</td>\n",
5836
       "      <td>-0.7003</td>\n",
5837
       "      <td>-0.7661</td>\n",
5838
       "      <td>-1.7035</td>\n",
5839
       "      <td>-0.5423</td>\n",
5840
       "      <td>-0.3488</td>\n",
5841
       "      <td>1.3713</td>\n",
5842
       "      <td>-0.4365</td>\n",
5843
       "      <td>2.3456</td>\n",
5844
       "      <td>-0.3866</td>\n",
5845
       "    </tr>\n",
5846
       "    <tr>\n",
5847
       "      <th>371</th>\n",
5848
       "      <td>0</td>\n",
5849
       "      <td>0</td>\n",
5850
       "      <td>0</td>\n",
5851
       "      <td>0</td>\n",
5852
       "      <td>0</td>\n",
5853
       "      <td>0</td>\n",
5854
       "      <td>0</td>\n",
5855
       "      <td>0</td>\n",
5856
       "      <td>0</td>\n",
5857
       "      <td>0</td>\n",
5858
       "      <td>...</td>\n",
5859
       "      <td>-0.6853</td>\n",
5860
       "      <td>-1.0240</td>\n",
5861
       "      <td>-1.2890</td>\n",
5862
       "      <td>-1.5666</td>\n",
5863
       "      <td>-0.1270</td>\n",
5864
       "      <td>-1.4662</td>\n",
5865
       "      <td>0.3981</td>\n",
5866
       "      <td>-0.5976</td>\n",
5867
       "      <td>-1.3822</td>\n",
5868
       "      <td>-0.4157</td>\n",
5869
       "    </tr>\n",
5870
       "    <tr>\n",
5871
       "      <th>372</th>\n",
5872
       "      <td>0</td>\n",
5873
       "      <td>0</td>\n",
5874
       "      <td>0</td>\n",
5875
       "      <td>0</td>\n",
5876
       "      <td>0</td>\n",
5877
       "      <td>0</td>\n",
5878
       "      <td>0</td>\n",
5879
       "      <td>0</td>\n",
5880
       "      <td>0</td>\n",
5881
       "      <td>0</td>\n",
5882
       "      <td>...</td>\n",
5883
       "      <td>0.0517</td>\n",
5884
       "      <td>-0.3570</td>\n",
5885
       "      <td>-0.4843</td>\n",
5886
       "      <td>-0.3792</td>\n",
5887
       "      <td>-0.1964</td>\n",
5888
       "      <td>0.4200</td>\n",
5889
       "      <td>3.2547</td>\n",
5890
       "      <td>-0.1232</td>\n",
5891
       "      <td>3.4519</td>\n",
5892
       "      <td>-0.1962</td>\n",
5893
       "    </tr>\n",
5894
       "  </tbody>\n",
5895
       "</table>\n",
5896
       "<p>373 rows × 20395 columns</p>\n",
5897
       "</div>"
5898
      ],
5899
      "text/plain": [
5900
       "     NDUFS5_cnv  MACF1_cnv  RNA5SP44_cnv  KIAA0754_cnv  BMP8A_cnv  PABPC4_cnv  \\\n",
5901
       "0            -1         -1            -1            -1         -1          -1   \n",
5902
       "1             2          2             2             2          2           2   \n",
5903
       "2             0          0             0             0          0           0   \n",
5904
       "3             0          0             0             0          0           0   \n",
5905
       "4             0          0             0             0          0           0   \n",
5906
       "..          ...        ...           ...           ...        ...         ...   \n",
5907
       "368           2          2             2             2          2           2   \n",
5908
       "369           0          0             0             0          0           0   \n",
5909
       "370           1          1             1             1          1           1   \n",
5910
       "371           0          0             0             0          0           0   \n",
5911
       "372           0          0             0             0          0           0   \n",
5912
       "\n",
5913
       "     SNORA55_cnv  HEYL_cnv  HPCAL4_cnv  NT5C1A_cnv  ...  ZWINT_rnaseq  \\\n",
5914
       "0             -1        -1          -1          -1  ...       -0.8388   \n",
5915
       "1              2         2           2           2  ...       -0.1083   \n",
5916
       "2              0         0           0           0  ...       -0.4155   \n",
5917
       "3              0         0           0           0  ...       -0.8143   \n",
5918
       "4              0         0           0           0  ...        0.0983   \n",
5919
       "..           ...       ...         ...         ...  ...           ...   \n",
5920
       "368            2         2           2           2  ...       -0.0291   \n",
5921
       "369            0         0           0           0  ...        0.0497   \n",
5922
       "370            1         1           1           1  ...        0.3822   \n",
5923
       "371            0         0           0           0  ...       -0.6853   \n",
5924
       "372            0         0           0           0  ...        0.0517   \n",
5925
       "\n",
5926
       "     ZXDA_rnaseq  ZXDB_rnaseq  ZXDC_rnaseq  ZYG11A_rnaseq  ZYG11B_rnaseq  \\\n",
5927
       "0         4.1375       3.9664       1.8437        -0.3959        -0.2561   \n",
5928
       "1         0.3393       0.2769       1.7320        -0.0975         2.6955   \n",
5929
       "2         1.6846       0.7711      -0.3061        -0.5016         2.8548   \n",
5930
       "3         0.8344       1.5075       3.6068        -0.5004        -0.0747   \n",
5931
       "4        -0.7908      -0.0053      -0.0643        -0.3706         0.3870   \n",
5932
       "..           ...          ...          ...            ...            ...   \n",
5933
       "368      -0.1058      -0.6721       0.2802         1.9504        -0.8784   \n",
5934
       "369       0.3673      -0.2208       0.3034         3.2580        -0.2089   \n",
5935
       "370      -0.7003      -0.7661      -1.7035        -0.5423        -0.3488   \n",
5936
       "371      -1.0240      -1.2890      -1.5666        -0.1270        -1.4662   \n",
5937
       "372      -0.3570      -0.4843      -0.3792        -0.1964         0.4200   \n",
5938
       "\n",
5939
       "     ZYX_rnaseq  ZZEF1_rnaseq  ZZZ3_rnaseq  TPTEP1_rnaseq  \n",
5940
       "0       -0.2866        1.8770      -0.3179        -0.3633  \n",
5941
       "1       -0.6741        1.0323       1.2766        -0.3982  \n",
5942
       "2       -0.6171       -0.8608      -0.0486        -0.3962  \n",
5943
       "3       -0.2185       -0.4379       1.6913         1.7748  \n",
5944
       "4       -0.5589       -0.5979       0.0047        -0.3548  \n",
5945
       "..          ...           ...          ...            ...  \n",
5946
       "368      0.9506        0.0607       1.1883        -0.3521  \n",
5947
       "369      1.6053       -0.8746      -0.4491        -0.3450  \n",
5948
       "370      1.3713       -0.4365       2.3456        -0.3866  \n",
5949
       "371      0.3981       -0.5976      -1.3822        -0.4157  \n",
5950
       "372      3.2547       -0.1232       3.4519        -0.1962  \n",
5951
       "\n",
5952
       "[373 rows x 20395 columns]"
5953
      ]
5954
     },
5955
     "execution_count": 68,
5956
     "metadata": {},
5957
     "output_type": "execute_result"
5958
    }
5959
   ],
5960
   "source": [
5961
    "genomic_features"
5962
   ]
5963
  },
5964
  {
5965
   "cell_type": "code",
5966
   "execution_count": 11,
5967
   "metadata": {},
5968
   "outputs": [],
5969
   "source": [
5970
    "if 'case_id' not in slide_data:\n",
5971
    "    slide_data.index = slide_data.index.str[:12]\n",
5972
    "    slide_data['case_id'] = slide_data.index\n",
5973
    "    slide_data = slide_data.reset_index(drop=True)"
5974
   ]
5975
  },
5976
  {
5977
   "cell_type": "code",
5978
   "execution_count": 14,
5979
   "metadata": {},
5980
   "outputs": [],
5981
   "source": [
5982
    "new_cols = list(slide_data.columns[-2:]) + list(slide_data.columns[:-2])\n",
5983
    "slide_data = slide_data[new_cols]"
5984
   ]
5985
  },
5986
  {
5987
   "cell_type": "code",
5988
   "execution_count": 15,
5989
   "metadata": {},
5990
   "outputs": [
5991
    {
5992
     "data": {
5993
      "text/html": [
5994
       "<div>\n",
5995
       "<style scoped>\n",
5996
       "    .dataframe tbody tr th:only-of-type {\n",
5997
       "        vertical-align: middle;\n",
5998
       "    }\n",
5999
       "\n",
6000
       "    .dataframe tbody tr th {\n",
6001
       "        vertical-align: top;\n",
6002
       "    }\n",
6003
       "\n",
6004
       "    .dataframe thead th {\n",
6005
       "        text-align: right;\n",
6006
       "    }\n",
6007
       "</style>\n",
6008
       "<table border=\"1\" class=\"dataframe\">\n",
6009
       "  <thead>\n",
6010
       "    <tr style=\"text-align: right;\">\n",
6011
       "      <th></th>\n",
6012
       "      <th>ZZZ3_rnaseq</th>\n",
6013
       "      <th>TPTEP1_rnaseq</th>\n",
6014
       "      <th>slide_id</th>\n",
6015
       "      <th>site</th>\n",
6016
       "      <th>is_female</th>\n",
6017
       "      <th>oncotree_code</th>\n",
6018
       "      <th>age</th>\n",
6019
       "      <th>survival_months</th>\n",
6020
       "      <th>censorship</th>\n",
6021
       "      <th>train</th>\n",
6022
       "      <th>...</th>\n",
6023
       "      <th>ZW10_rnaseq</th>\n",
6024
       "      <th>ZWILCH_rnaseq</th>\n",
6025
       "      <th>ZWINT_rnaseq</th>\n",
6026
       "      <th>ZXDA_rnaseq</th>\n",
6027
       "      <th>ZXDB_rnaseq</th>\n",
6028
       "      <th>ZXDC_rnaseq</th>\n",
6029
       "      <th>ZYG11A_rnaseq</th>\n",
6030
       "      <th>ZYG11B_rnaseq</th>\n",
6031
       "      <th>ZYX_rnaseq</th>\n",
6032
       "      <th>ZZEF1_rnaseq</th>\n",
6033
       "    </tr>\n",
6034
       "    <tr>\n",
6035
       "      <th>case_id</th>\n",
6036
       "      <th></th>\n",
6037
       "      <th></th>\n",
6038
       "      <th></th>\n",
6039
       "      <th></th>\n",
6040
       "      <th></th>\n",
6041
       "      <th></th>\n",
6042
       "      <th></th>\n",
6043
       "      <th></th>\n",
6044
       "      <th></th>\n",
6045
       "      <th></th>\n",
6046
       "      <th></th>\n",
6047
       "      <th></th>\n",
6048
       "      <th></th>\n",
6049
       "      <th></th>\n",
6050
       "      <th></th>\n",
6051
       "      <th></th>\n",
6052
       "      <th></th>\n",
6053
       "      <th></th>\n",
6054
       "      <th></th>\n",
6055
       "      <th></th>\n",
6056
       "      <th></th>\n",
6057
       "    </tr>\n",
6058
       "  </thead>\n",
6059
       "  <tbody>\n",
6060
       "    <tr>\n",
6061
       "      <th>TCGA-2F-A9KO</th>\n",
6062
       "      <td>-0.3179</td>\n",
6063
       "      <td>-0.3633</td>\n",
6064
       "      <td>TCGA-2F-A9KO-01Z-00-DX1.195576CF-B739-4BD9-B15...</td>\n",
6065
       "      <td>2F</td>\n",
6066
       "      <td>0</td>\n",
6067
       "      <td>BLCA</td>\n",
6068
       "      <td>63</td>\n",
6069
       "      <td>24.11</td>\n",
6070
       "      <td>0</td>\n",
6071
       "      <td>1.0</td>\n",
6072
       "      <td>...</td>\n",
6073
       "      <td>-0.7172</td>\n",
6074
       "      <td>0.7409</td>\n",
6075
       "      <td>-0.8388</td>\n",
6076
       "      <td>4.1375</td>\n",
6077
       "      <td>3.9664</td>\n",
6078
       "      <td>1.8437</td>\n",
6079
       "      <td>-0.3959</td>\n",
6080
       "      <td>-0.2561</td>\n",
6081
       "      <td>-0.2866</td>\n",
6082
       "      <td>1.8770</td>\n",
6083
       "    </tr>\n",
6084
       "    <tr>\n",
6085
       "      <th>TCGA-2F-A9KP</th>\n",
6086
       "      <td>1.2766</td>\n",
6087
       "      <td>-0.3982</td>\n",
6088
       "      <td>TCGA-2F-A9KP-01Z-00-DX1.3CDF534E-958F-4467-AA7...</td>\n",
6089
       "      <td>2F</td>\n",
6090
       "      <td>0</td>\n",
6091
       "      <td>BLCA</td>\n",
6092
       "      <td>66</td>\n",
6093
       "      <td>11.96</td>\n",
6094
       "      <td>0</td>\n",
6095
       "      <td>1.0</td>\n",
6096
       "      <td>...</td>\n",
6097
       "      <td>0.6373</td>\n",
6098
       "      <td>0.8559</td>\n",
6099
       "      <td>-0.1083</td>\n",
6100
       "      <td>0.3393</td>\n",
6101
       "      <td>0.2769</td>\n",
6102
       "      <td>1.7320</td>\n",
6103
       "      <td>-0.0975</td>\n",
6104
       "      <td>2.6955</td>\n",
6105
       "      <td>-0.6741</td>\n",
6106
       "      <td>1.0323</td>\n",
6107
       "    </tr>\n",
6108
       "    <tr>\n",
6109
       "      <th>TCGA-2F-A9KP</th>\n",
6110
       "      <td>1.2766</td>\n",
6111
       "      <td>-0.3982</td>\n",
6112
       "      <td>TCGA-2F-A9KP-01Z-00-DX2.718C82A3-252B-498E-BFB...</td>\n",
6113
       "      <td>2F</td>\n",
6114
       "      <td>0</td>\n",
6115
       "      <td>BLCA</td>\n",
6116
       "      <td>66</td>\n",
6117
       "      <td>11.96</td>\n",
6118
       "      <td>0</td>\n",
6119
       "      <td>1.0</td>\n",
6120
       "      <td>...</td>\n",
6121
       "      <td>0.6373</td>\n",
6122
       "      <td>0.8559</td>\n",
6123
       "      <td>-0.1083</td>\n",
6124
       "      <td>0.3393</td>\n",
6125
       "      <td>0.2769</td>\n",
6126
       "      <td>1.7320</td>\n",
6127
       "      <td>-0.0975</td>\n",
6128
       "      <td>2.6955</td>\n",
6129
       "      <td>-0.6741</td>\n",
6130
       "      <td>1.0323</td>\n",
6131
       "    </tr>\n",
6132
       "    <tr>\n",
6133
       "      <th>TCGA-2F-A9KQ</th>\n",
6134
       "      <td>-0.0486</td>\n",
6135
       "      <td>-0.3962</td>\n",
6136
       "      <td>TCGA-2F-A9KQ-01Z-00-DX1.1C8CB2DD-5CC6-4E99-A0F...</td>\n",
6137
       "      <td>2F</td>\n",
6138
       "      <td>0</td>\n",
6139
       "      <td>BLCA</td>\n",
6140
       "      <td>69</td>\n",
6141
       "      <td>94.81</td>\n",
6142
       "      <td>1</td>\n",
6143
       "      <td>1.0</td>\n",
6144
       "      <td>...</td>\n",
6145
       "      <td>-0.5676</td>\n",
6146
       "      <td>-0.0621</td>\n",
6147
       "      <td>-0.4155</td>\n",
6148
       "      <td>1.6846</td>\n",
6149
       "      <td>0.7711</td>\n",
6150
       "      <td>-0.3061</td>\n",
6151
       "      <td>-0.5016</td>\n",
6152
       "      <td>2.8548</td>\n",
6153
       "      <td>-0.6171</td>\n",
6154
       "      <td>-0.8608</td>\n",
6155
       "    </tr>\n",
6156
       "    <tr>\n",
6157
       "      <th>TCGA-2F-A9KR</th>\n",
6158
       "      <td>1.6913</td>\n",
6159
       "      <td>1.7748</td>\n",
6160
       "      <td>TCGA-2F-A9KR-01Z-00-DX1.D6A4BD2D-18F3-4FA6-827...</td>\n",
6161
       "      <td>2F</td>\n",
6162
       "      <td>1</td>\n",
6163
       "      <td>BLCA</td>\n",
6164
       "      <td>59</td>\n",
6165
       "      <td>104.57</td>\n",
6166
       "      <td>0</td>\n",
6167
       "      <td>1.0</td>\n",
6168
       "      <td>...</td>\n",
6169
       "      <td>-1.3825</td>\n",
6170
       "      <td>0.3550</td>\n",
6171
       "      <td>-0.8143</td>\n",
6172
       "      <td>0.8344</td>\n",
6173
       "      <td>1.5075</td>\n",
6174
       "      <td>3.6068</td>\n",
6175
       "      <td>-0.5004</td>\n",
6176
       "      <td>-0.0747</td>\n",
6177
       "      <td>-0.2185</td>\n",
6178
       "      <td>-0.4379</td>\n",
6179
       "    </tr>\n",
6180
       "    <tr>\n",
6181
       "      <th>...</th>\n",
6182
       "      <td>...</td>\n",
6183
       "      <td>...</td>\n",
6184
       "      <td>...</td>\n",
6185
       "      <td>...</td>\n",
6186
       "      <td>...</td>\n",
6187
       "      <td>...</td>\n",
6188
       "      <td>...</td>\n",
6189
       "      <td>...</td>\n",
6190
       "      <td>...</td>\n",
6191
       "      <td>...</td>\n",
6192
       "      <td>...</td>\n",
6193
       "      <td>...</td>\n",
6194
       "      <td>...</td>\n",
6195
       "      <td>...</td>\n",
6196
       "      <td>...</td>\n",
6197
       "      <td>...</td>\n",
6198
       "      <td>...</td>\n",
6199
       "      <td>...</td>\n",
6200
       "      <td>...</td>\n",
6201
       "      <td>...</td>\n",
6202
       "      <td>...</td>\n",
6203
       "    </tr>\n",
6204
       "    <tr>\n",
6205
       "      <th>TCGA-ZF-AA54</th>\n",
6206
       "      <td>1.1883</td>\n",
6207
       "      <td>-0.3521</td>\n",
6208
       "      <td>TCGA-ZF-AA54-01Z-00-DX1.9118BB51-333A-4257-A79...</td>\n",
6209
       "      <td>ZF</td>\n",
6210
       "      <td>0</td>\n",
6211
       "      <td>BLCA</td>\n",
6212
       "      <td>71</td>\n",
6213
       "      <td>19.38</td>\n",
6214
       "      <td>0</td>\n",
6215
       "      <td>1.0</td>\n",
6216
       "      <td>...</td>\n",
6217
       "      <td>-0.0898</td>\n",
6218
       "      <td>2.1092</td>\n",
6219
       "      <td>-0.0291</td>\n",
6220
       "      <td>-0.1058</td>\n",
6221
       "      <td>-0.6721</td>\n",
6222
       "      <td>0.2802</td>\n",
6223
       "      <td>1.9504</td>\n",
6224
       "      <td>-0.8784</td>\n",
6225
       "      <td>0.9506</td>\n",
6226
       "      <td>0.0607</td>\n",
6227
       "    </tr>\n",
6228
       "    <tr>\n",
6229
       "      <th>TCGA-ZF-AA58</th>\n",
6230
       "      <td>-0.4491</td>\n",
6231
       "      <td>-0.3450</td>\n",
6232
       "      <td>TCGA-ZF-AA58-01Z-00-DX1.85C3611E-11FA-4AAE-B88...</td>\n",
6233
       "      <td>ZF</td>\n",
6234
       "      <td>1</td>\n",
6235
       "      <td>BLCA</td>\n",
6236
       "      <td>61</td>\n",
6237
       "      <td>54.17</td>\n",
6238
       "      <td>1</td>\n",
6239
       "      <td>1.0</td>\n",
6240
       "      <td>...</td>\n",
6241
       "      <td>-0.2075</td>\n",
6242
       "      <td>-0.0617</td>\n",
6243
       "      <td>0.0497</td>\n",
6244
       "      <td>0.3673</td>\n",
6245
       "      <td>-0.2208</td>\n",
6246
       "      <td>0.3034</td>\n",
6247
       "      <td>3.2580</td>\n",
6248
       "      <td>-0.2089</td>\n",
6249
       "      <td>1.6053</td>\n",
6250
       "      <td>-0.8746</td>\n",
6251
       "    </tr>\n",
6252
       "    <tr>\n",
6253
       "      <th>TCGA-ZF-AA5H</th>\n",
6254
       "      <td>2.3456</td>\n",
6255
       "      <td>-0.3866</td>\n",
6256
       "      <td>TCGA-ZF-AA5H-01Z-00-DX1.2B5DF00E-E0FD-4C58-A82...</td>\n",
6257
       "      <td>ZF</td>\n",
6258
       "      <td>1</td>\n",
6259
       "      <td>BLCA</td>\n",
6260
       "      <td>60</td>\n",
6261
       "      <td>29.47</td>\n",
6262
       "      <td>1</td>\n",
6263
       "      <td>1.0</td>\n",
6264
       "      <td>...</td>\n",
6265
       "      <td>-1.4118</td>\n",
6266
       "      <td>-0.1236</td>\n",
6267
       "      <td>0.3822</td>\n",
6268
       "      <td>-0.7003</td>\n",
6269
       "      <td>-0.7661</td>\n",
6270
       "      <td>-1.7035</td>\n",
6271
       "      <td>-0.5423</td>\n",
6272
       "      <td>-0.3488</td>\n",
6273
       "      <td>1.3713</td>\n",
6274
       "      <td>-0.4365</td>\n",
6275
       "    </tr>\n",
6276
       "    <tr>\n",
6277
       "      <th>TCGA-ZF-AA5N</th>\n",
6278
       "      <td>-1.3822</td>\n",
6279
       "      <td>-0.4157</td>\n",
6280
       "      <td>TCGA-ZF-AA5N-01Z-00-DX1.A207E3EE-CC7D-4267-A77...</td>\n",
6281
       "      <td>ZF</td>\n",
6282
       "      <td>1</td>\n",
6283
       "      <td>BLCA</td>\n",
6284
       "      <td>62</td>\n",
6285
       "      <td>5.52</td>\n",
6286
       "      <td>0</td>\n",
6287
       "      <td>1.0</td>\n",
6288
       "      <td>...</td>\n",
6289
       "      <td>-0.1733</td>\n",
6290
       "      <td>-0.2397</td>\n",
6291
       "      <td>-0.6853</td>\n",
6292
       "      <td>-1.0240</td>\n",
6293
       "      <td>-1.2890</td>\n",
6294
       "      <td>-1.5666</td>\n",
6295
       "      <td>-0.1270</td>\n",
6296
       "      <td>-1.4662</td>\n",
6297
       "      <td>0.3981</td>\n",
6298
       "      <td>-0.5976</td>\n",
6299
       "    </tr>\n",
6300
       "    <tr>\n",
6301
       "      <th>TCGA-ZF-AA5P</th>\n",
6302
       "      <td>3.4519</td>\n",
6303
       "      <td>-0.1962</td>\n",
6304
       "      <td>TCGA-ZF-AA5P-01Z-00-DX1.B91697A2-A186-4E67-A81...</td>\n",
6305
       "      <td>ZF</td>\n",
6306
       "      <td>0</td>\n",
6307
       "      <td>BLCA</td>\n",
6308
       "      <td>65</td>\n",
6309
       "      <td>12.22</td>\n",
6310
       "      <td>1</td>\n",
6311
       "      <td>1.0</td>\n",
6312
       "      <td>...</td>\n",
6313
       "      <td>-1.1056</td>\n",
6314
       "      <td>-0.6634</td>\n",
6315
       "      <td>0.0517</td>\n",
6316
       "      <td>-0.3570</td>\n",
6317
       "      <td>-0.4843</td>\n",
6318
       "      <td>-0.3792</td>\n",
6319
       "      <td>-0.1964</td>\n",
6320
       "      <td>0.4200</td>\n",
6321
       "      <td>3.2547</td>\n",
6322
       "      <td>-0.1232</td>\n",
6323
       "    </tr>\n",
6324
       "  </tbody>\n",
6325
       "</table>\n",
6326
       "<p>437 rows × 20403 columns</p>\n",
6327
       "</div>"
6328
      ],
6329
      "text/plain": [
6330
       "              ZZZ3_rnaseq  TPTEP1_rnaseq  \\\n",
6331
       "case_id                                    \n",
6332
       "TCGA-2F-A9KO      -0.3179        -0.3633   \n",
6333
       "TCGA-2F-A9KP       1.2766        -0.3982   \n",
6334
       "TCGA-2F-A9KP       1.2766        -0.3982   \n",
6335
       "TCGA-2F-A9KQ      -0.0486        -0.3962   \n",
6336
       "TCGA-2F-A9KR       1.6913         1.7748   \n",
6337
       "...                   ...            ...   \n",
6338
       "TCGA-ZF-AA54       1.1883        -0.3521   \n",
6339
       "TCGA-ZF-AA58      -0.4491        -0.3450   \n",
6340
       "TCGA-ZF-AA5H       2.3456        -0.3866   \n",
6341
       "TCGA-ZF-AA5N      -1.3822        -0.4157   \n",
6342
       "TCGA-ZF-AA5P       3.4519        -0.1962   \n",
6343
       "\n",
6344
       "                                                       slide_id site  \\\n",
6345
       "case_id                                                                \n",
6346
       "TCGA-2F-A9KO  TCGA-2F-A9KO-01Z-00-DX1.195576CF-B739-4BD9-B15...   2F   \n",
6347
       "TCGA-2F-A9KP  TCGA-2F-A9KP-01Z-00-DX1.3CDF534E-958F-4467-AA7...   2F   \n",
6348
       "TCGA-2F-A9KP  TCGA-2F-A9KP-01Z-00-DX2.718C82A3-252B-498E-BFB...   2F   \n",
6349
       "TCGA-2F-A9KQ  TCGA-2F-A9KQ-01Z-00-DX1.1C8CB2DD-5CC6-4E99-A0F...   2F   \n",
6350
       "TCGA-2F-A9KR  TCGA-2F-A9KR-01Z-00-DX1.D6A4BD2D-18F3-4FA6-827...   2F   \n",
6351
       "...                                                         ...  ...   \n",
6352
       "TCGA-ZF-AA54  TCGA-ZF-AA54-01Z-00-DX1.9118BB51-333A-4257-A79...   ZF   \n",
6353
       "TCGA-ZF-AA58  TCGA-ZF-AA58-01Z-00-DX1.85C3611E-11FA-4AAE-B88...   ZF   \n",
6354
       "TCGA-ZF-AA5H  TCGA-ZF-AA5H-01Z-00-DX1.2B5DF00E-E0FD-4C58-A82...   ZF   \n",
6355
       "TCGA-ZF-AA5N  TCGA-ZF-AA5N-01Z-00-DX1.A207E3EE-CC7D-4267-A77...   ZF   \n",
6356
       "TCGA-ZF-AA5P  TCGA-ZF-AA5P-01Z-00-DX1.B91697A2-A186-4E67-A81...   ZF   \n",
6357
       "\n",
6358
       "              is_female oncotree_code  age  survival_months  censorship  \\\n",
6359
       "case_id                                                                   \n",
6360
       "TCGA-2F-A9KO          0          BLCA   63            24.11           0   \n",
6361
       "TCGA-2F-A9KP          0          BLCA   66            11.96           0   \n",
6362
       "TCGA-2F-A9KP          0          BLCA   66            11.96           0   \n",
6363
       "TCGA-2F-A9KQ          0          BLCA   69            94.81           1   \n",
6364
       "TCGA-2F-A9KR          1          BLCA   59           104.57           0   \n",
6365
       "...                 ...           ...  ...              ...         ...   \n",
6366
       "TCGA-ZF-AA54          0          BLCA   71            19.38           0   \n",
6367
       "TCGA-ZF-AA58          1          BLCA   61            54.17           1   \n",
6368
       "TCGA-ZF-AA5H          1          BLCA   60            29.47           1   \n",
6369
       "TCGA-ZF-AA5N          1          BLCA   62             5.52           0   \n",
6370
       "TCGA-ZF-AA5P          0          BLCA   65            12.22           1   \n",
6371
       "\n",
6372
       "              train  ...  ZW10_rnaseq  ZWILCH_rnaseq  ZWINT_rnaseq  \\\n",
6373
       "case_id              ...                                             \n",
6374
       "TCGA-2F-A9KO    1.0  ...      -0.7172         0.7409       -0.8388   \n",
6375
       "TCGA-2F-A9KP    1.0  ...       0.6373         0.8559       -0.1083   \n",
6376
       "TCGA-2F-A9KP    1.0  ...       0.6373         0.8559       -0.1083   \n",
6377
       "TCGA-2F-A9KQ    1.0  ...      -0.5676        -0.0621       -0.4155   \n",
6378
       "TCGA-2F-A9KR    1.0  ...      -1.3825         0.3550       -0.8143   \n",
6379
       "...             ...  ...          ...            ...           ...   \n",
6380
       "TCGA-ZF-AA54    1.0  ...      -0.0898         2.1092       -0.0291   \n",
6381
       "TCGA-ZF-AA58    1.0  ...      -0.2075        -0.0617        0.0497   \n",
6382
       "TCGA-ZF-AA5H    1.0  ...      -1.4118        -0.1236        0.3822   \n",
6383
       "TCGA-ZF-AA5N    1.0  ...      -0.1733        -0.2397       -0.6853   \n",
6384
       "TCGA-ZF-AA5P    1.0  ...      -1.1056        -0.6634        0.0517   \n",
6385
       "\n",
6386
       "              ZXDA_rnaseq  ZXDB_rnaseq  ZXDC_rnaseq  ZYG11A_rnaseq  \\\n",
6387
       "case_id                                                              \n",
6388
       "TCGA-2F-A9KO       4.1375       3.9664       1.8437        -0.3959   \n",
6389
       "TCGA-2F-A9KP       0.3393       0.2769       1.7320        -0.0975   \n",
6390
       "TCGA-2F-A9KP       0.3393       0.2769       1.7320        -0.0975   \n",
6391
       "TCGA-2F-A9KQ       1.6846       0.7711      -0.3061        -0.5016   \n",
6392
       "TCGA-2F-A9KR       0.8344       1.5075       3.6068        -0.5004   \n",
6393
       "...                   ...          ...          ...            ...   \n",
6394
       "TCGA-ZF-AA54      -0.1058      -0.6721       0.2802         1.9504   \n",
6395
       "TCGA-ZF-AA58       0.3673      -0.2208       0.3034         3.2580   \n",
6396
       "TCGA-ZF-AA5H      -0.7003      -0.7661      -1.7035        -0.5423   \n",
6397
       "TCGA-ZF-AA5N      -1.0240      -1.2890      -1.5666        -0.1270   \n",
6398
       "TCGA-ZF-AA5P      -0.3570      -0.4843      -0.3792        -0.1964   \n",
6399
       "\n",
6400
       "              ZYG11B_rnaseq  ZYX_rnaseq  ZZEF1_rnaseq  \n",
6401
       "case_id                                                \n",
6402
       "TCGA-2F-A9KO        -0.2561     -0.2866        1.8770  \n",
6403
       "TCGA-2F-A9KP         2.6955     -0.6741        1.0323  \n",
6404
       "TCGA-2F-A9KP         2.6955     -0.6741        1.0323  \n",
6405
       "TCGA-2F-A9KQ         2.8548     -0.6171       -0.8608  \n",
6406
       "TCGA-2F-A9KR        -0.0747     -0.2185       -0.4379  \n",
6407
       "...                     ...         ...           ...  \n",
6408
       "TCGA-ZF-AA54        -0.8784      0.9506        0.0607  \n",
6409
       "TCGA-ZF-AA58        -0.2089      1.6053       -0.8746  \n",
6410
       "TCGA-ZF-AA5H        -0.3488      1.3713       -0.4365  \n",
6411
       "TCGA-ZF-AA5N        -1.4662      0.3981       -0.5976  \n",
6412
       "TCGA-ZF-AA5P         0.4200      3.2547       -0.1232  \n",
6413
       "\n",
6414
       "[437 rows x 20403 columns]"
6415
      ]
6416
     },
6417
     "execution_count": 15,
6418
     "metadata": {},
6419
     "output_type": "execute_result"
6420
    }
6421
   ],
6422
   "source": [
6423
    "slide_data"
6424
   ]
6425
  }
6426
 ],
6427
 "metadata": {
6428
  "kernelspec": {
6429
   "display_name": "Python 3",
6430
   "language": "python",
6431
   "name": "python3"
6432
  },
6433
  "language_info": {
6434
   "codemirror_mode": {
6435
    "name": "ipython",
6436
    "version": 3
6437
   },
6438
   "file_extension": ".py",
6439
   "mimetype": "text/x-python",
6440
   "name": "python",
6441
   "nbconvert_exporter": "python",
6442
   "pygments_lexer": "ipython3",
6443
   "version": "3.7.7"
6444
  }
6445
 },
6446
 "nbformat": 4,
6447
 "nbformat_minor": 4
6448
}