7241 lines (7240 with data), 306.0 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from __future__ import print_function\n",
"import pandas as pd\n",
"import os,sys\n",
"import pybedtools as pbt\n",
"from StringIO import StringIO\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import time\n",
"from mapper import expand, parse_mapping_table, apply_mappers\n",
"%matplotlib inline\n",
"\n",
"\n",
"chr_dict = dict(zip(range(1,22),map(str,range(1,22))))\n",
"chr_dict.update({22: 'X', 23: \"Y\"})\n",
"\n",
"root_dir = \"/home/olya/SFU/Hossein/v2/\"\n",
"gene_coords_file = root_dir + \"ref_GRCh37.p5_top_level.gff3.bed\" # must contain chromosome, start, end and Entrez Gene ID for hg19"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# TCGA \n",
"\n",
"Assume that segmentation files from GDAC : http://gdac.broadinstitute.org/runs/stddata__2015_08_21/data/*/*snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.seg.txt are dowmnoaded\n",
"\n",
"1) Filtering segments:\n",
" - segments containing less than 5 probes removed\n",
" - keep only segments with segment mean below -0.23 or above 0.2. This means that one copy gains and losses are detectable when their CCF (canncer cell fraction) is 0.3 or higher. \n",
" \n",
"TODO: remove segements overlapping with germline CNA forund in normals (add this as the first step)\n",
"2). For each samples aggregte to gene-level:\n",
" - rename chromosomes 22 and 23 to X and Y\n",
" - overpal segemntation file with Entrez gene coordinates for hg19\n",
" - if a gene overlaps by multiple segments, keep the one with most extreme values"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"num_marker_thr = 5\n",
"# to detect 1 copy gains or losses presenting at CCF >= 0.3\n",
"pos_seg_mean_thr = 0.20\n",
"neg_seg_mean_thr = -0.23 \n",
"\n",
"preprocessed_dir = root_dir+\"preprocessed/CNA/\"\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"### functions for TCGA and CCLE #################################\n",
"def filter_lowconf_segments(df,num_marker_thr,pos_seg_mean_thr, neg_seg_mean_thr ):\n",
" # filter low-confidence segments with too few probes\n",
" df = df[df[\"Num_Probes\"] >= num_marker_thr ]\n",
" # filter low-confidence segments with Segment_Mean too close to zero:\n",
" df = df[ (df[\"Segment_Mean\"] >= pos_seg_mean_thr) | (df[\"Segment_Mean\"] <= neg_seg_mean_thr)]\n",
" return df\n",
"\n",
"def sample_type(barcode):\n",
" if barcode[13:16] in [\"10A\",\"10B\",\"11A\",\"11B\",\"10C\",\"11C\"]:\n",
" return \"Normal\"\n",
" else:\n",
" return \"Tumor\"\n",
"\n",
"def find_matching_normal(tumor_barcode,barcodes_list):\n",
" patient_id = tumor_barcode[:12]\n",
" normal_barcodes = []\n",
" for barcode in barcodes_list:\n",
" if barcode.startswith(patient_id) and sample_type(barcode) == \"Normal\":\n",
" normal_barcodes.append(barcode)\n",
" return normal_barcodes\n",
"\n",
"def cnv2bed(seg):\n",
" #cnv_bed = seg[[\"Chromosome\",\"Start\",\"End\",\"Segment_Mean\"]]\n",
" #cnv_bed.columns = [\"chrom\",\"start\",\"stop\",\"Segment_Mean\"]\n",
" cnv_bed = seg.rename({\"Chromosome\":\"chrom\",\"Start\":\"start\",\n",
" \"End\":\"stop\"},axis=\"columns\")\n",
" cnv_bed = cnv_bed.loc[:,[\"chrom\",\"start\",\"stop\",\"Segment_Mean\",\"Sample\",\"Num_Probes\"]]\n",
" return pbt.BedTool.from_dataframe(cnv_bed)\n",
"\n",
"def bed2cnv(cnv_bed):\n",
" cnv_bed = str(cnv_bed)\n",
" if len(cnv_bed) > 0:\n",
" seg = pd.read_csv(StringIO(cnv_bed),sep = \"\\t\",header=None)\n",
" seg.columns = [\"Chromosome\",\"Start\",\"End\",\"Segment_Mean\",\"Sample\",\"Num_Probes\"]\n",
" seg = seg.loc[:,[\"Sample\",\"Chromosome\",\"Start\",\"End\",\"Num_Probes\",\"Segment_Mean\"]]\n",
" else:\n",
" seg = pd.DataFrame(columns=[\"Sample\",\"Chromosome\",\"Start\",\"End\",\"Num_Probes\",\"Segment_Mean\"])\n",
" return seg\n",
"def remove_ovelapping_segments(tumor, normal,sample_name):\n",
" tumor_bed = cnv2bed(tumor)\n",
" normal_bed = cnv2bed(normal)\n",
" tumor_wo_germline = tumor_bed.subtract(normal_bed,r=True,f=0.8,A=True)\n",
" tumor_wo_germline = bed2cnv(tumor_wo_germline)\n",
" n_segs_removed = tumor.shape[0] - tumor_wo_germline.shape[0]\n",
" if n_segs_removed*1.0/tumor.shape[0] > 0.5 and n_segs_removed>5 :\n",
" print(n_segs_removed,\"of\",tumor.shape[0],\"segments removed in\",sample_name,\"due to overlap with normal\",file = sys.stderr)\n",
" return tumor_wo_germline\n",
"\n",
"def cnv2genelevel(cnv_bed,gene_intervals_bed,sample_name,verbose = True,sorted_index = \"\"):\n",
" \n",
" # intersect \n",
" cnv2gene = str(gene_intervals_bed.intersect(cnv_bed,wb = True,wa=True))\n",
" if len(cnv2gene)==0: # if no intersection, return all zeroes\n",
" print(sample_name,\"has no genes with altered CN\",file = sys.stderr)\n",
" return pd.DataFrame(columns=[sample])\n",
" cnv2gene = pd.read_csv(StringIO(cnv2gene),sep = \"\\t\",header=None)\n",
" cnv2gene = cnv2gene[[3,7]].copy()\n",
" cnv2gene.columns = [\"gene\",\"Segment_Mean\"] \n",
" \n",
" # find genes overlapping with more than one segment:\n",
" # take the most exterme segement_mean value\n",
" \n",
" dups = cnv2gene.loc[cnv2gene.duplicated(subset=[\"gene\"],keep=False),]\n",
" if dups.shape[0] > 0:\n",
" cnv2gene = cnv2gene.drop_duplicates(subset=[\"gene\"],keep=False)\n",
" dups[\"abs_seg_mean\"] = abs(dups[\"Segment_Mean\"])\n",
" if verbose:\n",
" print(sample_name,\"contain \",len(set(dups[\"gene\"].values)),\"genes overalpped with more than one segment\",file=sys.stderr)\n",
" #print(dups.head(10),file=sys.stderr)\n",
" dups = dups.groupby(['gene'], group_keys=False).apply(lambda row: row.loc[row['abs_seg_mean'].idxmax()])\n",
" cnv2gene = pd.concat([cnv2gene,dups],sort=False)\n",
"\n",
" cnv2gene = cnv2gene[[\"gene\",\"Segment_Mean\"]]\n",
" cnv2gene.set_index(\"gene\",inplace=True,drop=True)\n",
" cnv2gene.rename(int,axis=0,inplace=True)\n",
" # add copy-neutral genes with 0s\n",
" \n",
" cnv2gene = cnv2gene.loc[sorted_index,:]\n",
" cnv2gene.columns = [sample_name]\n",
" return cnv2gene\n",
"\n",
"\n",
"### functions for GDSC and PDX #################################\n",
"\n",
"def CN2log2R(col, median_ploidy=2 ):\n",
" # this is fr GDSC only\n",
" lRs = []\n",
" genes = col.index.values\n",
" for code in col.values:\n",
" if not code == \"-1,-1,-,-\":\n",
" [max_cn,min_cn,zygosity,disruption] = code.split(\",\")\n",
" if int(max_cn) == 0:\n",
" lRs.append(-4.32) # CN=0 with 95% purity\n",
" else:\n",
" max_lR = np.log2(float(max_cn)/median_ploidy)\n",
" if not disruption == \"D\":\n",
" lRs.append(max_lR)\n",
" else:\n",
" if int(min_cn) == 0:\n",
" min_lR = -4.32\n",
" else:\n",
" min_lR = np.log2(float(min_cn)/median_ploidy)\n",
" if abs(min_lR) > abs(max_lR):\n",
" lRs.append(min_lR)\n",
" else:\n",
" lRs.append(max_lR)\n",
" \n",
" else:\n",
" lRs.append(np.NaN)\n",
" return pd.Series(dict(zip(genes, lRs)))\n",
"\n",
"def define_avg_ploidy(col):\n",
" n,pl = 0,0\n",
" CN_non_disrupted = []\n",
" for code in col.values:\n",
" if not code == \"-1,-1,-,-\":\n",
" [max_cn,min_cn,zygosity,disruption] = code.split(\",\")\n",
" n+=1\n",
" cn = (int(max_cn)+int(min_cn))*0.5\n",
" pl += cn\n",
" if not disruption == \"D\":\n",
" CN_non_disrupted.append((cn))\n",
" return pd.Series({\"avg_pl\":pl/n , \"median_pl\":np.median(CN_non_disrupted)})\n",
"\n",
"def clean_logR(logR_value, pos_seg_mean_thr, neg_seg_mean_thr):\n",
" if logR_value >= pos_seg_mean_thr:\n",
" return logR_value \n",
" elif logR_value <= neg_seg_mean_thr:\n",
" return logR_value \n",
" else:\n",
" return 0\n",
" \n",
"def handle_dups(df,corr_thr = 0.75):\n",
" '''Detect dupliated row IDs. Merge 2 or more rows with the same ID, \n",
" if averaged correlation in all pairvise comparision is >= corr_thhr;\\n\n",
" otherwise drop all duplicates. Keeps abs. max value (negative preferred).'''\n",
" dups = df.index\n",
" dups = list(set(dups[dups.duplicated()]))\n",
" if len(dups)==0:\n",
" print(\"No duplicated row IDs. Do nothing.\")\n",
" return df\n",
" print(len(dups), \"duplicated IDs in\",df.loc[dups,:].shape[0],\"rows found.\")\n",
" dups_merge = [] # if corr > corr_thr\n",
" dups_remove = [] # corr < \n",
" for dup in dups:\n",
" r = df.loc[dup,:].T.corr()\n",
" n_dups = df.loc[dup,:].shape[0]\n",
" r_avg = []\n",
" for i in range(0,n_dups):\n",
" for j in range(i+1,n_dups):\n",
" r_avg.append(r.iloc[i,j])\n",
" if np.average(r_avg) < corr_thr :\n",
" #print(dup,r_avg, n_dups)\n",
" dups_remove.append(dup)\n",
" else:\n",
" dups_merge.append(dup)\n",
" \n",
" # remove not similar duplicates\n",
" df_size = df.shape[0]\n",
" df = df.loc[~df.index.isin(dups_remove),:]\n",
" print(\"duplicate rows removed due to low correlation of duplicated profiles\",df_size -df.shape[0] )\n",
" df_size = df.shape[0]\n",
" \n",
" # merge simialr duplicates\n",
" d1 = df.loc[~df.index.isin(dups_merge),:]\n",
" d2 = df.loc[dups_merge,:]\n",
" d2 = d2.groupby(d2.index).agg(lambda x: -max(-x.max(),-x.min(),key= abs))\n",
" df = pd.concat([d1,d2])\n",
" df.sort_index(inplace=True)\n",
" print(\"Merged \",df_size-df.shape[0]+len(dups_merge),\"duplicated rows into\",len(dups_merge),\"rows\")\n",
" return df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### next few tabs demonstrate necessity of removing low-confidence and germline segments: \n",
"\n",
"(e.g. fragment 11:126596926-127130276 presents in both tumor and normal\n",
"therefore, it is germline; see chr11:126596926-12713027 in UCSC browser - it covers part of KIRELL3)\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"segemtns in tumor 204 segemtns in normal 121\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sample</th>\n",
" <th>Chromosome</th>\n",
" <th>Start</th>\n",
" <th>End</th>\n",
" <th>Num_Probes</th>\n",
" <th>Segment_Mean</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>57803</th>\n",
" <td>TCGA-ZJ-AAXJ-10A-01D-A42Q-01</td>\n",
" <td>11</td>\n",
" <td>456120</td>\n",
" <td>8896255</td>\n",
" <td>4489.0</td>\n",
" <td>-0.0113</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57804</th>\n",
" <td>TCGA-ZJ-AAXJ-10A-01D-A42Q-01</td>\n",
" <td>11</td>\n",
" <td>8899400</td>\n",
" <td>8899668</td>\n",
" <td>3.0</td>\n",
" <td>-1.3344</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57805</th>\n",
" <td>TCGA-ZJ-AAXJ-10A-01D-A42Q-01</td>\n",
" <td>11</td>\n",
" <td>8900394</td>\n",
" <td>126596817</td>\n",
" <td>67487.0</td>\n",
" <td>0.0010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57806</th>\n",
" <td>TCGA-ZJ-AAXJ-10A-01D-A42Q-01</td>\n",
" <td>11</td>\n",
" <td>126596926</td>\n",
" <td>127130276</td>\n",
" <td>453.0</td>\n",
" <td>-1.0306</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57807</th>\n",
" <td>TCGA-ZJ-AAXJ-10A-01D-A42Q-01</td>\n",
" <td>11</td>\n",
" <td>127132920</td>\n",
" <td>128342803</td>\n",
" <td>864.0</td>\n",
" <td>-0.0031</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57808</th>\n",
" <td>TCGA-ZJ-AAXJ-10A-01D-A42Q-01</td>\n",
" <td>11</td>\n",
" <td>128342819</td>\n",
" <td>128350888</td>\n",
" <td>44.0</td>\n",
" <td>0.2824</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57809</th>\n",
" <td>TCGA-ZJ-AAXJ-10A-01D-A42Q-01</td>\n",
" <td>11</td>\n",
" <td>128353007</td>\n",
" <td>134142530</td>\n",
" <td>3708.0</td>\n",
" <td>0.0082</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sample Chromosome Start End \\\n",
"57803 TCGA-ZJ-AAXJ-10A-01D-A42Q-01 11 456120 8896255 \n",
"57804 TCGA-ZJ-AAXJ-10A-01D-A42Q-01 11 8899400 8899668 \n",
"57805 TCGA-ZJ-AAXJ-10A-01D-A42Q-01 11 8900394 126596817 \n",
"57806 TCGA-ZJ-AAXJ-10A-01D-A42Q-01 11 126596926 127130276 \n",
"57807 TCGA-ZJ-AAXJ-10A-01D-A42Q-01 11 127132920 128342803 \n",
"57808 TCGA-ZJ-AAXJ-10A-01D-A42Q-01 11 128342819 128350888 \n",
"57809 TCGA-ZJ-AAXJ-10A-01D-A42Q-01 11 128353007 134142530 \n",
"\n",
" Num_Probes Segment_Mean \n",
"57803 4489.0 -0.0113 \n",
"57804 3.0 -1.3344 \n",
"57805 67487.0 0.0010 \n",
"57806 453.0 -1.0306 \n",
"57807 864.0 -0.0031 \n",
"57808 44.0 0.2824 \n",
"57809 3708.0 0.0082 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#file_path = \"../../TCGA/CNA/data/gdac.broadinstitute.org_CESC.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0/CESC.snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.seg.txt\"\n",
"file_path = \"../../TCGA/CNA/data__2016_01_28/gdac.broadinstitute.org_CESC.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2016012800.0.0/CESC.snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.seg.txt\"\n",
"df = pd.read_csv(file_path, sep = \"\\t\")\n",
"tumor_barcode = \"TCGA-ZJ-AAXJ-01A-11D-A42N-01\"\n",
"t = df.loc[df[\"Sample\"]==tumor_barcode,:]\n",
"t_shape = t.shape[0]\n",
"n = find_matching_normal(tumor_barcode,list(set(df[\"Sample\"].values)))\n",
"n = df.loc[df[\"Sample\"]==n[0],:]\n",
"print(\"segemtns in tumor\",t.shape[0],\"segemtns in normal\",n.shape[0])\n",
"\n",
"n.loc[n['Chromosome']==11,:]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sample</th>\n",
" <th>Chromosome</th>\n",
" <th>Start</th>\n",
" <th>End</th>\n",
" <th>Num_Probes</th>\n",
" <th>Segment_Mean</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>57960</th>\n",
" <td>TCGA-ZJ-AAXJ-01A-11D-A42N-01</td>\n",
" <td>11</td>\n",
" <td>456120</td>\n",
" <td>64200041</td>\n",
" <td>34710.0</td>\n",
" <td>0.0054</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57961</th>\n",
" <td>TCGA-ZJ-AAXJ-01A-11D-A42N-01</td>\n",
" <td>11</td>\n",
" <td>64208988</td>\n",
" <td>64319750</td>\n",
" <td>61.0</td>\n",
" <td>-0.6748</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57962</th>\n",
" <td>TCGA-ZJ-AAXJ-01A-11D-A42N-01</td>\n",
" <td>11</td>\n",
" <td>64325209</td>\n",
" <td>126596817</td>\n",
" <td>37207.0</td>\n",
" <td>0.0571</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57963</th>\n",
" <td>TCGA-ZJ-AAXJ-01A-11D-A42N-01</td>\n",
" <td>11</td>\n",
" <td>126596926</td>\n",
" <td>127130276</td>\n",
" <td>454.0</td>\n",
" <td>-1.0760</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57964</th>\n",
" <td>TCGA-ZJ-AAXJ-01A-11D-A42N-01</td>\n",
" <td>11</td>\n",
" <td>127132920</td>\n",
" <td>132080656</td>\n",
" <td>3591.0</td>\n",
" <td>0.0449</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57965</th>\n",
" <td>TCGA-ZJ-AAXJ-01A-11D-A42N-01</td>\n",
" <td>11</td>\n",
" <td>132080885</td>\n",
" <td>132099465</td>\n",
" <td>15.0</td>\n",
" <td>-0.6123</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57966</th>\n",
" <td>TCGA-ZJ-AAXJ-01A-11D-A42N-01</td>\n",
" <td>11</td>\n",
" <td>132099856</td>\n",
" <td>134142530</td>\n",
" <td>1010.0</td>\n",
" <td>0.0483</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sample Chromosome Start End \\\n",
"57960 TCGA-ZJ-AAXJ-01A-11D-A42N-01 11 456120 64200041 \n",
"57961 TCGA-ZJ-AAXJ-01A-11D-A42N-01 11 64208988 64319750 \n",
"57962 TCGA-ZJ-AAXJ-01A-11D-A42N-01 11 64325209 126596817 \n",
"57963 TCGA-ZJ-AAXJ-01A-11D-A42N-01 11 126596926 127130276 \n",
"57964 TCGA-ZJ-AAXJ-01A-11D-A42N-01 11 127132920 132080656 \n",
"57965 TCGA-ZJ-AAXJ-01A-11D-A42N-01 11 132080885 132099465 \n",
"57966 TCGA-ZJ-AAXJ-01A-11D-A42N-01 11 132099856 134142530 \n",
"\n",
" Num_Probes Segment_Mean \n",
"57960 34710.0 0.0054 \n",
"57961 61.0 -0.6748 \n",
"57962 37207.0 0.0571 \n",
"57963 454.0 -1.0760 \n",
"57964 3591.0 0.0449 \n",
"57965 15.0 -0.6123 \n",
"57966 1010.0 0.0483 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"t.loc[t[\"Chromosome\"] ==11,:]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"segemtns in normal after dropping low.conf.: 38\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sample</th>\n",
" <th>Chromosome</th>\n",
" <th>Start</th>\n",
" <th>End</th>\n",
" <th>Num_Probes</th>\n",
" <th>Segment_Mean</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>57804</th>\n",
" <td>TCGA-ZJ-AAXJ-10A-01D-A42Q-01</td>\n",
" <td>11</td>\n",
" <td>8899400</td>\n",
" <td>8899668</td>\n",
" <td>3.0</td>\n",
" <td>-1.3344</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57806</th>\n",
" <td>TCGA-ZJ-AAXJ-10A-01D-A42Q-01</td>\n",
" <td>11</td>\n",
" <td>126596926</td>\n",
" <td>127130276</td>\n",
" <td>453.0</td>\n",
" <td>-1.0306</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sample Chromosome Start End \\\n",
"57804 TCGA-ZJ-AAXJ-10A-01D-A42Q-01 11 8899400 8899668 \n",
"57806 TCGA-ZJ-AAXJ-10A-01D-A42Q-01 11 126596926 127130276 \n",
"\n",
" Num_Probes Segment_Mean \n",
"57804 3.0 -1.3344 \n",
"57806 453.0 -1.0306 "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"n = filter_lowconf_segments(n,0,0.46, -0.68 )\n",
"print(\"segemtns in normal after dropping low.conf.:\",n.shape[0])\n",
"n.loc[n['Chromosome']==11,:]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"segemtns in tumor after removing germlines: 194\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sample</th>\n",
" <th>Chromosome</th>\n",
" <th>Start</th>\n",
" <th>End</th>\n",
" <th>Num_Probes</th>\n",
" <th>Segment_Mean</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>96</th>\n",
" <td>TCGA-ZJ-AAXJ-01A-11D-A42N-01</td>\n",
" <td>11</td>\n",
" <td>456120</td>\n",
" <td>64200041</td>\n",
" <td>34710.0</td>\n",
" <td>0.0054</td>\n",
" </tr>\n",
" <tr>\n",
" <th>97</th>\n",
" <td>TCGA-ZJ-AAXJ-01A-11D-A42N-01</td>\n",
" <td>11</td>\n",
" <td>64208988</td>\n",
" <td>64319750</td>\n",
" <td>61.0</td>\n",
" <td>-0.6748</td>\n",
" </tr>\n",
" <tr>\n",
" <th>98</th>\n",
" <td>TCGA-ZJ-AAXJ-01A-11D-A42N-01</td>\n",
" <td>11</td>\n",
" <td>64325209</td>\n",
" <td>126596817</td>\n",
" <td>37207.0</td>\n",
" <td>0.0571</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99</th>\n",
" <td>TCGA-ZJ-AAXJ-01A-11D-A42N-01</td>\n",
" <td>11</td>\n",
" <td>127132920</td>\n",
" <td>132080656</td>\n",
" <td>3591.0</td>\n",
" <td>0.0449</td>\n",
" </tr>\n",
" <tr>\n",
" <th>100</th>\n",
" <td>TCGA-ZJ-AAXJ-01A-11D-A42N-01</td>\n",
" <td>11</td>\n",
" <td>132080885</td>\n",
" <td>132099465</td>\n",
" <td>15.0</td>\n",
" <td>-0.6123</td>\n",
" </tr>\n",
" <tr>\n",
" <th>101</th>\n",
" <td>TCGA-ZJ-AAXJ-01A-11D-A42N-01</td>\n",
" <td>11</td>\n",
" <td>132099856</td>\n",
" <td>134142530</td>\n",
" <td>1010.0</td>\n",
" <td>0.0483</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sample Chromosome Start End \\\n",
"96 TCGA-ZJ-AAXJ-01A-11D-A42N-01 11 456120 64200041 \n",
"97 TCGA-ZJ-AAXJ-01A-11D-A42N-01 11 64208988 64319750 \n",
"98 TCGA-ZJ-AAXJ-01A-11D-A42N-01 11 64325209 126596817 \n",
"99 TCGA-ZJ-AAXJ-01A-11D-A42N-01 11 127132920 132080656 \n",
"100 TCGA-ZJ-AAXJ-01A-11D-A42N-01 11 132080885 132099465 \n",
"101 TCGA-ZJ-AAXJ-01A-11D-A42N-01 11 132099856 134142530 \n",
"\n",
" Num_Probes Segment_Mean \n",
"96 34710.0 0.0054 \n",
"97 61.0 -0.6748 \n",
"98 37207.0 0.0571 \n",
"99 3591.0 0.0449 \n",
"100 15.0 -0.6123 \n",
"101 1010.0 0.0483 "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"t = remove_ovelapping_segments(t, n,tumor_barcode)\n",
"print(\"segemtns in tumor after removing germlines:\",t.shape[0])\n",
"t.loc[t[\"Chromosome\"] ==11,:]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"segemtns in tumor after dropping low.conf.: 101\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sample</th>\n",
" <th>Chromosome</th>\n",
" <th>Start</th>\n",
" <th>End</th>\n",
" <th>Num_Probes</th>\n",
" <th>Segment_Mean</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>97</th>\n",
" <td>TCGA-ZJ-AAXJ-01A-11D-A42N-01</td>\n",
" <td>11</td>\n",
" <td>64208988</td>\n",
" <td>64319750</td>\n",
" <td>61.0</td>\n",
" <td>-0.6748</td>\n",
" </tr>\n",
" <tr>\n",
" <th>100</th>\n",
" <td>TCGA-ZJ-AAXJ-01A-11D-A42N-01</td>\n",
" <td>11</td>\n",
" <td>132080885</td>\n",
" <td>132099465</td>\n",
" <td>15.0</td>\n",
" <td>-0.6123</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sample Chromosome Start End \\\n",
"97 TCGA-ZJ-AAXJ-01A-11D-A42N-01 11 64208988 64319750 \n",
"100 TCGA-ZJ-AAXJ-01A-11D-A42N-01 11 132080885 132099465 \n",
"\n",
" Num_Probes Segment_Mean \n",
"97 61.0 -0.6748 \n",
"100 15.0 -0.6123 "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"t = filter_lowconf_segments(t,num_marker_thr,pos_seg_mean_thr, neg_seg_mean_thr )\n",
"print(\"segemtns in tumor after dropping low.conf.:\",t.shape[0])\n",
"t.loc[t[\"Chromosome\"] ==11,:]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# TCGA "
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"HNSC samples: 1089 CNA events per sample on avg.: 101.275482094\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"total samples: 1089 tumors: 530 normals: 559\n",
"\ttumors without matched normal 28\n",
"\ttumors with at least one sCNA 497\n",
"\ttumors without any somatic CNA 5\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"HNSC samples: 525 Segments per sample on avg.: 60.6876190476\n",
"ESCA samples: 373 CNA events per sample on avg.: 163.010723861\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"total samples: 373 tumors: 185 normals: 188\n",
"\ttumors without matched normal 3\n",
"\ttumors with at least one sCNA 181\n",
"\ttumors without any somatic CNA 1\n",
"total samples: 248 tumors: 125 normals: 123\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"ESCA samples: 184 Segments per sample on avg.: 141.836956522\n",
"THYM samples: 248 CNA events per sample on avg.: 62.7862903226\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\ttumors without matched normal 5\n",
"\ttumors with at least one sCNA 95\n",
"\ttumors without any somatic CNA 25\n",
"total samples: 132 tumors: 66 normals: 66\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"THYM samples: 100 Segments per sample on avg.: 9.41\n",
"KICH samples: 132 CNA events per sample on avg.: 77.0\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\ttumors without matched normal 0\n",
"\ttumors with at least one sCNA 65\n",
"\ttumors without any somatic CNA 1\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"KICH samples: 65 Segments per sample on avg.: 51.4923076923\n",
"LUSC samples: 1032 CNA events per sample on avg.: 130.682170543\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"total samples: 1032 tumors: 501 normals: 531\n",
"\ttumors without matched normal 23\n",
"\ttumors with at least one sCNA 476\n",
"\ttumors without any somatic CNA 2\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"LUSC samples: 499 Segments per sample on avg.: 94.6533066132\n",
"BLCA samples: 797 CNA events per sample on avg.: 130.927227102\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"total samples: 797 tumors: 414 normals: 383\n",
"\ttumors without matched normal 46\n",
"\ttumors with at least one sCNA 366\n",
"\ttumors without any somatic CNA 2\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"BLCA samples: 412 Segments per sample on avg.: 94.8859223301\n",
"GBM samples: 1104 CNA events per sample on avg.: 133.018115942\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"total samples: 1104 tumors: 590 normals: 514\n",
"\ttumors without matched normal 78\n",
"\ttumors with at least one sCNA 511\n",
"\ttumors without any somatic CNA 1\n",
"total samples: 85 tumors: 36 normals: 49\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"GBM samples: 589 Segments per sample on avg.: 70.2139219015\n",
"CHOL samples: 85 CNA events per sample on avg.: 89.0588235294\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\ttumors without matched normal 0\n",
"\ttumors with at least one sCNA 36\n",
"\ttumors without any somatic CNA 0\n",
"total samples: 111 tumors: 56 normals: 55\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"CHOL samples: 36 Segments per sample on avg.: 56.6944444444\n",
"UCS samples: 111 CNA events per sample on avg.: 173.855855856\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\ttumors without matched normal 2\n",
"\ttumors with at least one sCNA 54\n",
"\ttumors without any somatic CNA 0\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"UCS samples: 56 Segments per sample on avg.: 179.125\n",
"LGG samples: 1015 CNA events per sample on avg.: 78.6118226601\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"total samples: 1015 tumors: 530 normals: 485\n",
"\ttumors without matched normal 33\n",
"\ttumors with at least one sCNA 494\n",
"\ttumors without any somatic CNA 3\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"LGG samples: 527 Segments per sample on avg.: 29.1157495256\n",
"THCA samples: 1013 CNA events per sample on avg.: 54.4096742349\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"total samples: 1013 tumors: 506 normals: 507\n",
"\ttumors without matched normal 15\n",
"\ttumors with at least one sCNA 367\n",
"\ttumors without any somatic CNA 124\n",
"total samples: 365 tumors: 185 normals: 180\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"THCA samples: 382 Segments per sample on avg.: 3.8219895288\n",
"PAAD samples: 365 CNA events per sample on avg.: 95.3643835616\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\ttumors without matched normal 10\n",
"\ttumors with at least one sCNA 161\n",
"\ttumors without any somatic CNA 14\n",
"total samples: 1059 tumors: 529 normals: 530\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"PAAD samples: 171 Segments per sample on avg.: 32.4093567251\n",
"KIRC samples: 1059 CNA events per sample on avg.: 80.298394712\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\ttumors without matched normal 22\n",
"\ttumors with at least one sCNA 501\n",
"\ttumors without any somatic CNA 6\n",
"total samples: 160 tumors: 80 normals: 80\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"KIRC samples: 523 Segments per sample on avg.: 20.5009560229\n",
"UVM samples: 160 CNA events per sample on avg.: 81.08125\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\ttumors without matched normal 0\n",
"\ttumors with at least one sCNA 80\n",
"\ttumors without any somatic CNA 0\n",
"total samples: 586 tumors: 297 normals: 289\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"UVM samples: 80 Segments per sample on avg.: 38.425\n",
"CESC samples: 586 CNA events per sample on avg.: 101.450511945\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\ttumors without matched normal 16\n",
"\ttumors with at least one sCNA 280\n",
"\ttumors without any somatic CNA 1\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"CESC samples: 296 Segments per sample on avg.: 58.1351351351\n",
"LUAD samples: 1095 CNA events per sample on avg.: 105.78630137\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"total samples: 1095 tumors: 518 normals: 577\n",
"\ttumors without matched normal 19\n",
"\ttumors with at least one sCNA 494\n",
"\ttumors without any somatic CNA 5\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"LUAD samples: 513 Segments per sample on avg.: 70.469785575\n",
"STAD samples: 904 CNA events per sample on avg.: 130.961283186\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"total samples: 904 tumors: 442 normals: 462\n",
"\ttumors without matched normal 26\n",
"\ttumors with at least one sCNA 410\n",
"\ttumors without any somatic CNA 6\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"STAD samples: 436 Segments per sample on avg.: 96.4220183486\n",
"UCEC samples: 1089 CNA events per sample on avg.: 116.707070707\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"total samples: 1089 tumors: 540 normals: 549\n",
"\ttumors without matched normal 23\n",
"\ttumors with at least one sCNA 504\n",
"\ttumors without any somatic CNA 13\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"UCEC samples: 527 Segments per sample on avg.: 78.89943074\n",
"SKCM samples: 937 CNA events per sample on avg.: 115.351120598\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"total samples: 937 tumors: 472 normals: 465\n",
"\ttumors without matched normal 7\n",
"\ttumors with at least one sCNA 463\n",
"\ttumors without any somatic CNA 2\n",
"total samples: 172 tumors: 87 normals: 85\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"SKCM samples: 470 Segments per sample on avg.: 82.9957446809\n",
"MESO samples: 172 CNA events per sample on avg.: 106.598837209\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\ttumors without matched normal 2\n",
"\ttumors with at least one sCNA 82\n",
"\ttumors without any somatic CNA 3\n",
"total samples: 346 tumors: 168 normals: 178\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"MESO samples: 84 Segments per sample on avg.: 60.8333333333\n",
"PCPG samples: 346 CNA events per sample on avg.: 90.3352601156\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\ttumors without matched normal 6\n",
"\ttumors with at least one sCNA 159\n",
"\ttumors without any somatic CNA 3\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"PCPG samples: 165 Segments per sample on avg.: 43.5878787879\n",
"STES samples: 1277 CNA events per sample on avg.: 140.322631167\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"total samples: 1277 tumors: 627 normals: 650\n",
"\ttumors without matched normal 29\n",
"\ttumors with at least one sCNA 591\n",
"\ttumors without any somatic CNA 7\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"STES samples: 620 Segments per sample on avg.: 109.9\n",
"SARC samples: 513 CNA events per sample on avg.: 208.068226121\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"total samples: 513 tumors: 263 normals: 250\n",
"\ttumors without matched normal 17\n",
"\ttumors with at least one sCNA 245\n",
"\ttumors without any somatic CNA 1\n",
"total samples: 380 tumors: 191 normals: 189\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"SARC samples: 262 Segments per sample on avg.: 187.057251908\n",
"LAML samples: 380 CNA events per sample on avg.: 74.5368421053\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\ttumors without matched normal 3\n",
"\ttumors with at least one sCNA 167\n",
"\ttumors without any somatic CNA 21\n",
"total samples: 590 tumors: 288 normals: 302\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"LAML samples: 170 Segments per sample on avg.: 7.18823529412\n",
"KIRP samples: 590 CNA events per sample on avg.: 79.5152542373\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\ttumors without matched normal 15\n",
"\ttumors with at least one sCNA 271\n",
"\ttumors without any somatic CNA 2\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"KIRP samples: 286 Segments per sample on avg.: 21.8846153846\n",
"LIHC samples: 760 CNA events per sample on avg.: 122.8\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"total samples: 760 tumors: 373 normals: 387\n",
"\ttumors without matched normal 21\n",
"\ttumors with at least one sCNA 348\n",
"\ttumors without any somatic CNA 4\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"LIHC samples: 369 Segments per sample on avg.: 81.1327913279\n",
"OV samples: 1168 CNA events per sample on avg.: 224.04109589\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"total samples: 1168 tumors: 597 normals: 571\n",
"\ttumors without matched normal 26\n",
"\ttumors with at least one sCNA 571\n",
"\ttumors without any somatic CNA 0\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"OV samples: 597 Segments per sample on avg.: 207.924623116\n",
"TGCT samples: 304 CNA events per sample on avg.: 83.8125\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"total samples: 304 tumors: 156 normals: 148\n",
"\ttumors without matched normal 2\n",
"\ttumors with at least one sCNA 154\n",
"\ttumors without any somatic CNA 0\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"TGCT samples: 156 Segments per sample on avg.: 37.7820512821\n",
"COAD samples: 918 CNA events per sample on avg.: 98.6209150327\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"total samples: 918 tumors: 453 normals: 465\n",
"\ttumors without matched normal 44\n",
"\ttumors with at least one sCNA 406\n",
"\ttumors without any somatic CNA 3\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"COAD samples: 450 Segments per sample on avg.: 48.4755555556\n",
"BRCA samples: 2199 CNA events per sample on avg.: 129.35788995\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"total samples: 2199 tumors: 1088 normals: 1111\n",
"\ttumors without matched normal 35\n",
"\ttumors with at least one sCNA 1046\n",
"\ttumors without any somatic CNA 7\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"BRCA samples: 1081 Segments per sample on avg.: 102.808510638\n",
"PRAD samples: 1023 CNA events per sample on avg.: 114.706744868\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"total samples: 1023 tumors: 493 normals: 530\n",
"\ttumors without matched normal 17\n",
"\ttumors with at least one sCNA 458\n",
"\ttumors without any somatic CNA 18\n",
"total samples: 96 tumors: 52 normals: 44\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"PRAD samples: 475 Segments per sample on avg.: 60.3831578947\n",
"DLBC samples: 96 CNA events per sample on avg.: 97.3229166667\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\ttumors without matched normal 10\n",
"\ttumors with at least one sCNA 40\n",
"\ttumors without any somatic CNA 2\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"DLBC samples: 50 Segments per sample on avg.: 44.44\n",
"READ samples: 316 CNA events per sample on avg.: 113.180379747\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"total samples: 316 tumors: 166 normals: 150\n",
"\ttumors without matched normal 23\n",
"\ttumors with at least one sCNA 141\n",
"\ttumors without any somatic CNA 2\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"READ samples: 164 Segments per sample on avg.: 70.012195122\n",
"ACC samples: 180 CNA events per sample on avg.: 116.955555556\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"total samples: 180 tumors: 90 normals: 90\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"after filtering\n",
"ACC samples: 89 Segments per sample on avg.: 107.449438202\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\ttumors without matched normal 2\n",
"\ttumors with at least one sCNA 87\n",
"\ttumors without any somatic CNA 1\n"
]
}
],
"source": [
"\n",
"data_dir = \"../../TCGA/CNA/data__2016_01_28//\"\n",
"\n",
"dfs = {}\n",
"dfs_normals = {}\n",
"tumors_without_CNA = {}\n",
"for f in os.listdir(data_dir):\n",
" if f.endswith(\"tar.gz\"):\n",
" fp = f.replace(\".tar.gz\",\"\")\n",
" cohort = fp.split(\".\")[2].replace(\"org_\",\"\")\n",
" file_path = fp+\"/\"+cohort+\".\"+fp.split(\".\")[3].replace(\"Merge_\",\"\")+\".seg.txt\"\n",
" df = pd.read_csv(data_dir+file_path, sep = \"\\t\")\n",
" \n",
" df[\"Chromosome\"] = df[\"Chromosome\"].map(chr_dict)\n",
" print(cohort,\"samples:\",len(set(df[\"Sample\"].values)),\n",
" \"CNA events per sample on avg.:\",float(df.shape[0])/len(set(df[\"Sample\"].values)))\n",
" \n",
" #### remove segments overlapping with segemnts in normals by 80% or more reciprocally ####\n",
" df[\"type\"] = df[\"Sample\"].apply(sample_type)\n",
" df_normals = df.loc[df[\"type\"]== \"Normal\",[\"Sample\",\"Chromosome\",\"Start\",\"End\",\"Num_Probes\",\"Segment_Mean\"]]\n",
" df_tumors = df.loc[df[\"type\"]== \"Tumor\",[\"Sample\",\"Chromosome\",\"Start\",\"End\",\"Num_Probes\",\"Segment_Mean\"]]\n",
" normal_samples = list(set(df_normals[\"Sample\"].values))\n",
" tumor_samples = list(set(df_tumors[\"Sample\"].values))\n",
" print(\"total samples:\", len(set(df[\"Sample\"].values)),\n",
" \"tumors:\",len(tumor_samples),\"normals:\",len(normal_samples),file= sys.stderr)\n",
" \n",
" tumors_without_somatic_CNA = []\n",
" tumors_germline_removed = []\n",
" tumors_without_matching_normal = []\n",
" filtered_normals = []\n",
" for tumor_sample in tumor_samples:\n",
" #print(sample, find_matching_normal(sample,list(set(d[\"Sample\"]))))\n",
" tumor = df_tumors.loc[df_tumors [\"Sample\"]== tumor_sample,:]\n",
" matching_normals = find_matching_normal(tumor_sample,normal_samples)\n",
" if len(matching_normals) >0:\n",
" n_segs = tumor.shape[0]\n",
" for normal_sample in matching_normals:\n",
" normal = df_normals.loc[df_normals[\"Sample\"]== normal_sample,:]\n",
" # thresholds for +1 and -1 copy in 75% of normal cell;\n",
" # this is to retain segments appeared due to slight tumor contamination\n",
" normal = filter_lowconf_segments(normal,0,0.46, -0.68 )\n",
" filtered_normals.append(normal)\n",
" tumor = remove_ovelapping_segments(tumor, normal,tumor_sample)\n",
" #if n_segs > tumor.shape[0]:\n",
" # print(n_segs - tumor.shape[0],\"segments removed in sample\",tumor_sample,\n",
" # tumor.shape[0],\"remained\",file= sys.stderr)\n",
" tumor = filter_lowconf_segments(tumor,num_marker_thr,pos_seg_mean_thr, neg_seg_mean_thr )\n",
" if tumor.shape[0] == 0:\n",
" tumors_without_somatic_CNA.append(tumor_sample)\n",
" else:\n",
" tumors_germline_removed.append(tumor)\n",
" else:\n",
" tumor = filter_lowconf_segments(tumor,num_marker_thr,pos_seg_mean_thr, neg_seg_mean_thr )\n",
" if tumor.shape[0] == 0:\n",
" tumors_without_somatic_CNA.append(tumor_sample)\n",
" else:\n",
" tumors_without_matching_normal.append(tumor)\n",
"\n",
" print(\"\\ttumors without matched normal\",len(tumors_without_matching_normal),file= sys.stderr)\n",
" print(\"\\ttumors with at least one sCNA\",len(tumors_germline_removed),file= sys.stderr)\n",
" print(\"\\ttumors without any somatic CNA\",len(tumors_without_somatic_CNA),file= sys.stderr)\n",
" #dfs[cohort] = df\n",
" filtered_tumors = pd.concat(tumors_germline_removed+tumors_without_matching_normal)\n",
" dfs[cohort] = filtered_tumors\n",
" filtered_normals = pd.concat(filtered_normals)\n",
" dfs_normals[cohort] = filtered_normals\n",
" tumors_without_CNA[cohort] = tumors_without_somatic_CNA\n",
" print(\"after filtering\")\n",
" print(cohort,\"samples:\",len(set(filtered_tumors[\"Sample\"].values)),\n",
" \"Segments per sample on avg.:\",float(filtered_tumors.shape[0])/len(set(filtered_tumors[\"Sample\"].values)))\n",
" \n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Aggregating to gene-level\n",
"\n",
"Gene annotation must be:\n",
" - with Entrez gene IDs \n",
" - in hg19 coordinates\n",
" - with columns \"chrom\",\"start\",\"stop\",\"gene\" (this is foru-column bed format)\n",
" \n",
"wget ftp://ftp.ncbi.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.37.3/GFF/ref_GRCh37.p5_top_level.gff3.gz\n",
"\n",
"echo -e \"chrom\\tstart\\tstop\\tgene\\tname\" > ef_GRCh37.p5_top_level.gff3.bed;\n",
"zcat ref_GRCh37.p5_top_level.gff3.gz | awk '$3==\"gene\"' | cut -f 1,4,5,9| sed -e 's/;/\\t/g'| cut -f 1-3,5,6 | grep GeneID | sed -re 's/(Dbxref=GeneID:[0-9]*),.*/\\1/' | sed -e 's/Name=//' -e 's/Dbxref=GeneID://' | awk '{print $1\"\\t\"$2\"\\t\"$3\"\\t\"$5\"\\t\"$4}' >> \n",
"ref_GRCh37.p5_top_level.gff3.bed\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"rename_chroms = {\"NC_000001.10\":1,\"NC_000002.11\":2,\"NC_000003.11\":3,\"NC_000004.11\":4,\n",
" \"NC_000005.9\":5,\"NC_000006.11\":6,\"NC_000007.13\":7,\"NC_000008.10\":8,\n",
" \"NC_000009.11\":9,\"NC_000010.10\":10,\"NC_000011.9\":11,\"NC_000012.11\":12,\"NC_000013.10\":13,\n",
" \"NC_000014.8\":14,\"NC_000015.9\":15,\"NC_000016.9\":16,\"NC_000017.10\":17,\n",
" \"NC_000018.9\":18,\"NC_000019.9\":19,\"NC_000020.10\":20,\"NC_000021.8\":21,\n",
" \"NC_000022.10\":22,\"NC_000023.10\":23,\"NC_000024.9\":24}"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(36019, 5)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>chrom</th>\n",
" <th>start</th>\n",
" <th>stop</th>\n",
" <th>gene</th>\n",
" <th>name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>10954</td>\n",
" <td>11507</td>\n",
" <td>100506145</td>\n",
" <td>LOC100506145</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>12190</td>\n",
" <td>13639</td>\n",
" <td>100652771</td>\n",
" <td>LOC100652771</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>14362</td>\n",
" <td>29370</td>\n",
" <td>653635</td>\n",
" <td>WASH7P</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>30366</td>\n",
" <td>30503</td>\n",
" <td>100302278</td>\n",
" <td>MIR1302-2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" chrom start stop gene name\n",
"0 1 10954 11507 100506145 LOC100506145\n",
"1 1 12190 13639 100652771 LOC100652771\n",
"2 1 14362 29370 653635 WASH7P\n",
"3 1 30366 30503 100302278 MIR1302-2"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gene_intervals = pd.read_csv(gene_coords_file, sep = \"\\t\")\n",
"gene_intervals = gene_intervals.loc[gene_intervals[\"chrom\"].isin(rename_chroms.keys()),:]\n",
"gene_intervals[\"chrom\"] = gene_intervals[\"chrom\"].apply(lambda x : rename_chroms[x])\n",
"#print(\"chromosomes:\",list(set(gene_intervals[\"chrom\"].values)))\n",
"gene_intervals = gene_intervals.sort_values(by=[\"chrom\",\"start\",\"stop\"],ascending=True)\n",
"gene_intervals.to_csv(\"/home/olya/SFU/Hossein/v1/ref_GRCh37.p5_top_level.gff3.chroms_renamed.bed\",sep = \"\\t\",index=False)\n",
"print(gene_intervals.shape)\n",
"gene_intervals.head(4)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"gene_intervals_bed = pbt.BedTool.from_dataframe(gene_intervals[[\"chrom\",\"start\",\"stop\",\"gene\"]])\n",
"# prepare copy-neutral table\n",
"cnv_baseline = gene_intervals.copy()\n",
"cnv_baseline[\"Segment_Mean\"] = [0]*cnv_baseline.shape[0]\n",
"cnv_baseline = cnv_baseline[[\"gene\",\"Segment_Mean\"]]\n",
"cnv_baseline.set_index(\"gene\",inplace=True,drop=True)\n",
"cnv_baseline.sort_index(inplace=True)\n",
"sorted_index = list(cnv_baseline.index.values)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"ESCA\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n",
"ESCA (36019, 185)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"DLBC\n",
"TCGA-G8-6914-14A-01D-2209-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"DLBC (36019, 52)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"READ\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n",
"READ (36019, 166)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"GBM\n",
"TCGA-06-0165-01A-01D-0236-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n",
"... 200 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-06-0119-01A-08D-0214-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 300 processed.\n",
"... 400 processed.\n",
"... 500 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-06-5410-01A-01D-1694-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"GBM (36019, 590)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"STES\n",
"TCGA-MX-A5UG-01A-21D-A31K-01 has no genes with altered CN\n",
"TCGA-RD-A8NB-01A-12D-A396-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-BR-7957-01A-11D-2200-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 200 processed.\n",
"... 300 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-BR-6563-01A-13D-2052-01 has no genes with altered CN\n",
"TCGA-D7-6522-01A-11D-1799-01 has no genes with altered CN\n",
"TCGA-BR-7196-01A-11D-2052-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 400 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-D7-A6ET-01A-32D-A32M-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 500 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-HU-A4GJ-01A-11D-A253-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 600 processed.\n",
"STES (36019, 627)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"BLCA\n",
"TCGA-YC-A8S6-01A-31D-A38F-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-DK-A3WY-01A-11D-A22Y-01 has no genes with altered CN\n",
"TCGA-XF-A9SL-01A-11D-A390-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 200 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-E7-A7XN-01A-11D-A34T-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 300 processed.\n",
"... 400 processed.\n",
"BLCA (36019, 414)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"UCEC\n",
"TCGA-D1-A16Y-01A-31D-A12G-01 has no genes with altered CN\n",
"TCGA-BK-A6W4-01A-12D-A34P-01 has no genes with altered CN\n",
"TCGA-BS-A0V7-01A-21D-A120-01 has no genes with altered CN\n",
"TCGA-B5-A11Y-01A-21D-A10L-01 has no genes with altered CN\n",
"TCGA-D1-A17F-01A-11D-A12G-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-AX-A062-01A-11D-A00X-01 has no genes with altered CN\n",
"TCGA-D1-A16D-01A-11D-A12G-01 has no genes with altered CN\n",
"TCGA-BG-A0VZ-01A-11D-A107-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 200 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-AJ-A2QL-01A-11D-A18N-01 has no genes with altered CN\n",
"TCGA-BS-A0UA-01A-11D-A120-01 has no genes with altered CN\n",
"TCGA-B5-A11U-01A-11D-A120-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 300 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-EO-A3AU-01A-21D-A19X-01 has no genes with altered CN\n",
"TCGA-QF-A5YS-01A-11D-A31T-01 has no genes with altered CN\n",
"TCGA-D1-A0ZV-01A-11D-A10L-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 400 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-QS-A5YR-01A-31D-A31T-01 has no genes with altered CN\n",
"TCGA-DI-A1BU-01A-11D-A134-01 has no genes with altered CN\n",
"TCGA-AP-A0LG-01A-11D-A042-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 500 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-D1-A0ZS-01A-11D-A120-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"UCEC (36019, 540)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"PCPG\n",
"TCGA-RW-A7CZ-01A-11D-A35C-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-WB-A817-01A-11D-A35H-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"PCPG (36019, 168)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"STAD\n",
"TCGA-MX-A5UG-01A-21D-A31K-01 has no genes with altered CN\n",
"TCGA-RD-A8NB-01A-12D-A396-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-BR-7957-01A-11D-2200-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 200 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-BR-6563-01A-13D-2052-01 has no genes with altered CN\n",
"TCGA-D7-6522-01A-11D-1799-01 has no genes with altered CN\n",
"TCGA-BR-7196-01A-11D-2052-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 300 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-D7-A6ET-01A-32D-A32M-01 has no genes with altered CN\n",
"TCGA-HU-A4GJ-01A-11D-A253-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 400 processed.\n",
"STAD (36019, 442)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"CESC\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n",
"... 200 processed.\n",
"CESC (36019, 297)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"UCS\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"UCS (36019, 56)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TGCT\n",
"TCGA-YU-A90S-01A-11D-A434-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n",
"TGCT (36019, 156)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"THCA\n",
"TCGA-EL-A4JZ-01A-11D-A256-01 has no genes with altered CN\n",
"TCGA-DJ-A13X-01A-11D-A10T-01 has no genes with altered CN\n",
"TCGA-EL-A3ZT-01A-12D-A23L-01 has no genes with altered CN\n",
"TCGA-DE-A0XZ-01A-11D-A17S-01 has no genes with altered CN\n",
"TCGA-DJ-A2PP-01A-11D-A19I-01 has no genes with altered CN\n",
"TCGA-KS-A4I5-01A-11D-A256-01 has no genes with altered CN\n",
"TCGA-DJ-A2PS-01A-11D-A18E-01 has no genes with altered CN\n",
"TCGA-EL-A3GW-01A-11D-A201-01 has no genes with altered CN\n",
"TCGA-BJ-A0ZG-01A-11D-A10T-01 has no genes with altered CN\n",
"TCGA-J8-A3O2-01A-11D-A23L-01 has no genes with altered CN\n",
"TCGA-FY-A3RA-01A-11D-A21Y-01 has no genes with altered CN\n",
"TCGA-CE-A483-01A-11D-A23T-01 has no genes with altered CN\n",
"TCGA-EM-A1CW-01A-21D-A13V-01 has no genes with altered CN\n",
"TCGA-DJ-A4V4-01A-11D-A256-01 has no genes with altered CN\n",
"TCGA-E3-A3E1-01A-11D-A20A-01 has no genes with altered CN\n",
"TCGA-ET-A2MZ-01A-12D-A19I-01 has no genes with altered CN\n",
"TCGA-E8-A414-01A-11D-A23L-01 has no genes with altered CN\n",
"TCGA-EL-A3T6-01A-11D-A21Y-01 has no genes with altered CN\n",
"TCGA-DJ-A4V5-01A-11D-A256-01 has no genes with altered CN\n",
"TCGA-DJ-A3UY-01A-21D-A22C-01 has no genes with altered CN\n",
"TCGA-EL-A3D4-01A-11D-A19I-01 has no genes with altered CN\n",
"TCGA-FY-A76V-01A-11D-A396-01 has no genes with altered CN\n",
"TCGA-FY-A4B3-01A-11D-A23T-01 has no genes with altered CN\n",
"TCGA-DJ-A3UO-01A-11D-A22C-01 has no genes with altered CN\n",
"TCGA-EL-A4K7-01A-11D-A256-01 has no genes with altered CN\n",
"TCGA-DJ-A1QI-01A-11D-A14V-01 has no genes with altered CN\n",
"TCGA-EL-A3N2-01A-11D-A20A-01 has no genes with altered CN\n",
"TCGA-E3-A3E5-01A-11D-A20A-01 has no genes with altered CN\n",
"TCGA-EM-A1YD-01A-11D-A14V-01 has no genes with altered CN\n",
"TCGA-GE-A2C6-01A-11D-A16M-01 has no genes with altered CN\n",
"TCGA-DJ-A2Q5-01A-11D-A18E-01 has no genes with altered CN\n",
"TCGA-ET-A3DP-01A-11D-A219-01 has no genes with altered CN\n",
"TCGA-DJ-A4UT-01A-11D-A256-01 has no genes with altered CN\n",
"TCGA-DJ-A2PT-01A-11D-A18E-01 has no genes with altered CN\n",
"TCGA-DJ-A4V2-01A-11D-A256-01 has no genes with altered CN\n",
"TCGA-L6-A4ET-01A-11D-A256-01 has no genes with altered CN\n",
"TCGA-BJ-A0ZJ-01A-11D-A10T-01 has no genes with altered CN\n",
"TCGA-DE-A4M9-01A-11D-A256-01 has no genes with altered CN\n",
"TCGA-EL-A4KD-01A-11D-A256-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-QD-A8IV-01A-11D-A396-01 has no genes with altered CN\n",
"TCGA-ET-A3DV-01A-12D-A201-01 has no genes with altered CN\n",
"TCGA-EM-A22K-01A-11D-A17S-01 has no genes with altered CN\n",
"TCGA-DJ-A3VE-01A-11D-A23L-01 has no genes with altered CN\n",
"TCGA-EL-A3D1-01A-11D-A19I-01 has no genes with altered CN\n",
"TCGA-BJ-A2P4-01A-11D-A18E-01 has no genes with altered CN\n",
"TCGA-CE-A3ME-01A-11D-A20A-01 has no genes with altered CN\n",
"TCGA-E8-A417-01A-21D-A23L-01 has no genes with altered CN\n",
"TCGA-KS-A41I-01A-11D-A23L-01 has no genes with altered CN\n",
"TCGA-FK-A3SB-01A-11D-A22C-01 has no genes with altered CN\n",
"TCGA-BJ-A28S-01A-11D-A19I-01 has no genes with altered CN\n",
"TCGA-MK-A4N9-01A-11D-A256-01 has no genes with altered CN\n",
"TCGA-E8-A437-01A-12D-A23T-01 has no genes with altered CN\n",
"TCGA-EM-A3AP-01A-12D-A20A-01 has no genes with altered CN\n",
"TCGA-EL-A3TA-01A-12D-A22C-01 has no genes with altered CN\n",
"TCGA-IM-A41Z-01A-11D-A23L-01 has no genes with altered CN\n",
"TCGA-EM-A2CQ-01A-11D-A17S-01 has no genes with altered CN\n",
"TCGA-EM-A3O7-01A-11D-A21Y-01 has no genes with altered CN\n",
"TCGA-FE-A3PC-01A-11D-A21Y-01 has no genes with altered CN\n",
"TCGA-DJ-A2PY-01A-11D-A18E-01 has no genes with altered CN\n",
"TCGA-EM-A4FQ-01A-11D-A256-01 has no genes with altered CN\n",
"TCGA-EM-A3FO-01A-11D-A219-01 has no genes with altered CN\n",
"TCGA-BJ-A0Z9-01A-11D-A10T-01 has no genes with altered CN\n",
"TCGA-EM-A3FK-01A-11D-A219-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 200 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-ET-A3BU-01A-11D-A19I-01 has no genes with altered CN\n",
"TCGA-BJ-A0Z5-01A-11D-A10T-01 has no genes with altered CN\n",
"TCGA-EL-A3MY-01A-11D-A219-01 has no genes with altered CN\n",
"TCGA-ET-A39L-01A-12D-A19I-01 has no genes with altered CN\n",
"TCGA-E8-A415-01A-11D-A23L-01 has no genes with altered CN\n",
"TCGA-ET-A40Q-01A-11D-A23L-01 has no genes with altered CN\n",
"TCGA-KS-A4I7-01A-11D-A256-01 has no genes with altered CN\n",
"TCGA-MK-A4N7-01A-11D-A256-01 has no genes with altered CN\n",
"TCGA-L6-A4EQ-01A-11D-A256-01 has no genes with altered CN\n",
"TCGA-FY-A3TY-01A-11D-A22Y-01 has no genes with altered CN\n",
"TCGA-ET-A2N1-01A-11D-A18E-01 has no genes with altered CN\n",
"TCGA-DJ-A2PO-01A-21D-A19I-01 has no genes with altered CN\n",
"TCGA-J8-A3O2-06A-11D-A23L-01 has no genes with altered CN\n",
"TCGA-CE-A485-01A-11D-A23T-01 has no genes with altered CN\n",
"TCGA-ET-A3BX-01A-11D-A19I-01 has no genes with altered CN\n",
"TCGA-DJ-A3VK-01A-11D-A23L-01 has no genes with altered CN\n",
"TCGA-DE-A4M8-01A-21D-A256-01 has no genes with altered CN\n",
"TCGA-ET-A40T-01A-11D-A23L-01 has no genes with altered CN\n",
"TCGA-BJ-A18Z-01A-21D-A13V-01 has no genes with altered CN\n",
"TCGA-DJ-A3UT-01A-11D-A22C-01 has no genes with altered CN\n",
"TCGA-DJ-A2Q2-01A-11D-A19I-01 has no genes with altered CN\n",
"TCGA-BJ-A18Y-01A-11D-A13V-01 has no genes with altered CN\n",
"TCGA-ET-A39T-01A-11D-A19I-01 has no genes with altered CN\n",
"TCGA-EL-A3CL-01A-11D-A19I-01 has no genes with altered CN\n",
"TCGA-DJ-A4V0-01A-11D-A256-01 has no genes with altered CN\n",
"TCGA-EL-A3H8-01A-11D-A20A-01 has no genes with altered CN\n",
"TCGA-ET-A39J-01A-11D-A19I-01 has no genes with altered CN\n",
"TCGA-FY-A3I4-01A-11D-A219-01 has no genes with altered CN\n",
"TCGA-EM-A2CU-01A-12D-A17S-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 300 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-EM-A3FM-01A-11D-A219-01 has no genes with altered CN\n",
"TCGA-EM-A4FF-01A-11D-A256-01 has no genes with altered CN\n",
"TCGA-EL-A3GX-01A-11D-A201-01 has no genes with altered CN\n",
"TCGA-DJ-A3UN-01A-11D-A22C-01 has no genes with altered CN\n",
"TCGA-EM-A4FO-01A-11D-A256-01 has no genes with altered CN\n",
"TCGA-EL-A3TB-01A-11D-A22C-01 has no genes with altered CN\n",
"TCGA-ET-A25N-01A-11D-A16M-01 has no genes with altered CN\n",
"TCGA-ET-A39M-01A-11D-A19I-01 has no genes with altered CN\n",
"TCGA-DE-A4MA-01A-11D-A256-01 has no genes with altered CN\n",
"TCGA-ET-A39O-01A-11D-A19I-01 has no genes with altered CN\n",
"TCGA-DE-A0Y2-01A-11D-A10T-01 has no genes with altered CN\n",
"TCGA-FY-A3R8-01A-11D-A21Y-01 has no genes with altered CN\n",
"TCGA-EM-A3AL-01A-11D-A201-01 has no genes with altered CN\n",
"TCGA-EM-A2CN-01A-11D-A19I-01 has no genes with altered CN\n",
"TCGA-FY-A3BL-01A-11D-A19I-01 has no genes with altered CN\n",
"TCGA-EM-A1CS-01A-11D-A13V-01 has no genes with altered CN\n",
"TCGA-EL-A3D0-01A-12D-A201-01 has no genes with altered CN\n",
"TCGA-E3-A3DZ-01A-11D-A20A-01 has no genes with altered CN\n",
"TCGA-DJ-A1QF-01A-12D-A14V-01 has no genes with altered CN\n",
"TCGA-J8-A3YH-01A-11D-A22Y-01 has no genes with altered CN\n",
"TCGA-EL-A4K1-01A-11D-A256-01 has no genes with altered CN\n",
"TCGA-EM-A3O8-01A-11D-A21Y-01 has no genes with altered CN\n",
"TCGA-DJ-A3VJ-01A-11D-A23L-01 has no genes with altered CN\n",
"TCGA-BJ-A45D-01A-11D-A23T-01 has no genes with altered CN\n",
"TCGA-FY-A4B4-01A-11D-A23T-01 has no genes with altered CN\n",
"TCGA-EM-A1CU-01A-11D-A13V-01 has no genes with altered CN\n",
"TCGA-EL-A3CX-01A-11D-A19I-01 has no genes with altered CN\n",
"TCGA-ET-A25O-01A-11D-A16M-01 has no genes with altered CN\n",
"TCGA-E8-A433-01A-11D-A23L-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"THCA (36019, 506)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"CHOL\n",
"TCGA-W5-AA2H-01A-31D-A416-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHOL (36019, 36)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"HNSC\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n",
"... 200 processed.\n",
"... 300 processed.\n",
"... 400 processed.\n",
"... 500 processed.\n",
"HNSC (36019, 530)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"UVM\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"UVM (36019, 80)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"SKCM\n",
"TCGA-ER-A19A-06A-21D-A191-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n",
"... 200 processed.\n",
"... 300 processed.\n",
"... 400 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-EB-A4OZ-01A-12D-A25P-01 has no genes with altered CN\n",
"TCGA-EE-A2GK-06A-11D-A194-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"SKCM (36019, 472)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"COAD\n",
"TCGA-G4-6302-01A-11D-1717-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-AA-A03F-01A-11D-A080-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 200 processed.\n",
"... 300 processed.\n",
"... 400 processed.\n",
"COAD (36019, 453)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"ACC\n",
"TCGA-OR-A5KQ-01A-11D-A309-01 has no genes with altered CN\n",
"TCGA-OR-A5KV-01A-11D-A29H-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"ACC (36019, 90)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"PAAD\n",
"TCGA-IB-AAUR-01A-21D-A38F-01 has no genes with altered CN\n",
"TCGA-HZ-8002-01A-11D-2200-01 has no genes with altered CN\n",
"TCGA-XD-AAUG-01A-61D-A40V-01 has no genes with altered CN\n",
"TCGA-Z5-AAPL-01A-12D-A40V-01 has no genes with altered CN\n",
"TCGA-IB-A5SQ-01A-11D-A32M-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-IB-AAUS-01A-12D-A38F-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"PAAD (36019, 185)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"THYM\n",
"TCGA-4V-A9QW-01A-11D-A422-01 has no genes with altered CN\n",
"TCGA-ZB-A96B-01A-11D-A427-01 has no genes with altered CN\n",
"TCGA-X7-A8DB-01A-11D-A422-01 has no genes with altered CN\n",
"TCGA-X7-A8M4-01A-11D-A422-01 has no genes with altered CN\n",
"TCGA-X7-A8D8-01A-11D-A422-01 has no genes with altered CN\n",
"TCGA-3S-AAYX-01A-11D-A422-01 has no genes with altered CN\n",
"TCGA-YT-A95E-01A-11D-A427-01 has no genes with altered CN\n",
"TCGA-X7-A8M8-01A-11D-A422-01 has no genes with altered CN\n",
"TCGA-ZT-A8OM-01A-11D-A427-01 has no genes with altered CN\n",
"TCGA-ZB-A96E-01A-11D-A427-01 has no genes with altered CN\n",
"TCGA-3Q-A9WF-01A-11D-A422-01 has no genes with altered CN\n",
"TCGA-X7-A8M1-01A-11D-A422-01 has no genes with altered CN\n",
"TCGA-ZB-A96A-01A-11D-A427-01 has no genes with altered CN\n",
"TCGA-ZB-A96R-01A-11D-A427-01 has no genes with altered CN\n",
"TCGA-ZB-A963-01A-11D-A427-01 has no genes with altered CN\n",
"TCGA-ZC-AAAA-01A-11D-A427-01 has no genes with altered CN\n",
"TCGA-XM-A8RB-01A-11D-A422-01 has no genes with altered CN\n",
"TCGA-ZB-A96G-01A-11D-A427-01 has no genes with altered CN\n",
"TCGA-X7-A8M7-01A-11D-A422-01 has no genes with altered CN\n",
"TCGA-XU-AAXZ-01A-11D-A427-01 has no genes with altered CN\n",
"TCGA-XH-A853-01A-11D-A422-01 has no genes with altered CN\n",
"TCGA-XM-AAZ3-01A-11D-A422-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n",
"THYM (36019, 125)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"LUSC\n",
"TCGA-56-8623-01A-11D-2391-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n",
"... 200 processed.\n",
"... 300 processed.\n",
"... 400 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-98-A53H-01A-12D-A25M-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"LUSC (36019, 501)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"MESO\n",
"TCGA-TS-A8AS-01A-21D-A39Q-01 has no genes with altered CN\n",
"TCGA-TS-A7P8-01A-11D-A34B-01 has no genes with altered CN\n",
"TCGA-TS-A8AV-01A-12D-A39Q-01 has no genes with altered CN\n",
"TCGA-3H-AB3O-01A-11D-A39Q-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"MESO (36019, 87)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"OV\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n",
"... 200 processed.\n",
"... 300 processed.\n",
"... 400 processed.\n",
"... 500 processed.\n",
"OV (36019, 597)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"SARC\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-WK-A8Y0-10D-01D-A419-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 200 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-WK-A8XS-10E-01D-A37E-01 has no genes with altered CN\n",
"TCGA-QQ-A5V2-01A-11D-A32H-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"SARC (36019, 263)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"KIRP\n",
"TCGA-Y8-A8S1-01A-11D-A36W-01 has no genes with altered CN\n",
"TCGA-GL-A4EM-01A-11D-A253-01 has no genes with altered CN\n",
"TCGA-4A-A93Y-01A-11D-A36W-01 has no genes with altered CN\n",
"TCGA-AL-3467-01A-02D-1348-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-A4-7828-01A-11D-2135-01 has no genes with altered CN\n",
"TCGA-DW-7838-01A-11D-2135-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 200 processed.\n",
"KIRP (36019, 288)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"LGG\n",
"TCGA-HT-8106-01A-11D-2391-01 has no genes with altered CN\n",
"TCGA-S9-A6WI-01A-21D-A33S-01 has no genes with altered CN\n",
"TCGA-HT-7602-01A-21D-2085-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-DU-7011-01A-11D-2023-01 has no genes with altered CN\n",
"TCGA-TM-A84B-12A-01D-A366-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 200 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-FG-8181-01A-11D-2252-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 300 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-FG-8189-01B-11D-A288-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 400 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-DU-5872-02A-21D-A36N-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 500 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-HT-7680-01A-11D-2252-01 has no genes with altered CN\n",
"TCGA-P5-A5EY-01A-11D-A27J-01 has no genes with altered CN\n",
"TCGA-CS-6669-01A-11D-1892-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"LGG (36019, 530)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"LAML\n",
"TCGA-AB-2884-03A-01D-0756-21 has no genes with altered CN\n",
"TCGA-AB-2932-03A-01D-0756-21 has no genes with altered CN\n",
"TCGA-AB-2842-03A-01D-0756-21 has no genes with altered CN\n",
"TCGA-AB-2969-03A-01D-0756-21 has no genes with altered CN\n",
"TCGA-AB-2826-03A-01D-0756-21 has no genes with altered CN\n",
"TCGA-AB-2836-03A-01D-0756-21 has no genes with altered CN\n",
"TCGA-AB-2871-03A-01D-0756-21 has no genes with altered CN\n",
"TCGA-AB-2845-03A-01D-0756-21 has no genes with altered CN\n",
"TCGA-AB-2840-03A-01D-0756-21 has no genes with altered CN\n",
"TCGA-AB-2837-03A-01D-0756-21 has no genes with altered CN\n",
"TCGA-AB-2844-03A-01D-0756-21 has no genes with altered CN\n",
"TCGA-AB-2854-03A-01D-0756-21 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-AB-3006-03A-01D-0756-21 has no genes with altered CN\n",
"TCGA-AB-2931-03A-01D-0756-21 has no genes with altered CN\n",
"TCGA-AB-2851-03A-01D-0756-21 has no genes with altered CN\n",
"TCGA-AB-2978-03A-01D-0756-21 has no genes with altered CN\n",
"TCGA-AB-2880-03A-01D-0756-21 has no genes with altered CN\n",
"TCGA-AB-2922-03A-01D-0756-21 has no genes with altered CN\n",
"TCGA-AB-2947-03A-01D-0756-21 has no genes with altered CN\n",
"TCGA-AB-2998-03A-01D-0756-21 has no genes with altered CN\n",
"TCGA-AB-2824-03A-01D-0756-21 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"LAML (36019, 191)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"LIHC\n",
"TCGA-2V-A95S-10D-01D-A36Z-01 has no genes with altered CN\n",
"TCGA-UB-AA0V-01A-11D-A381-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-G3-A25V-01A-11D-A16U-01 has no genes with altered CN\n",
"TCGA-DD-A3A6-01A-11D-A22E-01 has no genes with altered CN\n",
"TCGA-DD-A4NL-01A-11D-A28W-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 200 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-ED-A5KG-01A-11D-A27H-01 has no genes with altered CN\n",
"TCGA-CC-A9FV-01A-11D-A36W-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 300 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-MR-A520-01A-11D-A25U-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"LIHC (36019, 373)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"PRAD\n",
"TCGA-J9-A52C-01A-11D-A26L-01 has no genes with altered CN\n",
"TCGA-V1-A8MJ-01A-11D-A363-01 has no genes with altered CN\n",
"TCGA-XJ-A9DQ-01A-11D-A376-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-J4-A6G1-01A-11D-A30W-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 200 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-J4-A67R-01A-21D-A30D-01 has no genes with altered CN\n",
"TCGA-EJ-A7NJ-01A-22D-A34T-01 has no genes with altered CN\n",
"TCGA-EJ-7791-01A-11D-2112-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 300 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-EJ-A8FU-01A-11D-A363-01 has no genes with altered CN\n",
"TCGA-EJ-A6RC-01A-11D-A32A-01 has no genes with altered CN\n",
"TCGA-HC-7740-01A-11D-2112-01 has no genes with altered CN\n",
"TCGA-EJ-A65B-01A-12D-A30D-01 has no genes with altered CN\n",
"TCGA-HC-8260-01A-11D-2259-01 has no genes with altered CN\n",
"TCGA-FC-A8O0-01A-41D-A376-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 400 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-VN-A88I-01A-11D-A34T-01 has no genes with altered CN\n",
"TCGA-EJ-A7NK-01A-12D-A34T-01 has no genes with altered CN\n",
"TCGA-CH-5743-01A-21D-1574-01 has no genes with altered CN\n",
"TCGA-G9-6367-01A-11D-1785-01 has no genes with altered CN\n",
"TCGA-KC-A4BO-01A-61D-A256-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"PRAD (36019, 493)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"LUAD\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n",
"... 200 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-L4-A4E6-01A-11D-A24C-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 300 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-44-3398-01A-01D-1877-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 400 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-55-8619-01A-11D-2389-01 has no genes with altered CN\n",
"TCGA-86-A4P8-01A-11D-A24O-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 500 processed.\n",
"LUAD (36019, 518)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"BRCA\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-AO-A0JC-01A-11D-A059-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 200 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-BH-A0H5-01A-21D-A111-01 has no genes with altered CN\n",
"TCGA-A2-A0CR-01A-11D-A227-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 300 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-BH-A1FE-06A-11D-A20R-01 has no genes with altered CN\n",
"TCGA-AN-A0FN-01A-11D-A036-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 400 processed.\n",
"... 500 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-PL-A8LY-01A-11D-A41E-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 600 processed.\n",
"... 700 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-GM-A3XG-01A-31D-A242-01 has no genes with altered CN\n",
"TCGA-LD-A74U-01A-13D-A33D-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 800 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-GM-A2DO-10D-01D-A18N-01 has no genes with altered CN\n",
"TCGA-A2-A0EP-01A-52D-A22W-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 900 processed.\n",
"... 1000 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-AO-A1KO-01A-31D-A13J-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"BRCA (36019, 1088)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"KIRC\n",
"TCGA-B4-5378-01A-01D-1499-01 has no genes with altered CN\n",
"TCGA-B0-5400-01A-01D-1499-01 has no genes with altered CN\n",
"TCGA-CJ-4890-01A-01D-1302-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 100 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-A3-A8OX-01A-11D-A36W-01 has no genes with altered CN\n",
"TCGA-B0-4817-01A-01D-1274-01 has no genes with altered CN\n",
"TCGA-B0-5080-01A-01D-1499-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 200 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-DV-A4VZ-01A-11D-A25U-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 300 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-CJ-4891-01A-01D-1302-01 has no genes with altered CN\n",
"TCGA-CJ-4889-01A-01D-1302-01 has no genes with altered CN\n",
"TCGA-BP-4769-01A-01D-1283-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 400 processed.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"TCGA-BP-4760-01A-02D-1417-01 has no genes with altered CN\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"... 500 processed.\n",
"KIRC (36019, 529)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"KICH\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"KICH (36019, 66)\n"
]
}
],
"source": [
"for cohort in dfs.keys():\n",
" print(cohort, file=sys.stderr)\n",
" df = dfs[cohort]\n",
" cna_table = []\n",
" n_samples = 0\n",
" for sample in list(set(df.Sample.values)):\n",
" n_samples +=1\n",
" cnv2gene = cnv2genelevel(cnv2bed(df[df.Sample == sample]),gene_intervals_bed,sample,\n",
" verbose = False,sorted_index = sorted_index)\n",
" cna_table.append(cnv2gene)\n",
" if n_samples % 100 == 0:\n",
" print(\"...\",n_samples, \"processed.\")\n",
" cna_table = pd.concat(cna_table,axis =1)\n",
" \n",
"\n",
" for sample in tumors_without_CNA[cohort]:\n",
" cna_table[sample] = 0\n",
" \n",
" cna_table.fillna(0, inplace = True)\n",
" cna_table.to_csv(preprocessed_dir+\"/TCGA-\"+cohort+\".Segment_Mean.CNA.tsv\",\n",
" sep = \"\\t\",header=True,index=True)\n",
" print(cohort,cna_table.shape)\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'t = time.time()\\ncnv2gene = cnv2genelevel(cnv2bed(df[df.Sample == sample]),gene_intervals_bed,sample,\\n verbose = False,sorted_index = sorted_index)\\nprint( time.time() - t)\\ncnv2gene'"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"\"\"t = time.time()\n",
"cnv2gene = cnv2genelevel(cnv2bed(df[df.Sample == sample]),gene_intervals_bed,sample,\n",
" verbose = False,sorted_index = sorted_index)\n",
"print( time.time() - t)\n",
"cnv2gene\"\"\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# CCLE \n",
"\n",
"the same pipeline as for TCGA except filtering out germline CNA (because no )\n",
"\n",
"wget https://data.broadinstitute.org/ccle_legacy_data/dna_copy_number/CCLE_copynumber_2013-12-03.seg.txt\n",
"\n",
"? should we use a stronger segment_mean threshold because this data are for cell lines and purity must be 100%"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"47 duplicated IDs in 94 rows found.\n",
"duplicate rows removed due to low correlation of duplicated profiles 0\n",
"Merged 94 duplicated rows into 47 rows\n",
"CCLE: genes: 35972 samples 1043\n"
]
}
],
"source": [
"df = pd.read_csv(\"../../CCLE/CCLE_copynumber_2013-12-03.seg.txt\",sep = \"\\t\")\n",
"df.rename({\"CCLE_name\":\"Sample\"},inplace=True, axis=\"columns\")\n",
"df[\"End\"] = df[\"End\"].apply(int)\n",
"ccle = []\n",
"for sample_name in list(set(df[\"Sample\"].values)):\n",
" cl = df.loc[df[\"Sample\"]==sample_name, :]\n",
" # keep high-conf segments \n",
" cl_filtered = filter_lowconf_segments(cl,num_marker_thr,pos_seg_mean_thr, neg_seg_mean_thr )\n",
" #print(sample_name, cl.shape[0], \"after filtration\",cl_filtered.shape[0])\n",
" # map to genes \n",
" cnv2gene = cnv2genelevel(cnv2bed(cl_filtered),gene_intervals_bed,sample_name,\n",
" verbose = False,sorted_index = sorted_index)\n",
" ccle.append(cnv2gene)\n",
" \n",
"ccle = pd.concat(ccle,axis =1)\n",
"ccle.fillna(0, inplace = True)\n",
"ccle = handle_dups(ccle)\n",
"ccle.to_csv(preprocessed_dir+\"/\"+\"CCLE\"+\".Segment_Mean.CNA.tsv\",\n",
" sep = \"\\t\",header=True,index=True)\n",
"print(\"CCLE:\",\"genes:\",ccle.shape[0],\"samples\",ccle.shape[1])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# GDSC\n",
"Assume that supplementary file with gene-level CN is downloaded :\n",
"\n",
"wget \n",
"\n",
"GDSC provides gene-level integer estimated CN, max. and min. CN over all segments covering a gene. In order to make it comparable with TCGA and CCLE, we divide estimated CN by CN of copy-neutral state and log2-transform it. \n",
"\n",
"1) Copy-neutral state was defined from average ploiy, as median of integer CN values in non-disrupted genes.\n",
"\n",
"2) Compute log2(CN/neutral-CN) for min and max CN; keep the value with most extreme estimate\n",
"\n",
"3) Replace estimates below thresholds with zeroes. \n",
"\n",
"\n",
"DGSC uses 4 comma-separated values for gene-level CN (max_cn,min_cn,zygosity,disruption): e.g. (from \"legend\" tab)\n",
"\n",
"2,2,H,-\tGene resides on a single genomic segment in a diploid region of the genome.\n",
"2,0,L,D\tGene spans multiple segments, higest copy number is 2 but part of the coding sequence is homozygously deleted, the gene is disrupted.\n",
"13,13,H,-\tGene resides on a single genomic segment of copy number 13 in a heterozygous part of the genome (amplification).\n",
"14,12,L,D\tGene spans multiple genomic segments all of which are amplified to 12 or more copies, some or all segments have LOH, the gene is disrupted.\n",
"0,0,0,-\tComplete gene sequence falls within a homozygous deletion.\n",
"-1,-1,-,- gene level CN not assigned\n",
"\n",
"* min and max CN are integers \n",
"* zygosity - can be L (LOH in any overlapping segment) or H (heterozygous) or 0 (homozygous deleteion of the whole gene) or - (undefined)\n",
"* disruption - D (if disrupted) or \"-\" (not disrupted) \n",
"\n",
"Average ploidies of cell lines were downloaded from COSMIC:\n",
"\n",
"wget https://cog.sanger.ac.uk/cosmic/GRCh37/cell_lines/v86/PICNIC_average_ploidies.tsv?AWSAccessKeyId=KRV7P7QR9DL41J9EWGA2&Expires=1540792525&Signature=mcSB6oFv%2BXCF4%2Fezm4a3Ds1JXo4%3D\n",
"\n",
"wget ftp:// ftp.sanger.ac.uk/pub/project/cancerrxgene/releases/release-7.0/Gene_level_CN.xlsx\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>gene</th>\n",
" <th>chr</th>\n",
" <th>start</th>\n",
" <th>stop</th>\n",
" <th>201T</th>\n",
" <th>22RV1</th>\n",
" <th>23132-87</th>\n",
" <th>42-MG-BA</th>\n",
" <th>451Lu</th>\n",
" <th>5637</th>\n",
" <th>...</th>\n",
" <th>WSU-NHL</th>\n",
" <th>YAPC</th>\n",
" <th>YH-13</th>\n",
" <th>YKG-1</th>\n",
" <th>YMB-1-E</th>\n",
" <th>YT</th>\n",
" <th>ZR-75-30</th>\n",
" <th>huH-1</th>\n",
" <th>no-10</th>\n",
" <th>no-11</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1287381</td>\n",
" <td>924100</td>\n",
" <td>910924</td>\n",
" <td>687561</td>\n",
" <td>1287706</td>\n",
" <td>687452</td>\n",
" <td>...</td>\n",
" <td>909785</td>\n",
" <td>909904</td>\n",
" <td>909905</td>\n",
" <td>687592</td>\n",
" <td>1303911</td>\n",
" <td>946358</td>\n",
" <td>909907</td>\n",
" <td>1298146</td>\n",
" <td>908452</td>\n",
" <td>908450</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>DDX11L1</td>\n",
" <td>1</td>\n",
" <td>11869.0</td>\n",
" <td>14412.0</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>...</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>WASH7P</td>\n",
" <td>1</td>\n",
" <td>14363.0</td>\n",
" <td>29806.0</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>...</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" <td>-1,-1,-,-</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3 rows × 1000 columns</p>\n",
"</div>"
],
"text/plain": [
" gene chr start stop 201T 22RV1 23132-87 42-MG-BA \\\n",
"0 NaN NaN NaN NaN 1287381 924100 910924 687561 \n",
"1 DDX11L1 1 11869.0 14412.0 -1,-1,-,- -1,-1,-,- -1,-1,-,- -1,-1,-,- \n",
"2 WASH7P 1 14363.0 29806.0 -1,-1,-,- -1,-1,-,- -1,-1,-,- -1,-1,-,- \n",
"\n",
" 451Lu 5637 ... WSU-NHL YAPC YH-13 \\\n",
"0 1287706 687452 ... 909785 909904 909905 \n",
"1 -1,-1,-,- -1,-1,-,- ... -1,-1,-,- -1,-1,-,- -1,-1,-,- \n",
"2 -1,-1,-,- -1,-1,-,- ... -1,-1,-,- -1,-1,-,- -1,-1,-,- \n",
"\n",
" YKG-1 YMB-1-E YT ZR-75-30 huH-1 no-10 no-11 \n",
"0 687592 1303911 946358 909907 1298146 908452 908450 \n",
"1 -1,-1,-,- -1,-1,-,- -1,-1,-,- -1,-1,-,- -1,-1,-,- -1,-1,-,- -1,-1,-,- \n",
"2 -1,-1,-,- -1,-1,-,- -1,-1,-,- -1,-1,-,- -1,-1,-,- -1,-1,-,- -1,-1,-,- \n",
"\n",
"[3 rows x 1000 columns]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"GDSC_CNA = \"/home/olya/SFU/Hossein/GDSC/Gene_level_CN.xlsx\"\n",
"\n",
"gdsc = pd.read_excel(GDSC_CNA,\"Gene_level_CN\")\n",
"gdsc.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"25 gene IDs excluded due to string to datetime conversion in Excel.\n",
"Strings containing duplicated gene IDs: 0\n"
]
}
],
"source": [
"gdsc.set_index(\"gene\",inplace = True)\n",
"gdsc.drop([\"chr\",\"start\",\"stop\"],inplace=True,axis=1)\n",
"gdsc.columns = gdsc.iloc[0,:]\n",
"gdsc = gdsc.iloc[1:,:]\n",
"gdsc.columns.name = None\n",
"# replace 2001-12-01 with DEC1 and get remove gene names converted to datetimes\n",
"gdsc.index.values[37778] = \"DEC1\"\n",
"df_size = gdsc.shape[0]\n",
"ndxs=pd.Series(gdsc.index).apply(lambda x : type(x) == unicode or type(x) == str)\n",
"gdsc = gdsc.loc[gdsc.index.values[ndxs[ndxs].index],:]\n",
"print(df_size - gdsc.shape[0],\"gene IDs excluded due to string to datetime conversion in Excel.\")\n",
"\n",
"gdsc.index.name = \"gene_id\"\n",
"ids = gdsc.index\n",
"ids = list(set(ids[ids.duplicated()]))\n",
"print(\"Strings containing duplicated gene IDs:\",gdsc.loc[ids,:].shape[0])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### distribution of averaged ploidies in GDSC\n",
"\n",
"we compared average ploidies reported in PICNIC_average_ploidies.tsv provided by COSMIC with "
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1020\n",
"1016\n"
]
}
],
"source": [
"GDSC_Ploidies = \"/home/olya/SFU/Hossein/GDSC/PICNIC_average_ploidies.tsv\"\n",
"GDSC_Ploidies = pd.read_csv(GDSC_Ploidies,sep = \"\\t\")\n",
"GDSC_Ploidies.drop(\"#sample_name\",axis = 1, inplace= True)\n",
"GDSC_Ploidies.set_index(\"sample_id\",inplace=True)\n",
"print(GDSC_Ploidies.shape[0])\n",
"GDSC_Ploidies.dropna(inplace=True)\n",
"print(GDSC_Ploidies.shape[0])\n",
"\n",
"est_ploidies = gdsc.apply(define_avg_ploidy).T\n",
"df_ploidies = pd.DataFrame.from_dict({\"est. avg. ploidy from CN profile\":est_ploidies[\"avg_pl\"],\"PICNIC avg. pl.\":GDSC_Ploidies[\"average_ploidy\"],\n",
" \"est. median. ploidy\":est_ploidies[\"median_pl\"]})\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABIgAAAE/CAYAAAAt2/ipAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3Xu8pWVd///XO/CUoihMiBwcD2ip1agTah4i0UJA0VKEDNGo0cKy7Psr1L6imd8fVkaWqV8UBEoQBElU6it5zBJtUEIO+hVw/DE4MiPIQVEL+Pz+uK8Na/Zh9prZex1m7tfz8diPfa/rvtZan33PrOu61+e+rutOVSFJkiRJkqT++rFJByBJkiRJkqTJMkEkSZIkSZLUcyaIJEmSJEmSes4EkSRJkiRJUs+ZIJIkSZIkSeo5E0SSJEmSJEk9Z4JIUy3JAUnWD1n33Un+5xb2V5JHbkMM90nykSQ3J/ng1j6/D9J5X5LvJvlikqcn+drA/nVJnjXJGCVpR5fk00l+c9JxSFJfJHlZks8NPP5ekodPII6hvuckeUmSj29hv/1Iz5kg2kElWdkaip0nHcu4VNUrq+rNI3jpFwJ7ALtV1YtG8PqLSvKoJB9M8p2WqLo0yWuS7DTwb33BrOf8Q5I3jinEpwHPBvauqv2r6l+r6tFjem9JO4CWSP5BO7m+PsmpSe7X9m12wprk/kn+Osn/1+pf3R7vPvBaG5Pcd+A5v5nk0wOPNzuZ3lI7O5YDIEla1LR/x6mq+1XVNZOOYyFV9f6q+qVJx6HpZYJIWtxDgf9bVbfPt3PUHVSSRwBfAK4FfrqqHgC8CFgN7DJQ9UlJfn4E758ki7UVDwXWVdX3l/v9JfXKc6vqfsAT6Nq4P5ldIck9gU8AjwUOAu4PPAW4Adh/oOpOwKuHedOtaGclSZJ2WCaItgNJHpLk3CSbknwjye8N7Ns/ydokt7Qrrn/Vdn22/b6pXV19yhDv8/IkVya5Nck1SV4xsO/KJIcOPN65xfOE9vilSb6Z5IYk/3NrphS1uq9NckWbovS+JPdeoO5PtSvJNyW5PMnzBvadmuTPBh7/P0k2JPlWkt8YKP+5dqx2Gij7lST/Oc/7vQl4A/DidhyPaUNJ/y3JiUluAN6Y5MeS/Ek7BhuTnJ7kAe01Zq50vDzJte1vfGWL49L2t7xjC4foTcC/V9VrqmoDQFV9rap+rapuGqj358BbFjncM3/XzN/wjnal/KtJDhzY/+kkb0nyb8BtwMPb/8Pzk9yY5Kokv9XqHgO8F3hKO0ZvyhamBrZjdVy6K/43JDk7yYOGiVtSP1TVdcA/AY+bZ/dLgX2BF1TVFVV1Z1VtrKo3V9XgSMq/AP5Hkl2HeMth21kAkjwwyUdbP/jdtr132/fiJGtn1f+DJOe37d3STVu+Jcl/JPmzDExP2JLF2m5J2p6M8TvOp1tb++/tOR9pbfH7B9rilQP1fzLJhe2c92tJDh/Yt1s7H74lyReBR8x6r7tGpyY5JMmXW91rMzCyf+D7wdHpRsN+J8nrt/A3nJpuOY0L031X+0yShy5Q9wHtu8im9t3kT9Iu9mbulLhnt77k5vZ9JK38nu3v/+mBuj+R5LYkKxY75tp+mSCacu3D/BHgP4G9gAOB30/yy63K24G3V9X96Rqos1v5M9rvXdtQx88P8XYbgUPprsa+HDgxLQEEnAkcOVD3l4HvVNWXkjwGeCfwEmBP4AEt1q3xkvaajwAexfxXje9Bdyw+DvwE8LvA+5PMmcqU5CDgf9BNe9oPuCtZVVX/QXeleXB45VHA6bNfp6qOB/4XcFY7jie3XU8CrqGbevYW4GXt5xeBhwP3A2YnfZ7UYnkx8NfA61tcjwUOT/IL8xwXWp1zFtg36J3AozL8Wj9PAq4GdgeOBz40K1FzFLCG7ur5N4EPAOuBh9BNu/tfSZ7Zjskrgc+3Y3T8Iu/7u8DzgV9or/Vd4O+GjFlSDyTZBzgY+PI8u58F/HNVfW+Rl1kLfJquL1jMsO3sjB8D3kc3enJf4Afc3eZ/BHh0kv0G6v8acEbb/jvg+8CDgaPbz9ZYrO2WpKk35u84AEfQndvu1V7v83Tt+IOAK+naU9JNTb6Qrs3+ifa8d7bvO9C14T+k+87zG+1nId+nu6ixK3AI8NtJnj+rztOAR7e//w1JfmoLr/cS4M107f8lwPsXqPe3dN/HHk53vv1Suu92m0k3LftDdN+7dqfrW54KUFX/RXfu/+sDTzkS+ERVbdpCjNrOmSCafj8HrKiqP62q/2pzWt9D11gB/DfwyCS7V9X3quqibX2jqvpYVV1dnc/QJWKe3nafATwvyY+3x79GlzSCLlnwkar6XGtM3gDUVr79O6rq2qq6kS7hcuQ8dZ5Ml3g5oR2LTwIfXaDu4cD7quqyNu3pjbP2n0Zr8NqJ9S9z98n7ML5VVX9bVbdX1Q/oGuy/qqpr2peW1wJHZPPpZ2+uqh9W1cfpOowz21Xv64B/BR6/wHvtBmwYIqYf0B27P1usYrMR+Ouq+u+qOgv4Gl3nNePUqrq8Ta17MF2H8cftb7iEbtTQS4d8r0GvBF5fVeur6kd0/zYvzJTOJZc0Vv+Y5Cbgc8Bn6BL0sw3bJkLXH/3uEFc7t+Y1qaobqurcqrqtqm6la3t/oe27DfgwrW9qiaKfBM5PN3L1V4Hj23OvoOuPtsZibbckbQ/G9h2neV/7nnMz3QjVq6vqX9p57ge5+zz8ULplE97XzvO/DJwLvGigDX9DVX2/qi5jC214VX26qr7SRrpeSvfdafYF4TdV1Q+q6j/pkmU/u4W/4WNV9dl2/vx6utH7+wxWaDEeAby2qm6tqnXA2+iSY7MdDFxeVedU1X/TXcD+9sD+04Ajk6Q9Pgr4+y3Epx2ACaLp91DgIemmId3UTpxfRzdyBeAYuhE3X23DIw9d6IUWk+Q5SS5qwwlvoms0dgeoqqvosuvPbUmi53F3QuUhdOs20OreRjdCZ2tcO7D9zfaasz0EuLaq7pxVd77RSpvF1OoN+ge6v+W+dMmkf52ZVrAN8c683+B7fBPYmbv/nQCuH9j+wTyP77fAe91Ad5ViGO8F9kjy3CHqXldVg4m82cd98G98CHBj+yI0WH9rR4pB93/6vIH/z1cCd7D5sZLUT8+vql2r6qFV9TstAT/b0G1iO3n/KHDcIlW3pp0lyY8n+d9t6P4tdFMeds3dU5fP4O6LF78G/GPrG1fQ9Q2D7evs/mQxi7XdkrQ9GNt3nGbY8/CH0q3rORjXS+guls7Xhs/+jnGXJE9K8qk21etmuouku8+qNpiQuY2Fvw/A5t+3vgfcyNz2f3fgHsz9XrLo96XWtww+/kKL6YAkPwk8Ejh/C/FpB2CCaPpdC3yjnTDP/OxSVQcDVNXXq+pIuiGQbwXOaUmPrRrBk+RedNnxvwT2qKpdgQto81CbmWlmhwFXtKQRdFdd9x54rfvQXY3dGoPZ732Bb81T51vAPtl8weR9gevmqbthnte8Sxu183ngV9i2bPjs4/stug5l8P1uZ/POZ1v9C93VisWD6kZwvYlu+GkWqb7XwBUBmHvcB//GbwEPSrLLrPrzHfvFXAs8Z9b/6Xu3fxNJWsy/AL+cgTuULeJ44LfYckJ76Ha2+UO6KQFPatMfZqY8zLSpFwIrkqyi6zdnLqhsousb9h54rc2u/g5hsbZbkrYHY/mOs41xfWZWXPerqt/m7jZ8we8Ys5xBl1DZp7qbH7ybxc/Pt+Su9013l88HMbf9/w7d6KvZ30sW/b7U+pbZfdLMrIujgHOq6ofbGry2DyaIpt8XgVuT/HGS+6S7rfnjkvwcQJJfT7KijaqZWUjzTroG7E66uafDuCdwr/a825M8h83X6IFuHuovAb/N5tOxzqEbjfPz6e4u80a2vvE7NsnebbrX64Gz5qkzk8X+oyT3SHIA8NwW12xnAy9L8pg24mm+dXFOB/4I+Gm6+bdLcSbwB0ke1hrsmXWL5r3z2VY6Hvj5JH+R5MEASR6Z7jb28y2++vfAvenu7rMlPwH8XjuWLwJ+ii4pOEdVXQv8O/D/Jrl3kp+hu7LzD9vw97wbeEvawnpJViQ5bBteR1I//T3dCfy56RYS/bF0i4a+LsnBsyu3ixlnAb83e9+ArW1nd6G74nxT67c262PaUP0P0i2U/SC6hBFVdQddf/PGNgrpJ9n6qbpDt92SNMXG9R1na32Ubk3Po1o7e490N5b5qXna8Mew5XXkdqEbgf/DJPvTjShdioOTPK1933ozcFE7R79Li/FsunPtXdr59muY/5z9Y8Bj092sZ2e6fvLBs+r8A/ACuiTRnPVateMxQTTl2of8UGAV8A26rPB76RYegy4JcHmS79Et5nZEm8d6G92aCP/Whkc+OcnTW7353udWukbhbLpFg3+NWUMI2xSszwM/z0ACp6oup1t4+AN0mejv0a2R8COAJC9Jcvkif+oZdGseXUO3QNqcdXTa6JjnAs9px+GdwEur6qvz1P0nunm0nwSuar9nO4823akdr6U4he5Ly2fp/p1+SHdMlqyqrqa7hfNKun/rm+lGe60Fbp2n/h10624stmjpF+gWzf4O3f+VF1bVlqYGHtli+BbdsTu+qv5la/6W5u10/7c+nuRW4CK6RVclaVFt7YVnAV+lS7zcQvdFY3e6dm0+fwosOOJoa9tZuv7lPnTt50XAP89T54wW5wdnXSx4FV0f/m26fuNMWn8JkO4OnS9ZKFaGbLu31OdL0qSN6zvONsR1K90F8SPoznm/TTeC6V6tyqvopoF9GziVbqHrhfwO8KftfPcN3L3Q9rY6g+6CxI3AE9l8AelBv0u33uk1dGv6nUH3XWUzVfUd4EXACXRTrfcD/m1WnWuBL9GN3PrXJcav7UA2n8YuLV0bQXMTsF9VfWOI+uuA39zGZMOSJLkaeMUk3nuSkryM7pg/bdKxSFKfJXkr8OCqWvRuZrbdktRPSU4F1lfVnDs9j+G9T6G7Qc/Y31vj5wgiLYskz21DLe9Lt47RV4B1k41qy5L8Kl02fL7RRZIkLbs2Le5n0tmfbrrueZOOS5Kk2ZKspFuz9eTJRqJxMUGk5XIY3TDMb9ENTzyipnh4WpJPA+8Cjp11VzRJkkZpF7o1LL5PN137bcCHJxqRJEmzJHkzcBnwF8PMCtGOwSlmkiRJkiRJPecIIkmSJEmSpJ4zQSRJkiRJktRzO086AIDdd9+9Vq5cOekwJGkqXXzxxd+pqhWTjmOS7CckaX72ER37CUma39b0E1ORIFq5ciVr166ddBiSNJWSfHPSMUya/YQkzc8+omM/IUnz25p+wilmkiRJkiRJPWeCSJIkSZIkqedMEEmSJEmSJPWcCSJJkiRJkqSeM0EkSZIkSZLUcyaIJEmSJEmSes4EkSRJkiRJUs+ZIJIkSZIkSeo5E0SSJEmSJEk9Z4JIkiRJkiSp50wQSZIkSZIk9dzOkw5g2qw87mND1Vt3wiEjjkSSNI3sJyRpx2K7LkkdRxBJkpYkySlJNia5bKDsrCSXtJ91SS5p5SuT/GBg37snF7kkSZKkGY4gkiQt1anAO4DTZwqq6sUz20neBtw8UP/qqlo1tugkSZIkLcoEkSRpSarqs0lWzrcvSYDDgWeOMyZJkiRJW8cpZpKkUXo6cH1VfX2g7GFJvpzkM0mevtATk6xJsjbJ2k2bNo0+UkmSJKnHTBBJkkbpSODMgccbgH2r6vHAa4Azktx/vidW1UlVtbqqVq9YsWIMoUqSJEn9ZYJIkjQSSXYGfgU4a6asqn5UVTe07YuBq4FHTSZCSZIkSTNMEEmSRuVZwFerav1MQZIVSXZq2w8H9gOumVB8kiRJkhoTRJKkJUlyJvB54NFJ1ic5pu06gs2nlwE8A7i03fb+HOCVVXXj+KKVJEmSNB/vYiZJWpKqOnKB8pfNU3YucO6oY5IkSZK0dRYdQZTklCQbk1w2UHZWkkvaz7p2JZgkK5P8YGDfu0cZvCRJkiRJkpZumBFEpwLvAE6fKaiqF89sJ3kbcPNA/auratVyBShJkiRJkqTRWjRBVFWfTbJyvn1JAhwOPHN5w5IkSZIkSdK4LHWR6qcD11fV1wfKHpbky0k+k+TpS3x9SZIkSZIkjdhSF6k+ks3vULMB2LeqbkjyROAfkzy2qm6Z/cQka4A1APvuu+8Sw5AkSZIkSdK22uYRREl2Bn4FOGumrKp+VFU3tO2LgauBR833/Ko6qapWV9XqFStWbGsYkiRJkiRJWqKlTDF7FvDVqlo/U5BkRZKd2vbDgf2Aa5YWoiRJkiRJkkZpmNvcnwl8Hnh0kvVJjmm7jmDz6WUAzwAubbe9Pwd4ZVXduJwBS5IkSZIkaXkNcxezIxcof9k8ZecC5y49LEmSJEk7kjbTYC1wXVUdmuRhwAeA3YCLgaOq6r+S3As4HXgicAPw4qpaN6GwJak3lnoXM0mSJEkaxquBKwcevxU4saoeCXwXmJmpcAzw3VZ+YqsnSRoxE0SSJEmSRirJ3sAhwHvb4wDPpFuWAuA04Plt+7D2mLb/wFZfkjRCJogkSZIkjdpfA38E3Nke7wbcVFW3t8frgb3a9l7AtQBt/82tviRphEwQSZIkSRqZJIcCG6vq4mV+3TVJ1iZZu2nTpuV8aUnqJRNEkiRJkkbpqcDzkqyjW5T6mcDbgV2TzNw0Z2/gurZ9HbAPQNv/ALrFqjdTVSdV1eqqWr1ixYrR/gWS1AMmiCRJkiSNTFW9tqr2rqqVwBHAJ6vqJcCngBe2akcDH27b57fHtP2frKoaY8iS1EsmiCRJkiRNwh8Dr0lyFd0aQye38pOB3Vr5a4DjJhSfJPXKzotXkSRJkqSlq6pPA59u29cA+89T54fAi8YamCTJEUSSJEmSJEl9Z4JIkiRJkiSp50wQSZIkSZIk9ZwJIkmSJEmSpJ4zQSRJkiRJktRzJogkSZIkSZJ6zgSRJEmSJElSz+086QAkSZIkSdNl5XEfG6reuhMOGXEkksbFEUSSJEmSJEk9Z4JIkiRJkiSp50wQSZKWJMkpSTYmuWyg7I1JrktySfs5eGDfa5NcleRrSX55MlFLkiRJGmSCSJK0VKcCB81TfmJVrWo/FwAkeQxwBPDY9px3JtlpbJFKkiRJmpcJIknSklTVZ4Ebh6x+GPCBqvpRVX0DuArYf2TBSZIkSRqKCSJJ0qi8KsmlbQraA1vZXsC1A3XWtzJJkiRJE2SCSJI0Cu8CHgGsAjYAb9vaF0iyJsnaJGs3bdq03PFJkiRJGmCCSJK07Krq+qq6o6ruBN7D3dPIrgP2Gai6dyub7zVOqqrVVbV6xYoVow1YkiRJ6jkTRJKkZZdkz4GHLwBm7nB2PnBEknsleRiwH/DFcccnSZIkaXM7TzoASdL2LcmZwAHA7knWA8cDByRZBRSwDngFQFVdnuRs4ArgduDYqrpjEnFLkiRJutuiCaIkpwCHAhur6nGt7I3AbwEzi0K8buAWxq8FjgHuAH6vqv7PCOKWJE2JqjpynuKTt1D/LcBbRheRJEmSpK01zBSzU4GD5ik/sapWtZ+Z5NBjgCOAx7bnvDPJTssVrCRJkiRJkpbfogmiqvoscOOQr3cY8IGq+lFVfQO4irsXJpUkSZIkSdIUWsoi1a9KcmmSU5I8sJXtBVw7UGd9K5MkSZIkSdKU2tYE0buARwCrgA3A27b2BZKsSbI2ydpNmzYt/gRJkiRJkiSNxDYliKrq+qq6o6ruBN7D3dPIrgP2Gai6dyub7zVOqqrVVbV6xYoV2xKGJEmSJEmSlsE2JYiS7Dnw8AXAZW37fOCIJPdK8jBgP+CLSwtRkiRJkiRJozTMbe7PBA4Adk+yHjgeOCDJKqCAdcArAKrq8iRnA1cAtwPHVtUdowldkiRJkiRJy2HRBFFVHTlP8clbqP8W4C1LCUqSJEmSJEnjs5S7mEmSJEmSJGkHYIJIkiRJkiSp50wQSZIkSZIk9ZwJIkmSJEmSpJ4zQSRJkiRJktRzJogkSZIkSZJ6zgSRJEmSJElSz5kgkiRJkiRJ6jkTRJIkSZIkST1ngkiSJEmSJKnnTBBJkiRJkiT1nAkiSZIkSZKknjNBJEmSJEmS1HMmiCRJkiRJknrOBJEkSZIkSVLPmSCSJEmSJEnqORNEkiRJkiRJPWeCSJIkSZIkqedMEEmSJEmSJPWcCSJJ0pIkOSXJxiSXDZT9RZKvJrk0yXlJdm3lK5P8IMkl7efdk4tckiRJ0gwTRJKkpToVOGhW2YXA46rqZ4D/C7x2YN/VVbWq/bxyTDFKkiRJ2gITRJKkJamqzwI3zir7eFXd3h5eBOw99sAkSZIkDc0EkSRp1H4D+KeBxw9L8uUkn0ny9EkFJUmSJOluJogkSSOT5PXA7cD7W9EGYN+qejzwGuCMJPdf4LlrkqxNsnbTpk3jCViStOyS3DvJF5P8Z5LLk7yplT8syReSXJXkrCT3bOX3ao+vavtXTjJ+SeoLE0SSpJFI8jLgUOAlVVUAVfWjqrqhbV8MXA08ar7nV9VJVbW6qlavWLFiTFFLkkbgR8Azq+pngVXAQUmeDLwVOLGqHgl8Fzim1T8G+G4rP7HVkySNmAkiSdKyS3IQ8EfA86rqtoHyFUl2atsPB/YDrplMlJKkcajO99rDe7SfAp4JnNPKTwOe37YPa49p+w9MkjGFK0m9tWiCyNsXS5K2JMmZwOeBRydZn+QY4B3ALsCFs/qDZwCXJrmE7qT/lVV147wvLEnaYSTZqbX9G+nudHk1cNPADQ3WA3u17b2AawHa/puB3cYbsST1z85D1DmV7kT/9IGyC4HXVtXtSd5Kd/viP277rq6qVcsapSRpalXVkfMUn7xA3XOBc0cbkSRp2lTVHcCqdmH5POAnl/qaSdYAawD23Xffpb6cJPXeoiOIvH2xJEmSpOVQVTcBnwKeAuyaZOaC9d7AdW37OmAfgLb/AcAN87yWa9VJ0jJajjWIvH2xJEmSpHm19edmlqS4D/Bs4Eq6RNELW7WjgQ+37fPbY9r+T87c7ECSNDrDTDFb0BZuX3xDkicC/5jksVV1yzzPdUioJEmStOPbEzit3aTgx4Czq+qjSa4APpDkz4Avc/f05JOBv09yFd1MhiMmEbQk9c02J4gGbl984ODti+luY0lVXZxk5vbFa2c/v6pOAk4CWL16tVcEJEmSpB1QVV0KPH6e8muA/ecp/yHwojGEJkkasE1TzLx9sSRJkiRJ0o5j0RFE7fbFBwC7J1kPHE9317J70d2+GOCiqnol3e2L/zTJfwN34u2LJUmSJEmSpt6iCSJvXyxJkiRJkrRjW467mEmSJEmSJGk7ZoJIkiRJkiSp50wQSZIkSZIk9ZwJIkmSJEmSpJ4zQSRJkiRJktRzJogkSZIkSZJ6zgSRJEmSJElSz5kgkiRJkiRJ6jkTRJIkSZIkST1ngkiSJEmSJKnnTBBJkiRJkiT1nAkiSZIkSZKknjNBJEmSJEmS1HMmiCRJkiRJknrOBJEkSZIkSVLPmSCSJEmSJEnqORNEkiRJkiRJPWeCSJIkSZIkqedMEEmSJEmSJPWcCSJJkiRJkqSeM0EkSZIkSZLUcyaIJElLkuSUJBuTXDZQ9qAkFyb5evv9wFaeJH+T5KoklyZ5wuQilyRJkjTDBJEkaalOBQ6aVXYc8Imq2g/4RHsM8Bxgv/azBnjXmGKUJEmStAUmiCRJS1JVnwVunFV8GHBa2z4NeP5A+enVuQjYNcme44lUkiRJ0kJMEEmSRmGPqtrQtr8N7NG29wKuHai3vpXNkWRNkrVJ1m7atGl0kUqSJEkyQSRJGq2qKqC24XknVdXqqlq9YsWKEUQmSZIkacZQCSIXIJUkbaXrZ6aOtd8bW/l1wD4D9fZuZZIkSZImaNgRRKfiAqSSpOGdDxzdto8GPjxQ/tJ2MeHJwM0DU9EkSZIkTchQCSIXIJUkLSTJmcDngUcnWZ/kGOAE4NlJvg48qz0GuAC4BrgKeA/wOxMIWZIkSdIsOy/huVu7AKlXiCVpB1RVRy6w68B56hZw7GgjkiRJkrS1lmWR6m1ZgNS700iSJEmSJE2HpSSIlrQAqXenkSRJkiRJmg5LSRC5AKkkSZIkSdIOYKg1iNoCpAcAuydZDxxPt+Do2W0x0m8Ch7fqFwAH0y1Aehvw8mWOWZIkSZIkSctoqASRC5BKkiRJkiTtuJZlkWpJkiRJkiRtv0wQSZIkSZIk9ZwJIkmSJEmSpJ4zQSRJkiRJktRzQy1SLUmSts7K4z42VL11Jxwy4kgkSZKkxTmCSJIkSZIkqedMEEmSJEmSJPWcCSJJkiRJkqSeM0EkSZIkSZLUcyaIJEmSJEmSes4EkSRJkiRJUs+ZIJIkSZIkSeo5E0SSJEmSJEk9Z4JIkiRJ0sgk2SfJp5JckeTyJK9u5Q9KcmGSr7ffD2zlSfI3Sa5KcmmSJ0z2L5CkfjBBJEmSJGmUbgf+sKoeAzwZODbJY4DjgE9U1X7AJ9pjgOcA+7WfNcC7xh+yJPWPCSJJkiRJI1NVG6rqS237VuBKYC/gMOC0Vu004Plt+zDg9OpcBOyaZM8xhy1JvWOCSJIkSdJYJFkJPB74ArBHVW1ou74N7NG29wKuHXja+lY2+7XWJFmbZO2mTZtGFrMk9cXOkw5AkiRt/1Ye97Gh6q074ZARRyJpWiW5H3Au8PtVdUuSu/ZVVSWprXm9qjoJOAlg9erVW/VcSdJcjiCSJEmSNFJJ7kGXHHp/VX2oFV8/M3Ws/d7Yyq8D9hl4+t6tTJI0QiaIJEmSJI1MuqFCJwNXVtVfDew6Hzi6bR8NfHig/KXtbmZPBm4emIomSRoRp5hJkiRJGqWnAkcBX0lySSt7HXACcHaSY4BvAoe3fRcABwNXAbcBLx9vuJLUTyaIJEmSJI1MVX0OyAK7D5ynfgHHjjQoSdIcJogkSSOR5NHAWQNFDwfeAOwK/BYwc8uZ11XVBWMOT5IkSdIAE0SSpJGoqq8BqwCS7ES3wOh5dFMFTqyqv5xgeJIkSZIGuEi1JGkcDgSurqpvTjoQSZIkSXOZIJIkjcMRwJkDj1+V5NIkpyR54KSCkiRJktTZ5gRRkkcnuWTg55Ykv5/kjUm+scBIAAAQ5klEQVSuGyg/eDkDliRtX5LcE3ge8MFW9C7gEXTTzzYAb1vgeWuSrE2ydtOmTfNVkSRJkrRMtjlBVFVfq6pVVbUKeCLdLSjPa7tPnNnnwqOS1HvPAb5UVdcDVNX1VXVHVd0JvAfYf74nVdVJVbW6qlavWLFijOFKkiRJ/bNcU8xcW0KStJAjGZhelmTPgX0vAC4be0SSJEmSNrNcCSLXlpAkzZHkvsCzgQ8NFP95kq8kuRT4ReAPJhKcJEmSpLssOUHk2hKSpIVU1ferarequnmg7Kiq+umq+pmqel5VbZhkjJIkSZKWZwSRa0tIkiRJkiRtx3ZehteYs7bEwNVg15aQJGkKrTzuY0PVW3fCISOORJIkSdNgSQmigbUlXjFQ/OdJVgEFrJu1T9spv0hIkiRJkrTjWlKCqKq+D+w2q+yoJUUkSZKmxrAXCCRJkrR9W667mEmSJEmSJGk7tRxrEEmSJEmSJFyeQ9svE0Q7KBslSZIkSZI0LKeYSZIkSZIk9ZwJIkmSJEmSpJ4zQSRJkiRJktRzJogkSZIkSZJ6zgSRJEmSJElSz5kgkiRJkiRJ6jlvc99zK4/72KRDkCRJkiRJE+YIIkmSJEmSpJ4zQSRJkiRJktRzTjHbRsNOzVp3wiEjjkSSJEmSJGlpTBBpIkywSZIkSZI0PUwQSZKksfECgSRJ0nRyDSJJkiRJkqSeM0EkSZIkSZLUc04xkyRpBzLsFK5pN8zf4TQ0SZKk5eMIIkmSJEmSpJ7rzQiiHeWKqiRJkiRJ0nLrTYJoR2GiS5IkSZIkLTenmEmSJEmSJPWcCSJJkiRJkqSec4qZlpVT4CQNSrIOuBW4A7i9qlYneRBwFrASWAccXlXfnVSMkiRJkhxBJEkavV+sqlVVtbo9Pg74RFXtB3yiPZYkSZI0QSaIJEnjdhhwWts+DXj+BGORJEmSxDJMMXP6gCRpCwr4eJIC/ndVnQTsUVUb2v5vA3tMLLrtiFN4JUmSNErLNYLI6QOSpPk8raqeADwHODbJMwZ3VlXRJZHmSLImydokazdt2jSGUCVJkqT+GtUUM6cPSJKoquva743AecD+wPVJ9gRovzcu8NyTqmp1Va1esWLFuEKWJEmSemk5EkQz0wcuTrKmlTl9QJJ6Lsl9k+wysw38EnAZcD5wdKt2NPDhyUQoSZIkacZy3Ob+aVV1XZKfAC5M8tXBnVVVbe2JzbRk0hqAfffddxnCkCRNmT2A85JA19+cUVX/nOQ/gLOTHAN8Ezh8gjFKkiRJYhkSRIPTB5JsNn2gqjYsNH2gLVR6EsDq1avnXX+iT1x8VNKOpqquAX52nvIbgAPHH5EkSZKkhSxpipnTByRJkiRJkrZ/Sx1B5PQBSZIkSZKk7dySEkROH5AkSTuKYad7rzvhkBFHIkmSNH6jus29JEmSJEmSthMmiCRJkiRJknpuOW5zL0nSds+7SUrSaCQ5BTgU2FhVj2tlDwLOAlYC64DDq+q76RY3fTtwMHAb8LKq+tIk4pakvnEEkSRJkqRROhU4aFbZccAnqmo/4BPtMcBzgP3azxrgXWOKUZJ6zwSRJEmSpJGpqs8CN84qPgw4rW2fBjx/oPz06lwE7Jpkz/FEKkn9ZoJIkiRJ0rjtUVUb2va3gT3a9l7AtQP11rcySdKImSCSJEmSNDFVVUBt7fOSrEmyNsnaTZs2jSAySeoXE0SSJEmSxu36malj7ffGVn4dsM9Avb1b2RxVdVJVra6q1StWrBhpsJLUByaIJEmSJI3b+cDRbfto4MMD5S9N58nAzQNT0SRJI+Rt7jXVhr3t9LoTDhnra0mSJGk4Sc4EDgB2T7IeOB44ATg7yTHAN4HDW/UL6G5xfxXdbe5fPvaAJamnTBBJkqTtkol/aftQVUcusOvAeeoWcOxoI5IkzccEkSRJ2qENm0iSJEnqM9cgkiRJkiRJ6jkTRJIkSZIkST3nFDPtEJw+IEmSJEnStnMEkSRJkiRJUs+ZIJIkSZIkSeo5E0SSJEmSJEk95xpEkiRNkGuoSZIkaRqYIBoxT/wlSZIkSdK0M0EkSZK0FYa9+LPuhENGHIkkSdLyMUEkzeKJvyRJkiSpb1ykWpIkSZIkqedMEEmSJEmSJPWcCSJJkiRJkqSeM0EkSRqJJPsk+VSSK5JcnuTVrfyNSa5Lckn7OXjSsUqSJEl9t80JIk/8JUmLuB34w6p6DPBk4Ngkj2n7TqyqVe3ngsmFKEmSJAmWdhezmRP/LyXZBbg4yYVt34lV9ZdLD0+StL2qqg3AhrZ9a5Irgb0mG5UkSZKk+WzzCKKq2lBVX2rbtwKe+EuS5pVkJfB44Aut6FVJLk1ySpIHLvCcNUnWJlm7adOmMUUqSZIk9dOyrEG0LSf+kqR+SHI/4Fzg96vqFuBdwCOAVXQjjN423/Oq6qSqWl1Vq1esWDG2eCVJkqQ+WnKCaFtP/L0yLEk7viT3oOsj3l9VHwKoquur6o6quhN4D7D/JGOUJEmStMQE0VJO/L0yLEk7tiQBTgaurKq/Gijfc6DaC4DLxh2bJEmSpM1t8yLVWzrxbwuTgif+ktRnTwWOAr6S5JJW9jrgyCSrgALWAa+YTHiSJEmSZizlLmae+EuSFlRVnwMyzy5vay9JkiRNmW1OEHniL0mSJEmStGNYlruYSZIkSZIkaftlgkiSJEmSJKnnlrIGkdRrK4/72FD11p1wyIgjkSRJkiRpaRxBJEmSJEmS1HOOIJIkSRoBR5pKkqTtiSOIJEmSJEmSem67H0E07NU5SZIkSZIkzc8RRJIkSZIkST233Y8gkiRpSxxpKkmSJC3OBJEkSdJ2wEWvJUnSKDnFTJIkSZIkqedMEEmSJEmSJPWcCSJJkiRJkqSeM0EkSZIkSZLUcyaIJEmSJEmSes4EkSRJkiRJUs95m3tpSnj7YkmSJEnSpJggkrYzJpIkSZIkScvNKWaSJEmSJEk9Z4JIkiRJkiSp50wQSZIkSZIk9ZxrEEkjNuyaQZKkflrufmK516pz7TtJkvrBEUSSJEmSJEk95wgiSZIkSZI0Uo5InX4miKQdlFMMJEmSJEnDMkEkSZLUQ66RJ0mSBo0sQZTkIODtwE7Ae6vqhFG9l6Rt5xcETYr9hCRpIfYRkjR+I0kQJdkJ+Dvg2cB64D+SnF9VV4zi/SRtf3aUKWs7yt8xbvYT0o5nuS847Cjtpv3E1rOPkKTJGNUIov2Bq6rqGoAkHwAOA2zUpR3ctI9I8kR9athPSNqiSfUntv9TwT5CUi9M23eTUd3mfi/g2oHH61uZJElgPyFJWph9hCRNwMQWqU6yBljTHn4vydcmFcsidge+M+kgppDHZS6PyfyWdFzy1mWMZILvO+v1tvaYPHRZg9lObEf9xCDbgbk8JvPzuMw1FcdkyvqdYY5JL/sIWNZ+Yqj/e2P+vzEVn4d5zIlrUp+ZAdN4rKbx/xRsJ8fK/1Nz5a1LimnofmJUCaLrgH0GHu/dyu5SVScBJ43o/ZdNkrVVtXrScUwbj8tcHpP5eVzm8pgAO1A/Mch/27k8JvPzuMzlMZmrx8dk0T4Clq+fmMbjPI0xwXTGZUzDm8a4jGk444ppVFPM/gPYL8nDktwTOAI4f0TvJUna/thPSJIWYh8hSRMwkhFEVXV7klcB/4fu1pSnVNXlo3gvSdL2x35CkrQQ+whJmoyRrUFUVRcAF4zq9cdou5reMEYel7k8JvPzuMzlMWGH6icG+W87l8dkfh6XuTwmc/X2mIy5j5jG4zyNMcF0xmVMw5vGuIxpOGOJKVU1jveRJEmSJEnSlBrVGkSSJEmSJEnaTpggWkCSfZJ8KskVSS5P8upJxzRpSe6d5ItJ/rMdkzdNOqZpkmSnJF9O8tFJxzINkqxL8pUklyRZO+l4pkWSXZOck+SrSa5M8pRJx6Sls8+Yyz5jYfYXc9lnzGV/sbyGaafT+ZskVyW5NMkTpiCmA5Lc3D4blyR5w4hjWrTtTnKvJGe14/SFJCtHGdNWxPWyJJsGjtVvjjqu9r4LtumTOFZDxDSp47TFdn7cn78hYxrr56+95xbb/lEfp5GtQbQDuB34w6r6UpJdgIuTXFhVV0w6sAn6EfDMqvpeknsAn0vyT1V10aQDmxKvBq4E7j/pQKbIL1bVdyYdxJR5O/DPVfXCdHdm+fFJB6RlYZ8xl33Gwuwv5mefsTn7i+U1TDv9HGC/9vMk4F3t9yRjAvjXqjp0hHEMGqbtPgb4blU9MskRwFuBF09BXABnVdWrRhzLbFtq0ydxrBaLCSZznGDL7fy4P3/DxATj/fzB4m3/SI+TI4gWUFUbqupLbftWug/YXpONarKq87328B7tx0WsgCR7A4cA7510LJpeSR4APAM4GaCq/quqbppsVFoO9hlz2WfMz/5Cw7C/WH5DttOHAae39usiYNcke044prEasu0+DDitbZ8DHJgkUxDX2A3Rpo/9WG3H/cxYP3/TaMi2f6THyQTRENpQwMcDX5hsJJPXhiteAmwELqyq3h+T5q+BPwLunHQgU6SAjye5OMmaSQczJR4GbALe14b9vjfJfScdlJaXfcbd7DPmZX8xP/uMzdlfjNAW2um9gGsHHq9nTAmbRfqOp7SpVf+U5LFjiGWxtvuu41RVtwM3A7tNQVwAv9qm3ZyTZJ9Rx8TibfokjtUw/cy4jxMs3s5P4vM3TN8zzs/fMG3/SI+TCaJFJLkfcC7w+1V1y6TjmbSquqOqVgF7A/snedykY5q0JIcCG6vq4knHMmWeVlVPoBsGeWySZ0w6oCmwM/AE4F1V9Xjg+8Bxkw1Jy8k+Y3P2GZuzv9gi+4zN2V+MyDS204vE9CXgoVX1s8DfAv846nimte0eIq6PACur6meAC7l75M5ITGObPmRMYz1OA6axnV8spnF//ibe9psg2oI2v/Vc4P1V9aFJxzNN2lC3TwEHTTqWKfBU4HlJ1gEfAJ6Z5B8mG9LkVdV17fdG4Dxg/8lGNBXWA+sHrnidQ9cJaAdgn7Ew+4y72F8swD5jDvuLERiinb4OGBxNsXcrm1hMVXXLzNSqqroAuEeS3UcZ08B7L9R233WckuwMPAC4YRwxbSmuqrqhqn7UHr4XeOKIQxmmTR/3sVo0pgkcp5n3XaydH/vnb7GYJvD5G6btH+lxMkG0gDY39GTgyqr6q0nHMw2SrEiya9u+D/Bs4KuTjWryquq1VbV3Va0EjgA+WVW/PuGwJirJfdtii7Rhkb8EXDbZqCavqr4NXJvk0a3oQKDPixjvMOwz5rLPmMv+Yn72GXPZXyy/Idvp84GXpvNk4Oaq2jDJmJI8eGbNmiT7031/G1mCYci2+3zg6Lb9Qrq2bKTrAQ0T16x1WJ5Ht6bTyAzZpo/1WA0T07iPU3vPYdr5cX/+Fo1p3J+/Idv+kR4n72K2sKcCRwFfaXNdAV7XMod9tSdwWpKd6D4cZ1eVt+jVfPYAzmvt6c7AGVX1z5MNaWr8LvD+dHcluAZ4+YTj0fKwz5jLPkPDss+Yn/3F8pq3nQb2BaiqdwMXAAcDVwG3MfpjPkxMLwR+O8ntwA+AI0acjJm37U7yp8DaqjqfLqn190muAm6kS0SM2jBx/V6S59HdHe5G4GVjiGuOKThWi8U0ieM0bzuf5JUwsc/fMDGN+/MH87T94zxOGf3fJ0mSJEmSpGnmFDNJkiRJkqSeM0EkSZIkSZLUcyaIJEmSJEmSes4EkSRJkiRJUs+ZIJIkSZIkSeo5E0SSJEmSJEk9Z4JIkiRJkiSp50wQSZIkSZIk9dz/D24mQN1DAO6HAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 1440x360 with 3 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEKCAYAAAARnO4WAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3X2clXWd//HX55wzDMi9gBjgCC4/1wABa5RclDbNtryhWslKy1W3tX67P1ZNozVLsx7tbppbamVrWmpLKUKKlFmaN0mJCTogN+5KKQoq6iwio3iGmfn8/riug2eGM2fO3TXnnLnez8djHpxznevmMwf4nO/5Xt/v52vujoiIDHyJagcgIiL9QwlfRCQmlPBFRGJCCV9EJCaU8EVEYkIJX0QkJpTwRURiQglfRCQmlPBFRGIiVe0Aso0dO9YnT55c7TBEROrGmjVrXnX3cYXsW1MJf/LkyaxevbraYYiI1A0z21LovurSERGJiUgTvpmNMrOlZvaUmW0ys6OjvJ6IiPQu6i6dq4F73H2BmQ0C9ov4eiIi0ovIEr6ZjQTmAWcBuHs70B7V9UREJL8ou3SmAK8APzazJ8zsBjMbGuH1REQkjygTfgp4F3Cdux8BvAH8S8+dzOxcM1ttZqtfeeWVCMMREYm3KBP+VmCruz8aPl9K8AHQjbtf7+7N7t48blxBQ0lFROpCa1uatc+/Rmtbuqx9KiWyPnx3f8nMnjezv3T3/waOBzZGdT0RkVqyvGUbX1y2joZEgj1dXVxx6kzmz55Y9D6VFPU4/IXAYjNbB8wG/jXi64mIVF1rW5ovLlvHW3u62JXu4K09XSxatq5bK76QfSot0mGZ7t4CNEd5DRGRWrN1x24aEgneomvvtoZEgq07djNmWGPB+1SaZtqKiFTYpNFD2NPV1W3bnq4uJo0eUtQ+laaELyJSYWOGNXLFqTMZ3JBgeGOKwQ0Jrjh1ZreWeyH7VJq5e2QnL1Zzc7OreJqIDBStbWm27tjNpNFDek3kheyTj5mtcfeCus5rqlqmiMhAMmZYY59JvJB9KkVdOiIiMaGELyISE0r4IiIxoYQvIhITSvgiIjGhhC8iEhNK+CIiMaGELyISE0r4IiIxoYQvIhITSvgiIjGhhC8iEhNK+CIiof5cX7YaVC1TRIT+X1+2GtTCF5HYq8b6stWghC8isZdZXzZbZn3ZgUQJX0Rirxrry1aDEr6IxF411petBt20FREB5s+eyNypY8taX7bWKeGLiIT6c33ZalCXjohITCjhi4jEhBK+iNSNgT4TNmrqwxeRuhCHmbBRUwtfRGpeXGbCRk0JX0RqXlxmwkZNCV9Eal5cZsJGTQlfRGpeXGbCRk03bUWkLsRhJmzUlPBFpG4M9JmwUVOXjohITCjhi0jN04SrylCXjojUNE24qpxIW/hm9qyZPWlmLWa2OspricjAowlXldUfLfz3ufur/XAdEakTrW3pgkbbZCZcvcXbY/AzE65087Z46tIRkX5VTBeNJlxVVtQ3bR34jZmtMbNzI76WiNSozE3Xzdt3FdVFowlXlRV1C/8Yd99mZgcA95rZU+7+u+wdwg+CcwGampoiDkdE+lt2iz7d2YW5d3u9ry4aTbiqnEhb+O6+LfzzZeAO4Kgc+1zv7s3u3jxu3LgowxGRftbzpmt7Rxfpzu4Jv5AumjHDGpl10Cgl+zJFlvDNbKiZDc88Bj4ArI/qeiJSe3JVuRzckGBQ0tRFUwVRdumMB+4ws8x1furu90R4PRGpMbluugLc/c/H8kZ7p7po+llkCd/d/wzMiur8IlL7MjddF/UYlTN1/PBqhxZLGpYpIpHSTdfaoYQvIpFTlcvaoOJpIlKSzdt3sXT182zevqvaoUiB1MIXkaJdeueT3LLqub3PT5pxIF/7yAy14mucWvgiUpTN23d1S/YAv1z/Ekf/233c1bKtSlFJIZTwRaQoLc+/lnN7eyeqZFnjlPBFpFe5Fh6ZfdCoXvfPlEmQ2qQ+fBHJqbeqllPHD+fMo5u45ZHn9jlGlSxrm1r4IjFTyHKBfS088rUPH859F8zjyINHdzvutOZJunFbw5TwRWJkecs25n7zfj51w6PM/eb9vd5kzVUDp2d3zaPP/C+PbdnRbZ8lq7eqD7+GKeGLxESuVvtFt6/NOY6+r4VHWtvSXP6LjfsclzRTH34NU8IXiYlcrfb2TufEa1fu09Lva+GRrTt2Myhp+1xjT6f68GuZbtqKxERvlSvbO7q4aOk6pr1jRLeiZvlq4EwaPYSOLt/nXJedMl19+DVMLXyRmMi02nO1zNs7ujjxmoe5q2Vbt5u6vS08kv0NYGhjkkGpBN/46AzOeM/B/fXrSAnMfd9P6Wppbm721atXVzsMkQFt8/ZdnHjtSto79m3tpxKQTCQYlOx7gXEI+vJVBbO6zGyNuzcXsq9a+CIxM3X8cL61YCaDUvv+9+/ognRHYQuMg5YerDclJXwzO7DSgYhI/5k/eyJ3LzwmZ/dONs2cHVhKbeHfWNEoRKTfTR0/nG99bNbekTiNKaOhxweAZs4OLCWN0nH3kyodiIhUTq6+9Vzbeo7E+f3mV/dZjlDdNQNHrwnfzPbPd6C7/2/lwxGRcrS2pVn86HN874GnGZRM7k3aDjnr4kD31ai0HOHA1usoHTN7BnAgVyefu/shlQ5Go3RESre8ZRtfuH0t7Z3d/083phKAk+54e/vghgS//+JxSugDQDGjdHpt4bv7lMqFJCJRaW1Ls+GFnVxwawv7DrSEZMLADejcuy1zM1YJP14K6sM3s78FjiFo8T/s7ndGGpWIFCRTwriry3Mme4DOLif4r/s23YyNpz4Tvpl9H5gK/Czc9DkzO8Hd/ynSyEQkr+xiaL1pSMKVC2YC6GasFNTCPw54p4ed/WZ2M7Ah0qhEpE+ZYmhv9dq2h5995j00TxkDoJuxUlDC3ww0AVvC5weF20QkYvlKF0waPYS3Ojp7OTIok9CQSu59nj0aR+KpkIQ/HNhkZn8k6Ag8ClhtZncBuPv8COMTia3Fq7Zw+YoNNCSNPV1w2SnTOGPOwXs/BIYOSpKvFlYyYeqnl24KSfiXRh6FiHSzeNUWLrlzPcDeYZaX3LGetc+/xl1rX6AhkSDd2UXSjA5yJ/1LT1apYumuz4Tv7g/1RyAiEti8fRdfCZN9T0tWbwXI228PMLQxyYyJIysem9Q3VcsUqSHLW7Zx4jUr+0jnbxvckNin/g0EQzHVnSM9KeGL1IjMMMv2zkLTfeBX/3wsF55wKI0py7kcoUiGljgUiVAxC4RseOF1EjkrmeTWkDSuOHUmU8cPZ+H44Zw+p0nDLiWvkhK+mX3V3b9a4VhEBpTMLNhcBcty7bto6TrSOVah6k3CgrH1GRp2KX0ptUtnTUWjEBlgsmfB9rV6VGbfYpI9wKBkUouTSFFKrYe/otKBiAwkuWbB9lawbOuO3eRaeGpQ0vjpZ+aw/oXXaUwl+OqKDd0qXqoejhSrkFo61+TYvBNY7e7LKx+SSP2bNHoIe7q6t9h7S9Drt+3kjfZ9W/cNqQQNqSRnzQ0K1w5tTKkejpSlkBb+YOAw4Pbw+anAM8AsM3ufu58fVXAi9WrMsEauOHVmnwm6tS3N13+5Mec5eg6t1OIkUq5CEv5MYK67dwKY2XXAwwTlkp/s62AzSwKrgW3ufnIZsYrUvOxROfNnT2TaO0awcvMrjB02mKP/Ysw++/dWAG1QKvfQSt2YlXIUkvBHA8MIunEAhgL7u3unme17B2pf5wGbgBGlhShSH4KRNmtJWoKOrk7mHXoADzz1MpkFqAz4/AmHcvqcpr1JO1fXz6CkcffCY5g6fng//wYy0BUySucKoMXMfmxmNwFPAFea2VDgvnwHmtkk4CTghnIDFallrW1pLlzSQrrDeXNPJ+2dcN+mt5M9BJUHr7r3f/irf7+fu1q2AW93/QxuSOydNPWtj81SspdIFFJL50Yzu5ugSibAl9z9hfDxF/o4/DvAIoKKmzmZ2bnAuQBNTU19BixSix75UyuFjqpMdwRDNOdOHcuYYY3qm5d+02cL38xWAH8N3Ofuy7OSfV/HnQy87O55x+y7+/Xu3uzuzePGjSvk1CI1ZfGqLZx/W0tRx2SGaGaMGdbIrINGKdlLpArp0vkWcCyw0cyWmtkCMxtcwHFzgflm9ixwK3Ccmf1X6aGK1J5MGeOOrt7r0ufyVkenxtBLv+sz4bv7Q+7+j8AhwH8CpwEvF3Dcxe4+yd0nA58A7nf3T5UZr0hVtbalWfv8a7S2pWltS/daxrgv+RYuEYlKQTNtzWwIcArwceBdwM1RBiVSizK1cZJm7Ons4vSjmgouY9xTYyqZc9atSJQKmWm7hOCG7T3Ad4GH3L2of+fu/iDwYAnxiVTV5u27aHn+NSaP2Y9FS9d2K21w0yNb8hyZ355OlUWQ/ldIC/9G4JOZiVcicXHpnU9yy6rnIjn3Zado+UHpf4UMy/y1mc0ws2kEZRYy22+JNDKRKtq8fVfJyd6AOVNGs+qZHd227deYZE+n712MXKS/FdKlcxnBsMxpwN3Ah4CVgBK+DFgtz79W8rE3n3Mk8w49YG930OyDRjF66CCNs5eqK6RLZwEwC3jC3c82s/GAhlfKgHbvppdKPnbCyKBvfur44d1mzCrRS7UVkvB3u3uXmXWY2QiCIZkHRRyXSEUUs8RgZt9frt3Grzf0OfI4p8ENCd5o1+0uqU2FJPzVZjYK+CHBSldtwCORRiVSAcUsMXj9Q3/iil8/BQ4dZQ6R1+gbqVWF3LT9x/DhD8zsHmCEu6+LNiyR8mQvMZgpPbxo2TqmvWMEb7R3dmvxL1q6liWrt5Z0ndmTRvLU9l1alETqQlFLHLr7sxHFIVJRvdWZP/Gah2lMJWnv7OKkww9k8pj9Sk72SYMbzzpy7/V0Q1ZqXUlr2orUulx15t/aEzxv7+wA4OdPFFQHMK/fb36V+bMnKtFLXSikeJpI3elZZ35QKpFzofBydHrQTdTaVsg6QCLV12sL38yOBMa6+696bD8R2N5X2WORasuuM3//pu1cff/mil8jU+ZYLXypB/la+N8Ecq2uvAG4MppwRCprzLBGJo0ewnW/+3NFztfzS8KeLtXEkfqRrw9/uLvvUx3K3beY2dgIYxIpWa5x98ENXKO9iPMMShpmhjucPucgJo8ZyjFTx7LxxddZ1GOop1r3Ui/yJfzReV7br9KBiJSjtS3Ntb99mp+s2kJjKkEX8JWTpjFj4kiGDkqSLnT9QeCaT8zudcnBqeOHazlCqVv5Ev59ZvYN4MsertZgZgZcDtzfH8GJFGJ5yzYuuK2FzKJTb4ajcS65cz3JhOHuFLog1dLPvofmKWOA3kshjBnWqEQvdSlfwr8QuAHYbGaZBTtnAauBz0QdmEghWtvSfOH2tb0m9M4ilh4c3JCgIZWsUGQitafXhO/ubwCfNLNDgOnh5g3uXpm7XyIVsPjR52jvrNxygboBKwNZvmGZ78p6ui38c1Rmu7s/HmVgIn1pbUtz9X3/U/Z59mtI0oXrBqwMePm6dK7K85oDx1U4FpGCtbalWbH2Bcpt3DemEvzg0+9m+oQRSvYy4OXr0nlffwYi0pfMkMtVf27lqnv/h47OUpcQh8akYQnjilNnMu/QcRWMUqR25evSmZfnOHf3hyOIRyiuhntcZEodu3u3hcQLlQDOPPpgjnvneCaMHLxPxUyROMjXpfOFHNscmEmwAIqGM0SgmBrucZFd6rgUZxzVxOc/cKiSu8Revi6dU7Kfm9lc4MvAS8DCiOOKpd5quM+dOjaWySrzTef5/32zqOGV2RoSKNmLhApZxPx44CsErft/dfd7I48qpnLVcI9Lca6e3ViLV23h8hUb6Op0Oso472nNTQP+vRMpVL4+/JOAS4CdBLNtV/ZbVDGVq4Z7HIpzZXdjtXd2ctSU/Xn46daKnPvsuZMrch6RgSBftcwVwCSgA1hkZndl//RPePHSs4b74IbEgB8bnt2NtSvdQbrDK5bszzy6ianjh1fkXCIDQb4uHQ3LrILsGu5xGEXS21KEpRiUSnDd6Uew4809zD5olJK9SA/5Ev5GYJy7d6uJb2bTgFcijSrm4lSca9LoIexuL6eXHoY2JunsCmbKHj/twApFJjLw5Ev41wLfz7F9DMFondMjiUhi5aIlaylhWD1DUoab7S2BHIdvQyLlypfwp7r773pudPeHzey6CGOSGFj9TCt/f9Nj7Ex3Fn1syuDrHzmc9x12gJK8SBHyrniV57WGSgciA0/PoZatbWke+VMr/373RrbuLH3h70TClOxFSpAv4W82sxPd/e7sjWb2IUAlkiWvzFDLpBntnV28Z8r+/H5zawVuzcLC4/6Pkr1ICfIl/POBX5rZacCacFszcDRwctSBSf3KVQrh4c2VGWrZmEpw+pymipxLJG7ylVZ42swOJ7g5OyPc/BDwWXd/qz+Ck/qS6cLZuXtP3gkepdhvUIIuZ8DPSxCJUt7SCu6eBn7cT7FIHcueLftme0fZdeozGlMJLj1lGjMmaCSOSLnylVbYRVA/Z5+XCMojj4gsKqkbrW1pNrywk0VL15Lu8IpMoDKgIQkLjzuU0+eoFo5IpeTr0ilrmqKZDQZ+BzSG11nq7peVc06pLZlWvUFJNepzGZSEG/7uSKZPGKlEL1Jh+Vr4g4HPAVOBdcCP3L2YKZFp4Dh3bzOzBmClmf3K3VeVFbHUhHJr1PfUfQWqAypyThHpLl8f/s3AHuBh4ERgOnBeoSd2dwfawqcN4U+Fenalv/UcU7/hhddLrlGfbVDS+Oln5tCQSqqPXiRi+RL+NHc/HMDMbgT+WOzJzSxJMKRzKvA9d3+0pCilqnquwjV/1gR+/vg2OspI+I2pBGbBqJvmKWMqGK2I9CZfwt+TeeDuHWZW9MndvROYbWajgDvMbIa7r8/ex8zOBc4FaGrS+Opak2sVriWrt5Z8PgMuPvEw5kwZoxa9SD/Ll/Bnmdnr4WMDhoTPix6l4+6vmdkDwAeB9T1eux64HqC5uVldPjXmNxteorNCYywTBr85f57KFotUSb5ROmUtUm5m44A9YbIfApwAfLOcc0r0svvqz7v1CVZWYIZsKgHJhHHlgllK9iJV1OeatmV4B3Bz2I+fAJa4+y8ivJ6UaXnLNhYtDYZZdnR5WX30GY2pBD88s5npE0ao+0akyiJL+O6+DjgiqvNL5QSTp17n87etpdMr16vWmEpw5YKZzDt0XMXOKSKli7KFL3UgMwIH94ol+1QCzjtes2RFao0SfoxVevKUAZed8k5OmTVRiV6kBinhx0j2DVmAB556mVSi+OG2vWlsSCjZi9QwJfyYyJ489VZHJ52dTkPSSFdgyOV+g5J0uat0sUiNU8KPgVyTp4Cyk31jyrj05OlaRFykTijhx8DWHbuDln1FFhgM+upvPkcVLUXqjRJ+DEwaPYR0R/nJPgE0pIIJVKpoKVJ/lPAHuNa2NF+580naO0tP+Of8VRMnHj5BFS1F6pwS/gC2eNUWvrpiA3vK6Ks/duoYLp1/eAWjEpFqUcIfgFrb0lz726e56ZEtJZ9j7iH7c8EJh6p0scgAooQ/wCxetYVL79pQ8uIkSYOvfWQGZ8w5uMKRiUi1KeHXuezJVLf84Vmuvn9zyecy4MdnH6XaNyIDlBJ+HcueTPVmewflzqFqbEgwfULByxyISJ1Rwq9TvU2mKlVjKqGZsiIDnBJ+HWptS/PAUy/TUebY+iSQTMLC41TZUiQOlPDrzOJVW7h8xQbay+i/MeDiDx3GnEO0rqxInCjh15HFq7ZwyZ3r+94xh3OPncLIIQ0cPGYoR//FGCV5kRhSwq8Tm7fv4svLS0v273/nAXzppGkVjkhE6o0Sfg3KLDkIzoSRQ/jJqi3cXOIkqlTC+OapMysboIjUJSX8GrO8ZRsXLmmhArXOaEgaV31slrpvRARQwq8prW1pFi1dW1ayTxl88UOH8ZcHDlf5YhHpRgm/hmzdsZt0R3mzp8zgb981SYleRPaRqHYAEti8fRcLf7qm7PMMSiXZumN3BSISkYFGLfwqya6B87UVG1i+9sWiz5Ew6FkjrdN97yLlIiLZlPCrIFMDJ2nGG+2dRR9/2AFD+dLJ05g+YST3rH+Jy1dsoCGZoFMLiYtIHkr4/Sy7Bk6xPjh9PJ85Zkq3GvVnvOdgPjjjwL3fFpTsRaQ3Svj9bMMLr5dUA6cxZXzjo4fnTOhjhjUq0YtIn5Tw+0EwkWonv930Mj95ZEvRtS2TBlcu0Hh6ESmPEn7Elrds46Lb15a8rmzC4Nfnz2Pq+OEVjkxE4kYJP0KtbWkuXLKWjhKWGxzSkKDL4coFM5XsRaQilPAjsnn7Li6+48mik30qYVz+4enMmDBSN2FFpKKU8CustS3Nebe2sHLzq0UdZ8C1nzxCpYtFJDJK+GXI3IwFY7+GBD977HmWPb6t6PMkgO98YjYnz5pQ8RhFRDKU8EtUiaqWCeCz7z2Ezxx7iFr1IhI5JfwStLal+fxtLZSxyiDJhPHr847VDVkR6TcqnlaCnz++taxk35hK8O3TZinZi0i/Ugu/CK1taU77zz/wp1feLPkc/1ddOCJSJZElfDM7CLgFGA84cL27Xx3V9aK0+plWfvjwM/x64/aSz5FMGF/78HTOmHNwBSMTESlclC38DuBCd3/czIYDa8zsXnffGOE1K6q1Lc05Nz3G2q07Sz7Hv310BhNHD9HqUyJSdZElfHd/EXgxfLzLzDYBE4G6SPjLW7Zx4W0tlLoAVdLg2x+fzfzZEysbmIhIifqlD9/MJgNHAI/2x/XKlZk8VaqTDz+Qyz88Qy16EakpkSd8MxsGLAPOd/fXc7x+LnAuQFNTU9Th5JWZSHX2jx4r+RwNSVOyF5GaFGnCN7MGgmS/2N1/nmsfd78euB6gubm5vBW8y7C8ZRufX7KWzhIKnQEMThmOceUCrTglIrUpylE6BtwIbHL3/4jqOpXQ2pbm/FtbKCXVn/e+Qzhz7iFacUpEal6ULfy5wKeBJ80s0yH+JXe/O8JrFq21Lc3ffPuhopO9GVyddVNWiV5Eal2Uo3RWEhSBrDmtbWm27tjN+m07+cry9RTTi5MA/vWjMzhh+oFK8iJSV2I303bxqi1cdtd63Cm6PMKsicNZvnBeNIGJiEQsVgl/8aotXHLn+pKObUjAj86eU+GIRET6TywSfmtbmkf+9GrJyT6VgKtOm60uHBGpawM+4S9etYWv3LmeYsrWD07CiTMncOTk/Zk4ej+mTxihZC8idW9AJ/xSu3B+f/H7leBFZMAZkAk/6MJpLSnZf+MjmiUrIgPTgEv4y1u2cdHta9lTxBAcAxpSCS47ZZrKF4vIgDVgEn5QB+d1LlrSwp4i15m9+ZwjVb5YRAa8AZHwl7ds44vL1vFWEZm+IVzc8arTZjPv0AMiikxEpHbUfcJvbUuzaOk60h2FJfvGVIJLT57GjIkjVftGRGKl7hP+4kefKyjZJ4ALTjiU0+c0KcmLSCzVdcJvbUvzvQeezrtPAvisFg4XEanvhL91x24GJZOkOzpyvj570khuPOtIJXoREeo84U8aPYQ9Xd27c1IGZx8zmb+ZdiDNU8ZUKTIRkdqTqHYA5RgzrJErTp3J4IYEwxtTDG5I8B8fn80lJ01XshcR6aGuW/gA82dPZO7UsVpxSkSkD3Wf8CFo6SvRi4jkV9ddOiIiUjglfBGRmFDCFxGJCSV8EZGYUMIXEYkJJXwRkZgw98IXComamb0CbKl2HHmMBV6tdhA1SO9L7/Te9E7vTW7Fvi8Hu/u4QnasqYRf68xstbs3VzuOWqP3pXd6b3qn9ya3KN8XdemIiMSEEr6ISEwo4Rfn+moHUKP0vvRO703v9N7kFtn7oj58EZGYUAtfRCQmlPD7YGYHmdkDZrbRzDaY2XnVjqlWmNlgM/ujma0N35vLqx1TLTGzpJk9YWa/qHYstcTMnjWzJ82sxcxWVzueWmJmo8xsqZk9ZWabzOzoSp5/QJRHjlgHcKG7P25mw4E1Znavu2+sdmA1IA0c5+5tZtYArDSzX7n7qmoHViPOAzYBI6odSA16n7trDP6+rgbucfcFZjYI2K+SJ1cLvw/u/qK7Px4+3kXwH3hidaOqDR5oC582hD+6KQSY2STgJOCGasci9cHMRgLzgBsB3L3d3V+r5DWU8ItgZpOBI4BHqxtJ7Qi7LVqAl4F73V3vTeA7wCKgq68dY8iB35jZGjM7t9rB1JApwCvAj8OuwBvMbGglL6CEXyAzGwYsA85399erHU+tcPdOd58NTAKOMrMZ1Y6p2szsZOBld19T7Vhq1DHu/i7gQ8A/mdm8agdUI1LAu4Dr3P0I4A3gXyp5ASX8AoT908uAxe7+82rHU4vCr54PAB+sdiw1YC4w38yeBW4FjjOz/6puSLXD3beFf74M3AEcVd2IasZWYGvWt+SlBB8AFaOE3wczM4I+tU3u/h/VjqeWmNk4MxsVPh4CnAA8Vd2oqs/dL3b3Se4+GfgEcL+7f6rKYdUEMxsaDn4g7K74ALC+ulHVBnd/CXjezP4y3HQ8UNHBIRql07e5wKeBJ8O+aoAvufvdVYypVrwDuNnMkgSNhyXuriGIks944I6gHUUK+Km731PdkGrKQmBxOELnz8DZlTy5ZtqKiMSEunRERGJCCV9EJCaU8EVEYkIJX0QkJpTwRURiQglf8jKzs8xsQrXjyCeM8bt97PM5Mzszx/bJZlbUOHAz+5mZrTOzC4qNtcjrNJjZv5vZ02b2uJk9YmYfCl971syWZe27wMxuiiiOj4WVGx8ws2Yzuybc3uf7LrVF4/ClL2cRTIx5ocpxlMXdf1CJ85jZgcCR7j41x2spd++oxHVCXyeY6zDD3dNmNh54b9br7zazaeVWbg0nF5q791b35++Bf3D3leFzlTSuU2rhx5CZfSqsY99iZv8ZFkBLmtlNZrY+rFV+gZktAJoJJoK0hLNpezvnUWEL9Akz+0NmtqCZrTKz6Vn7PRi2EseZ2b1hHf0bzGyLmY3tI+4HzezqMJb1ZrbPlPywxX5/2AL/rZk1hdu/amYXhY/fHdbwXwv8U9axvzOz2VnQD22HAAAEa0lEQVTPV5rZrB6X+A0wMYzh2DCm74R13c/Lc/2bzOy68P34s5n9tZn9KGw535Tj99gP+AdgobunAdx9u7svydrtKuCSPt6zs8xseRjn02Z2Wdb79N9mdgvBB/pBZvbJ8O9+vZl9M9zvUuAY4EYzuzKMe5/JdeHf5zIzeyz8mZsvLqkSd9dPjH6AdwIrgIbw+feBM4F3E1S7zOw3KvzzQaC5gPOOAFLh4/cDy8LHFwCXh4/fAfx3+Pi7wMXh4w8SVFAc28c1HgR+GD6eB6wPH58FfDd8vAL4u/DxOcCd4eOvAheFj9cB88LHV2ad5++A74SPDwVW54hhcmb/rJi+n/W8t+vfRFBXx4APA68DhxM0utYAs3tcZybwRJ734lmCWaubgKnAAuCmHPudBbwIjAGGECT35vD36ALeE+43AXgOGEfwzf9+4CM9/w0Afw38Isf7/lOComgATQSlSKr+710/3X/Uwo+f4wmS+2NhqYjjgUMIpnEfYmbXmtkHCRJSMUYCt4f94d8GMq36JQTJCOA0goJQELQabwXwYGr9jgKv87PwmN8BIyys5ZPlaILkA/CT8Dp7hfuPCo/P7JNxO3CyBcXyziFI0oW4rcDrr/AgIz4JbHf3Jz3oRtlAkICL1UnwgXVxH/vd6+6t7r4b+HlWTFv87cVqjgQedPdXPOiWWkzwoVqo9wPfDf9N3UXwdzOsiOOlH6gPP34MuNnd90kSYffF3wCfI0jO5xRx3q8DD7j7Ry1YN+BBCCojmlmrmc0EPh6euxw9a4FUrDaIu79pZvcStMBPI/hgLMQbBe6XDv/synqced7z/+JmoMnMRnj+ctw/IUj4+W489/aeFRp3IRIE3xbequA5pcLUwo+f3wILzOwAADPb38wODvvPE+6+DPgyb5dl3QUML+C8I4Ft4eOzerx2G8FiICPdfV247fcESRUz+wAwusD4Px4ecwyw09139nj9DwQVKgHOAB7OftGDMs6vhcdn9sl2A3AN8Ji7F/qto+DrF8rd3ySo0nq1BYW0Mv3kH+ux3x6Cb1T5RgydEP49DwE+QvDe9/RH4L1mNtaCYnifBB4qIuTfEBT+Iox1dp59pUqU8GPGgxEdXyZYcWgdcC9B3/pE4MHwK/l/8XY3wU3ADzI3bc3sa2Y2P8eprwD+zcyeYN/W6lKCJJh9w/Fy4ANhF9DHgJcIPlwws7ut96Ggb4XX+AHB6JGeFgJnh7/bpwnWle3pbOB74e9q2S94sGjJ68CPe7l+Xwq5fqG+TLAC0sbwffoFubvabiT/t/U/EqznsI7g3so+o2zc/UWCxTYeANYCa9x9eRGx/jPQHN6s3kj53+QkAqqWKVVhZo1Ap7t3mNnRBKv85G0VmtmDBDdeIxsWGH7QPAgc5r0PU6wbZnYWwQ3X/1ftWKT61Icv1dIELDGzBNBOMASxqiyYmPUN4PMDIdmL9KQWvohITKgPX0QkJpTwRURiQglfRCQmlPBFRGJCCV9EJCaU8EVEYuL/AwAdTVsfmGjjAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(20,5))\n",
"plt.subplot(131)\n",
"tmp = plt.hist(est_ploidies[\"avg_pl\"],bins=30)\n",
"plt.title(\"est. avg. ploidy from CN profile\")\n",
"plt.subplot(132)\n",
"tmp = plt.hist(sorted(list(GDSC_Ploidies[\"average_ploidy\"].values)),bins=30)\n",
"plt.title(\"PICNIC avg. pl.\")\n",
"plt.subplot(133)\n",
"tmp = plt.hist(est_ploidies[\"median_pl\"],bins=30)\n",
"plt.title(\"est. median ploidy\")\n",
"\n",
"tmp = df_ploidies.plot.scatter(x = \"est. avg. ploidy from CN profile\",y=\"PICNIC avg. pl.\")"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAEcCAYAAADN+K/qAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3X+cXFV9//HXmyQmQGJQwCAQiT+oBoP8SL5gMUKWoFKgylexNv7EpqbxYbdY6ENi0wqkphKrVgtKFSMJgonID8FQ8gXLrjRYwUSCCEuLhYBAEEEICVBM4uf7xzkbJrOzO7OZ2Z3ZO+/n4zGPnZl75tzPnJ35zL3nnnuuIgIzMyue3ZodgJmZDQ0neDOzgnKCNzMrKCd4M7OCcoI3MysoJ3gzs4Jygh8hJC2T9Nlmx9FsA7WDpNMlrRnumFqVpFmSHi55fLekWU0MCUlTJIWk0TWU/VtJ3xxg+QZJJzQ2wmKp2si2M0kbgEnAdmAr8GNgfkT8qplxlZIUwMER8ctmx9JuWrntI+KNzY5hMCLiH5sdw0jnLfhd88cRMR54JfBr4IImxzNklPhzYjYC+Ytbh4j4X+BK4JDe5yRNlHSppN9IelDS3/UmSEkXSbqqpOwSSf+ek+gsSQ/n3dIn8u7nB/pbt6SPSfqlpN9Kuk7S/vn5W3KROyVtkfS+Cq8dJemLeT0PSPrL0t1mSd2SFku6FXgOeI2k/fN6fpvX+7GS+nbqNqnQNbBB0qcl3SPpKUmXSBpXsvwUSeslPS3px5LeVLLsCEk/k7RZ0neBHa/rv2l0oaRNku6VNDs/+V5J68oKninp2n4qmShpqaSNkh6R9FlJo/Ky10n6UV7HEzmumtq+wnqWSfqapBvya26VtJ+kL+e2ulfSESXl95d0Vf58PSDpr0qW7Z7re0rSPcD/KVvXji4NSUdJ+s/c5htzm72kpGxImi/pvlzmq5JU7f3k13ZL+pyk2yU9I+laSS/vp+xAn6tzJV1W8vhDSt+pJyUtLHl+P0nPSdq75LkjcxuNqSXmwooI3wZxAzYAJ+T7ewDLgUtLll8KXAtMAKYA/w3MLSn/38DpwFuBJ4AD87JZwDbgS8BY4DjgWeD1efky4LP5/vH5tUfmshcAt5TEEMDrBngP84F7gAOBlwE/zK8ZnZd3Aw8BbyR1440BbgG+RkqwhwO/AY4vj63kvTxc1ma/ACYDLwduLXkvRwCPA0cDo4CP5PJjgZcADwJ/nWM4jdQt9tl+3tfpuQ17y78P2JTXORb4LTC1pPwdwHv6qesa4OvAnsArgNuBv8jLVgALSRtI44CZtbZ9hfUsy//L6bmum4EHgA/n9vgs0JXL7gasAz6T2+Y1wP3AO/Ly84H/yO93cm7z8v9D72d3OvDm/P+dAvQAnyx7H6uAvYBX5f/3iTW+p27gEWBabr+rgMvysins/Fkb6HN1bsnrDgG2AMfm/+WX8v+69/38G/Dxkhj+Gbig2fmi2bemBzDSbvlLsgV4OiebR4FD87JRwO+AQ0rK/wXQXfL46JxoHgTmlDw/K39g9yx57grg7/P9ZbyYFJcCny8pNz7HMiU/rpbgbyYnq/z4BPom+EUlyyeTjjlMKHnuc8Cy8thK3kt5Yplf8vgk4H/y/YuAfyiL779IP3DH5vZVybIfM3CCLy9/O/ChknUtzvffCDwFjK1QzyTgBWD3kufm8GKivRT4BvnHuey1u5LgLy553An0lDw+FHi65LPzUNnrPw1cku/fT0kSBuZV+D+c0E8cnwSuKXsfpT9cVwALanxP3cD5JY8PIX0vRlGS4Gv4XJ3Liwn+M8DKknJ75jp7E/z7gFtLvoePAUfV810vws1dNLvm1IjYi7TV8ZfAjyTtB+xD2nJ8sKTsg8ABvQ8i4jbSF1GkL02ppyLi2bLX7l9h/fuXriMitgBPlq6niv2B0oPClQ4Qlz63P/DbiNhcFlut6yuvr/R9HQSclbsBnpb0NOmLv3++PRL5W1vy2oFUKt+7ruXA+3NXw4eAKyLihQp1HET6P24sienrpC15gE+R/n+3K41M+bMqMVXz65L7z1d4PL4krv3L2upvST9I0Pf/2m9bSfoDSaskPSbpGeAfSZ/fUo+V3H+uJI5alMcxpkL9g/lc7fTe8vfkyZLl1wKHSHo18DZgU0TcPoh4C8kJvg4RsT0iriZthcwk7WpvJX0Re72KtLsKgKRPkHYxHyUlilIvk7Rn2WsfrbDqR0vXkV+zd+l6qthI6p7pNblCmdIk+SjwckkTymLrXd+zpO6nXvtVqK90HaXv61ekreq9Sm57RMSKHOcBZX2/r+rvTWWVyj8KEBE/IW31vRV4P/Dtfur4FWkLfp+SmF4aeRRKRDwWER+LiP1Je2hfk/S6KnENmqRu4L1lcT1Q1lYTIuKkvHwjfdu5PxcB95JG/LyU9ENRUx97jcrj2Er6fpSq9rkqtdN7k7QH6TMP7DgedgXwQdKPd3//27biBF8HJe8i9WP3RMR20odssaQJkg4CzgQuy+X/gNSn2vsh/JSkw8uqPU/SSyS9FTgF+F6FVa8APirpcEljSVtft0XEhrz816T+2f5cAZwh6QBJewFnD/Q+Iw0B/THwOUnjlA6Czu19X8B64CRJL897Mp8E9pP0vKQtpC/m5/JW48uBlaTkAnAx8HFJKyU9lA80bpT0VdLxim3AE5IelzQHOCq35Z/nBEh+HKQt7FcAfyXpEEn/STrQ+FVJP5d0JumLfyGwNSIqjpmPiI3AjcAXJb1U0m6SXivpuLyu90rq/YF8ivRj+Pv8uFrb1+N2YLOks/MB1VGSpknqPZh6BfBpSS/L8XUOUNcE4Blgi6Q3AB9vcKwfzP+DPYBFwJX5+7FDDZ+rUlcCp0iamQ8GL6Jv/rqU1E33TpzgASf4XfWDnLieARYDH4mIu/OyTtIW7f3AGuA7wLeURqhcBiyJiDsj4j7SVtO3c5KGtEv8FGnL5nJSv3VvItwhIn4I/D3p4NVG4LXAn5YUORdYnnfj/6RC/BeTEtjPSQca/42USLdXKNtrDqn/9FHSAchzchyQvkx3kvp4bwS+m5/vHU76KGmL+Me5XZ4Hrs9lfp7b649IB/Q2Az8lHac4Ang3qWtgX1J/89UDxAhwG+kA4t3AG4D35y3U9wIzgO+TDv5VSiKlPkw6kHkP6X9yJWlYLKQfjdvyZ+A64IyIuD8vO5eStpf0qvyjVW3Po6qcIE8hHYx8gLRF/E1gYi5yHqmL4wHS/2GgJPc3pL2YzaTPw3cHKLsTSW/N730g3yYdX3iM1JX5V/2UG+hztUP+fn2C9H3aSPqfPFxW5lbSD+3PIqJaV157aPZBAN/SjbIDk8O87j8CHmxwnRt48QDYBtLW5ar8uBv483z/z0lbveOr1LWAlPT3Knldd0mZHQc3Scn7+n7q2p2U1A4uee5lpBEjvyEljlW8OLrpfcDasjr+Grgu398b+AHpx/6npD20NTW20emkEUUXkkb73AvMLlm+o51G0q2ZcZMGEIy4Nhuqm7fg21DevT9J0mhJBwDnkLaehsooUtfKHRWWnQCsjnSgeCBrSYnjb2pY3wmkLe5KPg78NNIeVK/dgEtIxzVeRdrDuDAv+wHwekkHl5R/P2lLEuCrpD2Q/UhDPD9SQ3yljgb+h3QA8hzgavUzZtwGlruqjmQQeyNF5wTfnkTanX+KlHR7SMPQGu37eaTHK0ldMZVOPd+btMtdi88AnZL2rVKuYp1K00ycAZxV+nxEPBkRV0XEc5FGdCwmDdMkIp4jjdCYk+s4mNT1c53SiU/vIXUrPBcR95BG6gzG48CXI2JrRHyXNET05EHW0fYkLSedz/HJ2HlUTlvzXDQtIiK62Xlky1Cu6znKznIcIqdGhf7UMk/yYt/2gCLiF5JWkbpregZbZ0RMqVQ4Hwj8Z+BEUncNwARJoyL1e38H+CLpwN77ge9HxHP5gPJoqg85HchAwzpHpIiY1YR1DnbPqS14C96a7YfAO8qGhw7kHOBjDDwG/4ekLetanQW8Hjg60gHZY/PzvcMGbwL2zSOe5vBi98xvSAenqw05HUi/wzrN6uUEb832bdJW71WS3pCHJO6tNCfPSeWFI83S+F36H5UB6UfgGEn/lLeye+ePuSwPCy03gdTv/nTu/z6nbJ1bScNV/4k0DcBN+fntpFE950raIw83/PCg3v2LwzrHSHovMJU0qsmsbk7w1lSRziQ9gTSC5CbSaJTbSQcdb+vnZYtIp6r3V+f/AH9IGn53t6RNpCGla0kjaMp9mTS65gngJ8DqCmW+k+P8XkRsK3n+L0nDFB8j/VitIA0JBXbMwd7vpHGk93hwXvdi4LSIeLK8UI1DE812op27/8ysHpKWAPvV0ics6XTSkL6ZQx6YtSVvwZvVIXcrvSmf1XwU6UzMoRxyalYzj6Ixq88EUrfM/qQTtr5IGlZp1nTuojEzKyh30ZiZFZQTvJlZQQ1JH/w+++wTU6ZMGYqqB+XZZ59lzz1rPX+mPbhN+nKb9OU2qaxV2mXdunVPRES1KTuGJsFPmTKFtWvXDkXVg9Ld3c2sWbOaHUZLcZv05Tbpy21SWau0i6SapkOuqYtG0l6SrlS6wnuPpD+sLzwzMxtqtW7Bf4U0petp+Woqe1R7gZmZNVfVBC9pImnypdMBIuJ3pOtamplZC6uli+bVpFnzLpF0h6RvDmLmPzMza5KqJzpJmkGagOktEXGbpK8Az0TE35eVmwfMA5g0adL0lStXDlHItduyZQvjx49vdhgtxW3Sl9ukL7dJZa3SLh0dHesiYka1crUk+P2An/ReLEHSW4EFEdHvVWdmzJgRHkXTmtwmfblN+nKbVNYq7SKppgRftYsmIh4DfiXp9fmp2aQrzZuZWQurdRRNJ3B5HkFzP/DRoQvJzMwaoaYEHxHrgaq7A2ZmrejQ5Yc2rrLBXla9grs+clf9ldTA0wWbWeFt7jmfDef3e9iwZo3og5+y4Pq646iVJxszMysoJ3gzs4JygjczKygneDOzgnKCNzMrKCd4M7OCcoI3MysoJ3gzs4LyiU5m1hYadoLR6vrqmbj7mMbEUQMneDMrvEacxQrpR6JRdQ0Hd9GYmRWUE7yZWUE5wZuZFZQTvJlZQTnBm5kVlBO8mVlBOcGbmRWUE7yZWUH5RCczM0BSbeWWDLw8IhoQTWN4C97MjJSYq926urqqlmklTvBmZgXlBG9mVlBO8GZmBeUEb2ZWUE7wZmYF5QRvZlZQTvBmZgXlBG9mVlA+k9WsDdV61mY1rXZij+3MW/Bmbaja2ZgHnb2qpjM7rbU5wZuZFZQTvJlZQTnBm5kVlBO8mVlBOcGbmRWUE7yZWUHVNA5e0gZgM7Ad2BYRM4YyKDMzq99gTnTqiIgnhiwSMzNrKHfRmJkVlGo5G03SA8BTQABfj4hvVCgzD5gHMGnSpOkrV65scKiDt2XLFsaPH9/sMFqK26Qvt0lfp69+lmUn7tnsMFpOq3xWOjo61tXSVV5rF83MiHhE0iuAmyTdGxG3lBbISf8bADNmzIhZs2YNNuaG6+7uphXiaCVuk77cJhWsvt5tUsFI+6zUlOAj4pH893FJ1wBHAbcM/Coza4bDzruRTc9vrbueKQuur7uOibuP4c5z3l53PbZrqiZ4SXsCu0XE5nz/7cCiIY/MzHbJpue3suH8k+uqo1Fbqo34kbBdV8sW/CTgmjy96GjgOxGxekijMjOzulVN8BFxP3DYMMRiZmYN5GGSZmYF5QRvZlZQTvDWtlasWMG0adOYPXs206ZNY8WKFc0OyayhfE1Wa0srVqxg4cKFLF26lO3btzNq1Cjmzp0LwJw5c5ocnVljeAve2tLixYtZunQpHR0djB49mo6ODpYuXcrixYubHZpZwzjBW1vq6elh5syZOz03c+ZMenp6mhSRWeO5i8ba0tSpU1mzZg0dHR07nluzZg1Tp05tYlSNMWHqAg5dvqD+ipY3IhaA+k66sl3nBG9taeHChcydO3dHH3xXVxdz584tRBfN5p7zfSarAU7w1qbmzJnDsmXLmD17NhGBJN72trf5AKsVivvgrS11dnZy880384UvfIEbbriBL3zhC9x88810dnY2OzSzhnGCt7Z08cUXs2TJEs4880zGjRvHmWeeyZIlS7j44oubHZpZw7iLxtrSCy+8wPz583d6bv78+Zx11llNiqixGtL3vbox0wVb8zjBW1saO3Ys8+bNY/369fT09DB16lQOP/xwxo4d2+zQ6lbvAVZIPxCNqMeay1001paOO+44Lr/8co499liuvfZajj32WC6//HKOO+64Zodm1jDegre29Mgjj3DqqafyrW99i4suuoixY8dy6qmnct999zU7NLOGcYK3ttTT08Mdd9zBmDFjdoz53rp1K+PGjWt2aGYN4y4aa0u9Z7KWKsqZrGa9nOCtLfWeydrV1cW2bdt2nMm6cOHCZodm1jDuorG21HvGamdn545RNIsXL/aZrFYoTvDWtubMmcOcOXMaNu+KWatxgrfCk9SQeiKiIfWYDRf3wVvhRcSAt4POXlW1TNGSu6QBbw8uOaVqmUb9cNrQcYI3a0PVfsy6urra7keviJzgzcwKygnezKygnODNzArKCd7MrKCc4M3MCsoJ3sysoJzgzcwKygnezKygnODNzArKCd7MrKCc4M3MCsoJ3sysoJzgzcwKquYEL2mUpDskrRrKgMzMrDEGswV/BtAzVIGYmVlj1ZTgJR0InAx8c2jDMTOzRql1C/7LwKeA3w9hLGZm1kBVr8kq6RTg8YhYJ2nWAOXmAfMAJk2aRHd3d6Ni3GVbtmxpiThaidukMrfJzvw5qWyktYuqXXZL0ueADwHbgHHAS4GrI+KD/b1mxowZsXbt2kbGuUu6u7uZNWtWs8NoKW6TvqYsuJ4N55/c7DBaij8nlbVKu0haFxEzqpWr2kUTEZ+OiAMjYgrwp8DNAyV3MzNrDVW7aMxa2WHn3cim57fWXc+UBdfXXcfE3cdw5zlvr7ses0YZVIKPiG6ge0giMdsFm57fWnf3SqN2uxvxI2HWSD6T1cysoJzgzcwKygnezKygnODNzArKCd7MrKCc4M3MCsoJ3sysoHyik41oE6Yu4NDlC+qvaHkjYoE06apZa3CCLxhJDamn2hxFrWJzz/k+0cmsH+6iKZiIGPB20NmrqpYZKcndzAbmBG9mVlBO8GZmBeUEb2ZWUE7wZmYF5VE0NuI1ZPTK6sbMB2/WSpzgbURrxKX2fMk+Kyp30ZiZFZQTvJlZQTnBm5kVlBO8mVlBOcGbmRVUIRP8ihUrmDZtGrNnz2batGmsWLGi2SGZmQ27wg2TXLFiBQsXLmTp0qVs376dUaNGMXfuXADmzJnT5OjMzIZP4bbgFy9ezNKlS+no6GD06NF0dHSwdOlSFi9e3OzQzMyGVeESfE9PDzNnztzpuZkzZ9LT09OkiMzMmqNwCX7q1KmsWbNmp+fWrFnD1KlTmxSRmVlzFC7BL1y4kLlz59LV1cW2bdvo6upi7ty5LFy4sNmhmZkNq8IdZO09kNrZ2UlPTw9Tp05l8eLFPsBqZm2ncAkeUpKfM2dOw661aWY2EhWui8bMzBIneDOzgnKCNzMrKCd4M7OCKuRB1qI67Lwb2fT81rrracQl7ibuPoY7z3l73fUMB0nVyyypXk9ENCAas+HjBD+CbHp+a92XlmvUyKKGXAd1mFRLzB5tZUXlLhozs4JygjczK6iqCV7SOEm3S7pT0t2SzhuOwMzMrD619MG/ABwfEVskjQHWSLohIn4yxLGZmVkdqib4SEeotuSHY/LNwwnMzFpcTX3wkkZJWg88DtwUEbcNbVhmZlYvDWZsr6S9gGuAzoj4RdmyecA8gEmTJk1fuXJlI+PcJVu2bGH8+PHNDqNhTl/9LMtO3LOuOhrVJo2IpVUU7XPSCG6TylqlXTo6OtZFxIyqBSNiUDfgM8DfDFRm+vTp0Qq6urqaHUJDHXT2qrrraFSbNCKWVlG0z0kjuE0qa5V2AdZGDfm6ah+8pH2BrRHxtKTdgbcBNZz3Z402YeoCDl2+oP6KljciFoD6Troys6FVyyiaVwLLJY0i9dlfERGrhjYsq2Rzz/k+k9XMalbLKJqfA0cMQyxmZtZAPpPVzKygnODNzArKCd7MrKCc4M3MCsoJ3sysoHzBjxGmIcMTVzfmik5m1tqc4EeQesfAQ/qBaEQ9Ztb63EVjZlZQhUzwnZ2djBs3jo6ODsaNG0dnZ2ezQzIzG3aF66Lp7Ozkwgsv3PH4hRde2PH4ggsuaFZYZmbDrnBb8L3J/JhjjuF73/sexxxzzE7Pm5m1i8IleICjjz6aW2+9lX322Ydbb72Vo48+utkhmZkNu0Im+He84x0DPjYzawcjtg9eUr/LFi1axKJFi2p+TQziqlZmZiPFiN2C7+8KJpMnT65YfvLkyQNdpcrMrHBGbILvz0MPPdQnyU+ePJmHHnqoSRGZmTVH4RI8pCQfERx09ioiwsndzNpSIRO8mZk5wZuZFZYTvJlZQTnBm5kVlBO8mVlBjdgTnayygU4A21FmSfV6fH6A2cjnLfiC6e9krt5bV1dX1TJO7mbF4ARvZlZQTvBmZgXlBG9mVlBO8GZmBeUEb2ZWUE7wZmYF1bLj4A8770Y2Pb+17nqmLLi+rtdP3H0Md57z9rrjMDMbbi2b4Dc9v5UN559cVx3d3d3MmjWrrjrq/YEwM2sWd9GYmRWUE7yZWUE5wZuZFZQTvJlZQTnBm5kVVNUEL2mypC5J90i6W9IZwxGYmZnVp5ZhktuAsyLiZ5ImAOsk3RQR9wxxbGZmVoeqCT4iNgIb8/3NknqAA4AhTfATpi7g0OUL6q9oeb1xANQ3Ht/MrBk0mIs7SJoC3AJMi4hnypbNA+YBTJo0afrKlSvrCuz01c+y7MQ966pjy5YtjB8/vulxtJJGtEnRuE36cptU1irt0tHRsS4iZlQtWMvVffKPwHhgHfDuamWnT58e9Tro7FV119HV1dUScbSSRrRJ0bhN+nKbVNYq7QKsjRrydk1TFUgaA1wFXB4RV+/6787gNGSagNX1z0VjZjYSVU3wSldxXgr0RMSXhj6kpN55aCD9QDSiHjOzkaiWcfBvAT4EHC9pfb6dNMRxmZlZnWoZRbMG0DDEYmZmDeQzWc3MCsoJ3sysoJzgzcwKygnezKygnODNzArKCd7MrKCc4M3MCsoJ3sysoJzgzcwKygnezKygnODNzArKCd7MrKBqmg++FaVZjGsot6R6mRjEVa3MzEaKEbsFX8vVTLq6umq9WpWZWeGM2ARvZmYDc4I3MysoJ3gzs4JygjczKygneDOzgnKCNzMrKCd4M7OCcoI3MysoDcWJPpJ+AzzY8IoHbx/giWYH0WLcJn25Tfpym1TWKu1yUETsW63QkCT4ViFpbUTMaHYcrcRt0pfbpC+3SWUjrV3cRWNmVlBO8GZmBVX0BP+NZgfQgtwmfblN+nKbVDai2qXQffBmZu2s6FvwZmZta8QneEmTJXVJukfS3ZLOqFBGkv5F0i8l/VzSkc2IdbhIGifpdkl35jY5r0KZsZK+m9vkNklThj/S4SdplKQ7JK2qsKzt2kTSBkl3SVovaW2F5W313QGQtJekKyXdK6lH0h+WLR8xbTLiEzywDTgrIg4B3gx8QtIhZWX+CDg43+YBFw1viMPuBeD4iDgMOBw4UdKby8rMBZ6KiNcB/wzUcO2rQjgD6OlnWbu2SUdEHN7P8L92++4AfAVYHRFvAA6j7+dlxLTJiE/wEbExIn6W728m/TMOKCv2LuDSSH4C7CXplcMc6rDJ73NLfjgm38oPtrwLWJ7vXwnMVq3XQRyhJB0InAx8s58ibdcmNWir746kicCxwFKAiPhdRDxdVmzEtMmIT/Cl8i71EcBtZYsOAH5V8vhh+v4IFEruilgPPA7cFBH9tklEbAM2AXsPb5TD7svAp4Df97O8HdskgBslrZM0r8LydvvuvBr4DXBJ7sr7pqQ9y8qMmDYpTIKXNB64CvhkRDzT7HiaLSK2R8ThwIHAUZKmNTumZpJ0CvB4RKxrdiwtZmZEHEnqdviEpGObHVCTjQaOBC6KiCOAZ4EFzQ1p1xUiwUsaQ0rul0fE1RWKPAJMLnl8YH6u8PLuZRdwYtmiHW0iaTQwEXhyeKMbVm8B3ilpA7ASOF7SZWVl2q1NiIhH8t/HgWuAo8qKtNt352Hg4ZI93itJCb/UiGmTEZ/gcx/pUqAnIr7UT7HrgA/no99vBjZFxMZhC3KYSdpX0l75/u7A24B7y4pdB3wk3z8NuDkKfFJERHw6Ig6MiCnAn5Le7wfLirVVm0jaU9KE3vvA24FflBVrq+9ORDwG/ErS6/NTs4F7yoqNmDYZ3ewAGuAtwIeAu3KfM8DfAq8CiIh/Bf4NOAn4JfAc8NEmxDmcXgkslzSK9CN+RUSskrQIWBsR15F+FL8t6ZfAb0lJr+20eZtMAq7Jx5FHA9+JiNWS5kPbfncAOoHLJb0EuB/46EhtE5/JamZWUCO+i8bMzCpzgjczKygneDOzgnKCNzMrKCd4M7OCcoK3ppB0uqT9h3mdUyT9It+fIelfhnHdsyrNYFlW5p2SKp41KWlLpefNBlKEcfA2Mp1OOqnm0WasPCLWAn2mx22mPBb/umbHYcXhLXhrGEkfzPPQr5f09Tzh2ShJyyT9Is87/teSTgNmkE4mWZ/Ptu2vznMlLZf0H5IelPRuSZ/Pda3O01QgabqkH+VJs/5f7+x++fk7Jd0JfKKk3h1b1JKOkvSfeXKpH/eexZj3Mq7O67lP0udraINlkv5V0lpJ/53nwCkv83JJ31eaS/wnkt5Usr4L8/1X55jukvTZktdeKunUkseXS3pXtbisPTnBW0NImgq8D3hLnuRsO/AB0nz0B0TEtIg4FLgkIq4kbT1/IM9D/nyV6l8LHA+8E7gM6Mp1PQ+cnJP8BcBpETEd+BawOL/2EqAzz43fn3uBt+bJpT4D/GPJssPz+zoUeJ+kyRVeX24KaU6Xk4F/lTSubPl5wB0R8SbSWdeXVqjjK6QJrw4FSk+DX0ra++md2vYY4PoaYrI25C4aa5TZwHTgp/l4vMN0AAACI0lEQVTU991JUxX/AHiNpAtIiejGXaj7hojYKukuYBSwOj9/FymZvh6YBtyU1z0K2Jjn49krIm7J5b9NmjWx3ETS1A4Hk6bPHVOy7N8jYhOApHuAg9h5qthKroiI3wP3SbofeEPZ8pnAewAi4mZJe0t6aVmZt/SWyXEvyeV/JOlrkvbNy6/KUxub9eEEb40iYHlEfLrPAukw4B3AfOBPgD8bZN0vAETE7yVtLZkA7Pekz7CAuyOi/NJqe9VY/z+Q9gr+r9I1BbrL151tp7bvTPn8H7s6H0h/r7sU+CBprpyWnQfFms9dNNYo/w6cJukVsKOf+SBJ+wC7RcRVwN/x4tSrm4EJDVr3fwH7Kl87U9IYSW/MUyU/LWlmLveBfl4/kRenez29AfG8V9Jukl4LvCbHV+o/emORNAt4osI1DG7lxcnOyuNeBnwSICLKZzo028EJ3hoiJ5q/I10d6OfATaRZLQ8AuvNMn5cBvVv4y0j90+sl7S5pkaR37uK6f0ea3ndJPpi6ntQ3DWkL96t5/f1dfu/zwOck3UGNe7VKV/qpdA1TgIeA24EbgPkR8b9ly88Fpud2Op8XpygudQbpAhx3UXa1oIj4NenSlJfUEqu1L88madZAkpYBq/KB5KFaxx6k4w9H9h4fMKvEW/BmI4ikE0hb7xc4uVs13oI3Mysob8GbmRWUE7yZWUE5wZuZFZQTvJlZQTnBm5kVlBO8mVlB/X+KIRNPxs+29gAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# PICNIC average ploidy vs estimated copy-neutral \n",
"tmp = df_ploidies.boxplot(column=\"PICNIC avg. pl.\", by = \"est. median. ploidy\" )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Convert gene-level integer CN into log2R-like format in order to make it compatible with TCGA and CCLE\n",
"\n",
"1) Copy-neutral state was defined from average ploiy, as median of integer CN values in non-disrupted genes.\n",
"\n",
"2) Compute log2(CN/neutral-CN) for min and max CN; keep the value with most extreme estimate\n",
"\n",
"3) Replace estimates below thresholds with zeroes. "
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3.0"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"estimated_CN = est_ploidies[\"median_pl\"].to_dict()\n",
"estimated_CN[1287381]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>1287381</th>\n",
" <th>924100</th>\n",
" <th>910924</th>\n",
" <th>687561</th>\n",
" <th>1287706</th>\n",
" <th>687452</th>\n",
" <th>906798</th>\n",
" <th>906797</th>\n",
" <th>906800</th>\n",
" <th>910922</th>\n",
" <th>...</th>\n",
" <th>909785</th>\n",
" <th>909904</th>\n",
" <th>909905</th>\n",
" <th>687592</th>\n",
" <th>1303911</th>\n",
" <th>946358</th>\n",
" <th>909907</th>\n",
" <th>1298146</th>\n",
" <th>908452</th>\n",
" <th>908450</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>5S_rRNA</th>\n",
" <td>-4.320000</td>\n",
" <td>1.807355</td>\n",
" <td>2.0</td>\n",
" <td>-0.415037</td>\n",
" <td>-4.32</td>\n",
" <td>-4.320000</td>\n",
" <td>-1.00</td>\n",
" <td>2.807355</td>\n",
" <td>2.169925</td>\n",
" <td>-1.00</td>\n",
" <td>...</td>\n",
" <td>-4.32</td>\n",
" <td>2.222392</td>\n",
" <td>-4.32</td>\n",
" <td>-4.320000</td>\n",
" <td>-4.32</td>\n",
" <td>1.807355</td>\n",
" <td>-4.320000</td>\n",
" <td>1.736966</td>\n",
" <td>2.169925</td>\n",
" <td>-1.584963</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5_8S_rRNA</th>\n",
" <td>-0.584963</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>-0.415037</td>\n",
" <td>0.00</td>\n",
" <td>-0.584963</td>\n",
" <td>0.00</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00</td>\n",
" <td>...</td>\n",
" <td>0.00</td>\n",
" <td>0.415037</td>\n",
" <td>0.00</td>\n",
" <td>-0.584963</td>\n",
" <td>-4.32</td>\n",
" <td>0.000000</td>\n",
" <td>-0.415037</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.584963</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7SK</th>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>-4.320000</td>\n",
" <td>-4.32</td>\n",
" <td>-0.584963</td>\n",
" <td>-4.32</td>\n",
" <td>0.584963</td>\n",
" <td>0.000000</td>\n",
" <td>-4.32</td>\n",
" <td>...</td>\n",
" <td>-4.32</td>\n",
" <td>0.000000</td>\n",
" <td>-4.32</td>\n",
" <td>-4.320000</td>\n",
" <td>-4.32</td>\n",
" <td>-0.415037</td>\n",
" <td>-4.320000</td>\n",
" <td>-4.320000</td>\n",
" <td>0.000000</td>\n",
" <td>-4.320000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3 rows × 996 columns</p>\n",
"</div>"
],
"text/plain": [
" 1287381 924100 910924 687561 1287706 687452 906798 \\\n",
"5S_rRNA -4.320000 1.807355 2.0 -0.415037 -4.32 -4.320000 -1.00 \n",
"5_8S_rRNA -0.584963 0.000000 0.0 -0.415037 0.00 -0.584963 0.00 \n",
"7SK 1.000000 0.000000 0.0 -4.320000 -4.32 -0.584963 -4.32 \n",
"\n",
" 906797 906800 910922 ... 909785 909904 909905 \\\n",
"5S_rRNA 2.807355 2.169925 -1.00 ... -4.32 2.222392 -4.32 \n",
"5_8S_rRNA 0.000000 0.000000 0.00 ... 0.00 0.415037 0.00 \n",
"7SK 0.584963 0.000000 -4.32 ... -4.32 0.000000 -4.32 \n",
"\n",
" 687592 1303911 946358 909907 1298146 908452 908450 \n",
"5S_rRNA -4.320000 -4.32 1.807355 -4.320000 1.736966 2.169925 -1.584963 \n",
"5_8S_rRNA -0.584963 -4.32 0.000000 -0.415037 0.000000 0.000000 -0.584963 \n",
"7SK -4.320000 -4.32 -0.415037 -4.320000 -4.320000 0.000000 -4.320000 \n",
"\n",
"[3 rows x 996 columns]"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gdsc = gdsc.apply(lambda x : CN2log2R(x,estimated_CN[x.name] ))\n",
"# drop genes without any determined value\n",
"gdsc = gdsc.dropna(axis=0,how=\"all\")\n",
"# fill with zeroes the remaining ones\n",
"gdsc.fillna(0,inplace=True)\n",
"gdsc.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"\n",
"gdsc = gdsc.applymap(lambda x : clean_logR(x, pos_seg_mean_thr, neg_seg_mean_thr))"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Ok: no empty rows detected\n",
"Ok: no duplicated pairs detected\n",
"Ok: All Symbol rows are not empty.\n",
"Ok: All Symbol are mapped to GeneID\n",
"16 Symbol mapped to multiple GeneID\n",
"Ok: All GeneID are unique\n",
"59266 Symbol can be mapped directly to GeneID\n"
]
}
],
"source": [
"NCBI = pd.read_csv(root_dir+\"Homo_sapiens.gene_info\",sep = \"\\t\")\n",
"NCBI = NCBI[[\"#tax_id\",\"GeneID\",\"Symbol\",\"Synonyms\",\"type_of_gene\"]]\n",
"NCBI = NCBI.loc[NCBI[\"#tax_id\"] == 9606]\n",
"NCBI = NCBI.loc[NCBI[\"type_of_gene\"] != \"unknown\"]\n",
"ncbi_symbols = parse_mapping_table(NCBI, \"Symbol\",\"GeneID\")"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Ok: no empty rows detected\n",
"Ok: no duplicated pairs detected\n",
"Ok: All Synonyms rows are not empty.\n",
"Ok: All Synonyms are mapped to GeneID\n",
"3145 Synonyms mapped to multiple GeneID\n",
"49179 different Synonyms mapped to the same GeneID\n",
"10839 Synonyms can be mapped directly to GeneID\n"
]
}
],
"source": [
"ncbi_synonyms = expand(NCBI[[\"Synonyms\",\"GeneID\"]],column=\"Synonyms\",sep=\"|\") \n",
"ncbi_synonyms = parse_mapping_table(ncbi_synonyms, \"Synonyms\",\"GeneID\")"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mapped: 24545 \n",
"\tdirectly via main_mapper 22363 \n",
"\tvia alternative mapper 766 \n",
"\tvia one of multiple synonyms in alternative mapper 1416 \n",
"\tLOC 0 \n",
"Unmapped: 21587 \n",
"\trecognized symbols without Entrez ID 0 \n",
"\tmultiple query_ids map to the same target_id 0 \n",
"\tquery_ids map to multiple target_ids in the main mapper 0 \n",
"\tquery_ids map to multiple target_ids in the alternative mapper 76 \n",
"\tLOC not found in Entrez 0 \n",
"\tNot found at all: 21511\n",
"Warning: query IDs mapping to duplicated target IDs in mapping table: 156\n",
"Warning: query IDs not mapped to any target IDs excluded: 21587\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/olya/miniconda2/lib/python2.7/site-packages/pandas/core/frame.py:3781: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" return super(DataFrame, self).rename(**kwargs)\n",
"IDs mapped to multiple target IDs are kept:\n",
" [143872, 286464, 140290, 414212, 414213, 51463, 642826, 84631, 574445, 399761, 100132115, 647060, 284565, 6551, 161176, 341019, 4253, 9502, 442416, 51236, 643749, 54438, 728113, 100302179, 414761, 29099, 729438, 256815, 10160, 645425, 653234, 644019, 26165, 3255, 644509, 2749, 653505, 653067, 643479, 100462820, 100418977, 26824, 79817, 6218, 728695, 100034743, 221262, 647507, 677844, 728917, 26583, 100289124, 84316, 200030, 768096, 642658, 23523, 401508, 23334, 119016, 106478953, 84458, 1517, 246126, 26095, 100033392, 92017, 374, 26871, 100132948, 125050, 387707, 653308, 79741, 728798]\n",
"mapper.py:204: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" df.sort_index(inplace=True)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>1287381</th>\n",
" <th>924100</th>\n",
" <th>910924</th>\n",
" <th>687561</th>\n",
" <th>1287706</th>\n",
" <th>687452</th>\n",
" <th>906798</th>\n",
" <th>906797</th>\n",
" <th>906800</th>\n",
" <th>910922</th>\n",
" <th>...</th>\n",
" <th>909785</th>\n",
" <th>909904</th>\n",
" <th>909905</th>\n",
" <th>687592</th>\n",
" <th>1303911</th>\n",
" <th>946358</th>\n",
" <th>909907</th>\n",
" <th>1298146</th>\n",
" <th>908452</th>\n",
" <th>908450</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.584963</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>-0.584963</td>\n",
" <td>0.584963</td>\n",
" <td>0.415037</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.584963</td>\n",
" <td>0.415037</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.000000</td>\n",
" <td>0.584963</td>\n",
" <td>0.0</td>\n",
" <td>-0.415037</td>\n",
" <td>0.321928</td>\n",
" <td>0.000000</td>\n",
" <td>0.584963</td>\n",
" <td>0.584963</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>-0.584963</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.736966</td>\n",
" <td>0.321928</td>\n",
" <td>0.0</td>\n",
" <td>-0.584963</td>\n",
" <td>0.000000</td>\n",
" <td>0.736966</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>-0.584963</td>\n",
" <td>0.584963</td>\n",
" <td>0.0</td>\n",
" <td>-0.415037</td>\n",
" <td>-0.415037</td>\n",
" <td>-0.584963</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>-0.584963</td>\n",
" <td>0.000000</td>\n",
" <td>0.415037</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-1.0</td>\n",
" <td>-0.584963</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3 rows × 996 columns</p>\n",
"</div>"
],
"text/plain": [
" 1287381 924100 910924 687561 1287706 687452 906798 \\\n",
"1 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 \n",
"2 0.000000 0.584963 0.0 -0.415037 0.321928 0.000000 0.584963 \n",
"9 -0.584963 0.584963 0.0 -0.415037 -0.415037 -0.584963 0.000000 \n",
"\n",
" 906797 906800 910922 ... 909785 909904 909905 \\\n",
"1 0.584963 0.0 0.0 ... 0.0 -0.584963 0.584963 \n",
"2 0.584963 0.0 0.0 ... 0.0 -0.584963 0.000000 \n",
"9 0.000000 0.0 0.0 ... 0.0 -0.584963 0.000000 \n",
"\n",
" 687592 1303911 946358 909907 1298146 908452 908450 \n",
"1 0.415037 0.000000 0.000000 0.0 0.000000 0.584963 0.415037 \n",
"2 0.000000 0.736966 0.321928 0.0 -0.584963 0.000000 0.736966 \n",
"9 0.415037 0.000000 0.000000 -1.0 -0.584963 0.000000 0.000000 \n",
"\n",
"[3 rows x 996 columns]"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gdsc,query2target,not_mapped = apply_mappers(gdsc, ncbi_symbols, ncbi_synonyms, verbose = True,handle_duplicates = \"keep\")\n",
"gdsc.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"75 duplicated IDs in 156 rows found.\n",
"duplicate rows removed due to low correlation of duplicated profiles 25\n",
"Merged 131 duplicated rows into 63 rows\n"
]
}
],
"source": [
"gdsc = handle_dups(gdsc,corr_thr = 0.75)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>683665</th>\n",
" <th>683667</th>\n",
" <th>684052</th>\n",
" <th>684055</th>\n",
" <th>684057</th>\n",
" <th>684059</th>\n",
" <th>684062</th>\n",
" <th>684072</th>\n",
" <th>684681</th>\n",
" <th>687448</th>\n",
" <th>...</th>\n",
" <th>1659818</th>\n",
" <th>1659819</th>\n",
" <th>1659823</th>\n",
" <th>1659928</th>\n",
" <th>1659929</th>\n",
" <th>1660034</th>\n",
" <th>1660035</th>\n",
" <th>1660036</th>\n",
" <th>1674021</th>\n",
" <th>1789883</th>\n",
" </tr>\n",
" <tr>\n",
" <th>gene_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>-0.415037</td>\n",
" <td>0.0</td>\n",
" <td>-0.415037</td>\n",
" <td>0.000000</td>\n",
" <td>0.415037</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>-0.415037</td>\n",
" <td>0.0</td>\n",
" <td>-0.584963</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.321928</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.584963</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.584963</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.584963</td>\n",
" <td>-0.415037</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0.0</td>\n",
" <td>0.321928</td>\n",
" <td>0.0</td>\n",
" <td>0.584963</td>\n",
" <td>0.584963</td>\n",
" <td>0.0</td>\n",
" <td>0.321928</td>\n",
" <td>0.000000</td>\n",
" <td>-0.584963</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.415037</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>-1.0</td>\n",
" <td>-1.584963</td>\n",
" <td>0.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>-1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>0.0</td>\n",
" <td>0.321928</td>\n",
" <td>0.0</td>\n",
" <td>0.584963</td>\n",
" <td>0.584963</td>\n",
" <td>0.0</td>\n",
" <td>0.321928</td>\n",
" <td>0.000000</td>\n",
" <td>-0.584963</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.415037</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>-1.0</td>\n",
" <td>-1.584963</td>\n",
" <td>0.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>-1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>0.0</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>-0.415037</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.321928</td>\n",
" <td>0.0</td>\n",
" <td>-0.415037</td>\n",
" <td>0.0</td>\n",
" <td>0.415037</td>\n",
" <td>0.584963</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 996 columns</p>\n",
"</div>"
],
"text/plain": [
" 683665 683667 684052 684055 684057 684059 684062 \\\n",
"gene_id \n",
"1 0.0 0.000000 0.0 0.000000 -0.415037 0.0 -0.415037 \n",
"2 0.0 0.000000 0.0 0.584963 0.000000 0.0 0.000000 \n",
"9 0.0 0.321928 0.0 0.584963 0.584963 0.0 0.321928 \n",
"10 0.0 0.321928 0.0 0.584963 0.584963 0.0 0.321928 \n",
"12 0.0 -1.000000 0.0 0.000000 -1.000000 0.0 -0.415037 \n",
"\n",
" 684072 684681 687448 ... 1659818 1659819 1659823 \\\n",
"gene_id ... \n",
"1 0.000000 0.415037 0.0 ... 0.0 0.0 0.000000 \n",
"2 0.584963 0.000000 0.0 ... 0.0 0.0 0.000000 \n",
"9 0.000000 -0.584963 0.0 ... 0.0 0.0 -0.415037 \n",
"10 0.000000 -0.584963 0.0 ... 0.0 0.0 -0.415037 \n",
"12 0.000000 0.000000 0.0 ... -1.0 0.0 0.321928 \n",
"\n",
" 1659928 1659929 1660034 1660035 1660036 1674021 1789883 \n",
"gene_id \n",
"1 0.0 -0.415037 0.0 -0.584963 0.000000 0.000000 0.321928 \n",
"2 0.0 0.000000 0.0 0.000000 0.000000 0.584963 -0.415037 \n",
"9 0.0 0.000000 -1.0 -1.584963 0.000000 -1.000000 -1.000000 \n",
"10 0.0 0.000000 -1.0 -1.584963 0.000000 -1.000000 -1.000000 \n",
"12 0.0 -0.415037 0.0 0.415037 0.584963 0.000000 0.000000 \n",
"\n",
"[5 rows x 996 columns]"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gdsc.index.name = \"gene_id\"\n",
"gdsc = gdsc.T.sort_index().T\n",
"gdsc.head()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"gdsc.to_csv(preprocessed_dir+\"/\"+\"GDSC\"+\".Segment_Mean.CNA.tsv\",\n",
" sep = \"\\t\",header=True,index=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# PDX \n",
"\n",
"For PDX dataset only gene-level estimated copy-number (non-integer) reported. \n",
"From ploidy distributions, calculated as average over all genes we concluded that CN estimates were called under assumption that copy-neutral state of each xenograft corresponds CN = 2.\n",
"\n",
"\n",
"For gene ID conversion we used the same approach as for RNA-seq."
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(23852, 375)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>X-1004</th>\n",
" <th>X-1008</th>\n",
" <th>X-1027</th>\n",
" <th>X-1095</th>\n",
" <th>X-1119</th>\n",
" <th>X-1156</th>\n",
" <th>X-1167</th>\n",
" <th>X-1169</th>\n",
" <th>X-1172</th>\n",
" <th>X-1173</th>\n",
" <th>...</th>\n",
" <th>X-5694</th>\n",
" <th>X-5696</th>\n",
" <th>X-5713</th>\n",
" <th>X-5717</th>\n",
" <th>X-5727</th>\n",
" <th>X-5739</th>\n",
" <th>X-5808</th>\n",
" <th>X-5959</th>\n",
" <th>X-5975</th>\n",
" <th>X-6047</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Sample</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A1BG</th>\n",
" <td>2.58</td>\n",
" <td>1.60</td>\n",
" <td>2.17</td>\n",
" <td>2.08</td>\n",
" <td>2.00</td>\n",
" <td>3.94</td>\n",
" <td>2.04</td>\n",
" <td>11.39</td>\n",
" <td>2.17</td>\n",
" <td>2.01</td>\n",
" <td>...</td>\n",
" <td>2.08</td>\n",
" <td>2.10</td>\n",
" <td>2.14</td>\n",
" <td>2.95</td>\n",
" <td>2.06</td>\n",
" <td>2.07</td>\n",
" <td>1.99</td>\n",
" <td>2.07</td>\n",
" <td>1.43</td>\n",
" <td>2.03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A1BG-AS1</th>\n",
" <td>2.58</td>\n",
" <td>1.60</td>\n",
" <td>2.17</td>\n",
" <td>2.08</td>\n",
" <td>2.00</td>\n",
" <td>3.94</td>\n",
" <td>2.04</td>\n",
" <td>11.39</td>\n",
" <td>2.17</td>\n",
" <td>2.01</td>\n",
" <td>...</td>\n",
" <td>2.08</td>\n",
" <td>2.10</td>\n",
" <td>2.14</td>\n",
" <td>2.95</td>\n",
" <td>2.06</td>\n",
" <td>2.07</td>\n",
" <td>1.99</td>\n",
" <td>2.07</td>\n",
" <td>1.43</td>\n",
" <td>2.03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A1CF</th>\n",
" <td>2.87</td>\n",
" <td>2.97</td>\n",
" <td>2.01</td>\n",
" <td>2.06</td>\n",
" <td>2.10</td>\n",
" <td>1.58</td>\n",
" <td>2.01</td>\n",
" <td>1.64</td>\n",
" <td>1.89</td>\n",
" <td>1.99</td>\n",
" <td>...</td>\n",
" <td>2.04</td>\n",
" <td>0.97</td>\n",
" <td>1.58</td>\n",
" <td>2.08</td>\n",
" <td>1.95</td>\n",
" <td>1.92</td>\n",
" <td>1.54</td>\n",
" <td>1.28</td>\n",
" <td>1.33</td>\n",
" <td>2.10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A2LD1</th>\n",
" <td>5.74</td>\n",
" <td>1.64</td>\n",
" <td>2.06</td>\n",
" <td>2.01</td>\n",
" <td>2.07</td>\n",
" <td>1.74</td>\n",
" <td>2.06</td>\n",
" <td>1.59</td>\n",
" <td>1.40</td>\n",
" <td>2.53</td>\n",
" <td>...</td>\n",
" <td>2.03</td>\n",
" <td>2.07</td>\n",
" <td>2.25</td>\n",
" <td>2.00</td>\n",
" <td>1.01</td>\n",
" <td>2.00</td>\n",
" <td>1.08</td>\n",
" <td>1.85</td>\n",
" <td>1.93</td>\n",
" <td>1.45</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4 rows × 375 columns</p>\n",
"</div>"
],
"text/plain": [
" X-1004 X-1008 X-1027 X-1095 X-1119 X-1156 X-1167 X-1169 \\\n",
"Sample \n",
"A1BG 2.58 1.60 2.17 2.08 2.00 3.94 2.04 11.39 \n",
"A1BG-AS1 2.58 1.60 2.17 2.08 2.00 3.94 2.04 11.39 \n",
"A1CF 2.87 2.97 2.01 2.06 2.10 1.58 2.01 1.64 \n",
"A2LD1 5.74 1.64 2.06 2.01 2.07 1.74 2.06 1.59 \n",
"\n",
" X-1172 X-1173 ... X-5694 X-5696 X-5713 X-5717 X-5727 \\\n",
"Sample ... \n",
"A1BG 2.17 2.01 ... 2.08 2.10 2.14 2.95 2.06 \n",
"A1BG-AS1 2.17 2.01 ... 2.08 2.10 2.14 2.95 2.06 \n",
"A1CF 1.89 1.99 ... 2.04 0.97 1.58 2.08 1.95 \n",
"A2LD1 1.40 2.53 ... 2.03 2.07 2.25 2.00 1.01 \n",
"\n",
" X-5739 X-5808 X-5959 X-5975 X-6047 \n",
"Sample \n",
"A1BG 2.07 1.99 2.07 1.43 2.03 \n",
"A1BG-AS1 2.07 1.99 2.07 1.43 2.03 \n",
"A1CF 1.92 1.54 1.28 1.33 2.10 \n",
"A2LD1 2.00 1.08 1.85 1.93 1.45 \n",
"\n",
"[4 rows x 375 columns]"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"PDX_xls = \"/home/olya/SFU/Hossein/PDX/nm.3954-S2.xlsx\"\n",
"pdx = pd.read_excel(PDX_xls,\"copy number\")\n",
"pdx.set_index(\"Sample\",drop=True,inplace=True)\n",
"focal = pdx.T[\"FocalCNScore\"]\n",
"pdx.drop([\"ArmLevelCNScore\",\"FocalCNScore\"],inplace = True)\n",
"print(pdx.shape)\n",
"pdx.head(4)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Strings containing duplicated gene IDs: 544\n",
"268 duplicated IDs in 544 rows found.\n",
"duplicate rows removed due to low correlation of duplicated profiles 134\n",
"Merged 410 duplicated rows into 205 rows\n"
]
}
],
"source": [
"pdx.index.name = \"gene_id\"\n",
"ids = pdx.index\n",
"ids = list(set(ids[ids.duplicated()]))\n",
"print(\"Strings containing duplicated gene IDs:\",pdx.loc[ids,:].shape[0])\n",
"pdx = handle_dups(pdx,corr_thr = 0.75)\n"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5,0,'CN Averaged over all')"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAGyJJREFUeJzt3XuYHVWd7vHvCwRFAwaGNicCMXhFFAnQIgoqIKjgwwFGUaMgcJyJjkfUOaMDelRgvEyYR1ARj+cEQcELl1FuCioIAl6RBCMhIIoQFIgkCMhFAQPv+aNWk01Pd3Z10rV3uuv9PM9+eu+qVVW/vZ6kfnutWrVKtomIiPZar98BREREfyURRES0XBJBRETLJRFERLRcEkFERMslEUREtFwSQTRC0uWS/qFGuZmSHpC0/ijrj5H0tTU4/mr3G//VmtZ1THxJBLHGJC2V9Ndywr1T0lckTR3LPmz/3vZU24+OZ2xrs19Ju0t6rHyv+yXdKOnwsm6WJJd1Q9/7O5L27th+aqmbt3Us21jS7yW9cXy+YcT4SSKItbWf7anAjsAg8JE+xzNe7ijfaxPgSOBkSdt2rJ9W1m8PXAKcK+kwANsPAO8EPitpoJT/D2CB7W/26gtE1JVEEOPC9u3Ad4EXDV8naT1JH5F0q6Tlkk6X9LSybugX9gbl89aSrii/xC8BNu/Yz4WSjhi272slHTjCMYfv93JJH5f0k7LviyVtPny7Eb6XbZ8H3ANsO8L6P9r+HHAMcJyk9cry7wMXAidK2h14E/Du0Y4j6TBJN5fYbhlqTUh6tqTLJP1J0l2Svi5pWsd2SyV9sNTDg5JOkTRd0nfLvn4gadNhdTJX0h2Slkn6wGpi2kXSTyXdK+lX5XusNt6YmJIIYlxI2grYF/jlCKsPK689gGcBU4GTRtnVN4CFVAng48ChHetOAw7uOOb2wBZUJ9w63gocDjwd2BAY9STYcYz1SqKZBixeTdFzyn6f37Hsn4HdgW8CH7D9x1GO8VTgRGAf2xsDLwcWDa0G/h14BvACYCuqpNPpDcDewPOA/agS8oeBAar/4+8dVn4P4LnAa4AjJe01QkxD9foJYDOquvqWpIEu8cYElEQQa+s8SfcCPwauAD41Qpm3ASfYvrl0m3wIeMvQr/UhkmYCLwE+avth21cC3+4ocgHwPEnPLZ8PAc6y/UjNWL9s+ze2/wqcDcxeTdlnlO91F3A0cIjtG1dT/o7yd7OhBbbvAZYAT6FKFKvzGPAiSRvZXmZ7SdnHTbYvKfWxAjgBeNWwbT9v+87SKvsRcJXtX9p+CDgX2GFY+WNtP2h7MfBlYM4I8RwMXGT7ItuP2b4EWECV7EeNNyamJIJYWwfYnmb7mbbfXU6ywz0DuLXj863ABsD0EcrdY/vBYWUBKCe2s4CDSxfMHOCrY4i18xf5X6haJqO5o3yvzWzPtn1ml31vUf7ePbRA0sHALOAHwHGjbVi+75uBdwHLShfYNmUf0yWdKel2SfcBX6Oju6y4s+P9X0f4PPx7/qHj/a1U9T7cM4GDSrfQvSUp7gbMWF28MTElEUQv3EF1YhkyE1jJE09YAMuATUvXQ2fZTqdRtTBeDfzF9s/GOdY1dSCwHLgRQNLTgc8A/0h14fhNkl4x2sa2v297b2AG8Gvg5LLqU4CB7WxvQvVLXWsZ61Yd72eyqjXT6Q/AV0syHHo91fa8LvHGBJREEL1wBvDP5ULwVKqT21m2V3YWsn0rVffDsZI2lLQbVZ93Z5mfUXVLHM/YWgONKL/Y30PVffQh24+VVScB59n+oe1lwL9SjTx60ij72L8kwIeBB6i+I8DG5fOfS7/9B8ch7I9KeoqkF1JdMzlrhDJfA/aT9FpJ60t6sqphtVt2iTcmoCSC6IVTqU7aVwK3AA8BR4xS9q3AS6m6WI4GTh+hzOnAdlQnq365V9KDVBeQ9wUOsn0qgKQDqLpRHj9p2/4S1S/vj42wr/WA/1XW3011DeCfyrpjqYbm/pnq4m23aw11XAHcBFwKfNr2xcML2P4DsD/VRecVVC2ED5ZYVxdvTEDKg2liopH0dmCu7d36HctEImkWVSKeMrw1Fu2WFkFMKJKeQjUef36/Y4mYLJIIYsKQ9Fqqboo7qe43iIhxkK6hiIiWS4sgIqLlNuhepP8233xzz5o1q99hRERMKAsXLrzL9kC3chMiEcyaNYsFCxb0O4yIiAlF0q3dS6VrKCKi9ZIIIiJaLokgIqLlkggiIlouiSAiouWSCCIiWq6xRFCmrf1FedbpEknHluVfKc84XVReq3tKVERENKzJ+wgeBva0/YCkKcCPJX23rPug7W82eOyIiKipsUTgahKjB8rHKeWViY0iItYxjd5ZLGl9YCHwHOALtq+S9E/AJyV9jOrBGEfZfniEbecCcwFmzhz+tMKY6GYddWGtckvnvb7hSCKi0YvFth+1PRvYEthZ0ouADwHbAC8BNgOOHGXb+bYHbQ8ODHSdKiMiItZQT0YN2b4X+CHwOtvLXHkY+DKwcy9iiIiIkTU5amhA0rTyfiNgb+DXkmaUZQIOAK5rKoaIiOiuyWsEM4DTynWC9YCzbX9H0mWSBgABi4B3NRhDRER00eSooWuBHUZYvmdTx4yIiLHLncURES2XRBAR0XJJBBERLZdEEBHRckkEEREtl0QQEdFySQQRES2XRBAR0XJJBBERLZdEEBHRckkEEREtl0QQEdFySQQRES2XRBAR0XJJBBERLZdEEBHRckkEEREt1+SjKqNlZh11Yb9DiIg1kBZBRETLJRFERLRcY4lA0pMl/ULSryQtkXRsWb61pKsk3STpLEkbNhVDRER012SL4GFgT9vbA7OB10naBTgO+Izt5wD3AO9oMIaIiOiisUTgygPl45TyMrAn8M2y/DTggKZiiIiI7hodNSRpfWAh8BzgC8DvgHttryxFbgO2GGXbucBcgJkzZzYZZqzD6o5EWjrv9Q1HEjF5NXqx2PajtmcDWwI7A9uMYdv5tgdtDw4MDDQWY0RE2/Vk1JDte4EfAi8DpkkaaolsCdzeixgiImJkTY4aGpA0rbzfCNgbuIEqIbyxFDsUOL+pGCIiorsmrxHMAE4r1wnWA862/R1J1wNnSvoE8EvglAZjiIiILhpLBLavBXYYYfnNVNcLIiJiHZC5hqKriTCHUEYXRay5TDEREdFySQQRES2XRBAR0XJJBBERLZdEEBHRchk1NAlNhFE+EbHuSIsgIqLlkggiIlouiSAiouWSCCIiWi6JICKi5ZIIIiJaLokgIqLlkggiIlouiSAiouWSCCIiWi6JICKi5TLXULRKnmQW8V+lRRAR0XJJBBERLddYIpC0laQfSrpe0hJJ7yvLj5F0u6RF5bVvUzFERER3TV4jWAn8i+1rJG0MLJR0SVn3GdufbvDYERFRU2OJwPYyYFl5f7+kG4AtmjpeRESsmZ5cI5A0C9gBuKoseo+kayWdKmnTUbaZK2mBpAUrVqzoRZgREa3UeCKQNBX4FvB+2/cBXwSeDcymajEcP9J2tufbHrQ9ODAw0HSYERGt1WgikDSFKgl83fY5ALbvtP2o7ceAk4Gdm4whIiJWr8lRQwJOAW6wfULH8hkdxQ4ErmsqhoiI6K7JUUO7AocAiyUtKss+DMyRNBswsBR4Z4MxREREF02OGvoxoBFWXdTUMSMiYuxyZ3FERMslEUREtFwSQUREyyURRES0XBJBRETLJRFERLRcEkFERMslEUREtFwSQUREy3VNBJKeLelJ5f3ukt4raVrzoUVERC/UaRF8C3hU0nOA+cBWwDcajSoiInqmTiJ4zPZKqplCP2/7g8CMLttERMQEUWfSub9JmgMcCuxXlk1pLqSI/pt11IW1yi2d9/qGI4loXp0WweHAy4BP2r5F0tbAV5sNKyIieqVri8D29ZKOBGaWz7cAxzUdWERE9EadUUP7AYuA75XPsyVd0HRgERHRG3W6ho6heq7wvQC2FwHPajCmiIjooTqJ4G+2/zxs2WNNBBMREb1XZ9TQEklvBdaX9FzgvcBPmw0rIiJ6pU6L4AjghcDDwBnAfcD7mwwqIiJ6p86oob8A/7u8apO0FXA6MB0wMN/25yRtBpwFzAKWAm+yfc/Ywo6IiPEyaiKQ9G2qE/iIbP/3LvteCfyL7WskbQwslHQJcBhwqe15ko4CjgKOHHPkERExLlbXIvj02uzY9jJgWXl/v6QbgC2A/YHdS7HTgMtJIoiI6JtRE4HtK4beS9oQ2IaqhXCj7UfGchBJs4AdgKuA6SVJAPyRqutopG3mAnMBZs6cOZbDRUTEGNS5oez1wO+AE4GTgJsk7VP3AJKmUs1g+n7b93Wus21G6X6yPd/2oO3BgYGBuoeLiIgxqjN89HhgD9s3QfV8AuBC4LvdNpQ0hSoJfN32OWXxnZJm2F4maQawfM1Cj4iI8VBn+Oj9Q0mguBm4v9tGkgScAtxg+4SOVRdQzWRK+Xt+zVgjIqIBdVoECyRdBJxN1Y1zEHC1pL8H6PilP9yuwCHAYkmLyrIPA/OAsyW9A7gVeNNaxB8REWupTiJ4MnAn8KryeQWwEdWzCQyMmAhs/xjQKPt89djCjIiIptS5oezwXgQSERH90TURlAfRHEF1J/Dj5WvcUBYRERNAna6h86gu+n6bzDoaETHp1EkED9k+sfFIIiKiL+okgs9JOhq4mGoGUgBsX9NYVBER0TN1EsF2VMNA92RV15DL54iImODqJIKDgGeNdX6hiIiYGOrcWXwdMK3pQCIioj/qtAimAb+WdDVPvEaQ4aMREZNAnURwdONRRERE39S5s/iKbmUiImLiqvM8gl0kXS3pAUmPSHpU0n3dtouIiImhzsXik4A5wG+pJpv7B+ALTQYVERG9UycRUJ5HsL7tR21/GXhds2FFRESv1LlY/JfyzOJFkv6D6oH0tRJIRESs++qc0A8p5d4DPAhsBbyhyaAiIqJ36owaurW8fUjSicBWwx5dGRERE1idUUOXS9pE0mbANcDJkk7otl1EREwMdbqGnmb7PuDvgdNtvxTYq9mwIiKiV+okgg0kzaB6yPx3Go4nIiJ6rE4i+Dfg+8BNtq+W9CyqewoiImIS6JoIbP+n7Rfbfnf5fLPtrqOGJJ0qabmk6zqWHSPpdkmLymvftQs/IiLWVpP3A3yFkW88+4zt2eV1UYPHj4iIGhpLBLavBO5uav8RETE++nGH8HskXVu6jjYdrZCkuZIWSFqwYsWKXsYXEdEqXW8ok/QkqjuJZ3WWt/1va3C8LwIfp3rm8ceB44H/MVJB2/OB+QCDg4Neg2NFREQNdeYaOh/4M7CQjieUrQnbdw69l3QyGY4aEdF3dRLBlrbHZbZRSTNsLysfD6R6HnJERPRRnUTwU0nb2V48lh1LOgPYHdhc0m1Uj7zcXdJsqq6hpcA7xxZuRESMtzqJYDfgMEm3UHUNCbDtF69uI9tzRlh8ythDjIiIJtVJBPs0HkVERPTNWKahjoiISShPGouIaLkkgoiIlksiiIhouSSCiIiWSyKIiGi5JIKIiJZLIoiIaLkkgoiIlksiiIhouSSCiIiWqzPXUKyhWUddWKvc0nmvbziSiIjRpUUQEdFySQQRES2XRBAR0XJJBBERLZdEEBHRchk1FLEW+jkyLKPSYrykRRAR0XKNJQJJp0paLum6jmWbSbpE0m/L302bOn5ERNTTZIvgK8Drhi07CrjU9nOBS8vniIjoo8YSge0rgbuHLd4fOK28Pw04oKnjR0REPb2+RjDd9rLy/o/A9B4fPyIihunbqCHbluTR1kuaC8wFmDlzZs/i6qbuSI1+7zMioq5etwjulDQDoPxdPlpB2/NtD9oeHBgY6FmAERFt0+tEcAFwaHl/KHB+j48fERHDNDl89AzgZ8DzJd0m6R3APGBvSb8F9iqfIyKijxq7RmB7ziirXt3UMSMiYuxyZ3FERMtlrqGIHhjLyLDMDRS9lhZBRETLJRFERLRcEkFERMslEUREtFwSQUREyyURRES0XBJBRETLJRFERLRcEkFERMslEUREtFymmIhYx4z3g4rq7q/u1Bbjvb/ov7QIIiJaLokgIqLlkggiIlouiSAiouWSCCIiWi6JICKi5ZIIIiJaLokgIqLl+nJDmaSlwP3Ao8BK24P9iCMiIvp7Z/Eetu/q4/EjIoJ0DUVEtF6/EoGBiyUtlDR3pAKS5kpaIGnBihUrehxeRER79CsR7GZ7R2Af4H9KeuXwArbn2x60PTgwMND7CCMiWqIvicD27eXvcuBcYOd+xBEREX1IBJKeKmnjoffAa4Dreh1HRERU+jFqaDpwrqSh43/D9vf6EEdERNCHRGD7ZmD7Xh83IiJGlieURUQjxvKktTzNrL9yH0FERMslEUREtFwSQUREyyURRES0XBJBRETLZdRQMZYRDhGT0WT6P1D3u2S0UiUtgoiIlksiiIhouSSCiIiWSyKIiGi5JIKIiJaT7X7H0NXg4KAXLFiwRttOppEQEdEfE3V0kaSFtge7lUuLICKi5ZIIIiJaLokgIqLlkggiIlouiSAiouUy11BExDhpYpRiL0YspUUQEdFySQQRES3Xl0Qg6XWSbpR0k6Sj+hFDRERUep4IJK0PfAHYB9gWmCNp217HERERlX60CHYGbrJ9s+1HgDOB/fsQR0RE0J9RQ1sAf+j4fBvw0uGFJM0F5paPD0i6sQexrY3Ngbv6HcQ6IPWwSupilQldFzpu3HY15npYy2M/s06hdXb4qO35wPx+x1GXpAV1Jnea7FIPq6QuVkldVNbVeuhH19DtwFYdn7csyyIiog/6kQiuBp4raWtJGwJvAS7oQxwREUEfuoZsr5T0HuD7wPrAqbaX9DqOBkyYbqyGpR5WSV2skrqorJP1MCEeTBMREc3JncURES2XRBAR0XJJBGMg6VRJyyVdN8r6p0n6tqRfSVoi6fBex9gLkraS9ENJ15fv+b4RykjSiWUakWsl7diPWJtWsy7eVupgsaSfStq+H7E2qU49dJR9iaSVkt7Yyxh7pW5dSNpd0qJS5opex/kEtvOq+QJeCewIXDfK+g8Dx5X3A8DdwIb9jruBepgB7Fjebwz8Bth2WJl9ge8CAnYBrup33H2si5cDm5b3+0zGuqhTD2Xd+sBlwEXAG/sddx//TUwDrgdmls9P72fMaRGMge0rqU7uoxYBNpYkYGopu7IXsfWS7WW2rynv7wduoLpjvNP+wOmu/ByYJmlGj0NtXJ26sP1T2/eUjz+nundmUqn5bwLgCOBbwPIehtdTNevircA5tn9fyvW1PpIIxtdJwAuAO4DFwPtsP9bfkJolaRawA3DVsFUjTSUy0olh0lhNXXR6B1VLadIarR4kbQEcCHyx91H1x2r+TTwP2FTS5ZIWSnp7r2PrtM5OMTFBvRZYBOwJPBu4RNKPbN/X37CaIWkq1a+790/W71hXnbqQtAdVItitl7H1Upd6+CxwpO3Hqkbz5NalLjYAdgJeDWwE/EzSz23/psdhPh5MjJ/DgXmuOv1uknQLsA3wi/6GNf4kTaH6R/512+eMUKQ1U4nUqAskvRj4ErCP7T/1Mr5eqVEPg8CZJQlsDuwraaXt83oYZk/UqIvbgD/ZfhB4UNKVwPZU1xN6Ll1D4+v3VBkeSdOB5wM39zWiBpRrIKcAN9g+YZRiFwBvL6OHdgH+bHtZz4LskTp1IWkmcA5wSL9+8TWtTj3Y3tr2LNuzgG8C756kSaDO/4/zgd0kbSDpKVQzMN/QqxiHS4tgDCSdAewObC7pNuBoYAqA7f8LfBz4iqTFVKNljrQ9YafeXY1dgUOAxZIWlWUfBmbC43VxEdXIoZuAv1C1liajOnXxMeDvgP9Tfg2v9Do4A+VaqlMPbdG1LmzfIOl7wLXAY8CXbI84LL0XMsVERETLpWsoIqLlkggiIlouiSAiouWSCCIiWi6JICKi5ZIIoqck/TdJZ0r6Xbm1/iJJz5M0S5IlHdFR9iRJh61mX4skndmTwNeSpMMkndTvOIaT9ED5O2u0WXVj8ksiiJ4pN9qcC1xu+9m2dwI+BEwvRZYD7yvPsu62rxdQzWT5CklPHaf4JsV9NZPle0TvJBFEL+0B/K3z5iLbv7L9o/JxBXApcGiNfc0BvgpcTDXTKZK2kfT4dB7lV+7i8n4nSVeUVsj3h2ZCLZN+fVbSAqoktJ+kqyT9UtIPyh3iSBqQdEmZO/5Lkm6VtHlZd7CkX5QWyv+TtH5Zfrik35SYdh3pS0jaTNJ5qp5X8HNJL5a0nqSlkqZ1lPutpOkljm9Jurq8di3rj5H0VUk/KfXSeYypki6VdI2qZyLsX6N+o0WSCKKXXgQs7FLmOOADQyfT1XgzcCZwBlVSwPavgQ0lbd1R5qwy78vnqea/3wk4Ffhkx742tD1o+3jgx8Autnco+//XUuZo4DLbL6SaHmEmPN4yeTOwq+3ZwKPA20qiOZYqAewGbDvK9zgW+KXtF1PdfXp6mbH2fKqZOpH0UuBW23cCnwM+Y/slwBuo5i8asi2wl+05w47xEHCg7R2pkvHxasOsb1FbmpCxTrF9s6SrqOZrH5GkQeAu27+XdDtwqqTNbN8NnE11Yp5X/r6Zas6nF1HNBgtVl1LnvEdndbzfkip5zAA2BG4py3ejnJhtf0/S0PMFXk01i+TVZd8bUXVxvZSqC2xFifksqqmHh9uN6oSO7csk/Z2kTUpMHwO+DLylI8a9gG07zuObqJrlEuAC238dqcqAT0l6JdV0BltQdcf9cYSy0UJJBNFLS4A6jyf8FNWv7tEe3zcH2EbS0vJ5E6qT6clUJ8z/lHQOYNu/lbQdsMT2y0bZ34Md7z8PnGD7Akm7A8d0iVXAabY/9ISF0gFdtuvmZ8BzJA0ABwCfKMvXo2qxPDTsePDE79HpbVRPzNvJ9t9KvT15LeOLSSRdQ9FLlwFPkjR3aEHpE39FZ6HSxXM9sN/wHUhaD3gTsF3HTJb7s6p76HdU3TMfZdWv6BuBAUkvK/uYIumFo8T4NFZNl915reIn5bhIeg2waVl+KfBGSU8v6zaT9EyqB5G8qvzCnwIcNMrxfkR1oqYknrts31emMj8XOIFqFsuhqasvpnrK11B9zB5lv8O/0/KSBPYAnlljm2iRJILomXJyOxDYqwwfXQL8OyN3UXySkR/p+Argdtt3dCy7kqq7ZOhRmGcBB1N1E2H7EaqWyHGSfkX18KCXjxLmMVQtioVA58yxxwKvKUMsDyox32/7euAjwMWSrgUuAWaUKbePofpl/xNGn2L4GGCnsu08nph8hr5HZ9fVe4HBcnH5euBdo+y309fLNouBtwO/rrFNtEhmH42oQdKTgEdtrywtiy+Wi8MRE16uEUTUMxM4u3RNPQL8Y5/jiRg3aRFERLRcrhFERLRcEkFERMslEUREtFwSQUREyyURRES03P8H9AxF0s+Zly8AAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"average_ploidies = pdx.apply(np.mean)\n",
"p = plt.hist(average_ploidies,bins=30)\n",
"plt.title(\"Ploidy in PDX samples\")\n",
"plt.ylabel(\"n samples\")\n",
"plt.xlabel(\"CN Averaged over all\")"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>X-1004</th>\n",
" <th>X-1008</th>\n",
" <th>X-1027</th>\n",
" <th>X-1095</th>\n",
" <th>X-1119</th>\n",
" <th>X-1156</th>\n",
" <th>X-1167</th>\n",
" <th>X-1169</th>\n",
" <th>X-1172</th>\n",
" <th>X-1173</th>\n",
" <th>...</th>\n",
" <th>X-5694</th>\n",
" <th>X-5696</th>\n",
" <th>X-5713</th>\n",
" <th>X-5717</th>\n",
" <th>X-5727</th>\n",
" <th>X-5739</th>\n",
" <th>X-5808</th>\n",
" <th>X-5959</th>\n",
" <th>X-5975</th>\n",
" <th>X-6047</th>\n",
" </tr>\n",
" <tr>\n",
" <th>gene_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A1BG</th>\n",
" <td>0.367371</td>\n",
" <td>-0.321928</td>\n",
" <td>0.117695</td>\n",
" <td>0.056584</td>\n",
" <td>0.000000</td>\n",
" <td>0.978196</td>\n",
" <td>0.028569</td>\n",
" <td>2.509696</td>\n",
" <td>0.117695</td>\n",
" <td>0.007196</td>\n",
" <td>...</td>\n",
" <td>0.056584</td>\n",
" <td>0.070389</td>\n",
" <td>0.097611</td>\n",
" <td>0.560715</td>\n",
" <td>0.042644</td>\n",
" <td>0.049631</td>\n",
" <td>-0.007232</td>\n",
" <td>0.049631</td>\n",
" <td>-0.483985</td>\n",
" <td>0.021480</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A1BG-AS1</th>\n",
" <td>0.367371</td>\n",
" <td>-0.321928</td>\n",
" <td>0.117695</td>\n",
" <td>0.056584</td>\n",
" <td>0.000000</td>\n",
" <td>0.978196</td>\n",
" <td>0.028569</td>\n",
" <td>2.509696</td>\n",
" <td>0.117695</td>\n",
" <td>0.007196</td>\n",
" <td>...</td>\n",
" <td>0.056584</td>\n",
" <td>0.070389</td>\n",
" <td>0.097611</td>\n",
" <td>0.560715</td>\n",
" <td>0.042644</td>\n",
" <td>0.049631</td>\n",
" <td>-0.007232</td>\n",
" <td>0.049631</td>\n",
" <td>-0.483985</td>\n",
" <td>0.021480</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A1CF</th>\n",
" <td>0.521051</td>\n",
" <td>0.570463</td>\n",
" <td>0.007196</td>\n",
" <td>0.042644</td>\n",
" <td>0.070389</td>\n",
" <td>-0.340075</td>\n",
" <td>0.007196</td>\n",
" <td>-0.286304</td>\n",
" <td>-0.081614</td>\n",
" <td>-0.007232</td>\n",
" <td>...</td>\n",
" <td>0.028569</td>\n",
" <td>-1.043943</td>\n",
" <td>-0.340075</td>\n",
" <td>0.056584</td>\n",
" <td>-0.036526</td>\n",
" <td>-0.058894</td>\n",
" <td>-0.377070</td>\n",
" <td>-0.643856</td>\n",
" <td>-0.588574</td>\n",
" <td>0.070389</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3 rows × 375 columns</p>\n",
"</div>"
],
"text/plain": [
" X-1004 X-1008 X-1027 X-1095 X-1119 X-1156 \\\n",
"gene_id \n",
"A1BG 0.367371 -0.321928 0.117695 0.056584 0.000000 0.978196 \n",
"A1BG-AS1 0.367371 -0.321928 0.117695 0.056584 0.000000 0.978196 \n",
"A1CF 0.521051 0.570463 0.007196 0.042644 0.070389 -0.340075 \n",
"\n",
" X-1167 X-1169 X-1172 X-1173 ... X-5694 \\\n",
"gene_id ... \n",
"A1BG 0.028569 2.509696 0.117695 0.007196 ... 0.056584 \n",
"A1BG-AS1 0.028569 2.509696 0.117695 0.007196 ... 0.056584 \n",
"A1CF 0.007196 -0.286304 -0.081614 -0.007232 ... 0.028569 \n",
"\n",
" X-5696 X-5713 X-5717 X-5727 X-5739 X-5808 \\\n",
"gene_id \n",
"A1BG 0.070389 0.097611 0.560715 0.042644 0.049631 -0.007232 \n",
"A1BG-AS1 0.070389 0.097611 0.560715 0.042644 0.049631 -0.007232 \n",
"A1CF -1.043943 -0.340075 0.056584 -0.036526 -0.058894 -0.377070 \n",
"\n",
" X-5959 X-5975 X-6047 \n",
"gene_id \n",
"A1BG 0.049631 -0.483985 0.021480 \n",
"A1BG-AS1 0.049631 -0.483985 0.021480 \n",
"A1CF -0.643856 -0.588574 0.070389 \n",
"\n",
"[3 rows x 375 columns]"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pdx = pdx.applymap(lambda x: np.log2(x/2))\n",
"pdx.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>X-1004</th>\n",
" <th>X-1008</th>\n",
" <th>X-1027</th>\n",
" <th>X-1095</th>\n",
" <th>X-1119</th>\n",
" <th>X-1156</th>\n",
" <th>X-1167</th>\n",
" <th>X-1169</th>\n",
" <th>X-1172</th>\n",
" <th>X-1173</th>\n",
" <th>...</th>\n",
" <th>X-5694</th>\n",
" <th>X-5696</th>\n",
" <th>X-5713</th>\n",
" <th>X-5717</th>\n",
" <th>X-5727</th>\n",
" <th>X-5739</th>\n",
" <th>X-5808</th>\n",
" <th>X-5959</th>\n",
" <th>X-5975</th>\n",
" <th>X-6047</th>\n",
" </tr>\n",
" <tr>\n",
" <th>gene_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A1BG</th>\n",
" <td>0.367371</td>\n",
" <td>-0.321928</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.978196</td>\n",
" <td>0.0</td>\n",
" <td>2.509696</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.560715</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.483985</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A1BG-AS1</th>\n",
" <td>0.367371</td>\n",
" <td>-0.321928</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.978196</td>\n",
" <td>0.0</td>\n",
" <td>2.509696</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.560715</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.483985</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A1CF</th>\n",
" <td>0.521051</td>\n",
" <td>0.570463</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.340075</td>\n",
" <td>0.0</td>\n",
" <td>-0.286304</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>-1.043943</td>\n",
" <td>-0.340075</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.37707</td>\n",
" <td>-0.643856</td>\n",
" <td>-0.588574</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3 rows × 375 columns</p>\n",
"</div>"
],
"text/plain": [
" X-1004 X-1008 X-1027 X-1095 X-1119 X-1156 X-1167 \\\n",
"gene_id \n",
"A1BG 0.367371 -0.321928 0.0 0.0 0.0 0.978196 0.0 \n",
"A1BG-AS1 0.367371 -0.321928 0.0 0.0 0.0 0.978196 0.0 \n",
"A1CF 0.521051 0.570463 0.0 0.0 0.0 -0.340075 0.0 \n",
"\n",
" X-1169 X-1172 X-1173 ... X-5694 X-5696 X-5713 \\\n",
"gene_id ... \n",
"A1BG 2.509696 0.0 0.0 ... 0.0 0.000000 0.000000 \n",
"A1BG-AS1 2.509696 0.0 0.0 ... 0.0 0.000000 0.000000 \n",
"A1CF -0.286304 0.0 0.0 ... 0.0 -1.043943 -0.340075 \n",
"\n",
" X-5717 X-5727 X-5739 X-5808 X-5959 X-5975 X-6047 \n",
"gene_id \n",
"A1BG 0.560715 0.0 0.0 0.00000 0.000000 -0.483985 0.0 \n",
"A1BG-AS1 0.560715 0.0 0.0 0.00000 0.000000 -0.483985 0.0 \n",
"A1CF 0.000000 0.0 0.0 -0.37707 -0.643856 -0.588574 0.0 \n",
"\n",
"[3 rows x 375 columns]"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pdx = pdx.applymap(lambda x : clean_logR(x, pos_seg_mean_thr, neg_seg_mean_thr))\n",
"pdx.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mapped: 23313 \n",
"\tdirectly via main_mapper 21188 \n",
"\tvia alternative mapper 466 \n",
"\tvia one of multiple synonyms in alternative mapper 926 \n",
"\tLOC 733 \n",
"Unmapped: 200 \n",
"\trecognized symbols without Entrez ID 0 \n",
"\tmultiple query_ids map to the same target_id 0 \n",
"\tquery_ids map to multiple target_ids in the main mapper 0 \n",
"\tquery_ids map to multiple target_ids in the alternative mapper 52 \n",
"\tLOC not found in Entrez 29 \n",
"\tNot found at all: 119\n",
"Warning: query IDs mapping to duplicated target IDs in mapping table: 77\n",
"Warning: query IDs not mapped to any target IDs excluded: 200\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"IDs mapped to multiple target IDs are kept:\n",
" [143872, 286464, 51463, 642826, 653067, 399761, 647060, 284565, 84631, 161176, 341019, 83869, 9502, 83871, 728113, 729438, 4253, 645425, 26165, 6218, 728695, 100132948, 100134869, 84316, 200030, 642658, 100302179, 401508, 119016, 84458, 574445, 26095, 84968, 80759, 3192, 387707, 79741]\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>X-1004</th>\n",
" <th>X-1008</th>\n",
" <th>X-1027</th>\n",
" <th>X-1095</th>\n",
" <th>X-1119</th>\n",
" <th>X-1156</th>\n",
" <th>X-1167</th>\n",
" <th>X-1169</th>\n",
" <th>X-1172</th>\n",
" <th>X-1173</th>\n",
" <th>...</th>\n",
" <th>X-5694</th>\n",
" <th>X-5696</th>\n",
" <th>X-5713</th>\n",
" <th>X-5717</th>\n",
" <th>X-5727</th>\n",
" <th>X-5739</th>\n",
" <th>X-5808</th>\n",
" <th>X-5959</th>\n",
" <th>X-5975</th>\n",
" <th>X-6047</th>\n",
" </tr>\n",
" <tr>\n",
" <th>gene_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.367371</td>\n",
" <td>-0.321928</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.978196</td>\n",
" <td>0.0</td>\n",
" <td>2.509696</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.560715</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.483985</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.761285</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.500802</td>\n",
" <td>0.0</td>\n",
" <td>0.700440</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.201634</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.739848</td>\n",
" <td>0.0</td>\n",
" <td>0.739848</td>\n",
" <td>0.327687</td>\n",
" <td>-0.494109</td>\n",
" <td>-0.535332</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.761285</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.500802</td>\n",
" <td>0.0</td>\n",
" <td>0.700440</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.201634</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.739848</td>\n",
" <td>0.0</td>\n",
" <td>0.739848</td>\n",
" <td>0.327687</td>\n",
" <td>-0.494109</td>\n",
" <td>-0.535332</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3 rows × 375 columns</p>\n",
"</div>"
],
"text/plain": [
" X-1004 X-1008 X-1027 X-1095 X-1119 X-1156 X-1167 \\\n",
"gene_id \n",
"1 0.367371 -0.321928 0.0 0.000000 0.0 0.978196 0.0 \n",
"2 0.761285 0.000000 0.0 0.500802 0.0 0.700440 0.0 \n",
"3 0.761285 0.000000 0.0 0.500802 0.0 0.700440 0.0 \n",
"\n",
" X-1169 X-1172 X-1173 ... X-5694 X-5696 X-5713 X-5717 \\\n",
"gene_id ... \n",
"1 2.509696 0.000000 0.0 ... 0.0 0.0 0.0 0.560715 \n",
"2 0.000000 0.201634 0.0 ... 0.0 0.0 0.0 0.739848 \n",
"3 0.000000 0.201634 0.0 ... 0.0 0.0 0.0 0.739848 \n",
"\n",
" X-5727 X-5739 X-5808 X-5959 X-5975 X-6047 \n",
"gene_id \n",
"1 0.0 0.000000 0.000000 0.000000 -0.483985 0.0 \n",
"2 0.0 0.739848 0.327687 -0.494109 -0.535332 0.0 \n",
"3 0.0 0.739848 0.327687 -0.494109 -0.535332 0.0 \n",
"\n",
"[3 rows x 375 columns]"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pdx,query2target,not_mapped = apply_mappers(pdx, ncbi_symbols, ncbi_synonyms, verbose = True,handle_duplicates = \"keep\")\n",
"pdx.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>X-1004</th>\n",
" <th>X-1008</th>\n",
" <th>X-1027</th>\n",
" <th>X-1095</th>\n",
" <th>X-1119</th>\n",
" <th>X-1156</th>\n",
" <th>X-1167</th>\n",
" <th>X-1169</th>\n",
" <th>X-1172</th>\n",
" <th>X-1173</th>\n",
" <th>...</th>\n",
" <th>X-5694</th>\n",
" <th>X-5696</th>\n",
" <th>X-5713</th>\n",
" <th>X-5717</th>\n",
" <th>X-5727</th>\n",
" <th>X-5739</th>\n",
" <th>X-5808</th>\n",
" <th>X-5959</th>\n",
" <th>X-5975</th>\n",
" <th>X-6047</th>\n",
" </tr>\n",
" <tr>\n",
" <th>gene_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>143872</th>\n",
" <td>0.000000</td>\n",
" <td>0.560715</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.330973</td>\n",
" <td>0.000000</td>\n",
" <td>-0.367732</td>\n",
" <td>0.350497</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-0.349235</td>\n",
" <td>0.000000</td>\n",
" <td>-0.321928</td>\n",
" <td>-1.014500</td>\n",
" <td>-0.588574</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.514573</td>\n",
" </tr>\n",
" <tr>\n",
" <th>143872</th>\n",
" <td>0.000000</td>\n",
" <td>0.560715</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.330973</td>\n",
" <td>0.000000</td>\n",
" <td>-0.367732</td>\n",
" <td>0.350497</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-0.349235</td>\n",
" <td>0.000000</td>\n",
" <td>-0.321928</td>\n",
" <td>-1.014500</td>\n",
" <td>-0.588574</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.514573</td>\n",
" </tr>\n",
" <tr>\n",
" <th>286464</th>\n",
" <td>0.000000</td>\n",
" <td>0.378512</td>\n",
" <td>0.000000</td>\n",
" <td>0.550901</td>\n",
" <td>0.000000</td>\n",
" <td>-0.524915</td>\n",
" <td>0.000000</td>\n",
" <td>-0.902389</td>\n",
" <td>-1.321928</td>\n",
" <td>0.367371</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>0.718088</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-1.494109</td>\n",
" <td>-1.494109</td>\n",
" </tr>\n",
" <tr>\n",
" <th>286464</th>\n",
" <td>0.000000</td>\n",
" <td>0.378512</td>\n",
" <td>0.000000</td>\n",
" <td>0.550901</td>\n",
" <td>0.000000</td>\n",
" <td>-0.524915</td>\n",
" <td>0.000000</td>\n",
" <td>-0.902389</td>\n",
" <td>-1.321928</td>\n",
" <td>0.367371</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>0.718088</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-1.494109</td>\n",
" <td>-1.494109</td>\n",
" </tr>\n",
" <tr>\n",
" <th>286464</th>\n",
" <td>0.000000</td>\n",
" <td>0.378512</td>\n",
" <td>0.000000</td>\n",
" <td>0.550901</td>\n",
" <td>0.000000</td>\n",
" <td>-0.524915</td>\n",
" <td>0.000000</td>\n",
" <td>-0.902389</td>\n",
" <td>-1.321928</td>\n",
" <td>0.367371</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>0.718088</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-1.494109</td>\n",
" <td>-1.494109</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51463</th>\n",
" <td>1.238787</td>\n",
" <td>1.090853</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.839960</td>\n",
" <td>0.000000</td>\n",
" <td>0.448901</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.469886</td>\n",
" <td>0.000000</td>\n",
" <td>0.618239</td>\n",
" <td>0.201634</td>\n",
" <td>0.000000</td>\n",
" <td>-0.260152</td>\n",
" <td>-0.349235</td>\n",
" <td>0.000000</td>\n",
" <td>0.469886</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51463</th>\n",
" <td>1.238787</td>\n",
" <td>1.090853</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.839960</td>\n",
" <td>0.000000</td>\n",
" <td>0.448901</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.438293</td>\n",
" <td>0.000000</td>\n",
" <td>0.618239</td>\n",
" <td>0.201634</td>\n",
" <td>0.000000</td>\n",
" <td>-0.260152</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.469886</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>642826</th>\n",
" <td>0.608809</td>\n",
" <td>0.859970</td>\n",
" <td>0.531069</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.871844</td>\n",
" <td>0.000000</td>\n",
" <td>-0.286304</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>-0.473931</td>\n",
" <td>0.000000</td>\n",
" <td>0.618239</td>\n",
" <td>0.000000</td>\n",
" <td>-0.386468</td>\n",
" <td>-1.494109</td>\n",
" <td>-0.312939</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>642826</th>\n",
" <td>0.608809</td>\n",
" <td>0.859970</td>\n",
" <td>0.531069</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.871844</td>\n",
" <td>0.000000</td>\n",
" <td>-0.286304</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>-0.473931</td>\n",
" <td>0.000000</td>\n",
" <td>0.618239</td>\n",
" <td>0.000000</td>\n",
" <td>-0.386468</td>\n",
" <td>-1.494109</td>\n",
" <td>-0.312939</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>653067</th>\n",
" <td>0.000000</td>\n",
" <td>0.378512</td>\n",
" <td>0.000000</td>\n",
" <td>0.550901</td>\n",
" <td>0.000000</td>\n",
" <td>-0.434403</td>\n",
" <td>0.000000</td>\n",
" <td>-0.902389</td>\n",
" <td>1.220330</td>\n",
" <td>0.367371</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>0.580145</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.577767</td>\n",
" <td>-1.494109</td>\n",
" <td>-1.494109</td>\n",
" </tr>\n",
" <tr>\n",
" <th>653067</th>\n",
" <td>0.000000</td>\n",
" <td>0.378512</td>\n",
" <td>0.000000</td>\n",
" <td>0.550901</td>\n",
" <td>0.000000</td>\n",
" <td>-0.434403</td>\n",
" <td>0.000000</td>\n",
" <td>-0.902389</td>\n",
" <td>1.220330</td>\n",
" <td>0.367371</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>0.580145</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.577767</td>\n",
" <td>-1.494109</td>\n",
" <td>-1.494109</td>\n",
" </tr>\n",
" <tr>\n",
" <th>653067</th>\n",
" <td>0.000000</td>\n",
" <td>0.378512</td>\n",
" <td>0.000000</td>\n",
" <td>0.550901</td>\n",
" <td>0.000000</td>\n",
" <td>-0.434403</td>\n",
" <td>0.000000</td>\n",
" <td>-0.902389</td>\n",
" <td>1.220330</td>\n",
" <td>0.367371</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>0.580145</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.577767</td>\n",
" <td>-1.494109</td>\n",
" <td>-1.494109</td>\n",
" </tr>\n",
" <tr>\n",
" <th>653067</th>\n",
" <td>0.000000</td>\n",
" <td>0.378512</td>\n",
" <td>0.000000</td>\n",
" <td>0.550901</td>\n",
" <td>0.000000</td>\n",
" <td>-0.434403</td>\n",
" <td>0.000000</td>\n",
" <td>-0.902389</td>\n",
" <td>1.220330</td>\n",
" <td>0.367371</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>0.580145</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.577767</td>\n",
" <td>-1.494109</td>\n",
" <td>-1.494109</td>\n",
" </tr>\n",
" <tr>\n",
" <th>399761</th>\n",
" <td>0.531069</td>\n",
" <td>0.718088</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.251539</td>\n",
" <td>0.000000</td>\n",
" <td>-0.286304</td>\n",
" <td>0.000000</td>\n",
" <td>0.790772</td>\n",
" <td>...</td>\n",
" <td>0.367371</td>\n",
" <td>-1.535332</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.599462</td>\n",
" <td>-0.588574</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>399761</th>\n",
" <td>0.531069</td>\n",
" <td>0.718088</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.251539</td>\n",
" <td>0.000000</td>\n",
" <td>-0.286304</td>\n",
" <td>0.000000</td>\n",
" <td>0.790772</td>\n",
" <td>...</td>\n",
" <td>0.367371</td>\n",
" <td>-1.535332</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.599462</td>\n",
" <td>-0.588574</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>647060</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.312939</td>\n",
" <td>0.000000</td>\n",
" <td>-0.321928</td>\n",
" <td>0.000000</td>\n",
" <td>0.618239</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-0.621488</td>\n",
" <td>0.000000</td>\n",
" <td>-0.377070</td>\n",
" <td>-0.749038</td>\n",
" <td>-0.405451</td>\n",
" <td>0.000000</td>\n",
" <td>-0.395929</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>647060</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.312939</td>\n",
" <td>0.000000</td>\n",
" <td>-0.321928</td>\n",
" <td>0.000000</td>\n",
" <td>0.618239</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-0.621488</td>\n",
" <td>0.000000</td>\n",
" <td>-0.377070</td>\n",
" <td>-0.749038</td>\n",
" <td>-0.405451</td>\n",
" <td>0.000000</td>\n",
" <td>-0.395929</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>284565</th>\n",
" <td>1.238787</td>\n",
" <td>1.090853</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.570463</td>\n",
" <td>0.000000</td>\n",
" <td>0.448901</td>\n",
" <td>0.411426</td>\n",
" <td>0.448901</td>\n",
" <td>...</td>\n",
" <td>0.469886</td>\n",
" <td>-0.504305</td>\n",
" <td>0.618239</td>\n",
" <td>0.201634</td>\n",
" <td>0.000000</td>\n",
" <td>-0.260152</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.469886</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>284565</th>\n",
" <td>1.238787</td>\n",
" <td>1.090853</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.310340</td>\n",
" <td>0.000000</td>\n",
" <td>0.448901</td>\n",
" <td>0.411426</td>\n",
" <td>0.448901</td>\n",
" <td>...</td>\n",
" <td>0.469886</td>\n",
" <td>-0.524915</td>\n",
" <td>0.618239</td>\n",
" <td>0.201634</td>\n",
" <td>0.000000</td>\n",
" <td>-0.260152</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.469886</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>84631</th>\n",
" <td>-0.823677</td>\n",
" <td>-0.524915</td>\n",
" <td>0.000000</td>\n",
" <td>0.632268</td>\n",
" <td>0.000000</td>\n",
" <td>-1.074001</td>\n",
" <td>0.000000</td>\n",
" <td>-0.875672</td>\n",
" <td>-0.678072</td>\n",
" <td>0.378512</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>0.500802</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.545824</td>\n",
" <td>-0.545824</td>\n",
" <td>-0.535332</td>\n",
" </tr>\n",
" <tr>\n",
" <th>84631</th>\n",
" <td>-0.823677</td>\n",
" <td>-0.524915</td>\n",
" <td>0.000000</td>\n",
" <td>0.632268</td>\n",
" <td>0.000000</td>\n",
" <td>-1.074001</td>\n",
" <td>0.000000</td>\n",
" <td>-0.875672</td>\n",
" <td>-0.678072</td>\n",
" <td>0.378512</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>0.500802</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.545824</td>\n",
" <td>-0.545824</td>\n",
" <td>-0.535332</td>\n",
" </tr>\n",
" <tr>\n",
" <th>161176</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.358454</td>\n",
" <td>0.250962</td>\n",
" <td>0.339137</td>\n",
" <td>...</td>\n",
" <td>-0.875672</td>\n",
" <td>0.000000</td>\n",
" <td>-0.483985</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.610433</td>\n",
" <td>0.000000</td>\n",
" <td>0.411426</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>161176</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.358454</td>\n",
" <td>0.250962</td>\n",
" <td>0.339137</td>\n",
" <td>...</td>\n",
" <td>-0.875672</td>\n",
" <td>0.000000</td>\n",
" <td>-0.483985</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.610433</td>\n",
" <td>0.000000</td>\n",
" <td>0.411426</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>341019</th>\n",
" <td>0.959770</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.304006</td>\n",
" <td>0.000000</td>\n",
" <td>-0.349235</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-0.666576</td>\n",
" <td>0.000000</td>\n",
" <td>-0.304006</td>\n",
" <td>-1.000000</td>\n",
" <td>-0.610433</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.444184</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>341019</th>\n",
" <td>0.959770</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.304006</td>\n",
" <td>0.000000</td>\n",
" <td>-0.349235</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-0.312939</td>\n",
" <td>0.000000</td>\n",
" <td>-0.304006</td>\n",
" <td>-1.000000</td>\n",
" <td>-0.610433</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.444184</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83869</th>\n",
" <td>-3.000000</td>\n",
" <td>-3.184425</td>\n",
" <td>-3.321928</td>\n",
" <td>-3.000000</td>\n",
" <td>-2.599462</td>\n",
" <td>-3.556393</td>\n",
" <td>-2.785875</td>\n",
" <td>-0.875672</td>\n",
" <td>-2.736966</td>\n",
" <td>-2.514573</td>\n",
" <td>...</td>\n",
" <td>-2.556393</td>\n",
" <td>-3.943416</td>\n",
" <td>-2.785875</td>\n",
" <td>-2.152003</td>\n",
" <td>-3.556393</td>\n",
" <td>-3.251539</td>\n",
" <td>-2.736966</td>\n",
" <td>-3.473931</td>\n",
" <td>-3.120294</td>\n",
" <td>-3.643856</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83869</th>\n",
" <td>-3.000000</td>\n",
" <td>-3.184425</td>\n",
" <td>-3.321928</td>\n",
" <td>-3.000000</td>\n",
" <td>-2.599462</td>\n",
" <td>-3.556393</td>\n",
" <td>-2.785875</td>\n",
" <td>-0.875672</td>\n",
" <td>-2.736966</td>\n",
" <td>-2.514573</td>\n",
" <td>...</td>\n",
" <td>-2.556393</td>\n",
" <td>-3.943416</td>\n",
" <td>-2.785875</td>\n",
" <td>-2.152003</td>\n",
" <td>-3.556393</td>\n",
" <td>-3.251539</td>\n",
" <td>-2.736966</td>\n",
" <td>-3.473931</td>\n",
" <td>-3.120294</td>\n",
" <td>-3.643856</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9502</th>\n",
" <td>0.000000</td>\n",
" <td>0.378512</td>\n",
" <td>0.000000</td>\n",
" <td>0.550901</td>\n",
" <td>0.000000</td>\n",
" <td>-0.434403</td>\n",
" <td>0.000000</td>\n",
" <td>-0.902389</td>\n",
" <td>1.220330</td>\n",
" <td>0.367371</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>0.580145</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.577767</td>\n",
" <td>-1.494109</td>\n",
" <td>-1.494109</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9502</th>\n",
" <td>0.000000</td>\n",
" <td>0.378512</td>\n",
" <td>0.000000</td>\n",
" <td>0.550901</td>\n",
" <td>0.000000</td>\n",
" <td>-0.434403</td>\n",
" <td>0.000000</td>\n",
" <td>-0.902389</td>\n",
" <td>1.220330</td>\n",
" <td>0.367371</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>0.580145</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.577767</td>\n",
" <td>-1.494109</td>\n",
" <td>-1.494109</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83871</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.500802</td>\n",
" <td>0.000000</td>\n",
" <td>-0.304006</td>\n",
" <td>0.000000</td>\n",
" <td>-0.349235</td>\n",
" <td>0.327687</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.463947</td>\n",
" <td>-0.358454</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.448901</td>\n",
" <td>-0.535332</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>100134869</th>\n",
" <td>0.000000</td>\n",
" <td>0.207893</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.232661</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.389567</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.463947</td>\n",
" <td>0.000000</td>\n",
" <td>-0.367732</td>\n",
" <td>0.790772</td>\n",
" <td>0.000000</td>\n",
" <td>-0.535332</td>\n",
" </tr>\n",
" <tr>\n",
" <th>100134869</th>\n",
" <td>0.000000</td>\n",
" <td>0.207893</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.232661</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.389567</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.463947</td>\n",
" <td>0.000000</td>\n",
" <td>-0.367732</td>\n",
" <td>0.790772</td>\n",
" <td>0.000000</td>\n",
" <td>-0.535332</td>\n",
" </tr>\n",
" <tr>\n",
" <th>84316</th>\n",
" <td>-0.251539</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.340075</td>\n",
" <td>0.000000</td>\n",
" <td>-0.349235</td>\n",
" <td>-0.588574</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-0.985645</td>\n",
" <td>-0.358454</td>\n",
" <td>-0.340075</td>\n",
" <td>-0.971431</td>\n",
" <td>-0.545824</td>\n",
" <td>0.310340</td>\n",
" <td>0.438293</td>\n",
" <td>-0.473931</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>84316</th>\n",
" <td>-0.810966</td>\n",
" <td>-0.689660</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.718088</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.731183</td>\n",
" <td>0.367371</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.434403</td>\n",
" <td>1.060047</td>\n",
" <td>0.490570</td>\n",
" <td>0.000000</td>\n",
" <td>-0.632629</td>\n",
" <td>-0.655172</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>200030</th>\n",
" <td>1.238787</td>\n",
" <td>1.090853</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.839960</td>\n",
" <td>0.000000</td>\n",
" <td>0.448901</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.438293</td>\n",
" <td>0.000000</td>\n",
" <td>0.618239</td>\n",
" <td>0.201634</td>\n",
" <td>0.000000</td>\n",
" <td>-0.260152</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.469886</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>200030</th>\n",
" <td>1.238787</td>\n",
" <td>1.090853</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.839960</td>\n",
" <td>0.000000</td>\n",
" <td>0.448901</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.438293</td>\n",
" <td>0.000000</td>\n",
" <td>0.618239</td>\n",
" <td>0.201634</td>\n",
" <td>0.000000</td>\n",
" <td>-0.260152</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.469886</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>642658</th>\n",
" <td>1.049631</td>\n",
" <td>1.358959</td>\n",
" <td>0.000000</td>\n",
" <td>0.599318</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.321928</td>\n",
" <td>0.700440</td>\n",
" <td>0.298658</td>\n",
" <td>...</td>\n",
" <td>0.632268</td>\n",
" <td>0.000000</td>\n",
" <td>0.769772</td>\n",
" <td>0.232661</td>\n",
" <td>1.121015</td>\n",
" <td>0.831877</td>\n",
" <td>-0.367732</td>\n",
" <td>0.000000</td>\n",
" <td>1.121015</td>\n",
" <td>0.459432</td>\n",
" </tr>\n",
" <tr>\n",
" <th>642658</th>\n",
" <td>1.049631</td>\n",
" <td>1.358959</td>\n",
" <td>0.000000</td>\n",
" <td>0.599318</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.321928</td>\n",
" <td>0.700440</td>\n",
" <td>0.298658</td>\n",
" <td>...</td>\n",
" <td>0.632268</td>\n",
" <td>0.000000</td>\n",
" <td>0.769772</td>\n",
" <td>0.232661</td>\n",
" <td>1.121015</td>\n",
" <td>0.831877</td>\n",
" <td>-0.367732</td>\n",
" <td>0.000000</td>\n",
" <td>1.121015</td>\n",
" <td>0.459432</td>\n",
" </tr>\n",
" <tr>\n",
" <th>100302179</th>\n",
" <td>0.778209</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.340075</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.201634</td>\n",
" <td>-0.251539</td>\n",
" <td>0.000000</td>\n",
" <td>-0.463947</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>100302179</th>\n",
" <td>0.778209</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.340075</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.201634</td>\n",
" <td>-0.251539</td>\n",
" <td>0.000000</td>\n",
" <td>-0.463947</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>401508</th>\n",
" <td>0.232661</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.250962</td>\n",
" <td>0.000000</td>\n",
" <td>0.207893</td>\n",
" <td>0.000000</td>\n",
" <td>-0.666576</td>\n",
" <td>1.629939</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-2.321928</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.260152</td>\n",
" <td>0.000000</td>\n",
" <td>-0.545824</td>\n",
" <td>-0.330973</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>401508</th>\n",
" <td>0.232661</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.250962</td>\n",
" <td>0.000000</td>\n",
" <td>0.207893</td>\n",
" <td>0.000000</td>\n",
" <td>-0.666576</td>\n",
" <td>1.629939</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-2.321928</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.260152</td>\n",
" <td>0.000000</td>\n",
" <td>-0.545824</td>\n",
" <td>-0.330973</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>119016</th>\n",
" <td>0.378512</td>\n",
" <td>0.570463</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.251539</td>\n",
" <td>0.000000</td>\n",
" <td>-0.286304</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-1.043943</td>\n",
" <td>-0.524915</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.377070</td>\n",
" <td>-0.643856</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>119016</th>\n",
" <td>0.448901</td>\n",
" <td>-0.286304</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.242977</td>\n",
" <td>0.000000</td>\n",
" <td>-0.286304</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>-0.473931</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.367732</td>\n",
" <td>-0.577767</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>84458</th>\n",
" <td>0.389567</td>\n",
" <td>-0.666576</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.340075</td>\n",
" <td>0.000000</td>\n",
" <td>-0.367732</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>-0.473931</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.386468</td>\n",
" <td>-0.610433</td>\n",
" <td>-0.577767</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>84458</th>\n",
" <td>0.389567</td>\n",
" <td>-0.666576</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.340075</td>\n",
" <td>0.000000</td>\n",
" <td>-0.367732</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>-0.473931</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.386468</td>\n",
" <td>-0.610433</td>\n",
" <td>-0.577767</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>574445</th>\n",
" <td>0.490570</td>\n",
" <td>0.000000</td>\n",
" <td>-0.286304</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-1.074001</td>\n",
" <td>0.000000</td>\n",
" <td>0.238787</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>-0.823677</td>\n",
" <td>-0.957356</td>\n",
" <td>-0.321928</td>\n",
" <td>0.000000</td>\n",
" <td>-0.985645</td>\n",
" <td>-0.454032</td>\n",
" <td>0.000000</td>\n",
" <td>-0.599462</td>\n",
" <td>-0.577767</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>574445</th>\n",
" <td>0.490570</td>\n",
" <td>0.000000</td>\n",
" <td>-0.286304</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-1.074001</td>\n",
" <td>0.000000</td>\n",
" <td>0.238787</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>-0.823677</td>\n",
" <td>-0.957356</td>\n",
" <td>-0.321928</td>\n",
" <td>0.000000</td>\n",
" <td>-0.985645</td>\n",
" <td>-0.454032</td>\n",
" <td>0.000000</td>\n",
" <td>-0.599462</td>\n",
" <td>-0.577767</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26095</th>\n",
" <td>0.531069</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.251539</td>\n",
" <td>0.000000</td>\n",
" <td>-0.286304</td>\n",
" <td>0.000000</td>\n",
" <td>0.790772</td>\n",
" <td>...</td>\n",
" <td>0.269033</td>\n",
" <td>-1.043943</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.588574</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26095</th>\n",
" <td>0.531069</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.251539</td>\n",
" <td>0.000000</td>\n",
" <td>-0.286304</td>\n",
" <td>0.000000</td>\n",
" <td>0.790772</td>\n",
" <td>...</td>\n",
" <td>0.269033</td>\n",
" <td>-1.043943</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.588574</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>84968</th>\n",
" <td>-0.736966</td>\n",
" <td>-0.321928</td>\n",
" <td>0.000000</td>\n",
" <td>0.570463</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.875672</td>\n",
" <td>-0.678072</td>\n",
" <td>0.378512</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.250962</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.545824</td>\n",
" <td>-0.545824</td>\n",
" <td>-0.535332</td>\n",
" </tr>\n",
" <tr>\n",
" <th>84968</th>\n",
" <td>-0.736966</td>\n",
" <td>-0.321928</td>\n",
" <td>0.000000</td>\n",
" <td>0.570463</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.875672</td>\n",
" <td>-0.678072</td>\n",
" <td>0.378512</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.250962</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.545824</td>\n",
" <td>-0.545824</td>\n",
" <td>-0.535332</td>\n",
" </tr>\n",
" <tr>\n",
" <th>80759</th>\n",
" <td>-0.556393</td>\n",
" <td>0.599318</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.250962</td>\n",
" <td>0.000000</td>\n",
" <td>-0.386468</td>\n",
" <td>-0.567041</td>\n",
" <td>-0.556393</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.621488</td>\n",
" <td>-0.454032</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.386468</td>\n",
" <td>-0.621488</td>\n",
" <td>0.761285</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>80759</th>\n",
" <td>-0.556393</td>\n",
" <td>0.599318</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.250962</td>\n",
" <td>0.000000</td>\n",
" <td>-0.386468</td>\n",
" <td>-0.567041</td>\n",
" <td>-0.556393</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.621488</td>\n",
" <td>-0.454032</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.386468</td>\n",
" <td>-0.621488</td>\n",
" <td>0.761285</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3192</th>\n",
" <td>1.350497</td>\n",
" <td>0.632268</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.220330</td>\n",
" <td>0.000000</td>\n",
" <td>0.448901</td>\n",
" <td>-0.268817</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.480265</td>\n",
" <td>0.000000</td>\n",
" <td>0.207893</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.330973</td>\n",
" <td>0.250962</td>\n",
" <td>0.000000</td>\n",
" <td>1.021480</td>\n",
" <td>0.599318</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3192</th>\n",
" <td>1.350497</td>\n",
" <td>0.632268</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.220330</td>\n",
" <td>0.000000</td>\n",
" <td>0.448901</td>\n",
" <td>-0.268817</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.480265</td>\n",
" <td>0.000000</td>\n",
" <td>0.207893</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.330973</td>\n",
" <td>0.250962</td>\n",
" <td>0.000000</td>\n",
" <td>1.021480</td>\n",
" <td>0.599318</td>\n",
" </tr>\n",
" <tr>\n",
" <th>387707</th>\n",
" <td>0.389567</td>\n",
" <td>-0.666576</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.340075</td>\n",
" <td>0.000000</td>\n",
" <td>-0.367732</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>-0.473931</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.386468</td>\n",
" <td>-0.610433</td>\n",
" <td>-0.577767</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>387707</th>\n",
" <td>0.389567</td>\n",
" <td>-0.666576</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.340075</td>\n",
" <td>0.000000</td>\n",
" <td>-0.367732</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>-1.014500</td>\n",
" <td>-0.473931</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.386468</td>\n",
" <td>-0.610433</td>\n",
" <td>-0.577767</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>79741</th>\n",
" <td>0.000000</td>\n",
" <td>0.589763</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.242977</td>\n",
" <td>0.000000</td>\n",
" <td>0.207893</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>-0.798366</td>\n",
" <td>-0.985645</td>\n",
" <td>-0.358454</td>\n",
" <td>0.000000</td>\n",
" <td>0.700440</td>\n",
" <td>0.000000</td>\n",
" <td>-0.367732</td>\n",
" <td>-0.588574</td>\n",
" <td>0.459432</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>79741</th>\n",
" <td>0.000000</td>\n",
" <td>0.589763</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.242977</td>\n",
" <td>0.000000</td>\n",
" <td>0.207893</td>\n",
" <td>0.000000</td>\n",
" <td>0.220330</td>\n",
" <td>...</td>\n",
" <td>-0.798366</td>\n",
" <td>-0.985645</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.700440</td>\n",
" <td>0.000000</td>\n",
" <td>-0.367732</td>\n",
" <td>-0.588574</td>\n",
" <td>0.459432</td>\n",
" <td>0.269033</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>77 rows × 375 columns</p>\n",
"</div>"
],
"text/plain": [
" X-1004 X-1008 X-1027 X-1095 X-1119 X-1156 \\\n",
"gene_id \n",
"143872 0.000000 0.560715 0.000000 0.000000 0.000000 -0.330973 \n",
"143872 0.000000 0.560715 0.000000 0.000000 0.000000 -0.330973 \n",
"286464 0.000000 0.378512 0.000000 0.550901 0.000000 -0.524915 \n",
"286464 0.000000 0.378512 0.000000 0.550901 0.000000 -0.524915 \n",
"286464 0.000000 0.378512 0.000000 0.550901 0.000000 -0.524915 \n",
"51463 1.238787 1.090853 0.000000 0.000000 0.000000 1.839960 \n",
"51463 1.238787 1.090853 0.000000 0.000000 0.000000 1.839960 \n",
"642826 0.608809 0.859970 0.531069 0.000000 0.000000 0.871844 \n",
"642826 0.608809 0.859970 0.531069 0.000000 0.000000 0.871844 \n",
"653067 0.000000 0.378512 0.000000 0.550901 0.000000 -0.434403 \n",
"653067 0.000000 0.378512 0.000000 0.550901 0.000000 -0.434403 \n",
"653067 0.000000 0.378512 0.000000 0.550901 0.000000 -0.434403 \n",
"653067 0.000000 0.378512 0.000000 0.550901 0.000000 -0.434403 \n",
"399761 0.531069 0.718088 0.000000 0.000000 0.000000 -0.251539 \n",
"399761 0.531069 0.718088 0.000000 0.000000 0.000000 -0.251539 \n",
"647060 0.000000 0.000000 0.000000 0.000000 0.000000 -0.312939 \n",
"647060 0.000000 0.000000 0.000000 0.000000 0.000000 -0.312939 \n",
"284565 1.238787 1.090853 0.000000 0.000000 0.000000 1.570463 \n",
"284565 1.238787 1.090853 0.000000 0.000000 0.000000 1.310340 \n",
"84631 -0.823677 -0.524915 0.000000 0.632268 0.000000 -1.074001 \n",
"84631 -0.823677 -0.524915 0.000000 0.632268 0.000000 -1.074001 \n",
"161176 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"161176 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"341019 0.959770 0.000000 0.000000 0.000000 0.000000 -0.304006 \n",
"341019 0.959770 0.000000 0.000000 0.000000 0.000000 -0.304006 \n",
"83869 -3.000000 -3.184425 -3.321928 -3.000000 -2.599462 -3.556393 \n",
"83869 -3.000000 -3.184425 -3.321928 -3.000000 -2.599462 -3.556393 \n",
"9502 0.000000 0.378512 0.000000 0.550901 0.000000 -0.434403 \n",
"9502 0.000000 0.378512 0.000000 0.550901 0.000000 -0.434403 \n",
"83871 0.000000 0.000000 0.000000 0.500802 0.000000 -0.304006 \n",
"... ... ... ... ... ... ... \n",
"100134869 0.000000 0.207893 0.000000 0.000000 0.000000 0.000000 \n",
"100134869 0.000000 0.207893 0.000000 0.000000 0.000000 0.000000 \n",
"84316 -0.251539 0.000000 0.000000 0.000000 0.000000 -0.340075 \n",
"84316 -0.810966 -0.689660 0.000000 0.000000 0.000000 0.718088 \n",
"200030 1.238787 1.090853 0.000000 0.000000 0.000000 1.839960 \n",
"200030 1.238787 1.090853 0.000000 0.000000 0.000000 1.839960 \n",
"642658 1.049631 1.358959 0.000000 0.599318 0.000000 0.000000 \n",
"642658 1.049631 1.358959 0.000000 0.599318 0.000000 0.000000 \n",
"100302179 0.778209 0.000000 0.000000 0.000000 0.000000 -0.340075 \n",
"100302179 0.778209 0.000000 0.000000 0.000000 0.000000 -0.340075 \n",
"401508 0.232661 0.000000 0.000000 0.000000 0.250962 0.000000 \n",
"401508 0.232661 0.000000 0.000000 0.000000 0.250962 0.000000 \n",
"119016 0.378512 0.570463 0.000000 0.000000 0.000000 -0.251539 \n",
"119016 0.448901 -0.286304 0.000000 0.000000 0.000000 -0.242977 \n",
"84458 0.389567 -0.666576 0.000000 0.000000 0.000000 -0.340075 \n",
"84458 0.389567 -0.666576 0.000000 0.000000 0.000000 -0.340075 \n",
"574445 0.490570 0.000000 -0.286304 0.000000 0.000000 -1.074001 \n",
"574445 0.490570 0.000000 -0.286304 0.000000 0.000000 -1.074001 \n",
"26095 0.531069 0.000000 0.000000 0.000000 0.000000 -0.251539 \n",
"26095 0.531069 0.000000 0.000000 0.000000 0.000000 -0.251539 \n",
"84968 -0.736966 -0.321928 0.000000 0.570463 0.000000 0.000000 \n",
"84968 -0.736966 -0.321928 0.000000 0.570463 0.000000 0.000000 \n",
"80759 -0.556393 0.599318 0.000000 0.000000 0.000000 0.250962 \n",
"80759 -0.556393 0.599318 0.000000 0.000000 0.000000 0.250962 \n",
"3192 1.350497 0.632268 0.000000 0.000000 0.000000 0.220330 \n",
"3192 1.350497 0.632268 0.000000 0.000000 0.000000 0.220330 \n",
"387707 0.389567 -0.666576 0.000000 0.000000 0.000000 -0.340075 \n",
"387707 0.389567 -0.666576 0.000000 0.000000 0.000000 -0.340075 \n",
"79741 0.000000 0.589763 0.000000 0.000000 0.000000 -0.242977 \n",
"79741 0.000000 0.589763 0.000000 0.000000 0.000000 -0.242977 \n",
"\n",
" X-1167 X-1169 X-1172 X-1173 ... X-5694 \\\n",
"gene_id ... \n",
"143872 0.000000 -0.367732 0.350497 0.000000 ... 0.000000 \n",
"143872 0.000000 -0.367732 0.350497 0.000000 ... 0.000000 \n",
"286464 0.000000 -0.902389 -1.321928 0.367371 ... 0.000000 \n",
"286464 0.000000 -0.902389 -1.321928 0.367371 ... 0.000000 \n",
"286464 0.000000 -0.902389 -1.321928 0.367371 ... 0.000000 \n",
"51463 0.000000 0.448901 0.000000 0.000000 ... 0.469886 \n",
"51463 0.000000 0.448901 0.000000 0.000000 ... 0.438293 \n",
"642826 0.000000 -0.286304 0.000000 0.000000 ... 0.000000 \n",
"642826 0.000000 -0.286304 0.000000 0.000000 ... 0.000000 \n",
"653067 0.000000 -0.902389 1.220330 0.367371 ... 0.000000 \n",
"653067 0.000000 -0.902389 1.220330 0.367371 ... 0.000000 \n",
"653067 0.000000 -0.902389 1.220330 0.367371 ... 0.000000 \n",
"653067 0.000000 -0.902389 1.220330 0.367371 ... 0.000000 \n",
"399761 0.000000 -0.286304 0.000000 0.790772 ... 0.367371 \n",
"399761 0.000000 -0.286304 0.000000 0.790772 ... 0.367371 \n",
"647060 0.000000 -0.321928 0.000000 0.618239 ... 0.000000 \n",
"647060 0.000000 -0.321928 0.000000 0.618239 ... 0.000000 \n",
"284565 0.000000 0.448901 0.411426 0.448901 ... 0.469886 \n",
"284565 0.000000 0.448901 0.411426 0.448901 ... 0.469886 \n",
"84631 0.000000 -0.875672 -0.678072 0.378512 ... 0.000000 \n",
"84631 0.000000 -0.875672 -0.678072 0.378512 ... 0.000000 \n",
"161176 0.000000 -0.358454 0.250962 0.339137 ... -0.875672 \n",
"161176 0.000000 -0.358454 0.250962 0.339137 ... -0.875672 \n",
"341019 0.000000 -0.349235 0.000000 0.000000 ... 0.000000 \n",
"341019 0.000000 -0.349235 0.000000 0.000000 ... 0.000000 \n",
"83869 -2.785875 -0.875672 -2.736966 -2.514573 ... -2.556393 \n",
"83869 -2.785875 -0.875672 -2.736966 -2.514573 ... -2.556393 \n",
"9502 0.000000 -0.902389 1.220330 0.367371 ... 0.000000 \n",
"9502 0.000000 -0.902389 1.220330 0.367371 ... 0.000000 \n",
"83871 0.000000 -0.349235 0.327687 0.000000 ... 0.000000 \n",
"... ... ... ... ... ... ... \n",
"100134869 0.000000 0.000000 0.232661 0.000000 ... 0.000000 \n",
"100134869 0.000000 0.000000 0.232661 0.000000 ... 0.000000 \n",
"84316 0.000000 -0.349235 -0.588574 0.000000 ... 0.000000 \n",
"84316 0.000000 0.000000 0.731183 0.367371 ... 0.000000 \n",
"200030 0.000000 0.448901 0.000000 0.000000 ... 0.438293 \n",
"200030 0.000000 0.448901 0.000000 0.000000 ... 0.438293 \n",
"642658 0.000000 0.321928 0.700440 0.298658 ... 0.632268 \n",
"642658 0.000000 0.321928 0.700440 0.298658 ... 0.632268 \n",
"100302179 0.000000 0.000000 0.000000 0.000000 ... 0.000000 \n",
"100302179 0.000000 0.000000 0.000000 0.000000 ... 0.000000 \n",
"401508 0.207893 0.000000 -0.666576 1.629939 ... 0.000000 \n",
"401508 0.207893 0.000000 -0.666576 1.629939 ... 0.000000 \n",
"119016 0.000000 -0.286304 0.000000 0.000000 ... 0.000000 \n",
"119016 0.000000 -0.286304 0.000000 0.000000 ... 0.000000 \n",
"84458 0.000000 -0.367732 0.000000 0.000000 ... 0.000000 \n",
"84458 0.000000 -0.367732 0.000000 0.000000 ... 0.000000 \n",
"574445 0.000000 0.238787 0.000000 0.000000 ... -0.823677 \n",
"574445 0.000000 0.238787 0.000000 0.000000 ... -0.823677 \n",
"26095 0.000000 -0.286304 0.000000 0.790772 ... 0.269033 \n",
"26095 0.000000 -0.286304 0.000000 0.790772 ... 0.269033 \n",
"84968 0.000000 -0.875672 -0.678072 0.378512 ... 0.000000 \n",
"84968 0.000000 -0.875672 -0.678072 0.378512 ... 0.000000 \n",
"80759 0.000000 -0.386468 -0.567041 -0.556393 ... 0.000000 \n",
"80759 0.000000 -0.386468 -0.567041 -0.556393 ... 0.000000 \n",
"3192 0.000000 0.448901 -0.268817 0.000000 ... 0.480265 \n",
"3192 0.000000 0.448901 -0.268817 0.000000 ... 0.480265 \n",
"387707 0.000000 -0.367732 0.000000 0.000000 ... 0.000000 \n",
"387707 0.000000 -0.367732 0.000000 0.000000 ... 0.000000 \n",
"79741 0.000000 0.207893 0.000000 0.000000 ... -0.798366 \n",
"79741 0.000000 0.207893 0.000000 0.220330 ... -0.798366 \n",
"\n",
" X-5696 X-5713 X-5717 X-5727 X-5739 X-5808 \\\n",
"gene_id \n",
"143872 -0.349235 0.000000 -0.321928 -1.014500 -0.588574 0.000000 \n",
"143872 -0.349235 0.000000 -0.321928 -1.014500 -0.588574 0.000000 \n",
"286464 -1.014500 0.718088 0.000000 -1.014500 0.000000 0.000000 \n",
"286464 -1.014500 0.718088 0.000000 -1.014500 0.000000 0.000000 \n",
"286464 -1.014500 0.718088 0.000000 -1.014500 0.000000 0.000000 \n",
"51463 0.000000 0.618239 0.201634 0.000000 -0.260152 -0.349235 \n",
"51463 0.000000 0.618239 0.201634 0.000000 -0.260152 0.000000 \n",
"642826 -1.014500 -0.473931 0.000000 0.618239 0.000000 -0.386468 \n",
"642826 -1.014500 -0.473931 0.000000 0.618239 0.000000 -0.386468 \n",
"653067 -1.014500 0.580145 0.000000 -1.014500 0.000000 0.000000 \n",
"653067 -1.014500 0.580145 0.000000 -1.014500 0.000000 0.000000 \n",
"653067 -1.014500 0.580145 0.000000 -1.014500 0.000000 0.000000 \n",
"653067 -1.014500 0.580145 0.000000 -1.014500 0.000000 0.000000 \n",
"399761 -1.535332 0.000000 0.000000 0.000000 0.000000 -0.599462 \n",
"399761 -1.535332 0.000000 0.000000 0.000000 0.000000 -0.599462 \n",
"647060 -0.621488 0.000000 -0.377070 -0.749038 -0.405451 0.000000 \n",
"647060 -0.621488 0.000000 -0.377070 -0.749038 -0.405451 0.000000 \n",
"284565 -0.504305 0.618239 0.201634 0.000000 -0.260152 0.000000 \n",
"284565 -0.524915 0.618239 0.201634 0.000000 -0.260152 0.000000 \n",
"84631 -1.014500 0.500802 0.000000 0.000000 0.000000 0.000000 \n",
"84631 -1.014500 0.500802 0.000000 0.000000 0.000000 0.000000 \n",
"161176 0.000000 -0.483985 0.000000 0.000000 -0.610433 0.000000 \n",
"161176 0.000000 -0.483985 0.000000 0.000000 -0.610433 0.000000 \n",
"341019 -0.666576 0.000000 -0.304006 -1.000000 -0.610433 0.000000 \n",
"341019 -0.312939 0.000000 -0.304006 -1.000000 -0.610433 0.000000 \n",
"83869 -3.943416 -2.785875 -2.152003 -3.556393 -3.251539 -2.736966 \n",
"83869 -3.943416 -2.785875 -2.152003 -3.556393 -3.251539 -2.736966 \n",
"9502 -1.014500 0.580145 0.000000 -1.014500 0.000000 0.000000 \n",
"9502 -1.014500 0.580145 0.000000 -1.014500 0.000000 0.000000 \n",
"83871 0.000000 -0.463947 -0.358454 0.000000 0.000000 0.000000 \n",
"... ... ... ... ... ... ... \n",
"100134869 0.389567 0.000000 0.000000 -0.463947 0.000000 -0.367732 \n",
"100134869 0.389567 0.000000 0.000000 -0.463947 0.000000 -0.367732 \n",
"84316 -0.985645 -0.358454 -0.340075 -0.971431 -0.545824 0.310340 \n",
"84316 0.000000 0.000000 -0.434403 1.060047 0.490570 0.000000 \n",
"200030 0.000000 0.618239 0.201634 0.000000 -0.260152 0.000000 \n",
"200030 0.000000 0.618239 0.201634 0.000000 -0.260152 0.000000 \n",
"642658 0.000000 0.769772 0.232661 1.121015 0.831877 -0.367732 \n",
"642658 0.000000 0.769772 0.232661 1.121015 0.831877 -0.367732 \n",
"100302179 0.000000 0.201634 -0.251539 0.000000 -0.463947 0.000000 \n",
"100302179 0.000000 0.201634 -0.251539 0.000000 -0.463947 0.000000 \n",
"401508 -2.321928 0.000000 0.000000 -0.260152 0.000000 -0.545824 \n",
"401508 -2.321928 0.000000 0.000000 -0.260152 0.000000 -0.545824 \n",
"119016 -1.043943 -0.524915 0.000000 0.000000 0.000000 -0.377070 \n",
"119016 -1.014500 -0.473931 0.000000 0.000000 0.000000 -0.367732 \n",
"84458 -1.014500 -0.473931 0.000000 0.000000 0.000000 -0.386468 \n",
"84458 -1.014500 -0.473931 0.000000 0.000000 0.000000 -0.386468 \n",
"574445 -0.957356 -0.321928 0.000000 -0.985645 -0.454032 0.000000 \n",
"574445 -0.957356 -0.321928 0.000000 -0.985645 -0.454032 0.000000 \n",
"26095 -1.043943 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"26095 -1.043943 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"84968 -1.000000 0.250962 0.000000 0.000000 0.000000 0.000000 \n",
"84968 -1.000000 0.250962 0.000000 0.000000 0.000000 0.000000 \n",
"80759 0.000000 -0.621488 -0.454032 0.000000 0.000000 -0.386468 \n",
"80759 0.000000 -0.621488 -0.454032 0.000000 0.000000 -0.386468 \n",
"3192 0.000000 0.207893 0.000000 0.000000 -0.330973 0.250962 \n",
"3192 0.000000 0.207893 0.000000 0.000000 -0.330973 0.250962 \n",
"387707 -1.014500 -0.473931 0.000000 0.000000 0.000000 -0.386468 \n",
"387707 -1.014500 -0.473931 0.000000 0.000000 0.000000 -0.386468 \n",
"79741 -0.985645 -0.358454 0.000000 0.700440 0.000000 -0.367732 \n",
"79741 -0.985645 0.000000 0.000000 0.700440 0.000000 -0.367732 \n",
"\n",
" X-5959 X-5975 X-6047 \n",
"gene_id \n",
"143872 0.000000 0.000000 -0.514573 \n",
"143872 0.000000 0.000000 -0.514573 \n",
"286464 0.000000 -1.494109 -1.494109 \n",
"286464 0.000000 -1.494109 -1.494109 \n",
"286464 0.000000 -1.494109 -1.494109 \n",
"51463 0.000000 0.469886 0.000000 \n",
"51463 0.000000 0.469886 0.000000 \n",
"642826 -1.494109 -0.312939 0.000000 \n",
"642826 -1.494109 -0.312939 0.000000 \n",
"653067 -0.577767 -1.494109 -1.494109 \n",
"653067 -0.577767 -1.494109 -1.494109 \n",
"653067 -0.577767 -1.494109 -1.494109 \n",
"653067 -0.577767 -1.494109 -1.494109 \n",
"399761 -0.588574 0.000000 0.000000 \n",
"399761 -0.588574 0.000000 0.000000 \n",
"647060 -0.395929 0.000000 0.000000 \n",
"647060 -0.395929 0.000000 0.000000 \n",
"284565 0.000000 0.469886 0.000000 \n",
"284565 0.000000 0.469886 0.000000 \n",
"84631 -0.545824 -0.545824 -0.535332 \n",
"84631 -0.545824 -0.545824 -0.535332 \n",
"161176 0.411426 0.000000 0.000000 \n",
"161176 0.411426 0.000000 0.000000 \n",
"341019 0.000000 -0.444184 0.000000 \n",
"341019 0.000000 -0.444184 0.000000 \n",
"83869 -3.473931 -3.120294 -3.643856 \n",
"83869 -3.473931 -3.120294 -3.643856 \n",
"9502 -0.577767 -1.494109 -1.494109 \n",
"9502 -0.577767 -1.494109 -1.494109 \n",
"83871 0.448901 -0.535332 0.000000 \n",
"... ... ... ... \n",
"100134869 0.790772 0.000000 -0.535332 \n",
"100134869 0.790772 0.000000 -0.535332 \n",
"84316 0.438293 -0.473931 0.000000 \n",
"84316 -0.632629 -0.655172 0.000000 \n",
"200030 0.000000 0.469886 0.000000 \n",
"200030 0.000000 0.469886 0.000000 \n",
"642658 0.000000 1.121015 0.459432 \n",
"642658 0.000000 1.121015 0.459432 \n",
"100302179 0.000000 0.000000 0.000000 \n",
"100302179 0.000000 0.000000 0.000000 \n",
"401508 -0.330973 0.000000 0.000000 \n",
"401508 -0.330973 0.000000 0.000000 \n",
"119016 -0.643856 0.000000 0.000000 \n",
"119016 -0.577767 0.000000 0.000000 \n",
"84458 -0.610433 -0.577767 0.000000 \n",
"84458 -0.610433 -0.577767 0.000000 \n",
"574445 -0.599462 -0.577767 0.000000 \n",
"574445 -0.599462 -0.577767 0.000000 \n",
"26095 -0.588574 0.000000 0.000000 \n",
"26095 -0.588574 0.000000 0.000000 \n",
"84968 -0.545824 -0.545824 -0.535332 \n",
"84968 -0.545824 -0.545824 -0.535332 \n",
"80759 -0.621488 0.761285 0.000000 \n",
"80759 -0.621488 0.761285 0.000000 \n",
"3192 0.000000 1.021480 0.599318 \n",
"3192 0.000000 1.021480 0.599318 \n",
"387707 -0.610433 -0.577767 0.000000 \n",
"387707 -0.610433 -0.577767 0.000000 \n",
"79741 -0.588574 0.459432 0.000000 \n",
"79741 -0.588574 0.459432 0.269033 \n",
"\n",
"[77 rows x 375 columns]"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dups = list(set(pdx[pdx.index.duplicated(keep=False)].index.values))\n",
"pdx.loc[dups,:]"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"37 duplicated IDs in 77 rows found.\n",
"duplicate rows removed due to low correlation of duplicated profiles 4\n",
"Merged 73 duplicated rows into 35 rows\n"
]
}
],
"source": [
"# most of these dupliates correspond to genes merged in the current assembly, e.g. gene - gene-AS\n",
"pdx = handle_dups(pdx,corr_thr = 0.75)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"pdx = pdx.T.sort_index().T\n",
"pdx.to_csv(preprocessed_dir+\"/\"+\"PDX\"+\".Segment_Mean.CNA.tsv\",\n",
" sep = \"\\t\",header=True,index=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Evaluation of the results\n",
"1). How many common genes between four datasets?\n",
"\n",
"2). Do CNA profiles of the same cell line from GDSC and CCLE correlate?\n",
"\n",
"3). Do CNA profiles of the same cancer type from TCGA and PDX look similar?\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"ename": "IOError",
"evalue": "File /home/olya/SFU/Hossein/v1/preprocessed/CNA/BRCA.Segment_Mean.CNA.tsv does not exist",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mIOError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-39-1f476096b0ec>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m### 1). How many common genes between four datasets?\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;31m# we take BRCA from TCGA because\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mtcga\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpreprocessed_dir\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\"BRCA\"\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\".Segment_Mean.CNA.tsv\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0msep\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"\\t\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex_col\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;31m#print(tcga.head(3))\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mgdsc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpreprocessed_dir\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\"GDSC\"\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\".Segment_Mean.CNA.tsv\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0msep\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"\\t\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex_col\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/home/olya/miniconda2/lib/python2.7/site-packages/pandas/io/parsers.pyc\u001b[0m in \u001b[0;36mparser_f\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision)\u001b[0m\n\u001b[1;32m 676\u001b[0m skip_blank_lines=skip_blank_lines)\n\u001b[1;32m 677\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 678\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 679\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 680\u001b[0m \u001b[0mparser_f\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/home/olya/miniconda2/lib/python2.7/site-packages/pandas/io/parsers.pyc\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 438\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 439\u001b[0m \u001b[0;31m# Create the parser.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 440\u001b[0;31m \u001b[0mparser\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 441\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 442\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/home/olya/miniconda2/lib/python2.7/site-packages/pandas/io/parsers.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 785\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'has_index_names'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'has_index_names'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 786\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 787\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 788\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 789\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/home/olya/miniconda2/lib/python2.7/site-packages/pandas/io/parsers.pyc\u001b[0m in \u001b[0;36m_make_engine\u001b[0;34m(self, engine)\u001b[0m\n\u001b[1;32m 1012\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'c'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1013\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'c'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1014\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mCParserWrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1015\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1016\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'python'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/home/olya/miniconda2/lib/python2.7/site-packages/pandas/io/parsers.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, src, **kwds)\u001b[0m\n\u001b[1;32m 1706\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'usecols'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0musecols\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1707\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1708\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparsers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTextReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1709\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1710\u001b[0m \u001b[0mpassed_names\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnames\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader.__cinit__\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._setup_parser_source\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mIOError\u001b[0m: File /home/olya/SFU/Hossein/v1/preprocessed/CNA/BRCA.Segment_Mean.CNA.tsv does not exist"
]
}
],
"source": [
"### 1). How many common genes between four datasets?\n",
"# we take BRCA from TCGA because \n",
"tcga = pd.read_csv(preprocessed_dir+\"BRCA\"+\".Segment_Mean.CNA.tsv\",sep = \"\\t\", index_col=0)\n",
"#print(tcga.head(3))\n",
"gdsc = pd.read_csv(preprocessed_dir+\"GDSC\"+\".Segment_Mean.CNA.tsv\",sep = \"\\t\", index_col=0)\n",
"#print(tcga.head(3))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"### distribution of logR values in GDSC and CCLE \n",
"cn_values_gdsc = []\n",
"for row in df.iterrows():\n",
" cn_values_gdsc += list(row[1].values)\n",
"cn_values_ccle = []\n",
"for row in cna_table.iterrows():\n",
" cn_values_ccle+= list(row[1].values)\n",
"\n",
"cn_values_gdsc = sorted (cn_values_gdsc)\n",
"cn_values_ccle = sorted (cn_values_ccle)\n",
"plt.figure(figsize=(20,5))\n",
"plt.subplot(121)\n",
"tmp = plt.hist(cn_values_gdsc,bins=100,density = True,range=(-5,4))\n",
"plt.title(\"GDSC\")\n",
"plt.subplot(122)\n",
"tmp = plt.hist(cn_values_ccle,bins=100,density = True, range=(-5,4))\n",
"plt.title(\"CCLE\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.15"
}
},
"nbformat": 4,
"nbformat_minor": 2
}