[d90d15]: / preprocessing_scr / mutations.ipynb

Download this file

15486 lines (15485 with data), 509.2 kB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "from __future__ import print_function\n",
    "import pandas as pd\n",
    "import os,sys\n",
    "import pybedtools as pbt\n",
    "from StringIO import StringIO\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import numpy as np\n",
    "import re\n",
    "from mapper import expand, parse_mapping_table, apply_mappers\n",
    "\n",
    "root_dir = \"/home/olya/SFU/Hossein/v2/\"\n",
    "preprocessed_dir = root_dir+\"preprocessed/mutations/\"\n",
    "w = 0.05\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Converting mutations to gene-level score (-1;1)\n",
    "* differentiate TSG and OG\n",
    "    \n",
    "    We use two lists of TSG and OG and their driver mutations obtained from OncoKB:\n",
    "    \n",
    "\n",
    "* differentiate mutations by their potential effect\n",
    "    \n",
    "    We take into consideration a protential effect of all mutations in every altered gene proposed based on \n",
    "        - type of protein change\n",
    "        - proximity to known driver mutations\n",
    "\n",
    "#### TSG potential loss-of-function \n",
    "    *-1 if any truncating variant found \n",
    "    *-1/(x+1) if any variant found at the distance x of database LoF\n",
    "#### OG potential gain-of-function \n",
    "    * +1 if any variant match the position of database GoF variant \n",
    "    * +1/(x+1) if any variant found at the distance x of database GoF\n",
    "    * However, some genes annotated as TSG and OG in COSMIC. If such a gene has a truncating variant, it gets -1. If such gene has a truncating variant, we exclude it from the consideration. \n",
    "    * Many OG are activated via overexpression or apmplification, but not via somatic mutations. We exclude such OG from consideration, because it is impossible to propose the effect of a mutation in such OGs. Every OG has at least one mutation\n",
    "    \n",
    "### Sources of mutations\n",
    "We need:\n",
    "    * a list of genes labelled as TSG or OG \n",
    "    * a list of driver mutations or hotspots with protein coordinates \n",
    "    \n",
    "1). COSMIC CGC - 1). list of driver genes, classified as TSG, OG or fusions; Tiers 1 and 2  2). list of mutations in these genes in every sample included in COSMIC\n",
    "\n",
    "2). OncoKB - curated list of 1) driver genes and 2) mutations with effects.\n",
    "\n",
    "3). Bailey et al. 2018 Cell \"Comprehensive Characterization\n",
    "of Cancer Driver Genes and Mutation\" - they report only driver genes with any mutations or hotspots. \n",
    "\n",
    "4). Iorio et al. 2016 - \n",
    "\n",
    "5). cBioPortal - 1D https://www.cancerhotspots.org and 3D hotspots https://www.3dhotspots.org.\n",
    "\n",
    "\n",
    "# Workflow\n",
    "\n",
    "1). Mutations - union of 1D and 3D hotspots \n",
    "2). List of genes:\n",
    "    * TSG: union of Bailey et al. 2018 and OncoKB\n",
    "    * OG: genes from union of Bailey et al., 2018 and OncoKB OG lists which had any mutation hotspot\n",
    "3). Mutation classification by effect on protein:\n",
    "    * potential LoF: frameshift,nonsense,ess_splice,stop_lost\n",
    "    * potential GoF: missense, inframe indels\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OncoKB OG: 214 TSG: 246 OG_TSG: 25\n"
     ]
    }
   ],
   "source": [
    "# from http://oncokb.org/#/cancerGenes\n",
    "OncoKB_mut = pd.read_csv(\"../../hotspots/OncoKB/allAnnotatedVariants.tsv\",sep =\"\\t\")\n",
    "OncoKB_gtype = pd.read_csv(\"../../hotspots/OncoKB/OncoKB_gene_type.txt\",sep =\"\\t\")\n",
    "OncoKB_gtype = OncoKB_gtype[['Hugo Symbol ', u'Oncogene /TSG ']]\n",
    "OncoKB = {\"OG\":'Oncogene ',\"TSG\":'TSG ',\"OG_TSG\":'Oncogene,TSG '}\n",
    "for key in OncoKB.keys():\n",
    "    OncoKB[key] = set(map(str.rstrip, list(set(OncoKB_gtype.loc[OncoKB_gtype['Oncogene /TSG ']== OncoKB[key],'Hugo Symbol '].values))))\n",
    "\n",
    "OncoKB[\"OG\"].update(OncoKB[\"OG_TSG\"])\n",
    "OncoKB[\"TSG\"].update(OncoKB[\"OG_TSG\"]) \n",
    "print(\"OncoKB OG:\",len(OncoKB[\"OG\"]),\"TSG:\",len(OncoKB[\"TSG\"]),\"OG_TSG:\",len(OncoKB[\"OG_TSG\"]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Algorithm:\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "# gene_id - Entrez Gene ID \n",
    "# df - all mutations in sample\n",
    "#    columns: \"aa_pos\",\"function\", \"Entrez_gene_ID\"\n",
    "# hotspots - table of known hotsport  \"Entrez_gene_ID\" and \"aa_pos\"\n",
    "\n",
    "def distance_to_hotspot(gene_id,df_gene, hotspots):\n",
    "    '''Returns minimal distance between point mutation in gene and known hotspot'''\n",
    "    # consider only GoF because LoF affect the whole protein and not a certian point\n",
    "    df_gene = df_gene.loc[df_gene[\"function\"] == \"point_mutation\"]\n",
    "    df_hotspots = hotspots.loc[hotspots[\"Entrez_gene_ID\"] == gene_id, :]\n",
    "    dist = 100000\n",
    "    for s,e in set(df_gene[\"aa_pos\"].values):\n",
    "        for hs, he in set(df_hotspots[\"aa_pos\"].values):\n",
    "            dist = min(dist, abs(s-hs),abs(s-he), abs(e-hs), abs(e-he))\n",
    "    return dist\n",
    "\n",
    "def mutations2score(df,TSG,OG,hotspots, w=0.05,verbose = False):\n",
    "    # define gene type\n",
    "    gene_scores = {}\n",
    "    for gene_id in list(set(df[\"Entrez_gene_ID\"].values)):\n",
    "        df_gene = df.loc[df[\"Entrez_gene_ID\"] == gene_id, :]\n",
    "        # functions of mutations\n",
    "        mut_functions = set(df_gene[\"function\"].values)\n",
    "        if gene_id in TSG:\n",
    "            # check for truncating mutations:\n",
    "            # score is -1 if any\n",
    "            if \"truncating_mutation\" in mut_functions:\n",
    "                gene_scores[gene_id] =-1\n",
    "            else:\n",
    "                # in mutation is not truncating, \n",
    "                # get the distance to closets hotspot if any \n",
    "                dist = distance_to_hotspot(gene_id, df_gene, hotspots)\n",
    "                # set abs. maximal of 1/(dist+1) and 2w which corresponds LoF in unannotatedd gene\n",
    "                gene_scores[gene_id] = -max(1.0/(dist+1),2*w)\n",
    "        elif gene_id in OG:\n",
    "            if \"point_mutation\" in mut_functions:\n",
    "                dist = distance_to_hotspot(gene_id, df_gene, hotspots)\n",
    "                if \"truncating_mutation\" in mut_functions:\n",
    "                    if verbose:\n",
    "                        print(\"GoF at dist\",dist,\"from hotspot in Oncogene with GoF\",file = sys.stderr)\n",
    "                        print(df_gene,file = sys.stderr)\n",
    "                    if 1.0/(dist+1) > 2*w:\n",
    "                        # if there is a mutation close to hotspot\n",
    "                        # despite LoF it is likely a driver and the second copy of this gene is broken\n",
    "                        # this happens in sampl 905968  in PIK3CA gene (5290)\n",
    "                        gene_scores[gene_id] = 1.0/(dist+1)\n",
    "                    else:\n",
    "                        # otherwise we set -1 to differentiate it from neutral mutations\n",
    "                        # because oncogene might turn on after domain truncation\n",
    "                        gene_scores[gene_id] = -1\n",
    "                else:\n",
    "                    # non-hotspot mutation in non-LoF oncogene - small positive value\n",
    "                    gene_scores[gene_id] = max(1.0/(dist+1),w)\n",
    "            else:\n",
    "                # only LoF mutations, oncogene is likely not functional or\n",
    "                # it is turned-on after truncting mutations\n",
    "                # therefore we set -1 to differentiate it from neutral mutations\n",
    "                gene_scores[gene_id] = -1\n",
    "            \n",
    "            \n",
    "        else: # if unknown_significance\n",
    "            if \"truncating_mutation\" in mut_functions:\n",
    "                gene_scores[gene_id] = -2.0*w\n",
    "            else:\n",
    "                gene_scores[gene_id] = -w\n",
    "    \n",
    "    return pd.Series(gene_scores)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Bailey_2018 OG: 115 TSG: 162 both: 18\n"
     ]
    }
   ],
   "source": [
    "bailye = {\"OG\":[],\"TSG\":[],\"OG_TSG\":[]}\n",
    "bailye_drivers = pd.read_excel(\"../../hotspots/Bailey_2018.mmc1.xlsx\", sheet_name=\"Table S1\", \n",
    "                               skiprows = 3)\n",
    "bailye_drivers = bailye_drivers[[\"Gene\",\"Tumor suppressor or oncogene prediction (by 20/20+)\"]]\n",
    "#print(bailye_drivers.shape[0])\n",
    "bailye_drivers.drop_duplicates(inplace=True)\n",
    "bailye_drivers.columns = [\"gene\", \"role\"]\n",
    "#print(bailye_drivers.shape[0])\n",
    "bailye_drivers.fillna(\"NA\", inplace = True)\n",
    "bailye_drivers[\"role\"] = bailye_drivers[\"role\"].apply(lambda x: x.replace(\"possible \",\"\"))\n",
    "bailye_drivers.drop_duplicates(inplace=True)\n",
    "#print(bailye_drivers.shape[0])\n",
    "bailye_drivers.loc[bailye_drivers.duplicated(\"gene\", keep=False),:]\n",
    "#print(len(set(bailye_drivers[\"gene\"].values)))\n",
    "bailye[\"OG\"] = bailye_drivers.loc[bailye_drivers[\"role\"] == \"oncogene\",:]\n",
    "bailye[\"OG\"] = set(bailye[\"OG\"][\"gene\"].values)\n",
    "bailye[\"TSG\"] = bailye_drivers.loc[bailye_drivers[\"role\"] == \"tsg\",:]\n",
    "bailye[\"TSG\"] = set(bailye[\"TSG\"][\"gene\"].values)\n",
    "bailye[\"OG_TSG\"] = bailye[\"OG\"].intersection(bailye[\"TSG\"])\n",
    "print(\"Bailey_2018 OG:\",len(bailye[\"OG\"]),\"TSG:\",len(bailye[\"TSG\"]),\"both:\", len(bailye[\"OG_TSG\"]))\n",
    "#print(len(OncoKB[\"OG\"]),len(OncoKB[\"TSG\"]),len(OncoKB[\"OG_TSG\"]) ) "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Hotspots "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1D mutations 3004 495\n",
      "1D hotspots 1024 55\n",
      "3D mutations 9512\n",
      "3D hotspots  3395\n",
      "genes with hotspot 688\n",
      "unique hotspots 4258\n"
     ]
    }
   ],
   "source": [
    "hotspots2D_subst = pd.read_excel(\"../../hotspots/hotspots_v2.xls\",\"24K_hotspots_cancerhotspots-org_singlecodon\")\n",
    "hotspots2D_indels = pd.read_excel(\"../../hotspots/hotspots_v2.xls\",\"24K_hotspots_cancerhotspots-org_indels\")\n",
    "print(\"1D mutations\",hotspots2D_subst.shape[0],hotspots2D_indels.shape[0] )\n",
    "hotspots2D_subst = hotspots2D_subst.loc[~hotspots2D_subst[\"Amino_Acid_Position\"].str.contains(\"splice\")]\n",
    "hotspots2D_subst = hotspots2D_subst[[\"Hugo_Symbol\",\"Amino_Acid_Position\"]]\n",
    "hotspots2D_subst.drop_duplicates(inplace=True)\n",
    "hotspots2D_indels = hotspots2D_indels[[\"Hugo_Symbol\",\"Amino_Acid_Position\"]]\n",
    "hotspots2D_indels.drop_duplicates(inplace=True)\n",
    "print(\"1D hotspots\",hotspots2D_subst.shape[0],hotspots2D_indels.shape[0] )\n",
    "cBioPortal_muts = pd.concat([hotspots2D_subst,hotspots2D_indels],axis =0)\n",
    "cBioPortal_muts_ = cBioPortal_muts.loc[cBioPortal_muts[\"Amino_Acid_Position\"].str.contains(\"-\"),:]\n",
    "cBioPortal_muts = cBioPortal_muts.loc[~cBioPortal_muts[\"Amino_Acid_Position\"].str.contains(\"-\"),:]\n",
    "cBioPortal_muts[\"Amino_Acid_Position\"] = cBioPortal_muts[\"Amino_Acid_Position\"].apply(int)\n",
    "#cBioPortal_muts[\"start\"] = cBioPortal_muts[\"Amino_Acid_Position\"]\n",
    "#cBioPortal_muts[\"end\"] = cBioPortal_muts[\"Amino_Acid_Position\"]\n",
    "cBioPortal_muts[\"aa_pos\"] = cBioPortal_muts[\"Amino_Acid_Position\"].apply(lambda x : (x,x))\n",
    "\n",
    "#cBioPortal_muts_ [\"start\"] = cBioPortal_muts_[\"Amino_Acid_Position\"].apply(lambda x: int(x.split(\"-\")[0]))\n",
    "#cBioPortal_muts_[\"end\"] = cBioPortal_muts_[\"Amino_Acid_Position\"].apply(lambda x: int(x.split(\"-\")[1]))\n",
    "cBioPortal_muts_[\"aa_pos\"] = cBioPortal_muts_[\"Amino_Acid_Position\"].apply(lambda x : tuple(map(int,x.split(\"-\"))))\n",
    "\n",
    "cBioPortal_muts = pd.concat([cBioPortal_muts_,cBioPortal_muts],axis = 0)\n",
    "cBioPortal_muts[\"source\"] = \"1D\"\n",
    "\n",
    "\n",
    "### 3D\n",
    "\n",
    "cBioPortal_3D = pd.read_excel(\"../../hotspots/3d_hotspots.xls\",\"Table S5\")\n",
    "cBioPortal_3D = cBioPortal_3D[[\"Gene\",\"Amino_Acid_Position\"]]\n",
    "print(\"3D mutations\",cBioPortal_3D.shape[0])\n",
    "cBioPortal_3D.drop_duplicates(inplace=True)\n",
    "print(\"3D hotspots \",cBioPortal_3D.shape[0])\n",
    "cBioPortal_3D[\"Amino_Acid_Position\"] = cBioPortal_3D[\"Amino_Acid_Position\"].apply(int)\n",
    "#cBioPortal_3D[\"start\"] = cBioPortal_3D[\"Amino_Acid_Position\"]\n",
    "#cBioPortal_3D[\"end\"] = cBioPortal_3D[\"Amino_Acid_Position\"]\n",
    "cBioPortal_3D[\"aa_pos\"] = cBioPortal_3D[\"Amino_Acid_Position\"].apply(lambda x : (x,x))\n",
    "cBioPortal_3D.rename({\"Gene\":'Hugo_Symbol'}, axis=\"columns\",inplace=True)\n",
    "cBioPortal_3D[\"source\"] = \"3D\"\n",
    "cBioPortal_muts = cBioPortal_muts[[\"Hugo_Symbol\",\"aa_pos\"]]\n",
    "cBioPortal_3D = cBioPortal_3D[[\"Hugo_Symbol\",\"aa_pos\"]]\n",
    "cBioPortal_muts = pd.concat([cBioPortal_3D, cBioPortal_muts],axis=0)\n",
    "cBioPortal_muts.sort_values([\"Hugo_Symbol\"],ascending = True,inplace=True)\n",
    "\n",
    "cBioPortal_genes = set(cBioPortal_muts[\"Hugo_Symbol\"].values)\n",
    "print(\"genes with hotspot\",len(cBioPortal_genes))\n",
    "cBioPortal_muts = cBioPortal_muts[[\"Hugo_Symbol\",\"aa_pos\"]]\n",
    "cBioPortal_muts.drop_duplicates(inplace=True)\n",
    "print(\"unique hotspots\",cBioPortal_muts.shape[0])\n",
    "#cBioPortal_muts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "genes with and without mutations\n",
      "OG oncokb 110 / 214 Bailye_2018 81 / 115\n",
      "TSG oncokb 123 / 246 Bailye_2018 81 / 162\n",
      "OG_TSG oncokb 15 / 25 Bailye_2018 14 / 18\n"
     ]
    }
   ],
   "source": [
    "print(\"genes with and without mutations\")\n",
    "for kind in [\"OG\",\"TSG\",\"OG_TSG\"]:\n",
    "    oncokb = OncoKB[kind].intersection(cBioPortal_genes) \n",
    "    b = bailye[kind].intersection(cBioPortal_genes)\n",
    "    print(kind,\"oncokb\", len(oncokb),\"/\",len(OncoKB[kind]),\n",
    "         \"Bailye_2018\", len(b),\"/\",len(bailye[kind]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OG oncokb 214 Bailye_2018 115 union 277 mutated: 144\n",
      "TSG oncokb 246 Bailye_2018 162 union 316 mutated: 136\n",
      "OG_TSG oncokb 25 Bailye_2018 18 union 41 mutated: 28\n"
     ]
    }
   ],
   "source": [
    "for kind in [\"OG\",\"TSG\",\"OG_TSG\"]:\n",
    "    oncokb = OncoKB[kind]#.intersection(cBioPortal_genes) \n",
    "    b = bailye[kind]#.intersection(cBioPortal_genes)\n",
    "    print(kind,\"oncokb\", len(oncokb),\n",
    "         \"Bailye_2018\", len(b),\"union\",len(b.union(oncokb)),\n",
    "         \"mutated:\",\n",
    "         len(b.union(oncokb).intersection(cBioPortal_genes)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TSG: 316 with hotspots: 136 \n",
      "OG with hotspots: 144\n",
      "Hotspots (all from cBioPortal): 4258 \n",
      "\ton OG: 1116 \n",
      "\ton TSG: 1012\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Hugo_Symbol</th>\n",
       "      <th>aa_pos</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1456</th>\n",
       "      <td>ACVR1</td>\n",
       "      <td>(206, 206)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1307</th>\n",
       "      <td>ACVR1</td>\n",
       "      <td>(258, 258)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1128</th>\n",
       "      <td>ACVR1</td>\n",
       "      <td>(328, 328)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2542</th>\n",
       "      <td>ACVR1B</td>\n",
       "      <td>(398, 398)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2056</th>\n",
       "      <td>ACVR1B</td>\n",
       "      <td>(401, 401)</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Hugo_Symbol      aa_pos\n",
       "1456       ACVR1  (206, 206)\n",
       "1307       ACVR1  (258, 258)\n",
       "1128       ACVR1  (328, 328)\n",
       "2542      ACVR1B  (398, 398)\n",
       "2056      ACVR1B  (401, 401)"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# TSG - not necessarity with a hotspot, take union of OncoKB and  Bailye_2018\n",
    "TSG = OncoKB[\"TSG\"].union(bailye[\"TSG\"])\n",
    "# OG - must have a hotspot \n",
    "OG = OncoKB[\"OG\"].union(bailye[\"OG\"]).intersection(cBioPortal_genes)\n",
    "print(\"TSG:\",len(TSG),\"with hotspots:\",len(TSG.intersection(cBioPortal_genes)),\n",
    "      \"\\nOG with hotspots:\",len(OG))\n",
    "# list of hotspots: \n",
    "hotspots = cBioPortal_muts.loc[cBioPortal_muts[\"Hugo_Symbol\"].isin(OG|TSG),:]\n",
    "print(\"Hotspots (all from cBioPortal):\", cBioPortal_muts.shape[0],\n",
    "      \"\\n\\ton OG:\", hotspots.loc[hotspots[\"Hugo_Symbol\"].isin(OG)].shape[0],\n",
    "      \"\\n\\ton TSG:\", hotspots.loc[hotspots[\"Hugo_Symbol\"].isin(TSG)].shape[0])\n",
    "hotspots.head(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### map gene name to Entrez gene ID "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Ok: no empty rows detected\n",
      "Ok: no duplicated pairs detected\n",
      "Ok: All Symbol rows are not empty.\n",
      "Ok: All Symbol are mapped to GeneID\n",
      "16 Symbol mapped to multiple GeneID\n",
      "Ok: All GeneID are unique\n",
      "59266 Symbol can be mapped directly to GeneID\n"
     ]
    }
   ],
   "source": [
    "NCBI = pd.read_csv(root_dir+\"Homo_sapiens.gene_info\",sep = \"\\t\")\n",
    "NCBI = NCBI[[\"#tax_id\",\"GeneID\",\"Symbol\",\"Synonyms\",\"type_of_gene\"]]\n",
    "NCBI = NCBI.loc[NCBI[\"#tax_id\"] == 9606]\n",
    "NCBI = NCBI.loc[NCBI[\"type_of_gene\"] != \"unknown\"]\n",
    "ncbi_symbols = parse_mapping_table(NCBI, \"Symbol\",\"GeneID\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Ok: no empty rows detected\n",
      "Ok: no duplicated pairs detected\n",
      "Ok: All Synonyms rows are not empty.\n",
      "Ok: All Synonyms are mapped to GeneID\n",
      "3145 Synonyms mapped to multiple GeneID\n",
      "49179 different Synonyms mapped to the same GeneID\n",
      "10839 Synonyms can be mapped directly to GeneID\n"
     ]
    }
   ],
   "source": [
    "ncbi_synonyms = expand(NCBI[[\"Synonyms\",\"GeneID\"]],column=\"Synonyms\",sep=\"|\") \n",
    "ncbi_synonyms = parse_mapping_table(ncbi_synonyms, \"Synonyms\",\"GeneID\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mapped: 421 \n",
      "\tdirectly via main_mapper 414 \n",
      "\tvia alternative mapper 2 \n",
      "\tvia one of multiple synonyms in alternative mapper 5 \n",
      "\tLOC 0 \n",
      "Unmapped: 0 \n",
      "\trecognized symbols without Entrez ID 0 \n",
      "\tmultiple query_ids map to the same target_id 0 \n",
      "\tquery_ids map to multiple target_ids in the main mapper 0 \n",
      "\tquery_ids map to multiple target_ids in the alternative mapper 0 \n",
      "\tLOC not found in Entrez 0 \n",
      "\tNot found at all: 0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "IDs mapped to multiple target IDs are kept:\n",
      " []\n",
      "/home/olya/miniconda2/lib/python2.7/site-packages/ipykernel_launcher.py:5: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \"\"\"\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Hugo_Symbol</th>\n",
       "      <th>aa_pos</th>\n",
       "      <th>Entrez_gene_ID</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1456</th>\n",
       "      <td>ACVR1</td>\n",
       "      <td>(206, 206)</td>\n",
       "      <td>90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1307</th>\n",
       "      <td>ACVR1</td>\n",
       "      <td>(258, 258)</td>\n",
       "      <td>90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1128</th>\n",
       "      <td>ACVR1</td>\n",
       "      <td>(328, 328)</td>\n",
       "      <td>90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2542</th>\n",
       "      <td>ACVR1B</td>\n",
       "      <td>(398, 398)</td>\n",
       "      <td>91</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2056</th>\n",
       "      <td>ACVR1B</td>\n",
       "      <td>(401, 401)</td>\n",
       "      <td>91</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Hugo_Symbol      aa_pos  Entrez_gene_ID\n",
       "1456       ACVR1  (206, 206)              90\n",
       "1307       ACVR1  (258, 258)              90\n",
       "1128       ACVR1  (328, 328)              90\n",
       "2542      ACVR1B  (398, 398)              91\n",
       "2056      ACVR1B  (401, 401)              91"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tsg_og_gene_names = pd.DataFrame.from_dict({\"genes\":dict(zip(list(OG | TSG),list(OG | TSG)))})\n",
    "tsg_og_gene_names, query2target, not_mapped = apply_mappers(tsg_og_gene_names, ncbi_symbols, ncbi_synonyms, verbose = True,handle_duplicates = \"keep\")\n",
    "OG_entrez = set(map(lambda x : query2target[x],OG))\n",
    "TSG_entrez = set(map(lambda x : query2target[x],TSG))\n",
    "hotspots[\"Entrez_gene_ID\"] = hotspots[\"Hugo_Symbol\"].apply(lambda x : query2target[x])\n",
    "hotspots.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# GDSC mutations \n",
    "ID, cDNA and aa coordinates; sometimes gene name is Ensembl 56 ID, sometimes HGNC. When map gene names to current Entrez gene IDs, many important genes like MLL2 and MLL4 ambiguously renamed as KMT2D and KMT2B or vise versa are lost. \n",
    "https://en.wikipedia.org/wiki/KMT2D\n",
    "\n",
    " Only protein coding mutations reported; of them: \n",
    " * potential LoF : 'frameshift','nonsense','stop_lost','ess_splice'\n",
    " * potential GoF : 'missense','Missense','inframe'\n",
    " * 'gene_fusion' records exluded because they do not correspond to a point mutation and present not in all datasets. Although most of recurrent fusions are GoF, some may be LoF (PMC4889949)\n",
    " \n",
    "wget ftp.sanger.ac.uk/pub/project/cancerrxgene/releases/release-7.0/ WES_variants.xlsx."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>COSMIC_ID</th>\n",
       "      <th>Gene</th>\n",
       "      <th>AA</th>\n",
       "      <th>Classification</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>907272</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>p.Y281C</td>\n",
       "      <td>missense</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>998184</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>p.A197T</td>\n",
       "      <td>missense</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>907272</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>p.A191T</td>\n",
       "      <td>missense</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   COSMIC_ID  Gene       AA Classification\n",
       "0     907272  A1BG  p.Y281C       missense\n",
       "1     998184  A1BG  p.A197T       missense\n",
       "2     907272  A1BG  p.A191T       missense"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gdsc = pd.read_excel(\"/home/olya/SFU/Hossein/GDSC/WES_variants.xlsx\",\"WES_variants\")\n",
    "gdsc  = gdsc [[\"COSMIC_ID\",\"Gene\",\"AA\",\"Classification\"]]\n",
    "gdsc.head(3)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Classification\n",
       "missense       413252\n",
       "frameshift      31933\n",
       "nonsense        23359\n",
       "ess_splice      13106\n",
       "inframe          3185\n",
       "Missense          884\n",
       "stop_lost         486\n",
       "gene_fusion        38\n",
       "dtype: int64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gdsc.groupby(\"Classification\").size().sort_values(ascending = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mean 485.7 median 288.0\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAABIcAAAE/CAYAAADc0KMkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzs3Xu8lWWZ+P/PJSDgkRQiBA00j4F42CAzjkZZZmHit1EzNcUD5E+drCaTpr5F/mp+NDWZzpiF2ojmMZqUUTuYhw6TJzQQlFS0bYAnQsUjKnr9/lgPtNjuw9qHtdfee33er9d67ee5n/t+nutea214uLjv+4nMRJIkSZIkSfVpk1oHIEmSJEmSpNoxOSRJkiRJklTHTA5JkiRJkiTVMZNDkiRJkiRJdczkkCRJkiRJUh0zOSRJkiRJklTHTA6p7kREY0R8sEbXHh4Rv42IFyPi32sRQ3tExAER8VCt4+hrIuLSiPhGB9u+FBE7dnVMkiRJkuqXySGpe80A/gpslZn/XM0LRcTkiFjRzjYZEe9Zv5+Zv8vMXbs+OnVUZm6RmY91pG1EbBoRX42IhyLi5YhYGRE/j4iDy+o0RsSrRQLz+Yj4Q0ScGhGblNUZFRE/jYi/RsSaiFgSEdOaXGdWRDxSXKcxIn4UEaM70XVJkiRJVWJySOqgiOjfgWbvBh7MzOzqeLSxKPHPuI3NA6YCxwPvAMYA5wFTmtT7WGZuSen7Ohs4G7ik7PjlwPLi+LbAp4Cnm1znMOAYYGtgPHAvcFDXdkeSJElSV/AfTuoRipEFX4iI+4uRCNdExKDi2LSI+H2T+htGuBRTdL5fjIB4KSL+NyLeFRHfi4jnIuJPEbF3k0tOiIgHi+P/tf5axfkOjYiFZaMm9mwS59kRcT/wcnMJooj4+4i4p+jHPRHx9+vjBE4AvljE+bapbe3tS9ORPuunK0XE5sDPge2K87wUEdtFxMSIuKPo25MR8Z8RsWnR9rfFaRYV9T/RdPRRROweEbcX7R+IiMOaXPuCiLixGHVyV0TsVByLiDg3Ip6JiBciYnFEjG3hu3B7RPx/EXF3Uff6iNim7Pik4nN5PiIWRcTkJm2/GRH/C7wCvG36VfH5rSxifCgiDirKW3xvyt7r04rRMC9GxP8bETsVsbwQEdeWvZeTI2JFRPxLlEbXNEbEsc31t6jf4neumbpNv/vNvufNtPsg8CFgambelZmvF69fZOaZzbXJzDWZOR/4BHBC2Wc2Abg0M1/OzHWZ+cfM/Hkz17mnOL4mMy/IzEuau44kSZKk2jI5pJ7kKOAQSqMZ9gSmtbPtV4ChwGvAHcB9xf484LtN6h8LfBjYCdilaEuRePkR8GlKIyJ+CMyPiIFlbT9JaaTFkMxcV37SIolxI3B+0f67wI0RsW1mTgOuAP6tmBr06y7qy9tk5svAR4AnimttkZlPAG8CnyvO9XeURnKcVrQ5sGg+vqh/TZO+DQD+B/gV8E7gn4ArIqJ82tnRwNcpjUpZBnyzKD8YOJDSe7110cfVrXTheOAkYASwjtL7SUSMpPT+fgPYBvgC8NOIGFbW9lOUpu9tCTzepA+7AmcAE4qRMR8GGovDLb43ZT4M7AtMAr4IzAGOA7YHxlL6bqz3ruJcIyklBec0ea/Wx1TJd641Lb3nTX0QuCsz2zXVECAz7wZWAAcURXcCF0TE0RGxQzPXuTszl7f3OpIkSZJqw+SQepLzM/OJzHyWUhJir3a0/Vlm3puZa4GfAWsz87LMfBO4Bmg6cug/M3N5ca1v8rd/1M8AfliMrHgzM+dSStBMahLn8sx8tZk4pgCPZOblxYiJq4A/AR+rYl8qVpz3ziK2RkqJiPdV2HwSsAUwuxhxcitwAxsnRH6WmXcXSbMr+Ntn+AalZM1uQGTm0sx8spVrXZ6ZS4ok1/8FjoqIfpQSMTdl5k2Z+VZm3gwsAD5a1vbSzHyg6OMbTc77JjAQ2CMiBmRmY2Y+2o735t8y84XMfABYAvwqMx/LzDWURmo1/Wz+b2a+lpm/oZTUOqqZvlbynWtNS+95U0OBp9bvRMQ2xUilNRGxtoLrPEEpIQdwJPA7Sp/Nn4tRTxOKY9sCrX22kiRJknoYk0PqSZ4q236FUiKiUuXrnbzazH7Tc5WPangc2K7Yfjfwz8U/mp+PiOcpjQrZroW2TW1Hk9Eqxf7I1sPfSHv7UrGI2CUiboiIpyLiBeBfKSUNKrEdsDwz3yora9q3Zj/DIpH0n8AFwDMRMScitmrlWk0/nwFFnO8Gjmzy+fwDpRFGzbXdSGYuAz4LzCriuDoitoOK35v2fDbPFcmt8n6Uf4/Wq+Q715pKf29WU/Y+ZeazmTmE0kioSkYpjQSeLdo+l5kzM/O9wHBgIXBdRETT60iSJEnq+UwOqTd4Gdhs/U5EvKsLzrl92fYOlEZFQCmx8M3MHFL22qwYAbRea4tJP0HpH/vldgBWdjri5r1C2XtDaSrTes3FeSGlkUw7Z+ZWwL8AUeG1ngC2j40Xea64b5l5fmbuC+xBaXrZWa1Ub/r5vEHpKW/LKY0qKv98Ns/M2eWXaiOOKzPzHyh9Tgl8qzjUmfemOe+I0tpP5f14opl6lXznusItlNbaGtXehsWooJHA75sey8y/At+hlMzaBvg1MLEj15EkSZJUGyaH1BssAt4bEXtFaeHoWV1wztOj9DjubYAvU5quBXARcGpE7Fcsorx5REyJiC0rPO9NwC4RcUxE9I+IT1BKhtzQBTE3ZyFwTET0i4hD2Hga1NPAthGxdVnZlsALwEsRsRvw/zQ539M0s4hz4S5KyagvRsSAYiHojwFXtxVkREwo3tMBlJJ9a4G3WmlyXETsERGbAecA84ppdT8GPhYRHy76PKhY/LmiRERE7BoRHyjW81lLabTP+jjaem864utReqz7AcChwE+aqdPZ71xFMvNXwG2URvjsV8Q1gFamr0XEVhFxKKXP+MeZubgo/1ZEjC2+41tSeq+WZebqYi2tm4GfRcS+6+tExKkRcVJX9kmSJElS1zA5pB4vMx+mlCD4NfAIzYxe6IArKS2s/BjwKKUFjsnMBcB0SlOgnqO0wO+0dsS6mlIS4J8pTa/5InBoMbqiGs6klKB5ntIi29eVxfIn4CrgsWK60naUFnA+BniRUlLimibnmwXMLepvtD5OZr5eXOsjlEbxfB84vrhOW7YqrvccpelVq4Fvt1L/cuBSSlOmBgGfKWJYTulR7P8CrKI06uYsKv+zbCClR7P/tTj3O4EvFcfaem/a6ylK/X2C0lpApzb3XnX2O9dO/4dSovLHlL4zf+Zvi7OX+5+IeJHS+/tlSougn1h2fDNK62E9T+l36N2UHl2/3hGUEqXXAGsorc/UQOl3WJIkSVIPE5mtzsCQpG4VEbdTGqVyca1j6ahiVNWPM9OpVZIkSZJ6PEcOSZIkSZIk1TGTQ5IkSZIkSXXMaWWSJEmSJEl1zJFDkiRJkiRJdczkkCRJkiRJUh3rX+sAAIYOHZqjR4+udRiSJKlK7r333r9m5rBax6GNeQ8mSVLfVuk9WI9IDo0ePZoFCxbUOgxJklQlEfF4rWPQ23kPJklS31bpPZjTyiRJkiRJkuqYySFJkqQeKCJ2jYiFZa8XIuKzEbFNRNwcEY8UP99R1I+IOD8ilkXE/RGxT637IEmSegeTQ5IkST1QZj6UmXtl5l7AvsArwM+AmcAtmbkzcEuxD/ARYOfiNQO4sPujliRJvVGPWHNIkiRJrToIeDQzH4+IqcDkonwucDtwNjAVuCwzE7gzIoZExIjMfLIWAUuSOuaNN95gxYoVrF27ttahqBcZNGgQo0aNYsCAAR1qb3JIkiSp5zsauKrYHl6W8HkKGF5sjwSWl7VZUZSZHJKkXmTFihVsueWWjB49moiodTjqBTKT1atXs2LFCsaMGdOhczitTJIkqQeLiE2Bw4CfND1WjBLKdp5vRkQsiIgFq1at6qIoJUldZe3atWy77bYmhlSxiGDbbbft1Ggzk0OSJEk920eA+zLz6WL/6YgYAVD8fKYoXwlsX9ZuVFG2kcyck5kNmdkwbNiwKoYtSeooE0Nqr85+Z0wOSZIk9Wyf5G9TygDmAycU2ycA15eVH188tWwSsMb1hiRJUiVcc0iSJKmHiojNgQ8Bny4rng1cGxEnA48DRxXlNwEfBZZRerLZid0YqiSpSkbPvLFLz9c4e0qXng/glFNO4fOf/zx77LFHi3Wuu+46dtlll1brdNall17KwQcfzHbbbdeuepXE39eZHJIkSeqhMvNlYNsmZaspPb2sad0ETu+m0CRJ2uDiiy9us851113HoYceWvXk0NixYytKDpXXqyT+vs5pZZIkSZIkaYPGxkZ22203jj32WHbffXeOOOIIXnnlFW655Rb23ntvxo0bx0knncRrr70GwOTJk1mwYAEAW2yxBV/+8pcZP348kyZN4umnn+YPf/gD8+fP56yzzmKvvfbi0Ucfbfa6kydP5nOf+xwNDQ3svvvu3HPPPXz84x9n55135itf+cqG2MaOHbuhzXe+8x1mzZrFvHnzWLBgAcceeyx77bUXr776Kueccw4TJkxg7NixzJgxg8xstl55/FdddRXjxo1j7NixnH322Ruu01y/AH7yk58wduxYxo8fz4EHHtj1H0Y3MTlUA6Nn3tjmS5IkqVebtXXbL0lSj/XQQw9x2mmnsXTpUrbaaiu++93vMm3aNK655hoWL17MunXruPDCC9/W7uWXX2bSpEksWrSIAw88kIsuuoi///u/57DDDuPb3/42CxcuZKeddmrxuptuuikLFizg1FNPZerUqVxwwQUsWbKESy+9lNWrV7fY7ogjjqChoYErrriChQsXMnjwYM444wzuuecelixZwquvvsoNN9zQbL31nnjiCc4++2xuvfVWFi5cyD333MN1113XYr8AzjnnHH75y1+yaNEi5s+f39G3u+ZMDkmSJEmSpI1sv/327L///gAcd9xx3HLLLYwZM4ZddtkFgBNOOIHf/va3b2u36aabcuihhwKw77770tjY2K7rHnbYYQCMGzeO9773vYwYMYKBAwey4447snz58nad67bbbmO//fZj3Lhx3HrrrTzwwAOt1r/nnnuYPHkyw4YNo3///hx77LEb+thSv/bff3+mTZvGRRddxJtvvtmu+HoSk0OSJEmSJGkjTR+NPmTIkIraDRgwYEPbfv36sW7dunZdd+DAgQBssskmG7bX769bt47+/fvz1ltvbShfu3Zts+dZu3Ytp512GvPmzWPx4sVMnz69xbqVaKlfP/jBD/jGN77B8uXL2XfffVsd3dSTmRySJEmSJEkb+ctf/sIdd9wBwJVXXklDQwONjY0sW7YMgMsvv5z3ve99FZ9vyy235MUXX+x0XMOHD+eZZ55h9erVvPbaa9xwww3NXmN9Imjo0KG89NJLzJs3r81YJk6cyG9+8xv++te/8uabb3LVVVe12cdHH32U/fbbj3POOYdhw4a1e3RTT+HTyiRJkiRJ6qGq8ej5Suy6665ccMEFnHTSSeyxxx6cf/75TJo0iSOPPJJ169YxYcIETj311IrPd/TRRzN9+nTOP/985s2b1+q6Q60ZMGAAX/3qV5k4cSIjR45kt91223Bs2rRpnHrqqQwePJg77riD6dOnM3bsWN71rncxYcKEFuutN2LECGbPns373/9+MpMpU6YwderUVuM566yzeOSRR8hMDjroIMaPH9+hftValJ56WlsNDQ25fmXwelDJgtO1+gNAkqRqiIh7M7Oh1nFoY1W9B6tkwelZa6pzbUnqxZYuXcruu+9e0xgaGxs59NBDWbJkSU3jUPs0992p9B7MaWWSJEmSJEl1zGllkiRJkiRpg9GjR1d11NDpp5/O//7v/25UduaZZ3LiiSdW7ZpqnckhSZIkSZLUbS644IJah6AmnFYmSZIkSZJUx0wOSZIkSZIk1TGTQ5IkSZIkSXXM5JAkSZIkSVIdc0FqSZIkSZJ6qllbd/H51rR6+Pnnn+fKK6/ktNNO69rrVujSSy/l4IMPZrvttgPglFNO4fOf/zx77LFHTeJpqrGxkT/84Q8cc8wx7aq3YMECLrvsMs4///zuCLPdHDkkSZIkSZKAUnLo+9///tvK161b1y3Xv/TSS3niiSc27F988cU9JjEEpaTPlVde2e56DQ0NPTYxBCaHJEmSJElSYebMmTz66KPstddeTJgwgQMOOIDDDjuMPfbYg8bGRsaOHbuh7ne+8x1mzZoFwOTJkzn77LOZOHEiu+yyC7/73e8AePPNN/nCF77A2LFj2XPPPfmP//gPAM455xwmTJjA2LFjmTFjBpnJvHnzWLBgAcceeyx77bUXr776KpMnT2bBggUAbLHFFnz5y19m/PjxTJo0iaeffhqARx99lEmTJjFu3Di+8pWvsMUWW7TYv9tvv533ve99TJ06lR133JGZM2dyxRVXMHHiRMaNG8ejjz4KwLRp05g3b96GduvPOXPmTH73u9+x1157ce6559LY2MgBBxzAPvvswz777MMf/vCHZuvdfvvtHHrooQA8++yzHH744ey5555MmjSJ+++/H4BZs2Zx0kknMXnyZHbccccNyaSXX36ZKVOmMH78eMaOHcs111zTiU+4eSaHJEmSJEkSALNnz2annXZi4cKFfPvb3+a+++7jvPPO4+GHH26z7bp167j77rv53ve+x9e//nUA5syZQ2NjIwsXLuT+++/n2GOPBeCMM87gnnvuYcmSJbz66qvccMMNHHHEETQ0NHDFFVewcOFCBg8evNH5X375ZSZNmsSiRYs48MADueiiiwA488wzOfPMM1m8eDGjRo1qM85Fixbxgx/8gKVLl3L55Zfz8MMPc/fdd3PKKadsSF619v4ccMABLFy4kM997nO8853v5Oabb+a+++7jmmuu4TOf+Uyz9cp97WtfY++99+b+++/nX//1Xzn++OM3HPvTn/7EL3/5S+6++26+/vWv88Ybb/CLX/yC7bbbjkWLFrFkyRIOOeSQNvvYXiaHJEmSJElSsyZOnMiYMWMqqvvxj38cgH333ZfGxkYAfv3rX/PpT3+a/v1LSx5vs802ANx2223st99+jBs3jltvvZUHHnigzfNvuummG0bflF/jjjvu4MgjjwRocy0ggAkTJjBixAgGDhzITjvtxMEHHwzAuHHjNpyzUm+88QbTp09n3LhxHHnkkTz44INttvn973/Ppz71KQA+8IEPsHr1al544QUApkyZwsCBAxk6dCjvfOc7efrppxk3bhw333wzZ599Nr/73e/YeusuXoeKCpJDEfGjiHgmIpaUlX07Iv4UEfdHxM8iYkjZsS9FxLKIeCgiPtzlEUuSJEmSpG6x+eabb9ju378/b7311ob9tWvXblR34MCBAPTr16/VNYrWrl3Laaedxrx581i8eDHTp09/27maM2DAACKiomu0Zn2cAJtsssmG/U022WTDOcv7+tZbb/H66683e65zzz2X4cOHs2jRIhYsWNBivY7Etr6Pu+yyC/fdd9+GaXPnnHNOp67RnEpGDl0KNB2zdDMwNjP3BB4GvgQQEXsARwPvLdp8PyL6dVm0kiRJkiSparbccktefPHFZo8NHz6cZ555htWrV/Paa69xww03tHm+D33oQ/zwhz/ckHR59tlnNySChg4dyksvvbTR2j6tXb8lkyZN4qc//SkAV199dbvatmT06NHce++9AMyfP5833nij2fjWrFnDiBEj2GSTTbj88st58803m61X7oADDuCKK64ASmsgDR06lK222qrFWJ544gk222wzjjvuOM466yzuu+++LuljuTYfZZ+Zv42I0U3KflW2eydwRLE9Fbg6M18D/hwRy4CJwB1dEq0kSZIkSfWkjUfPd7Vtt92W/fffn7FjxzJ48GCGDx++4diAAQP46le/ysSJExk5ciS77bZbm+c75ZRTePjhh9lzzz0ZMGAA06dP54wzzmD69OmMHTuWd73rXUyYMGFD/WnTpnHqqacyePBg7rijslTC9773PY477ji++c1vcsghh3TJtKvp06czdepUxo8fzyGHHLJhBNWee+5Jv379GD9+PNOmTeO0007jH//xH7nssstarbf33ntvOPf6haf33HNPNttsM+bOndtqLIsXL+ass85ik002YcCAAVx44YWd7l9TkZltVyolh27IzLHNHPsf4JrM/HFE/CdwZ2b+uDh2CfDzzJzXtF25hoaGXL/6eD0YPfPGNus0zp7SDZFIktQ9IuLezGyodRzaWFXvwWZVcGPezf/gkaTeYOnSpey+++61DqNXeeWVVxg8eDARwdVXX81VV13F9ddfX+uwul1z351K78HaHDnUmoj4MrAOuKIDbWcAMwB22GGHzoQhSZIkSZLq1L333ssZZ5xBZjJkyBB+9KMf1TqkXqfDyaGImAYcChyUfxt+tBLYvqzaqKLsbTJzDjAHSv9r1dE4JEmSJElS/TrggANYtGjRRmWLFy/e8ESw9QYOHMhdd93VnaH1Gh1KDkXEIcAXgfdl5itlh+YDV0bEd4HtgJ2BuzsdpSRJkiRJUoXGjRvHwoULax1Gr9FmcigirgImA0MjYgXwNUpPJxsI3Fw8Ru7OzDw1Mx+IiGuBBylNNzs9M9+sVvCSJEmSJPU1mbnhke1SJSpZT7o1lTyt7JPNFF/SSv1vAt/sTFCSJEmSJNWjQYMGsXr1arbddlsTRKpIZrJ69WoGDRrU4XN0akFqSZIkSZLUdUaNGsWKFStYtWpVrUNRLzJo0CBGjRrV4fYmhyRJknqoiBgCXAyMBRI4CXgIuAYYDTQCR2Xmc1H67+XzgI8CrwDTMvO+GoQtSeqEAQMGMGbMmFqHoTqzSa0DkCRJUovOA36RmbsB44GlwEzglszcGbil2Af4CKWHgewMzAAu7P5wJUlSb2RySJIkqQeKiK2BAynWeszM1zPzeWAqMLeoNhc4vNieClyWJXcCQyJiRDeHLUmSeiGTQ5IkST3TGGAV8F8R8ceIuDgiNgeGZ+aTRZ2ngOHF9khgeVn7FUWZJElSq0wOSZIk9Uz9gX2ACzNzb+Bl/jaFDIAsPbe2Xc+ujYgZEbEgIha42KkkSQKTQ5IkST3VCmBFZt5V7M+jlCx6ev10seLnM8XxlcD2Ze1HFWUbycw5mdmQmQ3Dhg2rWvCSJKn3MDkkSZLUA2XmU8DyiNi1KDoIeBCYD5xQlJ0AXF9szweOj5JJwJqy6WeSJEkt8lH2kiRJPdc/AVdExKbAY8CJlP5z79qIOBl4HDiqqHsTpcfYL6P0KPsTuz9cSZLUG5kc6qFGz7yxonqNs6dUORJJklQrmbkQaGjm0EHN1E3g9KoHJUmS+hynlUmSJEmSJNUxk0OSJEmSJEl1zOSQJEmSJElSHTM5JEmSJEmSVMdMDkmSJEmSJNUxk0OSJEmSJEl1zOSQJEmSJElSHTM5JEmSJEmSVMdMDkmSJEmSJNUxk0OSJEmSJEl1zOSQJEmSJElSHTM5JEmSJEmSVMdMDkmSJEmSJNUxk0OSJEmSJEl1rH+tA1D1jZ55Y5t1GmdP6YZIJEmSJElST+PIIUmSJEmSpDpmckiSJEmSJKmOmRySJEmSJEmqY6451MtVsp6QJEmSJElSSxw5JEmSJEmSVMfaTA5FxI8i4pmIWFJWtk1E3BwRjxQ/31GUR0ScHxHLIuL+iNinmsFLkiRJkiSpcyoZOXQpcEiTspnALZm5M3BLsQ/wEWDn4jUDuLBrwpQkSZIkSVI1tJkcyszfAs82KZ4KzC225wKHl5VfliV3AkMiYkRXBStJkiRJkqSu1dE1h4Zn5pPF9lPA8GJ7JLC8rN6KokySJEmSJEk9UKcXpM7MBLK97SJiRkQsiIgFq1at6mwYkiRJkiRJ6oCOJoeeXj9drPj5TFG+Eti+rN6oouxtMnNOZjZkZsOwYcM6GIYkSZIkSZI6o6PJofnACcX2CcD1ZeXHF08tmwSsKZt+JkmSJEmSpB6mf1sVIuIqYDIwNCJWAF8DZgPXRsTJwOPAUUX1m4CPAsuAV4ATqxCzJEmSJEmSukibyaHM/GQLhw5qpm4Cp3c2KEmSJEmSJHWPTi9ILUmSpOqIiMaIWBwRCyNiQVG2TUTcHBGPFD/fUZRHRJwfEcsi4v6I2Ke20UuSpN6izZFDap/RM2+sdQiSJKlveX9m/rVsfyZwS2bOjoiZxf7ZwEeAnYvXfsCFxU9JkqRWOXJIkiSpd5kKzC225wKHl5VfliV3AkPWP11WkiSpNSaHJEmSeq4EfhUR90bEjKJseNnTYJ8ChhfbI4HlZW1XFGWSJEmtclqZJElSz/UPmbkyIt4J3BwRfyo/mJkZEdmeExZJphkAO+ywQ9dFKkmSei1HDkmSJPVQmbmy+PkM8DNgIvD0+ulixc9niuorge3Lmo8qypqec05mNmRmw7Bhw6oZviRJ6iVMDkmSJPVAEbF5RGy5fhs4GFgCzAdOKKqdAFxfbM8Hji+eWjYJWFM2/UySJKlFTiuTJEnqmYYDP4sIKN2zXZmZv4iIe4BrI+Jk4HHgqKL+TcBHgWXAK8CJ3R+yJEnqjUwOSZIk9UCZ+Rgwvpny1cBBzZQncHo3hCZJkvoYk0MCYPTMG9us0zh7SjdEIkmSJEmSupNrDkmSJEmSJNUxk0OSJEmSJEl1zOSQJEmSJElSHTM5JEmSJEmSVMdMDkmSJEmSJNUxk0OSJEmSJEl1zOSQJEmSJElSHTM5JEmSJEmSVMdMDkmSJEmSJNUxk0OSJEmSJEl1zOSQJEmSJElSHTM5JEmSJEmSVMdMDkmSJEmSJNUxk0OSJEmSJEl1zOSQJEmSJElSHTM5JEmSJEmSVMdMDkmSJEmSJNUxk0OSJEmSJEl1zOSQJEmSJElSHTM5JEmSJEmSVMdMDkmSJEmSJNWxTiWHIuJzEfFARCyJiKsiYlBEjImIuyJiWURcExGbdlWwkiRJkiRJ6lodTg5FxEjgM0BDZo4F+gFHA98Czs3M9wDPASd3RaCSJEm9VUSMq3UMkiRJLenstLL+wOCI6A9sBjwJfACYVxyfCxzeyWtIkiT1dt+PiLsj4rSI2LrWwUiSJJXrcHJJ5bv3AAAakUlEQVQoM1cC3wH+QikptAa4F3g+M9cV1VYAIzsbpCRJUm+WmQcAxwLbA/dGxJUR8aEahyVJkgR0blrZO4CpwBhgO2Bz4JB2tJ8REQsiYsGqVas6GoYkSVKvkJmPAF8BzgbeB5wfEX+KiI/XNjJJklTvOjOt7IPAnzNzVWa+Afw3sD8wpJhmBjAKWNlc48yck5kNmdkwbNiwToQhSZLUs0XEnhFxLrCU0hT8j2Xm7sX2uTUNTpIk1b3OJIf+AkyKiM0iIoCDgAeB24AjijonANd3LkRJkqRe7z+A+4DxmXl6Zt4HkJlPUBpNJEmSVDOdWXPoLkoLT98HLC7ONYfSUOnPR8QyYFvgki6IU5IkqTebAlyZma8CRMQmEbEZQGZe3lrDiOgXEX+MiBuK/TERcVdELIuIayJi06J8YLG/rDg+uqo9kiRJfUannlaWmV/LzN0yc2xmfiozX8vMxzJzYma+JzOPzMzXuipYSZKkXurXwOCy/c2KskqcSWk62nrfAs7NzPcAzwEnF+UnA88V5ecW9SRJktrU2UfZS5IkqW2DMvOl9TvF9mZtNYqIUZRGHV1c7AeldYrmFVXmAocX21OLfYrjBxX1JUmSWmVySJIkqfpejoh91u9ExL7AqxW0+x7wReCtYn9b4PnMXFfsrwBGFtsjgeUAxfE1RX1JkqRW9W+7iiRJkjrps8BPIuIJIIB3AZ9orUFEHAo8k5n3RsTkrgokImYAMwB22GGHrjqtJEnqxUwOSZIkVVlm3hMRuwG7FkUPZeYbbTTbHzgsIj4KDAK2As4DhkRE/2J00ChgZVF/JbA9sCIi+gNbA6ubiWUOpYeI0NDQkJ3rmSRJ6gucViZJktQ9JgB7AvsAn4yI41urnJlfysxRmTkaOBq4NTOPBW4DjiiqnQBcX2zPL/Ypjt+amSZ/JElSmxw5JEmSVGURcTmwE7AQeLMoTuCyDpzubODqiPgG8EfgkqL8EuDyiFgGPEspoSRJktQmk0OSJEnV1wDs0dGRPJl5O3B7sf0YMLGZOmuBIzseoiRJqldOK5MkSaq+JZQWoZYkSepxHDkkSZJUfUOBByPibuC19YWZeVjtQpIkSSoxOSRJklR9s2odgCRJUktMDkmSJFVZZv4mIt4N7JyZv46IzYB+tY5LkiQJXHNIkiSp6iJiOjAP+GFRNBK4rnYRSZIk/Y3JIUmSpOo7HdgfeAEgMx8B3lnTiCRJkgomhyRJkqrvtcx8ff1ORPQHOvRYe0mSpK5mckiSJKn6fhMR/wIMjogPAT8B/qfGMUmSJAEmhyRJkrrDTGAVsBj4NHAT8JWaRiRJklTwaWWSJElVlplvARcVL0mSpB7F5JAkSVKVRcSfaWaNoczcsQbhSJIkbcTkkCRJUvU1lG0PAo4EtqlRLJIkSRtxzSFJkqQqy8zVZa+Vmfk9YEqt45IkSQJHDkmSJFVdROxTtrsJpZFE3odJkqQewZsSSZKk6vv3su11QCNwVG1CkSRJ2pjJIUmSpCrLzPfXOgZJkqSWmBySJEmqsoj4fGvHM/O73RWLJElSUyaHJEmSqq8BmADML/Y/BtwNPFKziCRJkgomhyRJkqpvFLBPZr4IEBGzgBsz87iaRiVJkoSPspckSeoOw4HXy/ZfL8okSZJqzpFDkiRJ1XcZcHdE/KzYPxyYW8N4JEmSNjA5JEmSVGWZ+c2I+DlwQFF0Ymb+sZYxSZIkree0MkmSpO6xGfBCZp4HrIiIMbUOSJIkCTqZHIqIIRExLyL+FBFLI+LvImKbiLg5Ih4pfr6jq4KVJEnqjSLia8DZwJeKogHAj2sXkSRJ0t90duTQecAvMnM3YDywFJgJ3JKZOwO3FPuSJEn17P8AhwEvA2TmE8CWNY1IkiSp0OHkUERsDRwIXAKQma9n5vPAVP62wOJcSgsuSpIk1bPXMzOBBIiIzWscjyRJ0gadGTk0BlgF/FdE/DEiLi5udIZn5pNFnafwMa2SJEnXRsQPgSERMR34NXBRjWOSJEkCOpcc6g/sA1yYmXtTGia90RSy8v8hayoiZkTEgohYsGrVqk6EIUmS1LNl5neAecBPgV2Br2bmf9Q2KkmSpJLOPMp+BbAiM+8q9udRSg49HREjMvPJiBgBPNNc48ycA8wBaGhoaDaBJEmS1NtFRD/g15n5fuDmWscjSZLUVIeTQ5n5VEQsj4hdM/Mh4CDgweJ1AjC7+Hl9l0Sqmhs988Y26zTOntINkUiS1Htk5psR8VZEbJ2ZayptFxGDgN8CAynds83LzK9FxBjgamBb4F7gU5n5ekQMBC4D9gVWA5/IzMYu7o4kSeqDOjNyCOCfgCsiYlPgMeBESlPVro2Ik4HHgaM6eQ1JkqTe7iVgcUTcTPHEMoDM/EwrbV4DPpCZL0XEAOD3EfFz4PPAuZl5dUT8ADgZuLD4+Vxmvicijga+BXyiSv2RJEl9SKeSQ5m5EGho5tBBnTmvJElSH/PfxatixdqNLxW7A4pXAh8AjinK5wKzKCWHphbbUJru/58REcV5JEmSWtTZkUOSJElqQUTskJl/ycy5HWzfj9LUsfcAFwCPAs9n5rqiygpgZLE9ElgOkJnrImINpalnf21yzhnADIAddtihI2FJkqQ+pjNPK5MkSVLrrlu/ERE/bW/jzHwzM/cCRgETgd06G1BmzsnMhsxsGDZsWGdPJ0mS+gCTQ5IkSdUTZds7dvQkmfk8cBvwd8CQiFg/+nsUsLLYXglsD1Ac35rSwtSSJEmtMjkkSZJUPdnCdpsiYlhEDCm2BwMfApZSShIdUVQrfzLs/GKf4vitrjckSZIq4ZpDkiRJ1TM+Il6gNIJocLFNsZ+ZuVUrbUcAc4t1hzYBrs3MGyLiQeDqiPgG8EfgkqL+JcDlEbEMeBY4ugr9kSRJfZDJIUmSpCrJzH6daHs/sHcz5Y9RWn+oafla4MiOXk+SJNUvp5VJkiRJkiTVMZNDkiRJkiRJdczkkCRJkiRJUh0zOSRJkiRJklTHTA5JkiRJkiTVMZNDkiRJkiRJdczkkCRJkiRJUh0zOSRJkiRJklTHTA5JkiRJkiTVMZNDkiRJkiRJdczkkCRJkiRJUh0zOSRJkiRJklTHTA5JkiRJkiTVMZNDkiRJkiRJdczkkCRJkiRJUh0zOSRJkiRJklTHTA5JkiRJkiTVMZNDkiRJkiRJdczkkCRJkiRJUh0zOSRJkiRJklTHTA5JkiRJkiTVMZNDkiRJkiRJdczkkCRJkiRJUh0zOSRJkiRJklTHTA5JkiRJkiTVsf6dPUFE9AMWACsz89CIGANcDWwL3At8KjNf7+x11DuMnnljRfUaZ0+pciSSJEmSJKkSXTFy6Exgadn+t4BzM/M9wHPAyV1wDUmSJEmSJFVBp5JDETEKmAJcXOwH8AFgXlFlLnB4Z64hSZIkSZKk6unsyKHvAV8E3ir2twWez8x1xf4KYGQnryFJklR3ImL7iLgtIh6MiAci4syifJuIuDkiHil+vqMoj4g4PyKWRcT9EbFPbXsgSZJ6iw4nhyLiUOCZzLy3g+1nRMSCiFiwatWqjoYhSZLUV60D/jkz9wAmAadHxB7ATOCWzNwZuKXYB/gIsHPxmgFc2P0hS5Kk3qgzI4f2Bw6LiEZKC1B/ADgPGBIR6xe6HgWsbK5xZs7JzIbMbBg2bFgnwpAkSep7MvPJzLyv2H6R0hqPI4GplKbuw8ZT+KcCl2XJnZTuyUZ0c9iSJKkX6vDTyjLzS8CXACJiMvCFzDw2In4CHEEpYXQCcH0XxClJklS3ImI0sDdwFzA8M58sDj0FDC+2RwLLy5qtn97/JD3VrK0rqLOm+nFIklTnuuJpZU2dDXw+IpZRWoPokipcQ5IkqS5ExBbAT4HPZuYL5ccyM4Fs5/mc2i9JkjbS4ZFD5TLzduD2YvsxYGJXnFeSJKmeRcQASomhKzLzv4vipyNiRGY+WUwbe6YoXwlsX9a82en9mTkHmAPQ0NDQrsSSJEnqm6oxckiSJEmdFBFBaQT20sz8btmh+ZSm7sPGU/jnA8cXTy2bBKwpm34mSZLUoi4ZOSRJkqQutz/wKWBxRCwsyv4FmA1cGxEnA48DRxXHbgI+CiwDXgFO7N5wJUlSb2VySJIkqQfKzN8D0cLhg5qpn8DpVQ1KkiT1SU4rkyRJkiRJqmMmhyRJkiRJkuqYySFJkiRJkqQ6ZnJIkiRJkiSpjpkckiRJkiRJqmMmhyRJkiRJkuqYySFJkiRJkqQ6ZnJIkiRJkiSpjpkckiRJkiRJqmMmhyRJkiRJkuqYySFJkiRJkqQ6ZnJIkiRJkiSpjvWvdQCSJElSp83auoI6a6ofhyRJvZAjhyRJkiRJkuqYySFJkiRJkqQ6ZnJIkiRJkiSpjpkckiRJkiRJqmMuSN0Oo2feWOsQJEmS1FEuWi1JUrNMDkmSJKnXG732yjbrNA46phsikSSp9zE5pJroqlFYjbOndMl5JEmSJEmqVyaHJEmSVBcqGl1U/TAkSepxXJBakiRJkiSpjpkckiRJkiRJqmMmhyRJkiRJkuqYySFJkiRJkqQ65oLUkiRJ0nqztq6gzpruP5ckSVVkckiSJEmqAp+OJknqLTqcHIqI7YHLgOFAAnMy87yI2Aa4BhhN6e+7ozLzuc6HKkmSJPUAlYwIAqDt5JAkST1BZ0YOrQP+OTPvi4gtgXsj4mZgGnBLZs6OiJnATODszocqSZIkVVdFo30GHdMNkUiS1H06nBzKzCeBJ4vtFyNiKTASmApMLqrNBW7H5JAkSVK7RMSPgEOBZzJzbFHW7AjtiAjgPOCjwCvAtMy8rxZx14NKEkiSJPUmXfK0sogYDewN3AUMLxJHAE9RmnYmSZKk9rkUOKRJ2UxKI7R3Bm4p9gE+AuxcvGYAF3ZTjJIkqQ/odHIoIrYAfgp8NjNfKD+WmUlpPaLm2s2IiAURsWDVqlWdDUOSJKlPyczfAs82KZ5KaWQ2xc/Dy8ovy5I7gSERMaJ7IpUkSb1dp5JDETGAUmLoisz876L46fU3I8XPZ5prm5lzMrMhMxuGDRvWmTAkSZLqRUsjtEcCy8vqrSjKJEmS2tTh5FAxt/0SYGlmfrfs0HzghGL7BOD6jocnSZKk5rQ2Qrs1jt6WJElNdWbk0P7Ap4APRMTC4vVRYDbwoYh4BPhgsS9JkqTOa2mE9kpg+7J6o4qyt3H0tiRJaqozTyv7PRAtHD6oo+eVJElSi9aP0J7NxiO05wNnRMTVwH7AmrLpZ5IkSa3qcHJIkiRJ1RMRVwGTgaERsQL4GqWk0LURcTLwOHBUUf0mSo+xX0bpUfYndnvAkiSp1zI5JEmS1ANl5idbOPS2EdrF+kOnVzciSZLUV3X6UfaSJEmSJEnqvRw5pF5t9Mwb26zTOHtKN0QiSZIkSVLvZHJIkiRJPdesrSuseGVVw5AkqS8zOSS1gyOVJEmSJEl9jWsOSZIkSZIk1TGTQ5IkSZIkSXXMaWWSJElSrVSyptKsNdWPQ5JU10wOSZIkSTUyem3bC2k3Vj8MSVKdc1qZJEmSJElSHXPkkPq8Sp4wBj5lTJIkSZJUnxw5JEmSJEmSVMdMDkmSJEmSJNUxk0OSJEmSJEl1zDWHpEKlaxNJkiRJktSXmBySJElSj1XJo94lSVLnmBySaqCSUUo+PU2SJAEwa+sK6qypfhySpD7LNYckSZIkSZLqmCOHcK0ZSZIk9XKOLpIkdYLJIUmSJKkHq2TdpcZBx3RDJJKkvsrkkNTFHIkmSZK6W0UJpOqHIUnqpVxzSJIkSZIkqY45ckiS6oBPyJMkSZLUEkcOSZIkSZIk1TFHDkk9VKVrF1Uy2qO7R404SkWSpF7MJ59JUt0xOSRJkiSp65lkkqRew+SQJKkuOKJNUt2rJFmDTz6TpHpkckiSJElS+1SQaDLJJEm9h8khSUDPHFXRE2OSKl0PrBJ+fyV1p0qSNZKk+mRySJIkSVK7mGiSpL6lasmhiDgEOA/oB1ycmbOrdS2pnnXlKIbuvFZ3xl3p9XrrKI7e+l5253kq1RPfy0r0xKcWqjZ62/2XCQZV/HTWQce0XcnFrSWpw6qSHIqIfsAFwIeAFcA9ETE/Mx+sxvUkSZLqnfdf6stcv6iX8ol1Uq9RrZFDE4FlmfkYQERcDUwFvDmR1KV686iRvsz3snv5fqvg/ZfqWk8bjQmVjXiqKPHVlTF14yjRnpjU68qRtD1tVG5P/A6o96hWcmgksLxsfwWwX5WuJUmSpB52/+WUMfVWXfofT130e9Cl/wnQVU+a66IEQ0/8D46+/p+PPTGmSnTZNPounKbanUnpaqvZgtQRMQOYUey+FBEPVelSQ4G/VuncPYn97DvqoY9QH/3sVX2Mb3W4adX62YmYqqFXfZ4dEd+qah/fXaXzqp28B+ty9dDPeugj1Ek/o6J+Htr2eXrW39FN1cVnSX30c0Mfu+o7F5VU+npFtbpMT7gHq1ZyaCWwfdn+qKJsg8ycA8yp0vU3iIgFmdlQ7evUmv3sO+qhj1Af/ayHPoL97EvqoY99XJv3X+A9WFerh37WQx/BfvYl9dBHqI9+1kMfoWf0c5MqnfceYOeIGBMRmwJHA/OrdC1JkiR5/yVJkjqoKiOHMnNdRJwB/JLSo1R/lJkPVONakiRJ8v5LkiR1XNXWHMrMm4CbqnX+dqj6sOkewn72HfXQR6iPftZDH8F+9iX10Mc+rQfdf0H9fJ/qoZ/10Eewn31JPfQR6qOf9dBH6AH9jMysdQySJEmSJEmqkWqtOSRJkiRJkqReoE8nhyLikIh4KCKWRcTMWsfTURGxfUTcFhEPRsQDEXFmUT4rIlZGxMLi9dGyNl8q+v1QRHy4dtG3T0Q0RsTioj8LirJtIuLmiHik+PmOojwi4vyin/dHxD61jb4yEbFr2We2MCJeiIjP9vbPMyJ+FBHPRMSSsrJ2f3YRcUJR/5GIOKEWfWlNC/38dkT8qejLzyJiSFE+OiJeLftMf1DWZt/iu76seC+693mZbWihn+3+jvbkP4db6OM1Zf1rjIiFRXlv/ixb+jukz/1+qmfoyb/37dXK70+v/ju7OdHH78Gij95/gfdg0YfuwVroY5+6/4L6uAdr5e+Pnvu7mZl98kVpIcZHgR2BTYFFwB61jquDfRkB7FNsbwk8DOwBzAK+0Ez9PYr+DgTGFO9Dv1r3o8K+NgJDm5T9GzCz2J4JfKvY/ijwcyCAScBdtY6/A/3tBzwFvLu3f57AgcA+wJKOfnbANsBjxc93FNvvqHXfKujnwUD/YvtbZf0cXV6vyXnuLvoexXvxkVr3rYJ+tus72tP/HG6uj02O/zvw1T7wWbb0d0if+/30VftXT/+970B/vAfrg/dg9KH7ryJW78Gyb9yDtdDHdn1He8Ofw831s8nxXn8P1srfHz32d7MvjxyaCCzLzMcy83XgamBqjWPqkMx8MjPvK7ZfBJYCI1tpMhW4OjNfy8w/A8sovR+91VRgbrE9Fzi8rPyyLLkTGBIRI2oRYCccBDyamY+3UqdXfJ6Z+Vvg2SbF7f3sPgzcnJnPZuZzwM3AIdWPvnLN9TMzf5WZ64rdO4FRrZ2j6OtWmXlnlv7Uv+z/b+9uXvso4jiOv7/4dKgPqEiRqphKPVvpoYfWkxQrWlBBKkJ9AhH0IB685H/wpCiIUpAqIirmpujBo0hrtYoPrV60xBR6sAcvPoyHnU02Ib+0v9Q2OzPvFyzZTH9Jf9/MzO6nk90tSz+bUZjQn5NMGqOjPg6vVWP+zdPDwDtrfY9C+nLSOaS6+alRGPW8n5YZrNoMVk3+AjNYTRmshfwFbWSwEvNXzYtDW4BfB5//xton8yJExK3AduCL3PRcvuzszf6SNMquPQGfRMThiHg6t21OKc3n/d+BzXm/5Dp7+1l+4KutP6ftu5Jr7T1Jt+rfm4mIryLi84jYndu20NXWK6nOacZoyf25G1hIKR0ftBXflyvOIS3OT1141Y4TM1hVGaz2/AVtHuNrzmCt5C+oMIOVkr9qXhyqTkRcCbwPPJ9SOgO8CtwG3AHM011+V7pdKaU7gb3AsxFx1/AP86pwFf/FXkRcDuwD3stNNfbnopr6bpKImAX+Bg7lpnnglpTSduAF4O2IuHqj3t//oOoxusIjLP+HQ/F9uco5ZFEL81M6H2aweo4TreUvqKfv1lJ5Bqt+jK5QVQYrKX/VvDh0Erh58PlNua1IEXEZ3aA6lFL6ACCltJBS+iel9C/wOkuXuhZbe0rpZP54CviQrqaF/lLl/PFUfnmxdWZ7gSMppQWosz+Zvu+KrTUiHgfuAx7NB3ryZb6n8/5huvu/b6eraXjZcxF1rmOMFtmfEXEp8CDwbt9Wel+udg6hofmpi6q6cWIGqy6DtZC/oKFjfO0ZrJX8BfVlsNLyV82LQ18C2yJiJv+GYD8wt8HvaV3yfZdvAN+nlF4atA/v7X4A6J/2Pgfsj4grImIG2Eb3sK5Ri4hNEXFVv0/3gLlv6erpn8r+GPBR3p8DDuQnu+8E/hhcoleCZavitfVnNm3ffQzsiYhr8yWze3LbqEXEPcCLwL6U0p+D9hsi4pK8v5Wu737JtZ6JiJ15fh9g6WczWusYo6Ueh+8GfkgpLV6qXHJfTjqH0Mj81EVX6rxflRmsygzWQv6CRo7xLWSwhvIXVJTBisxfaQRP8r5QG90Tv3+iW12c3ej3cx517KK73Owb4Gje7gXeAo7l9jngxsHXzOa6f2RET20/S51b6Z6m/zXwXd9nwPXAZ8Bx4FPgutwewCu5zmPAjo2uYYpaNwGngWsGbUX3J13Qmgf+orsX9qn19B3d/eIn8vbERtd1jnWeoLsXuJ+fr+XXPpTH8lHgCHD/4PvsoDu5/wy8DMRG13YOdU49Rsd8HF6txtx+EHhmxWtL7stJ55Dq5qfbOLYxz/t11GIGqyiDUWH+yu/TDFZJBptQY1X5a1Kduf0glWQwCsxfkf8ySZIkSZIkNajm28okSZIkSZJ0Fi4OSZIkSZIkNczFIUmSJEmSpIa5OCRJkiRJktQwF4ckSZIkSZIa5uKQJEmSJElSw1wckiRJkiRJapiLQ5IkSZIkSQ37D8cKgMxC5dvHAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 1440x360 with 2 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# define functional effect of a mutation \n",
    "gdsc_truncating = ['frameshift','nonsense','stop_lost','ess_splice']\n",
    "gdsc_point = ['missense','Missense','inframe']\n",
    "gdsc.loc[gdsc[\"Classification\"].isin(gdsc_truncating),\"function\"] = \"truncating_mutation\"\n",
    "gdsc.loc[gdsc[\"Classification\"].isin(gdsc_point),\"function\"] = \"point_mutation\"\n",
    "gdsc = gdsc.loc[gdsc[\"Classification\"].isin(gdsc_truncating+gdsc_point),:]\n",
    "\n",
    "plt.figure(figsize=(20,5))\n",
    "plt.subplot(1,2,1)\n",
    "n_mutations_gdsc = gdsc.groupby(\"COSMIC_ID\").size().sort_values(ascending = False)\n",
    "tmp = plt.hist(n_mutations_gdsc, bins=50,range = (0,2000))\n",
    "tmp = plt.title(\"number of mutations per sample in GDSC\")\n",
    "ax = plt.subplot(1,2,2)\n",
    "n_LoF = gdsc.loc[gdsc[\"Classification\"].isin(gdsc_truncating ),:].groupby(\"COSMIC_ID\").size().sort_values(ascending = False)\n",
    "n_GoF = gdsc.loc[gdsc[\"Classification\"].isin(gdsc_point),:].groupby(\"COSMIC_ID\").size().sort_values(ascending = False)\n",
    "df = pd.DataFrame.from_dict({\"truncating_mutations\":n_LoF,\"point_mutations\":n_GoF})\n",
    "#tmp = plt.hist(n_LoF, bins=50)\n",
    "#tmp = plt.title(\"LoF mutations per sample in GDSC\")\n",
    "df.plot.hist(stacked = True, bins = 50,ax =ax,range = (0,2000))\n",
    "print(\"mean\",round(np.mean(n_mutations_gdsc),1), \"median\",np.median(n_mutations_gdsc))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "def binarize(x):\n",
    "    if x==0:\n",
    "        return x\n",
    "    else:\n",
    "        return 1\n",
    "\n",
    "def get_aa_pos_GDSC(x):\n",
    "    l = re.findall(r'\\d+', x)\n",
    "    if len(l) == 0:\n",
    "        #print(x, \"positions not found.\",file = sys.stderr)\n",
    "        return (\"NA\",\"NA\")\n",
    "    elif len(l) == 1:\n",
    "        return (int(l[0]), int(l[0]))\n",
    "    else:\n",
    "        return (int(l[0]), int(l[1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>COSMIC_ID</th>\n",
       "      <th>Gene</th>\n",
       "      <th>AA</th>\n",
       "      <th>Classification</th>\n",
       "      <th>function</th>\n",
       "      <th>aa_pos</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>907272</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>p.Y281C</td>\n",
       "      <td>missense</td>\n",
       "      <td>point_mutation</td>\n",
       "      <td>(281, 281)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>998184</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>p.A197T</td>\n",
       "      <td>missense</td>\n",
       "      <td>point_mutation</td>\n",
       "      <td>(197, 197)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>907272</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>p.A191T</td>\n",
       "      <td>missense</td>\n",
       "      <td>point_mutation</td>\n",
       "      <td>(191, 191)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>907289</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>p.D486N</td>\n",
       "      <td>missense</td>\n",
       "      <td>point_mutation</td>\n",
       "      <td>(486, 486)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>905989</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>p.T257N</td>\n",
       "      <td>missense</td>\n",
       "      <td>point_mutation</td>\n",
       "      <td>(257, 257)</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   COSMIC_ID  Gene       AA Classification        function      aa_pos\n",
       "0     907272  A1BG  p.Y281C       missense  point_mutation  (281, 281)\n",
       "1     998184  A1BG  p.A197T       missense  point_mutation  (197, 197)\n",
       "2     907272  A1BG  p.A191T       missense  point_mutation  (191, 191)\n",
       "3     907289  A1BG  p.D486N       missense  point_mutation  (486, 486)\n",
       "4     905989  A1BG  p.T257N       missense  point_mutation  (257, 257)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "gdsc[\"aa_pos\"] = gdsc[\"AA\"].apply(lambda x : get_aa_pos_GDSC(x))\n",
    "#gdsc[\"start\"] = gdsc[\"aa_pos\"].apply(lambda x : x.split(\",\")[0])\n",
    "#gdsc[\"end\"] = gdsc[\"aa_pos\"].apply(lambda x : x.split(\",\")[1])\n",
    "gdsc.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ENSEMBL gene IDs found: 101\n",
      "Gene symbols: 18989\n"
     ]
    }
   ],
   "source": [
    "ensg_genes = []\n",
    "symbols = []\n",
    "for gene in set(gdsc[\"Gene\"].values):\n",
    "    if gene.startswith(\"ENSG\"):\n",
    "        ensg_genes.append(gene)\n",
    "        \n",
    "    else:\n",
    "        symbols.append(gene)\n",
    "print(\"ENSEMBL gene IDs found:\",len(ensg_genes))\n",
    "ensg_genes = gdsc.loc[gdsc[\"Gene\"].isin(ensg_genes), :]\n",
    "gdsc_hugo =  gdsc.loc[gdsc[\"Gene\"].isin(symbols), :]\n",
    "print(\"Gene symbols:\",len(set(gdsc_hugo[\"Gene\"].values)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mapped: 18509 \n",
      "\tdirectly via main_mapper 17003 \n",
      "\tvia alternative mapper 452 \n",
      "\tvia one of multiple synonyms in alternative mapper 1029 \n",
      "\tLOC 25 \n",
      "Unmapped: 480 \n",
      "\trecognized symbols without Entrez ID 0 \n",
      "\tmultiple query_ids map to the same target_id 0 \n",
      "\tquery_ids map to multiple target_ids in the main mapper 0 \n",
      "\tquery_ids map to multiple target_ids in the alternative mapper 51 \n",
      "\tLOC not found in Entrez 11 \n",
      "\tNot found at all: 418\n",
      "Warning: query IDs mapping to duplicated target IDs in mapping table: 174\n",
      "Warning: query IDs not mapped to any target IDs excluded: 480\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/olya/miniconda2/lib/python2.7/site-packages/pandas/core/frame.py:3781: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  return super(DataFrame, self).rename(**kwargs)\n",
      "IDs mapped to multiple target IDs are kept:\n",
      " [143872, 23272, 341019, 27177, 387119, 84530, 26165, 11321, 122945, 150082, 2122, 442444, 79949, 157777, 340578, 100289124, 8805, 127602, 387707, 728194, 345222, 84108, 91283, 84631, 388761, 4253, 163486, 54438, 92017, 283310, 84148, 57018, 284352, 5315, 283849, 339669, 119016, 340204, 64753, 64760, 25849, 286464, 129285, 4360, 10507, 4883, 23334, 114984, 256815, 23345, 57662, 165186, 93081, 54617, 84316, 200030, 10592, 57705, 246126, 126321, 10613, 374, 79741, 51586, 144535, 728642, 168850, 647060, 284565, 63895, 201625, 29086, 1954, 9127, 29099, 941, 155066, 140733, 85452, 139728, 84458, 55719, 23028, 245913, 54777, 79868]\n",
      "mapper.py:204: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  df.sort_index(inplace=True)\n"
     ]
    }
   ],
   "source": [
    "# mapping to Entrez\n",
    "df = gdsc_hugo[[\"Gene\"]].copy()\n",
    "df.set_index(\"Gene\",drop= False ,inplace=True)\n",
    "df.drop_duplicates(inplace = True)\n",
    "df, query2target,not_mapped  = apply_mappers(df, ncbi_symbols, ncbi_synonyms, verbose = True,handle_duplicates = \"keep\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>COSMIC_ID</th>\n",
       "      <th>Gene</th>\n",
       "      <th>AA</th>\n",
       "      <th>Classification</th>\n",
       "      <th>function</th>\n",
       "      <th>aa_pos</th>\n",
       "      <th>Entrez_gene_ID</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>907272</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>p.Y281C</td>\n",
       "      <td>missense</td>\n",
       "      <td>point_mutation</td>\n",
       "      <td>(281, 281)</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>998184</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>p.A197T</td>\n",
       "      <td>missense</td>\n",
       "      <td>point_mutation</td>\n",
       "      <td>(197, 197)</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>907272</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>p.A191T</td>\n",
       "      <td>missense</td>\n",
       "      <td>point_mutation</td>\n",
       "      <td>(191, 191)</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>907289</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>p.D486N</td>\n",
       "      <td>missense</td>\n",
       "      <td>point_mutation</td>\n",
       "      <td>(486, 486)</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>905989</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>p.T257N</td>\n",
       "      <td>missense</td>\n",
       "      <td>point_mutation</td>\n",
       "      <td>(257, 257)</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>909698</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>p.R90H</td>\n",
       "      <td>missense</td>\n",
       "      <td>point_mutation</td>\n",
       "      <td>(90, 90)</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>905971</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>p.T46M</td>\n",
       "      <td>missense</td>\n",
       "      <td>point_mutation</td>\n",
       "      <td>(46, 46)</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>909761</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>p.P41fs*33</td>\n",
       "      <td>frameshift</td>\n",
       "      <td>truncating_mutation</td>\n",
       "      <td>(41, 33)</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1327769</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>p.E249D</td>\n",
       "      <td>missense</td>\n",
       "      <td>point_mutation</td>\n",
       "      <td>(249, 249)</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1330931</td>\n",
       "      <td>A1BG</td>\n",
       "      <td>p.Y184*</td>\n",
       "      <td>nonsense</td>\n",
       "      <td>truncating_mutation</td>\n",
       "      <td>(184, 184)</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   COSMIC_ID  Gene          AA Classification             function  \\\n",
       "0     907272  A1BG     p.Y281C       missense       point_mutation   \n",
       "1     998184  A1BG     p.A197T       missense       point_mutation   \n",
       "2     907272  A1BG     p.A191T       missense       point_mutation   \n",
       "3     907289  A1BG     p.D486N       missense       point_mutation   \n",
       "4     905989  A1BG     p.T257N       missense       point_mutation   \n",
       "5     909698  A1BG      p.R90H       missense       point_mutation   \n",
       "6     905971  A1BG      p.T46M       missense       point_mutation   \n",
       "7     909761  A1BG  p.P41fs*33     frameshift  truncating_mutation   \n",
       "8    1327769  A1BG     p.E249D       missense       point_mutation   \n",
       "9    1330931  A1BG     p.Y184*       nonsense  truncating_mutation   \n",
       "\n",
       "       aa_pos  Entrez_gene_ID  \n",
       "0  (281, 281)               1  \n",
       "1  (197, 197)               1  \n",
       "2  (191, 191)               1  \n",
       "3  (486, 486)               1  \n",
       "4  (257, 257)               1  \n",
       "5    (90, 90)               1  \n",
       "6    (46, 46)               1  \n",
       "7    (41, 33)               1  \n",
       "8  (249, 249)               1  \n",
       "9  (184, 184)               1  "
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gdsc_hugo = gdsc_hugo.loc[~gdsc_hugo[\"Gene\"].isin(not_mapped),:].copy()\n",
    "gdsc_hugo[\"Entrez_gene_ID\"] = gdsc_hugo[\"Gene\"].apply(lambda x : query2target[x])\n",
    "gdsc_hugo.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>683665</th>\n",
       "      <th>683667</th>\n",
       "      <th>684052</th>\n",
       "      <th>684055</th>\n",
       "      <th>684057</th>\n",
       "      <th>684059</th>\n",
       "      <th>684062</th>\n",
       "      <th>684072</th>\n",
       "      <th>684681</th>\n",
       "      <th>687448</th>\n",
       "      <th>...</th>\n",
       "      <th>1659818</th>\n",
       "      <th>1659819</th>\n",
       "      <th>1659823</th>\n",
       "      <th>1659928</th>\n",
       "      <th>1659929</th>\n",
       "      <th>1660034</th>\n",
       "      <th>1660035</th>\n",
       "      <th>1660036</th>\n",
       "      <th>1674021</th>\n",
       "      <th>1789883</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>7157</th>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.00</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5290</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.05</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3845</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4893</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.10</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>4 rows × 1001 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      683665   683667   684052   684055   684057   684059   684062   684072   \\\n",
       "7157      0.0     -1.0     -1.0     -1.0     -1.0     -1.0     -1.0     -1.0   \n",
       "5290      0.0      0.0      0.0      0.0      0.0      0.0      0.0      0.0   \n",
       "3845      0.0      0.0      0.0      0.0      0.0      0.0      0.0      0.0   \n",
       "4893      0.0      0.0      0.0      0.0      0.0      0.0      0.0      0.0   \n",
       "\n",
       "      684681   687448    ...     1659818  1659819  1659823  1659928  1659929  \\\n",
       "7157      0.0      0.0   ...         0.0     -1.0     -1.0      0.0      0.0   \n",
       "5290      0.0      0.0   ...         0.0      0.0     -1.0      0.0      0.0   \n",
       "3845      0.0      0.0   ...         1.0      0.0      0.0      1.0      0.0   \n",
       "4893      0.0      0.0   ...         0.0      0.0      0.0      0.0      0.0   \n",
       "\n",
       "      1660034  1660035  1660036  1674021  1789883  \n",
       "7157     -1.0     -1.0    -1.00     -1.0     -1.0  \n",
       "5290      1.0      0.0     0.05      1.0      0.0  \n",
       "3845      1.0      1.0     1.00      0.0      0.0  \n",
       "4893      0.0      0.0    -0.10      0.0      0.0  \n",
       "\n",
       "[4 rows x 1001 columns]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gdsc_scores = {}\n",
    "for sample in list(set(gdsc_hugo[\"COSMIC_ID\"].values)):\n",
    "    df = gdsc_hugo.loc[gdsc_hugo[\"COSMIC_ID\"] == sample,:]\n",
    "    gdsc_scores[sample] = mutations2score(df, TSG_entrez,OG_entrez, hotspots, w=w)\n",
    "gdsc_scores = pd.DataFrame.from_dict(gdsc_scores)\n",
    "gdsc_scores.fillna(0,inplace=True)\n",
    "#import seaborn as sns\n",
    "#sns.heatmap(gdsc_scores.loc[[7157,5290,3845,2312,4893],:])\n",
    "gdsc_scores.to_csv(preprocessed_dir+\"/GDSC.non_bin_mutations.tsv\",sep = \"\\t\")\n",
    "gdsc_scores.loc[[7157,5290,3845,4893],:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>683665</th>\n",
       "      <th>683667</th>\n",
       "      <th>684052</th>\n",
       "      <th>684055</th>\n",
       "      <th>684057</th>\n",
       "      <th>684059</th>\n",
       "      <th>684062</th>\n",
       "      <th>684072</th>\n",
       "      <th>684681</th>\n",
       "      <th>687448</th>\n",
       "      <th>...</th>\n",
       "      <th>1659818</th>\n",
       "      <th>1659819</th>\n",
       "      <th>1659823</th>\n",
       "      <th>1659928</th>\n",
       "      <th>1659929</th>\n",
       "      <th>1660034</th>\n",
       "      <th>1660035</th>\n",
       "      <th>1660036</th>\n",
       "      <th>1674021</th>\n",
       "      <th>1789883</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>7157</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5290</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3845</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2312</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4893</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 1001 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      683665   683667   684052   684055   684057   684059   684062   684072   \\\n",
       "7157      0.0      1.0      1.0      1.0      1.0      1.0      1.0      1.0   \n",
       "5290      0.0      0.0      0.0      0.0      0.0      0.0      0.0      0.0   \n",
       "3845      0.0      0.0      0.0      0.0      0.0      0.0      0.0      0.0   \n",
       "2312      0.0      0.0      0.0      0.0      0.0      0.0      0.0      0.0   \n",
       "4893      0.0      0.0      0.0      0.0      0.0      0.0      0.0      0.0   \n",
       "\n",
       "      684681   687448    ...     1659818  1659819  1659823  1659928  1659929  \\\n",
       "7157      0.0      0.0   ...         0.0      1.0      1.0      0.0      0.0   \n",
       "5290      0.0      0.0   ...         0.0      0.0      1.0      0.0      0.0   \n",
       "3845      0.0      0.0   ...         1.0      0.0      0.0      1.0      0.0   \n",
       "2312      0.0      0.0   ...         1.0      0.0      1.0      1.0      0.0   \n",
       "4893      0.0      0.0   ...         0.0      0.0      0.0      0.0      0.0   \n",
       "\n",
       "      1660034  1660035  1660036  1674021  1789883  \n",
       "7157      1.0      1.0      1.0      1.0      1.0  \n",
       "5290      1.0      0.0      1.0      1.0      0.0  \n",
       "3845      1.0      1.0      1.0      0.0      0.0  \n",
       "2312      1.0      1.0      1.0      0.0      0.0  \n",
       "4893      0.0      0.0      1.0      0.0      0.0  \n",
       "\n",
       "[5 rows x 1001 columns]"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "gdsc_scores = gdsc_scores.applymap(lambda x: binarize(x))\n",
    "gdsc_scores.to_csv(preprocessed_dir+\"/GDSC.binary_mutations.tsv\",sep = \"\\t\")\n",
    "gdsc_scores.loc[[7157,5290,3845,2312,4893],:]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# PDX\n",
    "\n",
    "     * gene amplifications,deletions and mutations are mixed in one file\n",
    "    * gene ID - Symobols\n",
    "    * some gene IDs converted to datetime format\n",
    "    * mutation types: MutKnownFunctional, MutLikelyFunctional, MutNovel. Although we failed to find an explanation of the categories. \n",
    "    * 146 duplicated muataion records found (removed)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(262091, 5)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Sample</th>\n",
       "      <th>Gene</th>\n",
       "      <th>Entrez</th>\n",
       "      <th>Category</th>\n",
       "      <th>Details</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>A2LD1</td>\n",
       "      <td>87769</td>\n",
       "      <td>Amp5</td>\n",
       "      <td>5.74</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>ABCA4</td>\n",
       "      <td>24</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>K1831N,0.550</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>ABCC13</td>\n",
       "      <td>150000</td>\n",
       "      <td>Amp8</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>ABHD13</td>\n",
       "      <td>84945</td>\n",
       "      <td>Amp5</td>\n",
       "      <td>6.06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>ABTB1</td>\n",
       "      <td>80325</td>\n",
       "      <td>Amp5</td>\n",
       "      <td>6.82</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Sample    Gene  Entrez  Category       Details\n",
       "0  X-1004   A2LD1   87769      Amp5          5.74\n",
       "1  X-1004   ABCA4      24  MutNovel  K1831N,0.550\n",
       "2  X-1004  ABCC13  150000      Amp8             8\n",
       "3  X-1004  ABHD13   84945      Amp5          6.06\n",
       "4  X-1004   ABTB1   80325      Amp5          6.82"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pdx = pd.read_excel(root_dir + \"../PDX/nm.3954-S2.xlsx\",\"pdxe_mut_and_cn2\")\n",
    "print(pdx.shape)\n",
    "pdx.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Category\n",
       "Amp5                    51450\n",
       "Amp8                     9629\n",
       "Del0.8                  77022\n",
       "MutKnownFunctional      11002\n",
       "MutLikelyFunctional       172\n",
       "MutNovel               112816\n",
       "dtype: int64"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pdx.groupby(\"Category\").size()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(123844, 5)\n",
      "67 gene IDs were converted to datetimes Excel.\n"
     ]
    }
   ],
   "source": [
    "# remove CNA, drop duplicated records\n",
    "pdx = pdx.loc[~pdx[\"Category\"].isin([\"Amp5\",\"Amp8\",\"Del0.8\"]),:]\n",
    "pdx = pdx.drop_duplicates()\n",
    "print(pdx.shape)\n",
    "df_size = pdx.shape[0]\n",
    "# datetime genes\n",
    "datetimes = pdx[\"Gene\"].apply(lambda x : type(x) == unicode or type(x) == str)\n",
    "datetimes = datetimes[~datetimes].index\n",
    "pdx_ = pdx.loc[datetimes,:]\n",
    "pdx_ = pdx_.loc[pdx_[\"Entrez\"]!=\"None\",:]\n",
    "pdx_[\"Entrez_gene_ID\"] = pdx_[\"Entrez\"].apply(int)\n",
    "pdx_[\"Gene\"] = pdx_[\"Gene\"].apply(str)\n",
    "pdx = pdx.loc[~pdx.index.isin(datetimes),:]\n",
    "print(df_size - pdx.shape[0],\"gene IDs were converted to datetimes Excel.\")\n",
    "#pdx.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mutated genes in PDX cohort 15214\n",
      "Mapped: 15062 \n",
      "\tdirectly via main_mapper 14107 \n",
      "\tvia alternative mapper 249 \n",
      "\tvia one of multiple synonyms in alternative mapper 624 \n",
      "\tLOC 82 \n",
      "Unmapped: 152 \n",
      "\trecognized symbols without Entrez ID 0 \n",
      "\tmultiple query_ids map to the same target_id 0 \n",
      "\tquery_ids map to multiple target_ids in the main mapper 0 \n",
      "\tquery_ids map to multiple target_ids in the alternative mapper 31 \n",
      "\tLOC not found in Entrez 115 \n",
      "\tNot found at all: 6\n",
      "Warning: query IDs mapping to duplicated target IDs in mapping table: 34\n",
      "Warning: query IDs not mapped to any target IDs excluded: 152\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "IDs mapped to multiple target IDs are kept:\n",
      " [143872, 84458, 51236, 119016, 6218, 653067, 286464, 79741, 644019, 284565, 374, 144535, 341019, 84316, 4253, 200030]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Sample</th>\n",
       "      <th>Gene</th>\n",
       "      <th>Entrez</th>\n",
       "      <th>Category</th>\n",
       "      <th>Details</th>\n",
       "      <th>Entrez_gene_ID</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>ABCA4</td>\n",
       "      <td>24</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>K1831N,0.550</td>\n",
       "      <td>24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>ADAM17</td>\n",
       "      <td>6868</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>P799T,1.000</td>\n",
       "      <td>6868</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>ADCK1</td>\n",
       "      <td>57143</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>D52G,1.000</td>\n",
       "      <td>57143</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>ADNP</td>\n",
       "      <td>23394</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>R784T,0.223</td>\n",
       "      <td>23394</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>AK1</td>\n",
       "      <td>203</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>K27N,0.430</td>\n",
       "      <td>203</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>AMOTL2</td>\n",
       "      <td>51421</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>A578P,0.625</td>\n",
       "      <td>51421</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>ANKRD36</td>\n",
       "      <td>375248</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>T998S,0.278</td>\n",
       "      <td>375248</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>ANKRD62</td>\n",
       "      <td>None</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>N547T,1.000</td>\n",
       "      <td>342850</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>ANXA7</td>\n",
       "      <td>310</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>I283V,0.565</td>\n",
       "      <td>310</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>APLF</td>\n",
       "      <td>200558</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>I237V,0.556</td>\n",
       "      <td>200558</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>ARMC2</td>\n",
       "      <td>84071</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>E283G,1.000</td>\n",
       "      <td>84071</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>ASTN1</td>\n",
       "      <td>460</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>S1091C,0.443</td>\n",
       "      <td>460</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>ATRX</td>\n",
       "      <td>546</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>P711A,1.000</td>\n",
       "      <td>546</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>AVIL</td>\n",
       "      <td>10677</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>A455T,1.000</td>\n",
       "      <td>10677</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>BAG6</td>\n",
       "      <td>7917</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>L733I,0.547</td>\n",
       "      <td>7917</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>69</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>BIRC6</td>\n",
       "      <td>57448</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>A1405V,0.688</td>\n",
       "      <td>57448</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>88</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>C16orf79</td>\n",
       "      <td>283870</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>I82M,1.000</td>\n",
       "      <td>283870</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>92</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>C1orf182</td>\n",
       "      <td>128229</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>A80T,1.000</td>\n",
       "      <td>128229</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>106</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>CARM1</td>\n",
       "      <td>10498</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>K81N,0.280</td>\n",
       "      <td>10498</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>110</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>CBWD1</td>\n",
       "      <td>55871</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>N363I,0.893</td>\n",
       "      <td>55871</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>115</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>CCDC66</td>\n",
       "      <td>285331</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>R538P,1.000</td>\n",
       "      <td>285331</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>116</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>CCDC88A</td>\n",
       "      <td>55704</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>E217A,0.263</td>\n",
       "      <td>55704</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>119</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>CCT7</td>\n",
       "      <td>10574</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>A162T,0.384</td>\n",
       "      <td>10574</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>124</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>CDC42EP1</td>\n",
       "      <td>11135</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>PAPAANPS253P,0.833</td>\n",
       "      <td>11135</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>126</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>CDON</td>\n",
       "      <td>50937</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>S483R,1.000</td>\n",
       "      <td>50937</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>133</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>CENPV</td>\n",
       "      <td>201161</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>P24S,0.700</td>\n",
       "      <td>201161</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>CEP350</td>\n",
       "      <td>9857</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>R214M,0.286</td>\n",
       "      <td>9857</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>143</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>CHORDC1</td>\n",
       "      <td>26973</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>P31L,0.672</td>\n",
       "      <td>26973</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>145</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>CHRNB4</td>\n",
       "      <td>1143</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>G226D,1.000</td>\n",
       "      <td>1143</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>CLUAP1</td>\n",
       "      <td>23059</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>P348S,1.000</td>\n",
       "      <td>23059</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>125656</th>\n",
       "      <td>X-1726</td>\n",
       "      <td>2014-03-02 00:00:00</td>\n",
       "      <td>54996</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>F151S,0.442</td>\n",
       "      <td>54996</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>141280</th>\n",
       "      <td>X-3029</td>\n",
       "      <td>2014-03-06 00:00:00</td>\n",
       "      <td>10299</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>D142N,0.288</td>\n",
       "      <td>10299</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>141455</th>\n",
       "      <td>X-3029</td>\n",
       "      <td>2014-09-01 00:00:00</td>\n",
       "      <td>1731</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>R251H,0.364</td>\n",
       "      <td>1731</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>146922</th>\n",
       "      <td>X-4819</td>\n",
       "      <td>2014-03-04 00:00:00</td>\n",
       "      <td>57574</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>P368T,0.583</td>\n",
       "      <td>57574</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>151460</th>\n",
       "      <td>X-2602</td>\n",
       "      <td>2014-09-09 00:00:00</td>\n",
       "      <td>10801</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>A245V,0.528</td>\n",
       "      <td>10801</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>176127</th>\n",
       "      <td>X-5643</td>\n",
       "      <td>2014-03-02 00:00:00</td>\n",
       "      <td>51257</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>P34L,0.397</td>\n",
       "      <td>51257</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>176128</th>\n",
       "      <td>X-5643</td>\n",
       "      <td>2014-03-06 00:00:00</td>\n",
       "      <td>10299</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>P902S,0.297</td>\n",
       "      <td>10299</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>180164</th>\n",
       "      <td>X-5696</td>\n",
       "      <td>2014-03-02 00:00:00</td>\n",
       "      <td>54996</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>D131N,0.261</td>\n",
       "      <td>54996</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>180165</th>\n",
       "      <td>X-5696</td>\n",
       "      <td>2014-03-01 00:00:00</td>\n",
       "      <td>55016</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>T244I,0.267</td>\n",
       "      <td>55016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>180166</th>\n",
       "      <td>X-5696</td>\n",
       "      <td>2014-03-05 00:00:00</td>\n",
       "      <td>54708</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>G80D,0.394</td>\n",
       "      <td>54708</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>180167</th>\n",
       "      <td>X-5696</td>\n",
       "      <td>2014-03-05 00:00:00</td>\n",
       "      <td>54708</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>G242E,0.442</td>\n",
       "      <td>54708</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>180168</th>\n",
       "      <td>X-5696</td>\n",
       "      <td>2014-03-06 00:00:00</td>\n",
       "      <td>10299</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>V161M,1.000</td>\n",
       "      <td>10299</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>180169</th>\n",
       "      <td>X-5696</td>\n",
       "      <td>2014-03-06 00:00:00</td>\n",
       "      <td>10299</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>G352S,0.568</td>\n",
       "      <td>10299</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>180170</th>\n",
       "      <td>X-5696</td>\n",
       "      <td>2014-03-09 00:00:00</td>\n",
       "      <td>92979</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>G20C,0.304</td>\n",
       "      <td>92979</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>181635</th>\n",
       "      <td>X-5696</td>\n",
       "      <td>2014-09-01 00:00:00</td>\n",
       "      <td>1731</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>D342N,0.352</td>\n",
       "      <td>1731</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>181636</th>\n",
       "      <td>X-5696</td>\n",
       "      <td>2014-09-02 00:00:00</td>\n",
       "      <td>4735</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>R229K,0.324</td>\n",
       "      <td>4735</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>181637</th>\n",
       "      <td>X-5696</td>\n",
       "      <td>2014-09-02 00:00:00</td>\n",
       "      <td>4735</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>G307D,0.321</td>\n",
       "      <td>4735</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>186444</th>\n",
       "      <td>X-5959</td>\n",
       "      <td>2014-09-09 00:00:00</td>\n",
       "      <td>10801</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>P256L,0.799</td>\n",
       "      <td>10801</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>191753</th>\n",
       "      <td>X-2607</td>\n",
       "      <td>2014-09-11 00:00:00</td>\n",
       "      <td>55752</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>V102M,0.266</td>\n",
       "      <td>55752</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>191754</th>\n",
       "      <td>X-2607</td>\n",
       "      <td>2014-09-07 00:00:00</td>\n",
       "      <td>989</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>Q393H,0.451</td>\n",
       "      <td>989</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>203713</th>\n",
       "      <td>X-4412</td>\n",
       "      <td>2014-03-07 00:00:00</td>\n",
       "      <td>64844</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>E599K,0.467</td>\n",
       "      <td>64844</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>206247</th>\n",
       "      <td>X-4852</td>\n",
       "      <td>2014-03-09 00:00:00</td>\n",
       "      <td>92979</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>C134S,0.412</td>\n",
       "      <td>92979</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>209729</th>\n",
       "      <td>X-1199</td>\n",
       "      <td>2014-09-08 00:00:00</td>\n",
       "      <td>23176</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>P16S,0.542</td>\n",
       "      <td>23176</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>219117</th>\n",
       "      <td>X-2633</td>\n",
       "      <td>2014-03-10 00:00:00</td>\n",
       "      <td>162333</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>A117G,0.667</td>\n",
       "      <td>162333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226988</th>\n",
       "      <td>X-4377</td>\n",
       "      <td>2014-03-09 00:00:00</td>\n",
       "      <td>92979</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>T107I,1.000</td>\n",
       "      <td>92979</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227316</th>\n",
       "      <td>X-4378</td>\n",
       "      <td>2014-03-09 00:00:00</td>\n",
       "      <td>92979</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>T107I,0.400</td>\n",
       "      <td>92979</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>253499</th>\n",
       "      <td>X-4597</td>\n",
       "      <td>2014-09-07 00:00:00</td>\n",
       "      <td>989</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>L199F,0.484</td>\n",
       "      <td>989</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>257519</th>\n",
       "      <td>X-4979</td>\n",
       "      <td>2014-09-10 00:00:00</td>\n",
       "      <td>151011</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>R278C,0.524</td>\n",
       "      <td>151011</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>259068</th>\n",
       "      <td>X-5273</td>\n",
       "      <td>2014-09-08 00:00:00</td>\n",
       "      <td>23176</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>V384M,0.488</td>\n",
       "      <td>23176</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>259069</th>\n",
       "      <td>X-5273</td>\n",
       "      <td>2014-09-08 00:00:00</td>\n",
       "      <td>23176</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>P85F,0.564</td>\n",
       "      <td>23176</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>122412 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        Sample                 Gene  Entrez  Category             Details  \\\n",
       "1       X-1004                ABCA4      24  MutNovel        K1831N,0.550   \n",
       "8       X-1004               ADAM17    6868  MutNovel         P799T,1.000   \n",
       "16      X-1004                ADCK1   57143  MutNovel          D52G,1.000   \n",
       "17      X-1004                 ADNP   23394  MutNovel         R784T,0.223   \n",
       "21      X-1004                  AK1     203  MutNovel          K27N,0.430   \n",
       "32      X-1004               AMOTL2   51421  MutNovel         A578P,0.625   \n",
       "37      X-1004              ANKRD36  375248  MutNovel         T998S,0.278   \n",
       "38      X-1004              ANKRD62    None  MutNovel         N547T,1.000   \n",
       "41      X-1004                ANXA7     310  MutNovel         I283V,0.565   \n",
       "43      X-1004                 APLF  200558  MutNovel         I237V,0.556   \n",
       "48      X-1004                ARMC2   84071  MutNovel         E283G,1.000   \n",
       "52      X-1004                ASTN1     460  MutNovel        S1091C,0.443   \n",
       "57      X-1004                 ATRX     546  MutNovel         P711A,1.000   \n",
       "59      X-1004                 AVIL   10677  MutNovel         A455T,1.000   \n",
       "62      X-1004                 BAG6    7917  MutNovel         L733I,0.547   \n",
       "69      X-1004                BIRC6   57448  MutNovel        A1405V,0.688   \n",
       "88      X-1004             C16orf79  283870  MutNovel          I82M,1.000   \n",
       "92      X-1004             C1orf182  128229  MutNovel          A80T,1.000   \n",
       "106     X-1004                CARM1   10498  MutNovel          K81N,0.280   \n",
       "110     X-1004                CBWD1   55871  MutNovel         N363I,0.893   \n",
       "115     X-1004               CCDC66  285331  MutNovel         R538P,1.000   \n",
       "116     X-1004              CCDC88A   55704  MutNovel         E217A,0.263   \n",
       "119     X-1004                 CCT7   10574  MutNovel         A162T,0.384   \n",
       "124     X-1004             CDC42EP1   11135  MutNovel  PAPAANPS253P,0.833   \n",
       "126     X-1004                 CDON   50937  MutNovel         S483R,1.000   \n",
       "133     X-1004                CENPV  201161  MutNovel          P24S,0.700   \n",
       "135     X-1004               CEP350    9857  MutNovel         R214M,0.286   \n",
       "143     X-1004              CHORDC1   26973  MutNovel          P31L,0.672   \n",
       "145     X-1004               CHRNB4    1143  MutNovel         G226D,1.000   \n",
       "150     X-1004               CLUAP1   23059  MutNovel         P348S,1.000   \n",
       "...        ...                  ...     ...       ...                 ...   \n",
       "125656  X-1726  2014-03-02 00:00:00   54996  MutNovel         F151S,0.442   \n",
       "141280  X-3029  2014-03-06 00:00:00   10299  MutNovel         D142N,0.288   \n",
       "141455  X-3029  2014-09-01 00:00:00    1731  MutNovel         R251H,0.364   \n",
       "146922  X-4819  2014-03-04 00:00:00   57574  MutNovel         P368T,0.583   \n",
       "151460  X-2602  2014-09-09 00:00:00   10801  MutNovel         A245V,0.528   \n",
       "176127  X-5643  2014-03-02 00:00:00   51257  MutNovel          P34L,0.397   \n",
       "176128  X-5643  2014-03-06 00:00:00   10299  MutNovel         P902S,0.297   \n",
       "180164  X-5696  2014-03-02 00:00:00   54996  MutNovel         D131N,0.261   \n",
       "180165  X-5696  2014-03-01 00:00:00   55016  MutNovel         T244I,0.267   \n",
       "180166  X-5696  2014-03-05 00:00:00   54708  MutNovel          G80D,0.394   \n",
       "180167  X-5696  2014-03-05 00:00:00   54708  MutNovel         G242E,0.442   \n",
       "180168  X-5696  2014-03-06 00:00:00   10299  MutNovel         V161M,1.000   \n",
       "180169  X-5696  2014-03-06 00:00:00   10299  MutNovel         G352S,0.568   \n",
       "180170  X-5696  2014-03-09 00:00:00   92979  MutNovel          G20C,0.304   \n",
       "181635  X-5696  2014-09-01 00:00:00    1731  MutNovel         D342N,0.352   \n",
       "181636  X-5696  2014-09-02 00:00:00    4735  MutNovel         R229K,0.324   \n",
       "181637  X-5696  2014-09-02 00:00:00    4735  MutNovel         G307D,0.321   \n",
       "186444  X-5959  2014-09-09 00:00:00   10801  MutNovel         P256L,0.799   \n",
       "191753  X-2607  2014-09-11 00:00:00   55752  MutNovel         V102M,0.266   \n",
       "191754  X-2607  2014-09-07 00:00:00     989  MutNovel         Q393H,0.451   \n",
       "203713  X-4412  2014-03-07 00:00:00   64844  MutNovel         E599K,0.467   \n",
       "206247  X-4852  2014-03-09 00:00:00   92979  MutNovel         C134S,0.412   \n",
       "209729  X-1199  2014-09-08 00:00:00   23176  MutNovel          P16S,0.542   \n",
       "219117  X-2633  2014-03-10 00:00:00  162333  MutNovel         A117G,0.667   \n",
       "226988  X-4377  2014-03-09 00:00:00   92979  MutNovel         T107I,1.000   \n",
       "227316  X-4378  2014-03-09 00:00:00   92979  MutNovel         T107I,0.400   \n",
       "253499  X-4597  2014-09-07 00:00:00     989  MutNovel         L199F,0.484   \n",
       "257519  X-4979  2014-09-10 00:00:00  151011  MutNovel         R278C,0.524   \n",
       "259068  X-5273  2014-09-08 00:00:00   23176  MutNovel         V384M,0.488   \n",
       "259069  X-5273  2014-09-08 00:00:00   23176  MutNovel          P85F,0.564   \n",
       "\n",
       "       Entrez_gene_ID  \n",
       "1                  24  \n",
       "8                6868  \n",
       "16              57143  \n",
       "17              23394  \n",
       "21                203  \n",
       "32              51421  \n",
       "37             375248  \n",
       "38             342850  \n",
       "41                310  \n",
       "43             200558  \n",
       "48              84071  \n",
       "52                460  \n",
       "57                546  \n",
       "59              10677  \n",
       "62               7917  \n",
       "69              57448  \n",
       "88             283870  \n",
       "92             128229  \n",
       "106             10498  \n",
       "110             55871  \n",
       "115            285331  \n",
       "116             55704  \n",
       "119             10574  \n",
       "124             11135  \n",
       "126             50937  \n",
       "133            201161  \n",
       "135              9857  \n",
       "143             26973  \n",
       "145              1143  \n",
       "150             23059  \n",
       "...               ...  \n",
       "125656          54996  \n",
       "141280          10299  \n",
       "141455           1731  \n",
       "146922          57574  \n",
       "151460          10801  \n",
       "176127          51257  \n",
       "176128          10299  \n",
       "180164          54996  \n",
       "180165          55016  \n",
       "180166          54708  \n",
       "180167          54708  \n",
       "180168          10299  \n",
       "180169          10299  \n",
       "180170          92979  \n",
       "181635           1731  \n",
       "181636           4735  \n",
       "181637           4735  \n",
       "186444          10801  \n",
       "191753          55752  \n",
       "191754            989  \n",
       "203713          64844  \n",
       "206247          92979  \n",
       "209729          23176  \n",
       "219117         162333  \n",
       "226988          92979  \n",
       "227316          92979  \n",
       "253499            989  \n",
       "257519         151011  \n",
       "259068          23176  \n",
       "259069          23176  \n",
       "\n",
       "[122412 rows x 6 columns]"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# annotate genes \n",
    "PDX_genes = pdx[[\"Gene\"]].copy()\n",
    "PDX_genes.drop_duplicates(inplace = True)\n",
    "print(\"mutated genes in PDX cohort\",PDX_genes.shape[0])\n",
    "PDX_genes.set_index(\"Gene\",drop=False,inplace=True)\n",
    "# rename with mapper \n",
    "PDX_genes, query2target, not_mapped = apply_mappers(PDX_genes, ncbi_symbols, ncbi_synonyms, verbose = True,handle_duplicates = \"keep\")\n",
    "pdx = pdx.loc[~pdx[\"Gene\"].isin(not_mapped),:]\n",
    "pdx[\"Entrez_gene_ID\"] = pdx[\"Gene\"].apply(lambda x : query2target[x])\n",
    "\n",
    "# keep not mapped genes with Entrez IDs provided\n",
    "pdx_not_mapped = pdx.loc[pdx[\"Gene\"].isin(not_mapped),:]\n",
    "pdx_not_mapped = pdx_not_mapped.loc[pdx_not_mapped[\"Entrez\"]!=\"None\",:]\n",
    "pdx_not_mapped[\"Entrez_gene_ID\"] = pdx_not_mapped[\"Entrez\"].apply(int)\n",
    "\n",
    "# concatenate \n",
    "pdx = pd.concat([pdx,pdx_,pdx_not_mapped],axis=0)\n",
    "pdx "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Sample</th>\n",
       "      <th>Entrez_gene_ID</th>\n",
       "      <th>Details</th>\n",
       "      <th>Gene</th>\n",
       "      <th>Category</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>24</td>\n",
       "      <td>K1831N,0.550</td>\n",
       "      <td>ABCA4</td>\n",
       "      <td>MutNovel</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>203</td>\n",
       "      <td>K27N,0.430</td>\n",
       "      <td>AK1</td>\n",
       "      <td>MutNovel</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>310</td>\n",
       "      <td>I283V,0.565</td>\n",
       "      <td>ANXA7</td>\n",
       "      <td>MutNovel</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>460</td>\n",
       "      <td>S1091C,0.443</td>\n",
       "      <td>ASTN1</td>\n",
       "      <td>MutNovel</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>546</td>\n",
       "      <td>P711A,1.000</td>\n",
       "      <td>ATRX</td>\n",
       "      <td>MutNovel</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Sample Entrez_gene_ID       Details   Gene  Category\n",
       "1   X-1004             24  K1831N,0.550  ABCA4  MutNovel\n",
       "21  X-1004            203    K27N,0.430    AK1  MutNovel\n",
       "41  X-1004            310   I283V,0.565  ANXA7  MutNovel\n",
       "52  X-1004            460  S1091C,0.443  ASTN1  MutNovel\n",
       "57  X-1004            546   P711A,1.000   ATRX  MutNovel"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# define positions and mutation types\n",
    "pdx = pdx.sort_values([\"Sample\",\"Entrez_gene_ID\"])\n",
    "pdx = pdx.loc[:,[\"Sample\",\"Entrez_gene_ID\",\"Details\",\"Gene\",\"Category\"]]\n",
    "pdx.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "#pdx.loc[pdx[\"Details\"].str.contains(\"\\-\"),:].sort_values(\"Sample\").shape\n",
    "#pdx.loc[pdx[\"Gene\"] == \"TTN\",:].sort_values(\"Sample\")\n",
    "#pdx.loc[pdx[\"Category\"]==\"MutLikelyFunctional\",:].sort_values(\"Sample\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Classification\n",
       "missense                   99349\n",
       "inframe_indel               8559\n",
       "unrecognized_truncating     4624\n",
       "stop_gain_or_fs             3788\n",
       "unrecognized_point2         2245\n",
       "ess_splice                  2204\n",
       "unrecognized_point          1643\n",
       "dtype: int64"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# guess mutation type from \"Details\" field\n",
    "def guess_mut_type(x):\n",
    "    mut = x.split(\",\")[0]\n",
    "    missense_pattern = re.compile(\"^[A-Z][0-9]+[A-Z]$\")\n",
    "    indel_pattern = re.compile(\"^[A-Z]+[0-9]+[A-Z]+$\")\n",
    "    #stop_gain_pattern = re.compile(\"^[A-Z][0-9]+\\*$\")\n",
    "    if bool(re.match(missense_pattern ,mut)): # missense\n",
    "        return \"missense\"\n",
    "    elif bool(re.match(indel_pattern ,mut)): # indel inframe\n",
    "        return \"inframe_indel\"\n",
    "    elif \"*\" in mut: #bool(re.match(stop_gain_pattern,mut)): \n",
    "        return \"stop_gain_or_fs\" #print(mut)\n",
    "    elif \"?\" in mut: # splice_site\n",
    "        return \"ess_splice\" #print(mut)\n",
    "    elif bool(re.match(re.compile(\"^\\-[0-9]+[A-Z]+$\"),mut)): # -64Q - always MutNovel\n",
    "        return \"unrecognized_point\"\n",
    "    elif bool(re.match(re.compile(\"^\\-[0-9]+$\"),mut)): # -509 mostly MutKnownFunctional\n",
    "        return \"unrecognized_truncating\"\n",
    "    elif bool(re.match(re.compile(\"^[A-Z]+[0-9]+\\-$\"),mut)):  #Q72- mutNovel\n",
    "        return \"unrecognized_point2\"\n",
    "    else:\n",
    "        return \"unrecognized\"\n",
    "#for d in pdx[\"Details\"].values:\n",
    "    \n",
    "pdx[\"Classification\"] = pdx[\"Details\"].apply(guess_mut_type)\n",
    "#pdx.loc[pdx[\"Classification\"].str.contains(\"unrecognized\"),:].groupby([\"Category\",\"Classification\"]).size()\n",
    "pdx.groupby([\"Classification\"]).size().sort_values(ascending = False)\n",
    "#pdx.loc[pdx[\"Classification\"]==\"unrecognized\",:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "function\n",
       "point_mutation         111796\n",
       "truncating_mutation     10616\n",
       "dtype: int64"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pdx_truncating = ['stop_gain_or_fs','ess_splice',\"unrecognized_truncating\"]\n",
    "pdx_point = ['missense','inframe_indel','unrecognized_point','unrecognized_point2']\n",
    "pdx.loc[pdx[\"Classification\"].isin(pdx_truncating),\"function\"] = \"truncating_mutation\"\n",
    "pdx.loc[pdx[\"Classification\"].isin(pdx_point),\"function\"] = \"point_mutation\"\n",
    "pdx.groupby([\"function\"]).size().sort_values(ascending = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Sample</th>\n",
       "      <th>Entrez_gene_ID</th>\n",
       "      <th>Details</th>\n",
       "      <th>Gene</th>\n",
       "      <th>Category</th>\n",
       "      <th>Classification</th>\n",
       "      <th>function</th>\n",
       "      <th>aa_change</th>\n",
       "      <th>aa_pos</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>24</td>\n",
       "      <td>K1831N,0.550</td>\n",
       "      <td>ABCA4</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>missense</td>\n",
       "      <td>point_mutation</td>\n",
       "      <td>K1831N</td>\n",
       "      <td>(1831, 1831)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>203</td>\n",
       "      <td>K27N,0.430</td>\n",
       "      <td>AK1</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>missense</td>\n",
       "      <td>point_mutation</td>\n",
       "      <td>K27N</td>\n",
       "      <td>(27, 27)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>310</td>\n",
       "      <td>I283V,0.565</td>\n",
       "      <td>ANXA7</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>missense</td>\n",
       "      <td>point_mutation</td>\n",
       "      <td>I283V</td>\n",
       "      <td>(283, 283)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>460</td>\n",
       "      <td>S1091C,0.443</td>\n",
       "      <td>ASTN1</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>missense</td>\n",
       "      <td>point_mutation</td>\n",
       "      <td>S1091C</td>\n",
       "      <td>(1091, 1091)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td>X-1004</td>\n",
       "      <td>546</td>\n",
       "      <td>P711A,1.000</td>\n",
       "      <td>ATRX</td>\n",
       "      <td>MutNovel</td>\n",
       "      <td>missense</td>\n",
       "      <td>point_mutation</td>\n",
       "      <td>P711A</td>\n",
       "      <td>(711, 711)</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Sample Entrez_gene_ID       Details   Gene  Category Classification  \\\n",
       "1   X-1004             24  K1831N,0.550  ABCA4  MutNovel       missense   \n",
       "21  X-1004            203    K27N,0.430    AK1  MutNovel       missense   \n",
       "41  X-1004            310   I283V,0.565  ANXA7  MutNovel       missense   \n",
       "52  X-1004            460  S1091C,0.443  ASTN1  MutNovel       missense   \n",
       "57  X-1004            546   P711A,1.000   ATRX  MutNovel       missense   \n",
       "\n",
       "          function aa_change        aa_pos  \n",
       "1   point_mutation    K1831N  (1831, 1831)  \n",
       "21  point_mutation      K27N      (27, 27)  \n",
       "41  point_mutation     I283V    (283, 283)  \n",
       "52  point_mutation    S1091C  (1091, 1091)  \n",
       "57  point_mutation     P711A    (711, 711)  "
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pdx[\"aa_change\"] = pdx[\"Details\"].apply(lambda x: x.split(\",\")[0])\n",
    "pdx[\"aa_pos\"] = pdx[\"aa_change\"].apply(lambda x : get_aa_pos_GDSC(x))\n",
    "pdx.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>X-1004</th>\n",
       "      <th>X-1008</th>\n",
       "      <th>X-1027</th>\n",
       "      <th>X-1095</th>\n",
       "      <th>X-1119</th>\n",
       "      <th>X-1156</th>\n",
       "      <th>X-1167</th>\n",
       "      <th>X-1169</th>\n",
       "      <th>X-1172</th>\n",
       "      <th>X-1173</th>\n",
       "      <th>...</th>\n",
       "      <th>X-5713</th>\n",
       "      <th>X-5717</th>\n",
       "      <th>X-5727</th>\n",
       "      <th>X-5739</th>\n",
       "      <th>X-5808</th>\n",
       "      <th>X-5959</th>\n",
       "      <th>X-5974</th>\n",
       "      <th>X-5975</th>\n",
       "      <th>X-6030</th>\n",
       "      <th>X-6047</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>7157</th>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.00</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-0.5</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5290</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.055556</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3845</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2312</th>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.05</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.05</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4893</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 399 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      X-1004  X-1008  X-1027  X-1095  X-1119  X-1156  X-1167  X-1169  X-1172  \\\n",
       "7157    -1.0   -1.00    -1.0     0.0     0.0    -1.0    -0.5    -1.0   -1.00   \n",
       "5290     0.0    0.00     0.0     0.0     0.0     0.0     0.0     0.0    0.00   \n",
       "3845     0.0    0.00     1.0     1.0     0.0     1.0     0.0     0.0    1.00   \n",
       "2312     0.0   -0.05     0.0     0.0     0.0     0.0     0.0     0.0   -0.05   \n",
       "4893     0.0    0.00     0.0     0.0     0.0     0.0     0.0     0.0    0.00   \n",
       "\n",
       "      X-1173   ...    X-5713  X-5717  X-5727  X-5739    X-5808  X-5959  \\\n",
       "7157     0.0   ...       0.0    -1.0     0.0     0.0 -1.000000     0.0   \n",
       "5290     0.0   ...       0.0     0.0     0.0     0.0  0.055556     0.0   \n",
       "3845     1.0   ...       0.0     1.0     0.0     0.0  1.000000     0.0   \n",
       "2312     0.0   ...       0.0     0.0     0.0     0.0  0.000000     0.0   \n",
       "4893    -1.0   ...       0.0     0.0    -1.0     0.0  0.000000     0.0   \n",
       "\n",
       "      X-5974  X-5975  X-6030  X-6047  \n",
       "7157    -1.0     0.0     0.0    -1.0  \n",
       "5290     0.0     0.0     0.0     0.0  \n",
       "3845     0.0     0.0     1.0     0.0  \n",
       "2312     0.0     0.0     0.0     0.0  \n",
       "4893     0.0     0.0     0.0     0.0  \n",
       "\n",
       "[5 rows x 399 columns]"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# calculate per gene scores: \n",
    "pdx_scores = {}\n",
    "for sample in list(set(pdx[\"Sample\"].values)):\n",
    "    df = pdx.loc[pdx[\"Sample\"] == sample,:]\n",
    "    pdx_scores[sample] = mutations2score(df, TSG_entrez,OG_entrez, hotspots, w=w)\n",
    "pdx_scores = pd.DataFrame.from_dict(pdx_scores)\n",
    "pdx_scores.fillna(0,inplace=True)\n",
    "\n",
    "pdx_scores.to_csv(preprocessed_dir+\"/PDX.non_bin_mutations.tsv\",sep = \"\\t\")\n",
    "pdx_scores.loc[[7157,5290,3845,2312,4893],:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>X-1004</th>\n",
       "      <th>X-1008</th>\n",
       "      <th>X-1027</th>\n",
       "      <th>X-1095</th>\n",
       "      <th>X-1119</th>\n",
       "      <th>X-1156</th>\n",
       "      <th>X-1167</th>\n",
       "      <th>X-1169</th>\n",
       "      <th>X-1172</th>\n",
       "      <th>X-1173</th>\n",
       "      <th>...</th>\n",
       "      <th>X-5713</th>\n",
       "      <th>X-5717</th>\n",
       "      <th>X-5727</th>\n",
       "      <th>X-5739</th>\n",
       "      <th>X-5808</th>\n",
       "      <th>X-5959</th>\n",
       "      <th>X-5974</th>\n",
       "      <th>X-5975</th>\n",
       "      <th>X-6030</th>\n",
       "      <th>X-6047</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>7157</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5290</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3845</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2312</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4893</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 399 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      X-1004  X-1008  X-1027  X-1095  X-1119  X-1156  X-1167  X-1169  X-1172  \\\n",
       "7157     1.0     1.0     1.0     0.0     0.0     1.0     1.0     1.0     1.0   \n",
       "5290     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   \n",
       "3845     0.0     0.0     1.0     1.0     0.0     1.0     0.0     0.0     1.0   \n",
       "2312     0.0     1.0     0.0     0.0     0.0     0.0     0.0     0.0     1.0   \n",
       "4893     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   \n",
       "\n",
       "      X-1173   ...    X-5713  X-5717  X-5727  X-5739  X-5808  X-5959  X-5974  \\\n",
       "7157     0.0   ...       0.0     1.0     0.0     0.0     1.0     0.0     1.0   \n",
       "5290     0.0   ...       0.0     0.0     0.0     0.0     1.0     0.0     0.0   \n",
       "3845     1.0   ...       0.0     1.0     0.0     0.0     1.0     0.0     0.0   \n",
       "2312     0.0   ...       0.0     0.0     0.0     0.0     0.0     0.0     0.0   \n",
       "4893     1.0   ...       0.0     0.0     1.0     0.0     0.0     0.0     0.0   \n",
       "\n",
       "      X-5975  X-6030  X-6047  \n",
       "7157     0.0     0.0     1.0  \n",
       "5290     0.0     0.0     0.0  \n",
       "3845     0.0     1.0     0.0  \n",
       "2312     0.0     0.0     0.0  \n",
       "4893     0.0     0.0     0.0  \n",
       "\n",
       "[5 rows x 399 columns]"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pdx_scores = pdx_scores.applymap(lambda x: binarize(x))\n",
    "pdx_scores.to_csv(preprocessed_dir+\"/PDX.binary_mutations.tsv\",sep = \"\\t\")\n",
    "pdx_scores.loc[[7157,5290,3845,2312,4893],:]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# TCGA and CCLE \n",
    "    * in CCLE only 1651 genes were profiled. We used the file \"CCLE_hybrid_capture1650_hg19_NoCommonSNPs_CDS_2012.05.07.maf.gz\" marked as recommended. It contains a list of mutations across CCLE cell lines, with VAF > 10%, and common SNP excluded. \n",
    "    wget https://data.broadinstitute.org/ccle_legacy_data/hybrid_capture_sequencing/CCLE_hybrid_capture1650_hg19_NoCommonSNPs_CDS_2012.05.07.maf.gz\n",
    "    \n",
    "    Other source files:\n",
    "    * \"List of mutations in 1651 genes, determined by targeted massively parallel sequencing combined with Raindance technology\":\n",
    "    wget https://data.broadinstitute.org/ccle_legacy_data/hybrid_capture_sequencing/1650_HC_plus_RD_muts.maf.annotated\n",
    "    wget https://data.broadinstitute.org/ccle_legacy_data/hybrid_capture_sequencing/1650_HC_plus_RD_indels.maf.annotated\n",
    "  \n",
    "  \n",
    "    Union contains 120 OG and 182 TSG"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 177,
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_broad_maf(df, point, truncating, protein_change = \"Protein_Change\"):\n",
    "    df = df[[\"Hugo_Symbol\",\"Entrez_Gene_Id\",\"Variant_Classification\",\n",
    "                       protein_change,'Tumor_Sample_Barcode']].copy()\n",
    "    #                    'Chromosome', u'Start_position', u'End_position']].copy()\n",
    "    if \"-Tumor\" in df[\"Tumor_Sample_Barcode\"].values[0]:\n",
    "        df[\"Tumor_Sample_Barcode\"] = df[\"Tumor_Sample_Barcode\"].apply(lambda x : x.replace(\"-Tumor\",\"\") )\n",
    "    df.sort_values([\"Tumor_Sample_Barcode\",\"Hugo_Symbol\"], inplace=True)\n",
    "    df = df.loc[~df[\"Variant_Classification\"].isin([\"3'UTR\",\"5'Flank\",\"5'UTR\",\"Intron\",\"Silent\",\"RNA\",\n",
    "                                                    \"De_novo_Start_OutOfFrame\",'De_novo_Start_InFrame']),:]\n",
    "    \n",
    "    if df.shape[0] == 0:\n",
    "        return df.rename({\"Entrez_Gene_Id\":\"Entrez_gene_ID\",\"Tumor_Sample_Barcode\":\"Sample\"},axis =\"columns\")\n",
    "    df.loc[df[\"Variant_Classification\"].isin(truncating),\"function\"] = \"truncating_mutation\"\n",
    "    df.loc[df[\"Variant_Classification\"].isin(point),\"function\"] = \"point_mutation\"\n",
    "    df.loc[df[\"Variant_Classification\"].isin(['Splice_Site_DNP','Splice_Site_Ins','Splice_Site_SNP',\n",
    "                                              'Start_Codon_Del','Stop_Codon_DNP','Stop_Codon_Ins']),\"Protein_Change\"] = \"NA\"\n",
    "    \n",
    "    df[\"aa_pos\"] =  df[protein_change].apply(lambda x : get_aa_pos_GDSC(x))\n",
    "    df.rename({\"Entrez_Gene_Id\":\"Entrez_gene_ID\",\"Tumor_Sample_Barcode\":\"Sample\"},axis =\"columns\",inplace=True)\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/olya/miniconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2714: DtypeWarning: Columns (4) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  interactivity=interactivity, compiler=compiler, result=result)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Hugo_Symbol</th>\n",
       "      <th>Entrez_gene_ID</th>\n",
       "      <th>Variant_Classification</th>\n",
       "      <th>Protein_Change</th>\n",
       "      <th>Sample</th>\n",
       "      <th>function</th>\n",
       "      <th>aa_pos</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>76694</th>\n",
       "      <td>AAK1</td>\n",
       "      <td>22848</td>\n",
       "      <td>Missense_Mutation</td>\n",
       "      <td>p.R634H</td>\n",
       "      <td>22RV1_PROSTATE</td>\n",
       "      <td>point_mutation</td>\n",
       "      <td>(634, 634)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37856</th>\n",
       "      <td>ABCC4</td>\n",
       "      <td>10257</td>\n",
       "      <td>Frame_Shift_Del</td>\n",
       "      <td>p.L883fs</td>\n",
       "      <td>22RV1_PROSTATE</td>\n",
       "      <td>truncating_mutation</td>\n",
       "      <td>(883, 883)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27083</th>\n",
       "      <td>ACACB</td>\n",
       "      <td>32</td>\n",
       "      <td>Missense_Mutation</td>\n",
       "      <td>p.R2360H</td>\n",
       "      <td>22RV1_PROSTATE</td>\n",
       "      <td>point_mutation</td>\n",
       "      <td>(2360, 2360)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>66125</th>\n",
       "      <td>ACVR2A</td>\n",
       "      <td>92</td>\n",
       "      <td>Frame_Shift_Del</td>\n",
       "      <td>p.K435fs</td>\n",
       "      <td>22RV1_PROSTATE</td>\n",
       "      <td>truncating_mutation</td>\n",
       "      <td>(435, 435)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46329</th>\n",
       "      <td>ADAMTSL3</td>\n",
       "      <td>57188</td>\n",
       "      <td>Missense_Mutation</td>\n",
       "      <td>p.G287D</td>\n",
       "      <td>22RV1_PROSTATE</td>\n",
       "      <td>point_mutation</td>\n",
       "      <td>(287, 287)</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      Hugo_Symbol  Entrez_gene_ID Variant_Classification Protein_Change  \\\n",
       "76694        AAK1           22848      Missense_Mutation        p.R634H   \n",
       "37856       ABCC4           10257        Frame_Shift_Del       p.L883fs   \n",
       "27083       ACACB              32      Missense_Mutation       p.R2360H   \n",
       "66125      ACVR2A              92        Frame_Shift_Del       p.K435fs   \n",
       "46329    ADAMTSL3           57188      Missense_Mutation        p.G287D   \n",
       "\n",
       "               Sample             function        aa_pos  \n",
       "76694  22RV1_PROSTATE       point_mutation    (634, 634)  \n",
       "37856  22RV1_PROSTATE  truncating_mutation    (883, 883)  \n",
       "27083  22RV1_PROSTATE       point_mutation  (2360, 2360)  \n",
       "66125  22RV1_PROSTATE  truncating_mutation    (435, 435)  \n",
       "46329  22RV1_PROSTATE       point_mutation    (287, 287)  "
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ccle_point = [\"Missense_Mutation\",\"In_Frame_Del\",\"In_Frame_Ins\"]\n",
    "ccle_truncating = [\"Frame_Shift_Del\",\"Frame_Shift_Ins\",\"Nonsense_Mutation\",\n",
    "                   \"Splice_Site_Ins\",\"Splice_Site_Del\",\"Splice_Site_SNP\",\"Splice_Site_DNP\",\n",
    "                   \"Start_Codon_Del\",\"Stop_Codon_DNP\"]\n",
    "\n",
    "ccle = pd.read_csv(\"../../CCLE/CCLE_hybrid_capture1650_hg19_NoCommonSNPs_CDS_2012.05.07.maf\",sep=\"\\t\")\n",
    "ccle = read_broad_maf(ccle, ccle_point, ccle_truncating)\n",
    "ccle.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "function\n",
       "point_mutation         70503\n",
       "truncating_mutation    18486\n",
       "dtype: int64"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ccle.groupby(\"Variant_Classification\").size().sort_values(ascending = False)\n",
    "ccle.groupby(\"function\").size().sort_values(ascending = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>22RV1_PROSTATE</th>\n",
       "      <th>2313287_STOMACH</th>\n",
       "      <th>253JBV_URINARY_TRACT</th>\n",
       "      <th>253J_URINARY_TRACT</th>\n",
       "      <th>42MGBA_CENTRAL_NERVOUS_SYSTEM</th>\n",
       "      <th>5637_URINARY_TRACT</th>\n",
       "      <th>59M_OVARY</th>\n",
       "      <th>639V_URINARY_TRACT</th>\n",
       "      <th>647V_URINARY_TRACT</th>\n",
       "      <th>697_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE</th>\n",
       "      <th>...</th>\n",
       "      <th>WSUDLCL2_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE</th>\n",
       "      <th>YAPC_PANCREAS</th>\n",
       "      <th>YD10B_UPPER_AERODIGESTIVE_TRACT</th>\n",
       "      <th>YD15_SALIVARY_GLAND</th>\n",
       "      <th>YD38_UPPER_AERODIGESTIVE_TRACT</th>\n",
       "      <th>YD8_UPPER_AERODIGESTIVE_TRACT</th>\n",
       "      <th>YH13_CENTRAL_NERVOUS_SYSTEM</th>\n",
       "      <th>YKG1_CENTRAL_NERVOUS_SYSTEM</th>\n",
       "      <th>ZR751_BREAST</th>\n",
       "      <th>ZR7530_BREAST</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>7157</th>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5290</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.142857</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3845</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.111111</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4893</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>4 rows × 905 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      22RV1_PROSTATE  2313287_STOMACH  253JBV_URINARY_TRACT  \\\n",
       "7157            -1.0              0.0                   0.0   \n",
       "5290             1.0              0.0                   1.0   \n",
       "3845             0.0              0.0                   0.0   \n",
       "4893             0.0              0.0                   0.0   \n",
       "\n",
       "      253J_URINARY_TRACT  42MGBA_CENTRAL_NERVOUS_SYSTEM  5637_URINARY_TRACT  \\\n",
       "7157                 0.0                           -1.0                -1.0   \n",
       "5290                 1.0                            0.0                 0.0   \n",
       "3845                 0.0                            0.0                 0.0   \n",
       "4893                 0.0                            0.0                 0.0   \n",
       "\n",
       "      59M_OVARY  639V_URINARY_TRACT  647V_URINARY_TRACT  \\\n",
       "7157       -1.0                -1.0                -1.0   \n",
       "5290        0.0                 0.5                 0.0   \n",
       "3845        0.0                 1.0                 0.0   \n",
       "4893        0.0                -0.1                 0.0   \n",
       "\n",
       "      697_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE      ...        \\\n",
       "7157                                     0.0      ...         \n",
       "5290                                     0.0      ...         \n",
       "3845                                     0.0      ...         \n",
       "4893                                    -1.0      ...         \n",
       "\n",
       "      WSUDLCL2_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE  YAPC_PANCREAS  \\\n",
       "7157                                         -1.0           -1.0   \n",
       "5290                                          0.0            0.0   \n",
       "3845                                          0.0            1.0   \n",
       "4893                                          0.0            0.0   \n",
       "\n",
       "      YD10B_UPPER_AERODIGESTIVE_TRACT  YD15_SALIVARY_GLAND  \\\n",
       "7157                             -1.0                 -1.0   \n",
       "5290                              0.0                  0.0   \n",
       "3845                              0.0                  0.0   \n",
       "4893                              0.0                  0.0   \n",
       "\n",
       "      YD38_UPPER_AERODIGESTIVE_TRACT  YD8_UPPER_AERODIGESTIVE_TRACT  \\\n",
       "7157                            -1.0                      -1.000000   \n",
       "5290                             0.0                       0.000000   \n",
       "3845                             0.0                       0.111111   \n",
       "4893                             0.0                       0.000000   \n",
       "\n",
       "      YH13_CENTRAL_NERVOUS_SYSTEM  YKG1_CENTRAL_NERVOUS_SYSTEM  ZR751_BREAST  \\\n",
       "7157                     0.000000                         -1.0           0.0   \n",
       "5290                     0.142857                          1.0           0.0   \n",
       "3845                     0.000000                          0.0           0.0   \n",
       "4893                     0.000000                          0.0           0.0   \n",
       "\n",
       "      ZR7530_BREAST  \n",
       "7157            0.0  \n",
       "5290            0.0  \n",
       "3845            0.0  \n",
       "4893            0.0  \n",
       "\n",
       "[4 rows x 905 columns]"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ccle_scores = {}\n",
    "for sample in list(set(ccle[\"Sample\"].values)):\n",
    "    df = ccle.loc[ccle[\"Sample\"] == sample,:]\n",
    "    ccle_scores[sample] = mutations2score(df, TSG_entrez,OG_entrez, hotspots, w=w)\n",
    "ccle_scores = pd.DataFrame.from_dict(ccle_scores)\n",
    "ccle_scores.fillna(0,inplace=True)\n",
    "\n",
    "ccle_scores.to_csv(preprocessed_dir+\"/CCLE.non_bin_mutations.tsv\",sep = \"\\t\")\n",
    "ccle_scores.loc[[7157,5290,3845,4893],:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/olya/miniconda2/lib/python2.7/site-packages/pandas/core/indexing.py:1472: FutureWarning: \n",
      "Passing list-likes to .loc or [] with any missing label will raise\n",
      "KeyError in the future, you can use .reindex() as an alternative.\n",
      "\n",
      "See the documentation here:\n",
      "https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike\n",
      "  return self._getitem_tuple(key)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>22RV1_PROSTATE</th>\n",
       "      <th>2313287_STOMACH</th>\n",
       "      <th>253JBV_URINARY_TRACT</th>\n",
       "      <th>253J_URINARY_TRACT</th>\n",
       "      <th>42MGBA_CENTRAL_NERVOUS_SYSTEM</th>\n",
       "      <th>5637_URINARY_TRACT</th>\n",
       "      <th>59M_OVARY</th>\n",
       "      <th>639V_URINARY_TRACT</th>\n",
       "      <th>647V_URINARY_TRACT</th>\n",
       "      <th>697_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE</th>\n",
       "      <th>...</th>\n",
       "      <th>WSUDLCL2_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE</th>\n",
       "      <th>YAPC_PANCREAS</th>\n",
       "      <th>YD10B_UPPER_AERODIGESTIVE_TRACT</th>\n",
       "      <th>YD15_SALIVARY_GLAND</th>\n",
       "      <th>YD38_UPPER_AERODIGESTIVE_TRACT</th>\n",
       "      <th>YD8_UPPER_AERODIGESTIVE_TRACT</th>\n",
       "      <th>YH13_CENTRAL_NERVOUS_SYSTEM</th>\n",
       "      <th>YKG1_CENTRAL_NERVOUS_SYSTEM</th>\n",
       "      <th>ZR751_BREAST</th>\n",
       "      <th>ZR7530_BREAST</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>7157</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5290</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3845</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2312</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4893</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 905 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      22RV1_PROSTATE  2313287_STOMACH  253JBV_URINARY_TRACT  \\\n",
       "7157             1.0              0.0                   0.0   \n",
       "5290             1.0              0.0                   1.0   \n",
       "3845             0.0              0.0                   0.0   \n",
       "2312             NaN              NaN                   NaN   \n",
       "4893             0.0              0.0                   0.0   \n",
       "\n",
       "      253J_URINARY_TRACT  42MGBA_CENTRAL_NERVOUS_SYSTEM  5637_URINARY_TRACT  \\\n",
       "7157                 0.0                            1.0                 1.0   \n",
       "5290                 1.0                            0.0                 0.0   \n",
       "3845                 0.0                            0.0                 0.0   \n",
       "2312                 NaN                            NaN                 NaN   \n",
       "4893                 0.0                            0.0                 0.0   \n",
       "\n",
       "      59M_OVARY  639V_URINARY_TRACT  647V_URINARY_TRACT  \\\n",
       "7157        1.0                 1.0                 1.0   \n",
       "5290        0.0                 1.0                 0.0   \n",
       "3845        0.0                 1.0                 0.0   \n",
       "2312        NaN                 NaN                 NaN   \n",
       "4893        0.0                 1.0                 0.0   \n",
       "\n",
       "      697_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE      ...        \\\n",
       "7157                                     0.0      ...         \n",
       "5290                                     0.0      ...         \n",
       "3845                                     0.0      ...         \n",
       "2312                                     NaN      ...         \n",
       "4893                                     1.0      ...         \n",
       "\n",
       "      WSUDLCL2_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE  YAPC_PANCREAS  \\\n",
       "7157                                          1.0            1.0   \n",
       "5290                                          0.0            0.0   \n",
       "3845                                          0.0            1.0   \n",
       "2312                                          NaN            NaN   \n",
       "4893                                          0.0            0.0   \n",
       "\n",
       "      YD10B_UPPER_AERODIGESTIVE_TRACT  YD15_SALIVARY_GLAND  \\\n",
       "7157                              1.0                  1.0   \n",
       "5290                              0.0                  0.0   \n",
       "3845                              0.0                  0.0   \n",
       "2312                              NaN                  NaN   \n",
       "4893                              0.0                  0.0   \n",
       "\n",
       "      YD38_UPPER_AERODIGESTIVE_TRACT  YD8_UPPER_AERODIGESTIVE_TRACT  \\\n",
       "7157                             1.0                            1.0   \n",
       "5290                             0.0                            0.0   \n",
       "3845                             0.0                            1.0   \n",
       "2312                             NaN                            NaN   \n",
       "4893                             0.0                            0.0   \n",
       "\n",
       "      YH13_CENTRAL_NERVOUS_SYSTEM  YKG1_CENTRAL_NERVOUS_SYSTEM  ZR751_BREAST  \\\n",
       "7157                          0.0                          1.0           0.0   \n",
       "5290                          1.0                          1.0           0.0   \n",
       "3845                          0.0                          0.0           0.0   \n",
       "2312                          NaN                          NaN           NaN   \n",
       "4893                          0.0                          0.0           0.0   \n",
       "\n",
       "      ZR7530_BREAST  \n",
       "7157            0.0  \n",
       "5290            0.0  \n",
       "3845            0.0  \n",
       "2312            NaN  \n",
       "4893            0.0  \n",
       "\n",
       "[5 rows x 905 columns]"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ccle_scores = ccle_scores.applymap(lambda x: binarize(x))\n",
    "ccle_scores.to_csv(preprocessed_dir+\"/CCLE.binary_mutations.tsv\",sep = \"\\t\")\n",
    "ccle_scores.loc[[7157,5290,3845,2312,4893],:]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# TCGA\n",
    "\n",
    "### detected problems:\n",
    "* no protein change field in 'KIPAN', 'OV', 'GBMLGG','COADREAD','LGG','KICH','KIRC' cohorts\n",
    "* no input files for MESO\n",
    "* protein change may be in fields \"Protein_Change\",\"amino_acid_change_WU\",\"AAChange\",\"amino_acid_change\" \n",
    "* some missense mutations have no protein change.\n",
    "* not clear what is 'R' protein change; these mutations excluded\n",
    "* in some cohorts e.g. BRCA, Entrez_Gene_Id is 0 for all genes. We remapped HGNC symbols to Entrez Gene Id.\n",
    "We skip mutations with Ensembl gene ID appeared in the column \"Hugo_Symbol\".\n",
    "\n",
    "Cohorts and mutaions without protein change were excluded."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Ok: no empty rows detected\n",
      "Ok: no duplicated pairs detected\n",
      "Ok: All Symbol rows are not empty.\n",
      "Ok: All Symbol are mapped to GeneID\n",
      "16 Symbol mapped to multiple GeneID\n",
      "Ok: All GeneID are unique\n",
      "59266 Symbol can be mapped directly to GeneID\n",
      "Ok: no empty rows detected\n",
      "Ok: no duplicated pairs detected\n",
      "Ok: All Synonyms rows are not empty.\n",
      "Ok: All Synonyms are mapped to GeneID\n",
      "3145 Synonyms mapped to multiple GeneID\n",
      "49179 different Synonyms mapped to the same GeneID\n",
      "10839 Synonyms can be mapped directly to GeneID\n"
     ]
    }
   ],
   "source": [
    "# remapping Hugo symbols to Entreze gene ids if necessary \n",
    "NCBI = pd.read_csv(root_dir+\"Homo_sapiens.gene_info\",sep = \"\\t\")\n",
    "NCBI = NCBI[[\"#tax_id\",\"GeneID\",\"Symbol\",\"Synonyms\",\"type_of_gene\"]]\n",
    "NCBI = NCBI.loc[NCBI[\"#tax_id\"] == 9606]\n",
    "NCBI = NCBI.loc[NCBI[\"type_of_gene\"] != \"unknown\"]\n",
    "ncbi_symbols = parse_mapping_table(NCBI, \"Symbol\",\"GeneID\")\n",
    "ncbi_synonyms = expand(NCBI[[\"Synonyms\",\"GeneID\"]],column=\"Synonyms\",sep=\"|\") \n",
    "ncbi_synonyms = parse_mapping_table(ncbi_synonyms, \"Synonyms\",\"GeneID\")\n",
    "def hgnc2entrez_mapper(df,ncbi_symbols,ncbi_synonyms):\n",
    "    remapped_df = df[[\"Entrez_gene_ID\",\"Hugo_Symbol\"]].copy()\n",
    "    remapped_df.drop_duplicates(inplace = True)\n",
    "    remapped_df.set_index(\"Hugo_Symbol\",inplace=True)\n",
    "    remapped_df,query2target,not_mapped  = apply_mappers(remapped_df, ncbi_symbols, ncbi_synonyms,\n",
    "                                                         verbose = False,handle_duplicates = \"keep\")\n",
    "    if len(not_mapped) > 0 :\n",
    "        print(len(not_mapped),\"Hugo symbols not mapped to Entrez gene ID\",not_mapped, file = sys.stderr)\n",
    "        df = df.loc[~df[\"Hugo_Symbol\"].isin(not_mapped),:].copy()\n",
    "    df[\"Entrez_gene_ID\"] = df[\"Hugo_Symbol\"].apply(lambda x:query2target[x])\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['ACC', 'TGCT', 'PCPG', 'BRCA', 'CHOL', 'THYM', 'UCEC', 'SKCM', 'PRAD', 'LIHC', 'ESCA', 'CESC', 'THCA', 'DLBC', 'GBM', 'UCS', 'LAML', 'HNSC', 'PAAD', 'LUAD', 'UVM', 'SARC', 'LGG', 'STAD', 'READ', 'KICH', 'KIRP', 'LUSC', 'STES', 'COAD', 'BLCA']\n"
     ]
    }
   ],
   "source": [
    "cohorts = []\n",
    "for f in os.listdir(\"../../TCGA/mutations/data/\"):\n",
    "    if f.endswith(\"tar.gz\"):\n",
    "        fp = f.replace(\".tar.gz\",\"\")\n",
    "        cohort = fp.split(\".\")[2].replace(\"org_\",\"\")\n",
    "        if not cohort in [\"KIPAN\",\"OV\",'GBMLGG','COADREAD','KIRC']: # 'LGG','KICH'\n",
    "            cohorts.append(cohort)\n",
    "print(cohorts)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 183,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ACC\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "preprocessed maf 91 fies;\t\traw maf: 0 files\n",
      "select processed maf file for cohort ACC\n",
      "use path ../../TCGA/mutations/data/gdac.broadinstitute.org_ACC.Mutation_Packager_Calls.Level_3.2016012800.0.0/\n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "583       TINAG           27283      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "583        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "93        GPX1            2876      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "93        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "369      YTHDF3          253943      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "369        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "106        GPX1            2876      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "106        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "99      GPR111          222611      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "99        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "123        GPX1            2876           In_Frame_Del  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "123        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "78        GPX1            2876           In_Frame_Del  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "78        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "137      BAHCC1           57597           In_Frame_Del  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "137        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "96       MROH8          140699      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "96        .  (NA, NA)  \n",
      "  Hugo_Symbol  Entrez_gene_ID Variant_Classification        function AAChange  \\\n",
      "2       NBPF1           55672      Missense_Mutation  point_mutation        .   \n",
      "\n",
      "     aa_pos  \n",
      "2  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "50        GPX1            2876           In_Frame_Del  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "50        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "52        DIO3            1735      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "52        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "47      THSD7B           80731      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "47        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "274      BAHCC1           57597           In_Frame_Del  point_mutation   \n",
      "333       LTBP4            8425      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "274        .  (NA, NA)  \n",
      "333        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "186      DNAH11            8701      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "186        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "78      DNAH11            8701      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "78        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "180     C6orf10           10665      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "180        .  (NA, NA)  \n",
      "     Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "907       INPP5D            3635      Missense_Mutation  point_mutation   \n",
      "929        MROH8          140699      Missense_Mutation  point_mutation   \n",
      "36         NBPF1           55672      Missense_Mutation  point_mutation   \n",
      "202       PCDH15           65217      Missense_Mutation  point_mutation   \n",
      "852       THSD7B           80731      Missense_Mutation  point_mutation   \n",
      "1128     UGT2B10            7365      Missense_Mutation  point_mutation   \n",
      "\n",
      "     AAChange    aa_pos  \n",
      "907         .  (NA, NA)  \n",
      "929         .  (NA, NA)  \n",
      "36          .  (NA, NA)  \n",
      "202         .  (NA, NA)  \n",
      "852         .  (NA, NA)  \n",
      "1128        .  (NA, NA)  \n",
      "TCGA-PK-A5HB-01A-11D-A29I-10 ACC 6 point mutations without a position 1176 remain\n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "171        GPX1            2876           In_Frame_Del  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "171        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "159      GTPBP6            8225      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "159        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "429       PRKDC            5591      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "429        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "81        GPX1            2876           In_Frame_Del  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "81        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "591      DNAH11            8701      Missense_Mutation  point_mutation   \n",
      "64         DRGX          644168      Missense_Mutation  point_mutation   \n",
      "475        GPX1            2876      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "591        .  (NA, NA)  \n",
      "64         .  (NA, NA)  \n",
      "475        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "130       KRBA1           84626      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "130        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "100        EPT1           85465      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "100        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "138         RYK            6259      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "138        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "67        GPX1            2876           In_Frame_Del  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "67        .  (NA, NA)  \n",
      "     Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "2001      DNAH11            8701      Missense_Mutation  point_mutation   \n",
      "1058        OAZ1            4946      Missense_Mutation  point_mutation   \n",
      "1472       RIBC2           26150      Missense_Mutation  point_mutation   \n",
      "1228      THSD7B           80731      Missense_Mutation  point_mutation   \n",
      "2160      YTHDF3          253943      Missense_Mutation  point_mutation   \n",
      "\n",
      "     AAChange    aa_pos  \n",
      "2001        .  (NA, NA)  \n",
      "1058        .  (NA, NA)  \n",
      "1472        .  (NA, NA)  \n",
      "1228        .  (NA, NA)  \n",
      "2160        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "134        TAP2            6891      Missense_Mutation  point_mutation   \n",
      "141         ZAN            7455      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "134        .  (NA, NA)  \n",
      "141        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "20      MIR662          724032      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "20        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "26      SHANK2           22941      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "26        .  (NA, NA)  \n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TGCT\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "preprocessed maf 156 fies;\t\traw maf: 0 files\n",
      "select processed maf file for cohort TGCT\n",
      "use path ../../TCGA/mutations/data/gdac.broadinstitute.org_TGCT.Mutation_Packager_Calls.Level_3.2016012800.0.0/\n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "107      BCYRN1            6872      Missense_Mutation  point_mutation   \n",
      "31        NPRL3            8131      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "107        .  (NA, NA)  \n",
      "31         .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "62      TXNRD2           10587      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "62        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "47       FRG1B          284802      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "47        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "49       FRG1B          284802      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "49        .  (NA, NA)  \n",
      "      Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "2               .          284498      Missense_Mutation  point_mutation   \n",
      "3               .          284498      Missense_Mutation  point_mutation   \n",
      "5               .          284498      Missense_Mutation  point_mutation   \n",
      "7               .          284498      Missense_Mutation  point_mutation   \n",
      "42              .          388761      Missense_Mutation  point_mutation   \n",
      "43              .          388761      Missense_Mutation  point_mutation   \n",
      "48              .          653365      Missense_Mutation  point_mutation   \n",
      "55              .            4586      Missense_Mutation  point_mutation   \n",
      "57              .          340990      Missense_Mutation  point_mutation   \n",
      "58              .          340990      Missense_Mutation  point_mutation   \n",
      "77              .          283463      Missense_Mutation  point_mutation   \n",
      "79              .          283463      Missense_Mutation  point_mutation   \n",
      "89              .          144535      Missense_Mutation  point_mutation   \n",
      "168             .           23666      Missense_Mutation  point_mutation   \n",
      "175             .           85354      Missense_Mutation  point_mutation   \n",
      "181             .            5273      Missense_Mutation  point_mutation   \n",
      "208             .           10130      Missense_Mutation  point_mutation   \n",
      "261             .          339766      Missense_Mutation  point_mutation   \n",
      "265             .          388795      Missense_Mutation  point_mutation   \n",
      "293             .          152110      Missense_Mutation  point_mutation   \n",
      "356             .            9425      Missense_Mutation  point_mutation   \n",
      "357             .            9425      Missense_Mutation  point_mutation   \n",
      "358             .            9425      Missense_Mutation  point_mutation   \n",
      "359             .            9425      Missense_Mutation  point_mutation   \n",
      "365             .          729583      Missense_Mutation  point_mutation   \n",
      "466             .          286423      Missense_Mutation  point_mutation   \n",
      "192        CAPNS1             826           In_Frame_Del  point_mutation   \n",
      "286        GAS2L1           10634      Missense_Mutation  point_mutation   \n",
      "252  LOC100507443            1419      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "2          .  (NA, NA)  \n",
      "3          .  (NA, NA)  \n",
      "5          .  (NA, NA)  \n",
      "7          .  (NA, NA)  \n",
      "42         .  (NA, NA)  \n",
      "43         .  (NA, NA)  \n",
      "48         .  (NA, NA)  \n",
      "55         .  (NA, NA)  \n",
      "57         .  (NA, NA)  \n",
      "58         .  (NA, NA)  \n",
      "77         .  (NA, NA)  \n",
      "79         .  (NA, NA)  \n",
      "89         .  (NA, NA)  \n",
      "168        .  (NA, NA)  \n",
      "175        .  (NA, NA)  \n",
      "181        .  (NA, NA)  \n",
      "208        .  (NA, NA)  \n",
      "261        .  (NA, NA)  \n",
      "265        .  (NA, NA)  \n",
      "293        .  (NA, NA)  \n",
      "356        .  (NA, NA)  \n",
      "357        .  (NA, NA)  \n",
      "358        .  (NA, NA)  \n",
      "359        .  (NA, NA)  \n",
      "365        .  (NA, NA)  \n",
      "466        .  (NA, NA)  \n",
      "192        .  (NA, NA)  \n",
      "286        .  (NA, NA)  \n",
      "252        .  (NA, NA)  \n",
      "TCGA-W4-A7U3-01A-12D-A435-10 TGCT 29 point mutations without a position 67 remain\n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "11           .          256815      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "11        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "51      FBXL12           54850      Missense_Mutation  point_mutation   \n",
      "53        NFIX            4784      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "51        .  (NA, NA)  \n",
      "53        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "74      BCYRN1            6872      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "74        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "63           .          375748      Missense_Mutation  point_mutation   \n",
      "41       RRBP1            6238      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "63        .  (NA, NA)  \n",
      "41        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "63           .           29801      Missense_Mutation  point_mutation   \n",
      "13        GRM5            2915      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "63        .  (NA, NA)  \n",
      "13        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "35     ARHGEF4           50649      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "35        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "76     MAP3K14            9020      Missense_Mutation  point_mutation   \n",
      "72        SAT1            6303      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "76        .  (NA, NA)  \n",
      "72        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "103      INPP5D            3635      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "103        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "60       LTBP4            8425      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "60        .  (NA, NA)  \n",
      "  Hugo_Symbol  Entrez_gene_ID Variant_Classification        function AAChange  \\\n",
      "7      NBPF10       100132406      Missense_Mutation  point_mutation        .   \n",
      "\n",
      "     aa_pos  \n",
      "7  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "88       ATG9B          285973      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "88        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "69       CECR2           27443      Missense_Mutation  point_mutation   \n",
      "95       KRBA1           84626      Missense_Mutation  point_mutation   \n",
      "90       ULBP3           79465      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "69        .  (NA, NA)  \n",
      "95        .  (NA, NA)  \n",
      "90        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "60           .          339766      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "60        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "46           .          400986      Missense_Mutation  point_mutation   \n",
      "50       FRG1B          284802      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "46        .  (NA, NA)  \n",
      "50        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "114       KRBA1           84626      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "114        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "39       FRG1B          284802      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "39        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "72           .          378108      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "72        .  (NA, NA)  \n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "72       FOLR3            2352      Missense_Mutation  point_mutation   \n",
      "48       RRBP1            6238      Missense_Mutation  point_mutation   \n",
      "73      SEPHS2           22928      Missense_Mutation  point_mutation   \n",
      "74     TSPAN10           83882      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "72        .  (NA, NA)  \n",
      "48        .  (NA, NA)  \n",
      "73        .  (NA, NA)  \n",
      "74        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "84      ZNF274           10782      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "84        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "47       OLFM2           93145      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "47        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "23           .          283685           In_Frame_Ins  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "23        .  (NA, NA)  \n",
      "  Hugo_Symbol  Entrez_gene_ID Variant_Classification        function AAChange  \\\n",
      "6           .          728841      Missense_Mutation  point_mutation        .   \n",
      "\n",
      "     aa_pos  \n",
      "6  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "104       LTBP4            8425      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "104        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "51         RYK            6259      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "51        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "52           .          388795      Missense_Mutation  point_mutation   \n",
      "84        DNM1            1759      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "52        .  (NA, NA)  \n",
      "84        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "58      NUDT10          170685      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "58        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "38    ADAMTS10           81794      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "38        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "56       LTBP4            8425      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "56        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "155      TXNRD1            7296      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "155        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "16           .          283685      Missense_Mutation  point_mutation   \n",
      "41       THADA           63892      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "16        .  (NA, NA)  \n",
      "41        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "23      BAHCC1           57597      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "23        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "84       ATG9B          285973      Missense_Mutation  point_mutation   \n",
      "93        EPT1           85465      Missense_Mutation  point_mutation   \n",
      "94        GPX1            2876      Missense_Mutation  point_mutation   \n",
      "7       PFKFB3            5209      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "84        .  (NA, NA)  \n",
      "93        .  (NA, NA)  \n",
      "94        .  (NA, NA)  \n",
      "7         .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "55    MAPK8IP2           23542      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "55        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "25           .          283685           In_Frame_Del  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "25        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "51       FRG1B          284802      Missense_Mutation  point_mutation   \n",
      "80    MAPK8IP2           23542      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "51        .  (NA, NA)  \n",
      "80        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "66       RRBP1            6238      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "66        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "85           .           30816      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "85        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "74      BAHCC1           57597      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "74        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "46           .          400986      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "46        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "17           .          283685      Missense_Mutation  point_mutation   \n",
      "0        MST1L           11223      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "17        .  (NA, NA)  \n",
      "0         .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "90       KRBA1           84626      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "90        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "35           .          283463      Missense_Mutation  point_mutation   \n",
      "51      ANKLE1          126549           In_Frame_Del  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "35        .  (NA, NA)  \n",
      "51        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "60     FAM131A          131408      Missense_Mutation  point_mutation   \n",
      "85        TAP2            6891      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "60        .  (NA, NA)  \n",
      "85        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "59       FRG1B          284802      Missense_Mutation  point_mutation   \n",
      "0        NBPF1           55672      Missense_Mutation  point_mutation   \n",
      "55      THSD7B           80731      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "59        .  (NA, NA)  \n",
      "0         .  (NA, NA)  \n",
      "55        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "14        NACA            4666      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "14        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "11           .          388761      Missense_Mutation  point_mutation   \n",
      "60           .          200958      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "11        .  (NA, NA)  \n",
      "60        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "86       CPNE9          151835      Missense_Mutation  point_mutation   \n",
      "39       ENDOV          284131      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "86        .  (NA, NA)  \n",
      "39        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "36       FRG1B          284802      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "36        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "39     FAM101B          359845      Missense_Mutation  point_mutation   \n",
      "65       FRG1B          284802      Missense_Mutation  point_mutation   \n",
      "30       PCSK6            5046      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "39        .  (NA, NA)  \n",
      "65        .  (NA, NA)  \n",
      "30        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "53       FRG1B          284802      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "53        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "44           .          400986      Missense_Mutation  point_mutation   \n",
      "84           .           84443           In_Frame_Del  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "44        .  (NA, NA)  \n",
      "84        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "1  FPGT-TNNI3K       100144878      Missense_Mutation  point_mutation   \n",
      "\n",
      "  AAChange    aa_pos  \n",
      "1        .  (NA, NA)  \n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "8            .       100288142      Missense_Mutation  point_mutation   \n",
      "43           .          440348      Missense_Mutation  point_mutation   \n",
      "56           .          400986      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "8         .  (NA, NA)  \n",
      "43        .  (NA, NA)  \n",
      "56        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "46        EPT1           85465      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "46        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "83     FAM86B1           85002      Missense_Mutation  point_mutation   \n",
      "17        NACA            4666      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "83        .  (NA, NA)  \n",
      "17        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "14       PELI3          246330      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "14        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "65      DNAH11            8701      Missense_Mutation  point_mutation   \n",
      "66      DNAH11            8701      Missense_Mutation  point_mutation   \n",
      "14      TUBA1A            7846           In_Frame_Del  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "65        .  (NA, NA)  \n",
      "66        .  (NA, NA)  \n",
      "14        .  (NA, NA)  \n",
      "  Hugo_Symbol  Entrez_gene_ID Variant_Classification        function AAChange  \\\n",
      "1      AGTRAP           57085      Missense_Mutation  point_mutation        .   \n",
      "\n",
      "     aa_pos  \n",
      "1  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "78     CYP21A2            1589      Missense_Mutation  point_mutation   \n",
      "33       UBFD1           56061      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "78        .  (NA, NA)  \n",
      "33        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "62           .          728441      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "62        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "37           .          400986      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "37        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "56       RRBP1            6238      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "56        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "42      ZNF525          170958      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "42        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "33      MAP2K3            5606      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "33        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "45           .            7617      Missense_Mutation  point_mutation   \n",
      "52       PDE6D           64708      Missense_Mutation  point_mutation   \n",
      "90      TXNRD2           10587      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "45        .  (NA, NA)  \n",
      "52        .  (NA, NA)  \n",
      "90        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "53           .          253143      Missense_Mutation  point_mutation   \n",
      "54           .          253143      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "53        .  (NA, NA)  \n",
      "54        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "38     FAM101B          359845      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "38        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "54           .          400986      Missense_Mutation  point_mutation   \n",
      "36     FAM101B          359845      Missense_Mutation  point_mutation   \n",
      "33       PSKH1            5681      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "54        .  (NA, NA)  \n",
      "36        .  (NA, NA)  \n",
      "33        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "65       RIBC2           26150      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "65        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "33       EPCAM            4072      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "33        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "57           .          400986      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "57        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "0        MACF1           23499      Missense_Mutation  point_mutation   \n",
      "22      MRPS11           64963      Missense_Mutation  point_mutation   \n",
      "62     UGT2B10            7365      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "0         .  (NA, NA)  \n",
      "22        .  (NA, NA)  \n",
      "62        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "56           .          140731      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "56        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "26       SKOR1          390598      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "26        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "24           .          283685           In_Frame_Ins  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "24        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "119    MAPK8IP2           23542      Missense_Mutation  point_mutation   \n",
      "65       NEURL2          140825      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "119        .  (NA, NA)  \n",
      "65         .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "65      BAHCC1           57597      Missense_Mutation  point_mutation   \n",
      "41       RRBP1            6238      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "65        .  (NA, NA)  \n",
      "41        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "47       FRG1B          284802      Missense_Mutation  point_mutation   \n",
      "48       FRG1B          284802      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "47        .  (NA, NA)  \n",
      "48        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "25    C13orf35          400165      Missense_Mutation  point_mutation   \n",
      "53       FRG1B          284802      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "25        .  (NA, NA)  \n",
      "53        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "36           .            9425      Missense_Mutation  point_mutation   \n",
      "28        GYPC            2995      Missense_Mutation  point_mutation   \n",
      "13        NACA            4666      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "36        .  (NA, NA)  \n",
      "28        .  (NA, NA)  \n",
      "13        .  (NA, NA)  \n",
      "  Hugo_Symbol  Entrez_gene_ID Variant_Classification        function AAChange  \\\n",
      "0    C1orf170           84808      Missense_Mutation  point_mutation        .   \n",
      "\n",
      "     aa_pos  \n",
      "0  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "66           .          347265      Missense_Mutation  point_mutation   \n",
      "72      BAHCC1           57597      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "66        .  (NA, NA)  \n",
      "72        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "94       FOXP3           50943      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "94        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "88        DNM1            1759      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "88        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "4             .          728841      Missense_Mutation  point_mutation   \n",
      "24        DOCK9           23348      Missense_Mutation  point_mutation   \n",
      "109       PRKDC            5591      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "4          .  (NA, NA)  \n",
      "24         .  (NA, NA)  \n",
      "109        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "24            .          283463      Missense_Mutation  point_mutation   \n",
      "25            .          283463      Missense_Mutation  point_mutation   \n",
      "29            .          144453      Missense_Mutation  point_mutation   \n",
      "70            .          339766      Missense_Mutation  point_mutation   \n",
      "101           .            6960      Missense_Mutation  point_mutation   \n",
      "106           .          347265      Missense_Mutation  point_mutation   \n",
      "122           .          286423      Missense_Mutation  point_mutation   \n",
      "17    C10orf105          414152      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "24         .  (NA, NA)  \n",
      "25         .  (NA, NA)  \n",
      "29         .  (NA, NA)  \n",
      "70         .  (NA, NA)  \n",
      "101        .  (NA, NA)  \n",
      "106        .  (NA, NA)  \n",
      "122        .  (NA, NA)  \n",
      "17         .  (NA, NA)  \n",
      "TCGA-VF-A8AA-01A-11D-A435-10 TGCT 8 point mutations without a position 27 remain\n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "2             .          284498      Missense_Mutation  point_mutation   \n",
      "3             .          284498      Missense_Mutation  point_mutation   \n",
      "10            .          780789      Missense_Mutation  point_mutation   \n",
      "52            .          283463      Missense_Mutation  point_mutation   \n",
      "54            .          283463      Missense_Mutation  point_mutation   \n",
      "55            .          283463      Missense_Mutation  point_mutation   \n",
      "56            .          283463      Missense_Mutation  point_mutation   \n",
      "58            .          283463      Missense_Mutation  point_mutation   \n",
      "63            .          144453      Missense_Mutation  point_mutation   \n",
      "129           .            1550      Missense_Mutation  point_mutation   \n",
      "149           .          339766      Missense_Mutation  point_mutation   \n",
      "160           .          373856      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "2          .  (NA, NA)  \n",
      "3          .  (NA, NA)  \n",
      "10         .  (NA, NA)  \n",
      "52         .  (NA, NA)  \n",
      "54         .  (NA, NA)  \n",
      "55         .  (NA, NA)  \n",
      "56         .  (NA, NA)  \n",
      "58         .  (NA, NA)  \n",
      "63         .  (NA, NA)  \n",
      "129        .  (NA, NA)  \n",
      "149        .  (NA, NA)  \n",
      "160        .  (NA, NA)  \n",
      "TCGA-VF-A8A9-01A-11D-A435-10 TGCT 12 point mutations without a position 43 remain\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "89      SEPHS2           22928      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "89        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "61        CHIT1            1118      Missense_Mutation  point_mutation   \n",
      "519       PRKDC            5591      Missense_Mutation  point_mutation   \n",
      "52     RABGAP1L            9910      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "61         .  (NA, NA)  \n",
      "519        .  (NA, NA)  \n",
      "52         .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "17         C1R             715      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "17        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "3            .          343170      Missense_Mutation  point_mutation   \n",
      "76       SIMC1          375484      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "3         .  (NA, NA)  \n",
      "76        .  (NA, NA)  \n",
      "  Hugo_Symbol  Entrez_gene_ID Variant_Classification        function AAChange  \\\n",
      "9           .          646309      Missense_Mutation  point_mutation        .   \n",
      "\n",
      "     aa_pos  \n",
      "9  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "33           .          440348      Missense_Mutation  point_mutation   \n",
      "71       PRKDC            5591      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "33        .  (NA, NA)  \n",
      "71        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "45       LMCD1           29995      Missense_Mutation  point_mutation   \n",
      "73       NPRL3            8131      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "45        .  (NA, NA)  \n",
      "73        .  (NA, NA)  \n",
      "      Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "0               .          339457      Missense_Mutation  point_mutation   \n",
      "1               .          339457      Missense_Mutation  point_mutation   \n",
      "2               .          284498      Missense_Mutation  point_mutation   \n",
      "3               .          284498      Missense_Mutation  point_mutation   \n",
      "6               .          284498      Missense_Mutation  point_mutation   \n",
      "38              .          388761      Missense_Mutation  point_mutation   \n",
      "39              .          388761      Missense_Mutation  point_mutation   \n",
      "44              .          653365      Missense_Mutation  point_mutation   \n",
      "45              .          653045      Missense_Mutation  point_mutation   \n",
      "52              .          340990      Missense_Mutation  point_mutation   \n",
      "53              .          340990      Missense_Mutation  point_mutation   \n",
      "55              .          340990      Missense_Mutation  point_mutation   \n",
      "68              .          283463      Missense_Mutation  point_mutation   \n",
      "70              .          283463      Missense_Mutation  point_mutation   \n",
      "71              .          283463      Missense_Mutation  point_mutation   \n",
      "94              .          319089      Missense_Mutation  point_mutation   \n",
      "126             .          400555      Missense_Mutation  point_mutation   \n",
      "127             .          400555      Missense_Mutation  point_mutation   \n",
      "149             .           10130      Missense_Mutation  point_mutation   \n",
      "151             .           10130      Missense_Mutation  point_mutation   \n",
      "234             .          200958      Missense_Mutation  point_mutation   \n",
      "255             .            9425      Missense_Mutation  point_mutation   \n",
      "262             .          729583      Missense_Mutation  point_mutation   \n",
      "338             .           84443           In_Frame_Del  point_mutation   \n",
      "340             .          286423      Missense_Mutation  point_mutation   \n",
      "146        CAPNS1             826           In_Frame_Del  point_mutation   \n",
      "186         FRG1B          284802      Missense_Mutation  point_mutation   \n",
      "178  LOC100507443            1419      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "0          .  (NA, NA)  \n",
      "1          .  (NA, NA)  \n",
      "2          .  (NA, NA)  \n",
      "3          .  (NA, NA)  \n",
      "6          .  (NA, NA)  \n",
      "38         .  (NA, NA)  \n",
      "39         .  (NA, NA)  \n",
      "44         .  (NA, NA)  \n",
      "45         .  (NA, NA)  \n",
      "52         .  (NA, NA)  \n",
      "53         .  (NA, NA)  \n",
      "55         .  (NA, NA)  \n",
      "68         .  (NA, NA)  \n",
      "70         .  (NA, NA)  \n",
      "71         .  (NA, NA)  \n",
      "94         .  (NA, NA)  \n",
      "126        .  (NA, NA)  \n",
      "127        .  (NA, NA)  \n",
      "149        .  (NA, NA)  \n",
      "151        .  (NA, NA)  \n",
      "234        .  (NA, NA)  \n",
      "255        .  (NA, NA)  \n",
      "262        .  (NA, NA)  \n",
      "338        .  (NA, NA)  \n",
      "340        .  (NA, NA)  \n",
      "146        .  (NA, NA)  \n",
      "186        .  (NA, NA)  \n",
      "178        .  (NA, NA)  \n",
      "TCGA-W4-A7U2-01A-11D-A435-10 TGCT 28 point mutations without a position 39 remain\n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "64      BAHCC1           57597      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "64        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "74       MMGT1           93380      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "74        .  (NA, NA)  \n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PCPG\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "preprocessed maf 185 fies;\t\traw maf: 0 files\n",
      "select processed maf file for cohort PCPG\n",
      "use path ../../TCGA/mutations/data/gdac.broadinstitute.org_PCPG.Mutation_Packager_Calls.Level_3.2016012800.0.0/\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "BRCA\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "preprocessed maf 983 fies;\t\traw maf: 0 files\n",
      "select processed maf file for cohort BRCA\n",
      "use path ../../TCGA/mutations/data/gdac.broadinstitute.org_BRCA.Mutation_Packager_Calls.Level_3.2016012800.0.0/\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['EFTUD1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['KIAA1804']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000254553']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['CD97']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['C2orf47']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 10\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "10 Hugo symbols not mapped to Entrez gene ID ['AZI1', 'CSRP2BP', 'DBC1', 'TCEB3C', 'ENSG00000249624', 'MT-ATP6', 'MT-CO3', 'MT-CYB', 'MT-ND3', 'MT-ND4']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000173366']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['KIAA1045']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['FAM21A']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['FAM18B2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['DBC1', 'ENSG00000226232']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000226232']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000184909']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['C10orf2', 'MLL2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000214397']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000176593', 'FLJ22184']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['KIAA1045']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000173366']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MLL2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 9\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "9 Hugo symbols not mapped to Entrez gene ID ['ADC', 'AIM1', 'C15orf38', 'CCBP2', 'CD97', 'EFTUD1', 'MLL2', 'TCEB3CL', 'ENSG00000235434']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000184909']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['HEATR8']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MLL2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000267261']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['DBC1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ADC', 'ODZ3']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['MT-CYB', 'MT-ND1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000248710']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MLL2']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000235041']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['FAM21A']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['CXXC11']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['CD97']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['PPYR1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['FAM21A']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-CYB']"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['APITD1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['AIM1', 'FLJ43860']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['CD97']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['CCBP2', 'FLJ43860']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs mapping to duplicated target IDs in mapping table: 2\n",
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['C10orf2', 'CSRP2BP', 'MLL4', 'ODZ3']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['TCEB3C']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000256349']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['TCEB3C']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000226232']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['HEATR8']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['PPYR1', 'TRAPPC2P1', 'FLJ22184']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['FLJ43860']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MLL2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['MLL4', 'ODZ3', 'ENSG00000257950']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['DBC1', 'MLL4']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['C10orf2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['AIM1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['CSRP2BP', 'MLL4']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['FLJ43860']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['C2orf47']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['FAM21A']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['C10orf2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['EFTUD1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['AIM1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ODZ3']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['MT-CO1', 'MT-ND5']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['MLL2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000226232']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['HEATR8']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MLL2']\n",
      "1"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " Hugo symbols not mapped to Entrez gene ID ['FLJ43860']\n",
      "3 Hugo symbols not mapped to Entrez gene ID ['HEATR8', 'MLL2', 'ENSG00000251184']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['FLJ22184']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['AIM1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MLL2']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000248710']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['DBC1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['CD97', 'FAM18B2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['CSRP2BP', 'DBC1', 'ODZ3', 'ENSG00000233389', 'ENSG00000237240']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000257198']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['FLJ43860']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['AIM1', 'FLJ27352']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000226232']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['KIAA1045']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['FAM21A']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000214970']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000259288']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MLL4']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000226232']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000183470']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 9\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "9 Hugo symbols not mapped to Entrez gene ID ['C10orf2', 'C2orf47', 'CSRP2BP', 'MLL2', 'ODZ3', 'PPYR1', 'ENSG00000184909', 'ENSG00000187461', 'ENSG00000248710']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['CD97']\n",
      "1 "
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Hugo symbols not mapped to Entrez gene ID ['FAM21A']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ATP6']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MLL2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['C10orf2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MLL2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000257921']\n",
      "2 Hugo symbols not mapped to Entrez gene ID ['MLL2', 'ENSG00000262323']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000226232']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000263424']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['SLC35E2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000258472']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['AIM1', 'CD97']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MLL2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MLL4']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['AIM1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['HEATR8']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['CSRP2BP', 'SLC35E2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['DBC1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['C10orf2', 'ENSG00000226232']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ODZ3']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MLL4']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['AZI1', 'C11orf48', 'FLJ43860']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['HEATR8']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['AIM1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ODZ3']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 6\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "6 Hugo symbols not mapped to Entrez gene ID ['DBC1', 'FAM18B2', 'MLL2', 'MLL4', 'ENSG00000256349', 'FLJ43860']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000254756']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['AZI1', 'MLL2', 'MLL4', 'ENSG00000258466']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ODZ3']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['EFTUD1', 'RPL17-C18ORF32']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['FAM18B2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['AIM1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000226232']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000247570']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000260175']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['HN1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['SLC35E2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['FAM21A', 'ODZ3']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['HEATR8', 'ENSG00000248710']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['HN1L', 'FLJ43860']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['TCEB3C']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000267385']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['KIAA1045']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['FAM21A']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000248710']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000258852']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000257921']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000254553']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MLL2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['HEATR8']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['CCBP2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['FAM21A', 'ENSG00000260861']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MLL4']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['KIAA1804']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MLL4']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['B3GNT1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['DBC1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MLL2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['FLJ43860']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['FAM21A']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000261884']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['C2orf47', 'CD97']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MLL4']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['AGPAT9', 'ODZ3', 'ENSG00000260175']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MLL2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['CD97']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['HN1', 'ENSG00000226232']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['FLJ22184']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['MLL2', 'TRAPPC2P1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['CCRL1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['AZI1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000232637']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['FLJ22184']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MLL2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['AIM1', 'AZI1', 'MLL2', 'ENSG00000254553']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ADC']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['FAM21A']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['AIM1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000249209']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['KIAA1804']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 8\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "8 Hugo symbols not mapped to Entrez gene ID ['ABP1', 'AIM1', 'C11orf48', 'C2orf47', 'MLL4', 'ENSG00000187461', 'ENSG00000240127', 'ENSG00000260175']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['PPYR1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['CXXC11']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000257033']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000251357']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000226232']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CHOL\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "preprocessed maf 36 fies;\t\traw maf: 0 files\n",
      "select processed maf file for cohort CHOL\n",
      "use path ../../TCGA/mutations/data/gdac.broadinstitute.org_CHOL.Mutation_Packager_Calls.Level_3.2016012800.0.0/\n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "226           .            1550      Missense_Mutation  point_mutation   \n",
      "215       PRSS1            5644      Missense_Mutation  point_mutation   \n",
      "214         ZAN            7455      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "226        .  (NA, NA)  \n",
      "215        .  (NA, NA)  \n",
      "214        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "211       FOLR3            2352      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "211        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "118           .          728741      Missense_Mutation  point_mutation   \n",
      "132           .           30816      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "118        .  (NA, NA)  \n",
      "132        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "115           .           28891      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "115        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "129           .           50649      Missense_Mutation  point_mutation   \n",
      "136       SH2B2           10603      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "129        .  (NA, NA)  \n",
      "136        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "142    ANKRD13D          338692      Missense_Mutation  point_mutation   \n",
      "129       ITGB3            3690      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "142        .  (NA, NA)  \n",
      "129        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "175     MAP3K14            9020      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "175        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "96        TTC3            7267      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "96        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "146           .          388677      Missense_Mutation  point_mutation   \n",
      "192      PCDH15           65217      Missense_Mutation  point_mutation   \n",
      "178       TTYH3           80727      Missense_Mutation  point_mutation   \n",
      "193       UHRF1           29128      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "146        .  (NA, NA)  \n",
      "192        .  (NA, NA)  \n",
      "178        .  (NA, NA)  \n",
      "193        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "165       FRG1B          284802      Missense_Mutation  point_mutation   \n",
      "166       FRG1B          284802      Missense_Mutation  point_mutation   \n",
      "155        IWS1           55677      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "165        .  (NA, NA)  \n",
      "166        .  (NA, NA)  \n",
      "155        .  (NA, NA)  \n",
      "     Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "952            .          144535      Missense_Mutation  point_mutation   \n",
      "956            .          220416      Missense_Mutation  point_mutation   \n",
      "1000           .          285311      Missense_Mutation  point_mutation   \n",
      "1116    C11orf31          280636      Missense_Mutation  point_mutation   \n",
      "1121       COPG2           26958      Missense_Mutation  point_mutation   \n",
      "1087      DNAH10          642797      Missense_Mutation  point_mutation   \n",
      "1117        GPX2            2877      Missense_Mutation  point_mutation   \n",
      "1118      ZNF274           10782      Missense_Mutation  point_mutation   \n",
      "\n",
      "     AAChange    aa_pos  \n",
      "952         .  (NA, NA)  \n",
      "956         .  (NA, NA)  \n",
      "1000        .  (NA, NA)  \n",
      "1116        .  (NA, NA)  \n",
      "1121        .  (NA, NA)  \n",
      "1087        .  (NA, NA)  \n",
      "1117        .  (NA, NA)  \n",
      "1118        .  (NA, NA)  \n",
      "TCGA-W5-AA39-01A-11D-A417-09 CHOL 8 point mutations without a position 736 remain\n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "161           .            7203      Missense_Mutation  point_mutation   \n",
      "174      INO80E            8479      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "161        .  (NA, NA)  \n",
      "174        .  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "90       CPNE9          151835      Missense_Mutation  point_mutation   \n",
      "\n",
      "   AAChange    aa_pos  \n",
      "90        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "273           .          400986      Missense_Mutation  point_mutation   \n",
      "304     PRPF40A           55660      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "273        .  (NA, NA)  \n",
      "304        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "125        ADIG          149685      Missense_Mutation  point_mutation   \n",
      "116        GRK4            2868           In_Frame_Del  point_mutation   \n",
      "117       SMAD5            4090      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "125        .  (NA, NA)  \n",
      "116        .  (NA, NA)  \n",
      "117        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "116           .       100129543      Missense_Mutation  point_mutation   \n",
      "163      THSD7B           80731      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "116        .  (NA, NA)  \n",
      "163        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "134           .           84443      Missense_Mutation  point_mutation   \n",
      "137    ARHGAP25            9938      Missense_Mutation  point_mutation   \n",
      "160       CECR2           27443      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "134        .  (NA, NA)  \n",
      "137        .  (NA, NA)  \n",
      "160        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "124           .          342850      Missense_Mutation  point_mutation   \n",
      "112       CRTAM           56253      Missense_Mutation  point_mutation   \n",
      "153      LILRB5           10990      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "124        .  (NA, NA)  \n",
      "112        .  (NA, NA)  \n",
      "153        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "185           .           91646      Missense_Mutation  point_mutation   \n",
      "221      DNAH11            8701      Missense_Mutation  point_mutation   \n",
      "219       FRG1B          284802      Missense_Mutation  point_mutation   \n",
      "220       FRG1B          284802      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "185        .  (NA, NA)  \n",
      "221        .  (NA, NA)  \n",
      "219        .  (NA, NA)  \n",
      "220        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "105       MACF1           23499      Missense_Mutation  point_mutation   \n",
      "148      PCDH15           65217      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "105        .  (NA, NA)  \n",
      "148        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "142           .          400986      Missense_Mutation  point_mutation   \n",
      "139   ANKRD30BL          554226      Missense_Mutation  point_mutation   \n",
      "129       NBPF1           55672      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "142        .  (NA, NA)  \n",
      "139        .  (NA, NA)  \n",
      "129        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "183       FRG1B          284802      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "183        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "145    MIR548F5           26960      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "145        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "189       PRKDC            5591      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "189        .  (NA, NA)  \n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "190       PCSK6            5046      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "190        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "111           .          131034      Missense_Mutation  point_mutation   \n",
      "142      METTL8           79828      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "111        .  (NA, NA)  \n",
      "142        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "158       MAPK6            5597      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "158        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "101           .          319089      Missense_Mutation  point_mutation   \n",
      "94      FOXRED1           55572      Missense_Mutation  point_mutation   \n",
      "117       SPEF2           79925      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "101        .  (NA, NA)  \n",
      "94         .  (NA, NA)  \n",
      "117        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "115           .          728841      Missense_Mutation  point_mutation   \n",
      "128           .          400986      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "115        .  (NA, NA)  \n",
      "128        .  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "88            .           80167      Missense_Mutation  point_mutation   \n",
      "108           .          400986      Missense_Mutation  point_mutation   \n",
      "98      TSPAN14           81619      Missense_Mutation  point_mutation   \n",
      "\n",
      "    AAChange    aa_pos  \n",
      "88         .  (NA, NA)  \n",
      "108        .  (NA, NA)  \n",
      "98         .  (NA, NA)  \n",
      "preprocessed maf 124 fies;\t\traw maf: 0 files\n",
      "select processed maf file for cohort THYM\n",
      "use path ../../TCGA/mutations/data/gdac.broadinstitute.org_THYM.Mutation_Packager_Calls.Level_3.2016012800.0.0/\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "THYM\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['MT-ATP6', 'MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ATP6']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000260415']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND6']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['LOC101930102', 'MT-ND6']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000263264']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000176984']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND6']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['CSRP2BP', 'ENSG00000243501', 'ENSG00000269089']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND5']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND6']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND4L']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['MT-CO1', 'MT-ND5']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['AIM1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000256530']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "UCEC\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "preprocessed maf 249 fies;\t\traw maf: 0 files\n",
      "select processed maf file for cohort UCEC\n",
      "use path ../../TCGA/mutations/data/gdac.broadinstitute.org_UCEC.Mutation_Packager_Calls.Level_3.2016012800.0.0/\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "SKCM\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "preprocessed maf 346 fies;\t\traw maf: 369 files\n",
      "select raw maf file for cohort SKCM\n",
      "use path ../../TCGA/mutations/data_raw/gdac.broadinstitute.org_SKCM.Mutation_Packager_Raw_Calls.Level_3.2016012800.0.0/\n",
      "/home/olya/miniconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2714: DtypeWarning: Columns (38,50,67,81,82,83,84) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  interactivity=interactivity, compiler=compiler, result=result)\n",
      "/home/olya/miniconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2714: DtypeWarning: Columns (38,50,81,82,83,84) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  interactivity=interactivity, compiler=compiler, result=result)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PRAD\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "preprocessed maf 333 fies;\t\traw maf: 500 files\n",
      "select raw maf file for cohort PRAD\n",
      "use path ../../TCGA/mutations/data_raw/gdac.broadinstitute.org_PRAD.Mutation_Packager_Raw_Calls.Level_3.2016012800.0.0/\n",
      "/home/olya/miniconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2714: DtypeWarning: Columns (38,67,81,82,83,84) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  interactivity=interactivity, compiler=compiler, result=result)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LIHC\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "preprocessed maf 199 fies;\t\traw maf: 374 files\n",
      "select raw maf file for cohort LIHC\n",
      "use path ../../TCGA/mutations/data_raw/gdac.broadinstitute.org_LIHC.Mutation_Packager_Raw_Calls.Level_3.2016012800.0.0/\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ESCA\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "preprocessed maf 186 fies;\t\traw maf: 0 files\n",
      "select processed maf file for cohort ESCA\n",
      "use path ../../TCGA/mutations/data/gdac.broadinstitute.org_ESCA.Mutation_Packager_Calls.Level_3.2016012800.0.0/\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['AIM1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000237452', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000268194']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000257743']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['LOC100509091']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000257743']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000197182', 'ENSG00000228768', 'ENSG00000232748']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000145965', 'ENSG00000242628', 'ENSG00000257743']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000205018', 'MT-CO1', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000167765']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000255537', 'MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000257743']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 7\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "7 Hugo symbols not mapped to Entrez gene ID ['AGPAT9', 'AZI1', 'ENSG00000212999', 'ENSG00000249428', 'ENSG00000260415', 'ENSG00000267848', 'ENSG00000268241']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['MT-ATP6', 'MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000167765']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000232866']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000212884']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['B3GNT1', 'ENSG00000212857', 'ENSG00000268059', 'MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['LOC101928664']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000187811', 'ENSG00000267848', 'MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['AZI1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['MT-CO1', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000234719']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['CSRP2BP', 'ENSG00000231171', 'ENSG00000263020']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000212857', 'ENSG00000228532']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['B3GNT1', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs mapping to duplicated target IDs in mapping table: 2\n",
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000255622', 'ENSG00000257743', 'FLJ22184']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000212857', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000254469', 'MT-CO1', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000255622']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000229729']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000257743', 'ENSG00000268948']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000189275', 'MT-ND5', 'MT-ND6']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000229729']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['ENSG00000212884', 'ENSG00000254614', 'MT-CO1', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000212857', 'ENSG00000233539', 'ENSG00000268948']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000267848', 'ENSG00000269175']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000240291']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['LOC100128374']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['NOTCH2NL', 'ENSG00000179755', 'ENSG00000267848', 'FLJ22184']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['CD97', 'ENSG00000255641', 'ENSG00000268241']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['CXXC11', 'ENSG00000183154']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000197182', 'ENSG00000205018']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['101928757']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['CD97']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['STRA13', 'LOC101928102']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000234719', 'ENSG00000269175']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000189275', 'ENSG00000215642']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000267270', 'FLJ22184']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND4L']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 12\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "12 Hugo symbols not mapped to Entrez gene ID ['ADC', 'ENSG00000183154', 'ENSG00000212857', 'ENSG00000215428', 'ENSG00000225996', 'ENSG00000237102', 'ENSG00000255622', 'ENSG00000257743', 'ENSG00000259455', 'ENSG00000267270', 'ENSG00000268467', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['C10orf2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['MT-CO1', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000173213']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000215304', 'ENSG00000232274', 'MT-ATP8']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000234677']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['AZI1', 'ENSG00000237452', 'ENSG00000268467']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000215642']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['B3GNT1', 'ENSG00000259455']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000222031', 'ENSG00000268864', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000212857']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000222031']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['KIAA1804']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000212884', 'MT-ATP6', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000249034']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['CXXC11', 'ENSG00000173213', 'ENSG00000255835']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000176593', 'ENSG00000225996', 'MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs mapping to duplicated target IDs in mapping table: 2\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000268400', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ATP6']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000259357', 'ENSG00000272268']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['KIAA1804']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000267954']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['TCEB3C', 'MT-CO1', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['MT-CO1', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['LOC101927810']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000255154', 'ENSG00000259471']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000257743', 'MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['MT-CO1', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000272268']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000249034']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['B3GNT1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000181495', 'ENSG00000255537']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000247925']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['EFTUD1', 'ENSG00000183562', 'ENSG00000217825', 'MT-ATP6']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000268412', 'ENSG00000273398']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['MT-CO1', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['CD97', 'LOC100133301', 'ENSG00000205018']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['ENSG00000217702', 'ENSG00000234222', 'MT-ATP6', 'MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000188474']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000214999']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000214305']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['EFTUD1', 'ENSG00000212928', 'ENSG00000259966']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000173213', 'ENSG00000249428', 'ENSG00000255622']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['LOC101927016', 'ENSG00000169203', 'ENSG00000212857', 'ENSG00000253251']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000248801', 'ENSG00000257057', 'ENSG00000264222']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['AIM1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000253172', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000226524', 'ENSG00000259455', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['HN1L']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000176593']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000217825']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND5']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000212857', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000269490']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['FLJ46361']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['LOC101930611']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['LOC100996779', 'ENSG00000217825', 'ENSG00000267848', 'ENSG00000269175']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CESC\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "preprocessed maf 195 fies;\t\traw maf: 0 files\n",
      "select processed maf file for cohort CESC\n",
      "use path ../../TCGA/mutations/data/gdac.broadinstitute.org_CESC.Mutation_Packager_Calls.Level_3.2016012800.0.0/\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "THCA\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "preprocessed maf 405 fies;\t\traw maf: 505 files\n",
      "select raw maf file for cohort THCA\n",
      "use path ../../TCGA/mutations/data_raw/gdac.broadinstitute.org_THCA.Mutation_Packager_Raw_Calls.Level_3.2016012800.0.0/\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['RP11-113D6.10']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DLBC\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "preprocessed maf 49 fies;\t\traw maf: 0 files\n",
      "select processed maf file for cohort DLBC\n",
      "use path ../../TCGA/mutations/data/gdac.broadinstitute.org_DLBC.Mutation_Packager_Calls.Level_3.2016012800.0.0/\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GBM\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "preprocessed maf 291 fies;\t\traw maf: 291 files\n",
      "select processed maf file for cohort GBM\n",
      "use path ../../TCGA/mutations/data/gdac.broadinstitute.org_GBM.Mutation_Packager_Calls.Level_3.2016012800.0.0/\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "UCS\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "preprocessed maf 58 fies;\t\traw maf: 0 files\n",
      "select processed maf file for cohort UCS\n",
      "use path ../../TCGA/mutations/data/gdac.broadinstitute.org_UCS.Mutation_Packager_Calls.Level_3.2016012800.0.0/\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LAML\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "preprocessed maf 198 fies;\t\traw maf: 0 files\n",
      "select processed maf file for cohort LAML\n",
      "use path ../../TCGA/mutations/data/gdac.broadinstitute.org_LAML.Mutation_Packager_Calls.Level_3.2016012800.0.0/\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['LOC100132232']\n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "18        FLT3            2322           In_Frame_Ins  point_mutation   \n",
      "\n",
      "   amino_acid_change    aa_pos  \n",
      "18      in_frame_ins  (NA, NA)  \n",
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-CYB']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['uc004amh.1']\n",
      "4 Hugo symbols not mapped to Entrez gene ID ['LOC100130734', 'LOC100132903', 'LOC387761', 'LOC727878']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['LOC100131040']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000216522']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['LOC100128077']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['uc002rsf.1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['LOC728896']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-CYB']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['LOC727895']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-CYB']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-CYB']\n",
      "3 Hugo symbols not mapped to Entrez gene ID ['MT-ND1', 'MT-ND4', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['MT-CYB', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['PRG-3', 'ENSG00000211619']\n",
      "2 Hugo symbols not mapped to Entrez gene ID ['NOTCH2NL', 'LOC200493']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['LOC100129901']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['MLL2', 'LOC100132800', 'LOC100133684', 'MT-CO3']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-CYB']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['EFTUD1', 'LOC730032']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['LOC100129218', 'ENSG00000103832']\n",
      "  Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "2        FLT3            2322           In_Frame_Ins  point_mutation   \n",
      "\n",
      "  amino_acid_change    aa_pos  \n",
      "2      in_frame_ins  (NA, NA)  \n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['LOC644504', 'ENSG00000214135']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['LOC100132713', 'LOC100134687']\n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "16        FLT3            2322           In_Frame_Ins  point_mutation   \n",
      "\n",
      "   amino_acid_change    aa_pos  \n",
      "16      in_frame_ins  (NA, NA)  \n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['FLJ43860', 'MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['MT-ND5', 'uc001vvs.1']\n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "16        FLT3            2322           In_Frame_Ins  point_mutation   \n",
      "\n",
      "   amino_acid_change    aa_pos  \n",
      "16      in_frame_ins  (NA, NA)  \n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000198229']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000211894']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-CO3']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['LOC100133915']\n",
      "2"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " Hugo symbols not mapped to Entrez gene ID ['LOC100133718', 'LOC730167']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['LOC100133292']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000215720', 'ENSG00000220279']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['LOC100128371']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000124399']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['FLJ22167']\n",
      "1"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " Hugo symbols not mapped to Entrez gene ID ['MT-ATP6']\n",
      "3 Hugo symbols not mapped to Entrez gene ID ['ODZ3', 'LOC100128989', 'LOC100129218']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['LOC257039']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['LOC100133655']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['LOC100132495']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['LOC100131699']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['LOC100132800']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000163098']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['LOC644992']\n",
      "2 Hugo symbols not mapped to Entrez gene ID ['MT-CO3', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['uc004ewl.1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000204989']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['PRAMEF16']\n",
      "1"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "HNSC\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " Hugo symbols not mapped to Entrez gene ID ['LOC100128744']\n",
      "preprocessed maf 280 fies;\t\traw maf: 513 files\n",
      "select raw maf file for cohort HNSC\n",
      "use path ../../TCGA/mutations/data_raw/gdac.broadinstitute.org_HNSC.Mutation_Packager_Raw_Calls.Level_3.2016012800.0.0/\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PAAD\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "preprocessed maf 151 fies;\t\traw maf: 186 files\n",
      "select raw maf file for cohort PAAD\n",
      "use path ../../TCGA/mutations/data_raw/gdac.broadinstitute.org_PAAD.Mutation_Packager_Raw_Calls.Level_3.2016012800.0.0/\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LUAD\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "preprocessed maf 231 fies;\t\traw maf: 544 files\n",
      "select raw maf file for cohort LUAD\n",
      "use path ../../TCGA/mutations/data_raw/gdac.broadinstitute.org_LUAD.Mutation_Packager_Raw_Calls.Level_3.2016012800.0.0/\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "UVM\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "preprocessed maf 81 fies;\t\traw maf: 0 files\n",
      "select processed maf file for cohort UVM\n",
      "use path ../../TCGA/mutations/data/gdac.broadinstitute.org_UVM.Mutation_Packager_Calls.Level_3.2016012800.0.0/\n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "10        REC8            9985      Missense_Mutation  point_mutation   \n",
      "\n",
      "   Protein_Change    aa_pos  \n",
      "10             NA  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "17        TEP1            7011      Missense_Mutation  point_mutation   \n",
      "\n",
      "   Protein_Change    aa_pos  \n",
      "17             NA  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "16       ABCB4            5244      Missense_Mutation  point_mutation   \n",
      "\n",
      "   Protein_Change    aa_pos  \n",
      "16             NA  (NA, NA)  \n",
      "  Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "7       AP1G2            8906      Missense_Mutation  point_mutation   \n",
      "\n",
      "  Protein_Change    aa_pos  \n",
      "7             NA  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "16        E2F7          144455      Missense_Mutation  point_mutation   \n",
      "\n",
      "   Protein_Change    aa_pos  \n",
      "16             NA  (NA, NA)  \n",
      "  Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "8       P2RX1            5023      Missense_Mutation  point_mutation   \n",
      "9        RGL4          266747      Missense_Mutation  point_mutation   \n",
      "\n",
      "  Protein_Change    aa_pos  \n",
      "8             NA  (NA, NA)  \n",
      "9             NA  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "14        RYR2            6262      Missense_Mutation  point_mutation   \n",
      "\n",
      "   Protein_Change    aa_pos  \n",
      "14             NA  (NA, NA)  \n",
      "  Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "3        NEBL           10529      Missense_Mutation  point_mutation   \n",
      "8        RSG1           79363      Missense_Mutation  point_mutation   \n",
      "0       ZMYM3            9203      Missense_Mutation  point_mutation   \n",
      "\n",
      "  Protein_Change    aa_pos  \n",
      "3             NA  (NA, NA)  \n",
      "8             NA  (NA, NA)  \n",
      "0             NA  (NA, NA)  \n",
      "  Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "3       OLFM1           10439      Missense_Mutation  point_mutation   \n",
      "\n",
      "  Protein_Change    aa_pos  \n",
      "3             NA  (NA, NA)  \n",
      "  Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "1        MRAS           22808      Missense_Mutation  point_mutation   \n",
      "\n",
      "  Protein_Change    aa_pos  \n",
      "1             NA  (NA, NA)  \n",
      "  Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "6        BAP1            8314      Missense_Mutation  point_mutation   \n",
      "8      VSIG10           54621      Missense_Mutation  point_mutation   \n",
      "\n",
      "  Protein_Change    aa_pos  \n",
      "6             NA  (NA, NA)  \n",
      "8             NA  (NA, NA)  \n",
      "  Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "6      DLGAP5            9787      Missense_Mutation  point_mutation   \n",
      "\n",
      "  Protein_Change    aa_pos  \n",
      "6             NA  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "18      COMMD4           54939      Missense_Mutation  point_mutation   \n",
      "13      MRPS24           64951      Missense_Mutation  point_mutation   \n",
      "\n",
      "   Protein_Change    aa_pos  \n",
      "18             NA  (NA, NA)  \n",
      "13             NA  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "14       MYO1C            4641      Missense_Mutation  point_mutation   \n",
      "\n",
      "   Protein_Change    aa_pos  \n",
      "14             NA  (NA, NA)  \n",
      "  Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "2       ITPR2            3709      Missense_Mutation  point_mutation   \n",
      "\n",
      "  Protein_Change    aa_pos  \n",
      "2             NA  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "15       ANO10           55129      Missense_Mutation  point_mutation   \n",
      "9     MAPK8IP3           23162      Missense_Mutation  point_mutation   \n",
      "\n",
      "   Protein_Change    aa_pos  \n",
      "15             NA  (NA, NA)  \n",
      "9              NA  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "14      ZNF654           55279      Missense_Mutation  point_mutation   \n",
      "\n",
      "   Protein_Change    aa_pos  \n",
      "14             NA  (NA, NA)  \n",
      "  Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "3       PRDM2            7799      Missense_Mutation  point_mutation   \n",
      "\n",
      "  Protein_Change    aa_pos  \n",
      "3             NA  (NA, NA)  \n",
      "  Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "2        RTTN           25914      Missense_Mutation  point_mutation   \n",
      "\n",
      "  Protein_Change    aa_pos  \n",
      "2             NA  (NA, NA)  \n",
      "  Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "1       ATG9A           79065      Missense_Mutation  point_mutation   \n",
      "\n",
      "  Protein_Change    aa_pos  \n",
      "1             NA  (NA, NA)  \n",
      "  Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "2      ZNF525          170958      Missense_Mutation  point_mutation   \n",
      "\n",
      "  Protein_Change    aa_pos  \n",
      "2             NA  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "10        RPS9            6203      Missense_Mutation  point_mutation   \n",
      "\n",
      "   Protein_Change    aa_pos  \n",
      "10             NA  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "21      ZNF839           55778      Missense_Mutation  point_mutation   \n",
      "\n",
      "   Protein_Change    aa_pos  \n",
      "21             NA  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "13        LRP1            4035      Missense_Mutation  point_mutation   \n",
      "6         MDN1           23195      Missense_Mutation  point_mutation   \n",
      "\n",
      "   Protein_Change    aa_pos  \n",
      "13             NA  (NA, NA)  \n",
      "6              NA  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "1        HTR1E            3354      Missense_Mutation  point_mutation   \n",
      "13       SYTL3           94120      Missense_Mutation  point_mutation   \n",
      "\n",
      "   Protein_Change    aa_pos  \n",
      "1              NA  (NA, NA)  \n",
      "13             NA  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "16      MYO15A           51168      Missense_Mutation  point_mutation   \n",
      "9       PTCHD1          139411      Missense_Mutation  point_mutation   \n",
      "\n",
      "   Protein_Change    aa_pos  \n",
      "16             NA  (NA, NA)  \n",
      "9              NA  (NA, NA)  \n",
      "  Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "6     RAD54L2           23132      Missense_Mutation  point_mutation   \n",
      "\n",
      "  Protein_Change    aa_pos  \n",
      "6             NA  (NA, NA)  \n",
      "    Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "530        DPP4            1803      Missense_Mutation  point_mutation   \n",
      "403       GABRD            2563      Missense_Mutation  point_mutation   \n",
      "46         ROR2            4920      Missense_Mutation  point_mutation   \n",
      "570        XAB2           56949      Missense_Mutation  point_mutation   \n",
      "\n",
      "    Protein_Change    aa_pos  \n",
      "530             NA  (NA, NA)  \n",
      "403             NA  (NA, NA)  \n",
      "46              NA  (NA, NA)  \n",
      "570             NA  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "6        MYO3B          140469      Missense_Mutation  point_mutation   \n",
      "15       OBSCN           84033      Missense_Mutation  point_mutation   \n",
      "\n",
      "   Protein_Change    aa_pos  \n",
      "6              NA  (NA, NA)  \n",
      "15             NA  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "16       PPM1J          333926      Missense_Mutation  point_mutation   \n",
      "\n",
      "   Protein_Change    aa_pos  \n",
      "16             NA  (NA, NA)  \n",
      "     Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "6           COQ2           27235      Missense_Mutation  point_mutation   \n",
      "8  RP11-1084J3.4          114899      Missense_Mutation  point_mutation   \n",
      "\n",
      "  Protein_Change    aa_pos  \n",
      "6             NA  (NA, NA)  \n",
      "8             NA  (NA, NA)  \n",
      "  Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "5       ANKAR          150709      Missense_Mutation  point_mutation   \n",
      "7       ANKAR          150709      Missense_Mutation  point_mutation   \n",
      "1         BBX           56987      Missense_Mutation  point_mutation   \n",
      "\n",
      "  Protein_Change    aa_pos  \n",
      "5             NA  (NA, NA)  \n",
      "7             NA  (NA, NA)  \n",
      "1             NA  (NA, NA)  \n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "13      KATNB1           10300      Missense_Mutation  point_mutation   \n",
      "\n",
      "   Protein_Change    aa_pos  \n",
      "13             NA  (NA, NA)  \n",
      "   Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "21      SEMA5A            9037      Missense_Mutation  point_mutation   \n",
      "3        TNPO3           23534      Missense_Mutation  point_mutation   \n",
      "\n",
      "   Protein_Change    aa_pos  \n",
      "21             NA  (NA, NA)  \n",
      "3              NA  (NA, NA)  \n",
      "  Hugo_Symbol  Entrez_gene_ID Variant_Classification        function  \\\n",
      "6        PKP3           11187      Missense_Mutation  point_mutation   \n",
      "\n",
      "  Protein_Change    aa_pos  \n",
      "6             NA  (NA, NA)  \n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "SARC\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "preprocessed maf 248 fies;\t\traw maf: 0 files\n",
      "select processed maf file for cohort SARC\n",
      "use path ../../TCGA/mutations/data/gdac.broadinstitute.org_SARC.Mutation_Packager_Calls.Level_3.2016012800.0.0/\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000234978', 'ENSG00000269688', 'ENSG00000272231']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 8\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "8 Hugo symbols not mapped to Entrez gene ID ['AZI1', 'ENSG00000205821', 'ENSG00000221136', 'ENSG00000222717', 'ENSG00000261616', 'ENSG00000270726', 'MT-CO1', 'MT-ND5']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000250992']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000203849']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['DH17', 'ENSG00000215958', 'ENSG00000237357', 'MT-RNR2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000221704', 'ENSG00000258727']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000230850']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['CNTP5', 'DH7', 'ENSG00000185710']\n",
      "2 Hugo symbols not mapped to Entrez gene ID ['MT-CO1', 'T10']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['CAC2D1', 'DH11', 'ENSG00000215398', 'ENSG00000234719']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['CHR9', 'ENSG00000215958', 'ENSG00000237452']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n",
      "Warning: query IDs not mapped to any target IDs excluded: 6\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['DJA4', 'EM', 'ENSG00000268673', 'PSB', 'SP91']\n",
      "6 Hugo symbols not mapped to Entrez gene ID ['LOC100507033', 'ENSG00000267219', 'ENSG00000269939', 'ENSG00000273433', 'MT-ATP8', 'PEPLD']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['ENSG00000206567', 'ENSG00000221280', 'ENSG00000226716', 'ENSG00000265803', 'ENSG00000268823']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000262712']\n",
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000221741', 'ENSG00000222586', 'T10']\n",
      "2 Hugo symbols not mapped to Entrez gene ID ['LOC101929847', 'ENSG00000221295']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['ALAD2', 'CNTP3', 'ENSG00000215979', 'ENSG00000221659', 'ENSG00000259241']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000252677']\n",
      "3 "
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Hugo symbols not mapped to Entrez gene ID ['ENSG00000214614', 'MT-CO1', 'MT-RNR2']\n",
      "3 Hugo symbols not mapped to Entrez gene ID ['CAC1A', 'DJC1', 'ENSG00000225946']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000221704']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 7\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "7 Hugo symbols not mapped to Entrez gene ID ['DH2', 'ENSG00000201913', 'ENSG00000214581', 'ENSG00000215022', 'ENSG00000221295', 'ENSG00000221388', 'ENSG00000272667']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n",
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['LOC101930494', 'DH2', 'DJB3', 'KC5', 'ST6GALC4']\n",
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000173213', 'ENSG00000237281', 'ENSG00000265002']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['EB1BP2', 'ENSG00000223697', 'ENSG00000256616', 'ENSG00000270726', 'PLX2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n",
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['DH10', 'ENSG00000221684', 'ENSG00000264299', 'ENSG00000266657']\n",
      "4 Hugo symbols not mapped to Entrez gene ID ['ENSG00000196589', 'ENSG00000221235', 'ENSG00000254551', 'ENSG00000255200']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['GT1', 'CAC1S', 'ENSG00000232274', 'ENSG00000253200']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n",
      "Warning: query IDs not mapped to any target IDs excluded: 3\n",
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['LOC100293748', 'ENSG00000161103', 'ENSG00000227175', 'ENSG00000233207', 'IFR1']\n",
      "3 Hugo symbols not mapped to Entrez gene ID ['DH2', 'FLJ46361', 'RG2']\n",
      "3 Hugo symbols not mapped to Entrez gene ID ['APC4', 'ENSG00000251199', 'ENSG00000265194']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000221684', 'ENSG00000256616']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 9\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "9 Hugo symbols not mapped to Entrez gene ID ['AHK2', 'CAC1A', 'ENSG00000203849', 'ENSG00000215923', 'ENSG00000215954', 'ENSG00000224643', 'ENSG00000233825', 'ENSG00000258437', 'ENSG00000266999']\n",
      "3 Hugo symbols not mapped to Entrez gene ID ['CSGALCT1', 'DJB3', 'RG2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n",
      "Warning: query IDs not mapped to any target IDs excluded: 5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['LOC101928102', 'DH11', 'ENSG00000221011', 'ENSG00000234978', 'ENSG00000267260']\n",
      "3 Hugo symbols not mapped to Entrez gene ID ['DJB8-AS1', 'ENSG00000223063', 'ENSG00000267045']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n",
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['CAC2D4', 'ENSG00000235852', 'ENSG00000236911', 'MT-RNR1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 7\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "7 Hugo symbols not mapped to Entrez gene ID ['CAC1E', 'ENSG00000205745', 'ENSG00000221280', 'ENSG00000259455', 'ENSG00000269939', 'FLJ26850', 'SI3']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 6\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "6 Hugo symbols not mapped to Entrez gene ID ['AHK', 'CNTP3B', 'ENSG00000204398', 'ENSG00000244306', 'ENSG00000256982', 'ENSG00000263378']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 21\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "21 Hugo symbols not mapped to Entrez gene ID ['CAC1S', 'DH10', 'DH17', 'DH6', 'DH7', 'ENSG00000188477', 'ENSG00000189275', 'ENSG00000205663', 'ENSG00000215958', 'ENSG00000221137', 'ENSG00000221379', 'ENSG00000234722', 'ENSG00000235772', 'ENSG00000237265', 'ENSG00000256982', 'ENSG00000257057', 'ENSG00000257829', 'ENSG00000260628', 'ENSG00000267943', 'KP2', 'RSEL']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n",
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['LOC101928703', 'ENSG00000216020', 'ENSG00000221279']\n",
      "3 Hugo symbols not mapped to Entrez gene ID ['CAC1A', 'CAC1C', 'ENSG00000200485']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000196096', 'ENSG00000244327']\n",
      "2 Hugo symbols not mapped to Entrez gene ID ['DH7', 'RSE8']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000258727']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['CAC1A', 'CNTP3', 'ENSG00000221395', 'ENSG00000266885', 'ENSG00000267040']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000216194']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 6\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "6 Hugo symbols not mapped to Entrez gene ID ['AHK', 'ENSG00000228436', 'ENSG00000228532', 'ENSG00000229743', 'ENSG00000260628', 'ENSG00000260973']\n",
      "2"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " Hugo symbols not mapped to Entrez gene ID ['ENSG00000216089', 'LINC00846']\n",
      "5 Hugo symbols not mapped to Entrez gene ID ['CNTP5', 'DH2', 'ENSG00000161103', 'ENSG00000221544', 'ENSG00000269690']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n",
      "Warning: query IDs not mapped to any target IDs excluded: 8\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000221664', 'ENSG00000227175', 'R5-8SP6']\n",
      "8 Hugo symbols not mapped to Entrez gene ID ['TCEB3C', 'ENSG00000171658', 'ENSG00000212939', 'ENSG00000215527', 'ENSG00000249149', 'ENSG00000257769', 'ENSG00000257931', 'ENSG00000261759']\n",
      "1"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " Hugo symbols not mapped to Entrez gene ID ['APC1']\n",
      "4 Hugo symbols not mapped to Entrez gene ID ['CAC1B', 'DH5', 'ENSG00000255487', 'GZ']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['DJC11', 'RSE2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000216004', 'ENSG00000233434', 'ENSG00000240253']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000258704']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['ENSG00000167765', 'ENSG00000231345', 'ENSG00000250910', 'ENSG00000267075']\n",
      "2 Hugo symbols not mapped to Entrez gene ID ['CAC1H', 'ENSG00000188971']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n",
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['LOC101928729', 'ENSG00000263120', 'ENSG00000267353']\n",
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000221492', 'ENSG00000244227']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n",
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['CGB', 'ENSG00000239179', 'MT-RNR2', 'MT-TL1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['CHR4', 'ENSG00000226668', 'ENSG00000261546']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['LOC101928780', 'DJC13', 'ENSG00000248115', 'ENSG00000268032']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 9\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "9 Hugo symbols not mapped to Entrez gene ID ['ADC', 'CTN2', 'ENSG00000242474', 'ENSG00000254665', 'ENSG00000255384', 'ENSG00000260213', 'ENSG00000267387', 'FLJ22184', 'TIG']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000225241']\n",
      "4 Hugo symbols not mapped to Entrez gene ID ['ENSG00000212768', 'ENSG00000221121', 'ENSG00000233836', 'R5-8SP6']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n",
      "Warning: query IDs not mapped to any target IDs excluded: 3\n",
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['DH3', 'DH5', 'NMT3']\n",
      "3 Hugo symbols not mapped to Entrez gene ID ['CAC1G', 'ENSG00000203849', 'MT-RNR2']\n",
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000216020', 'ENSG00000226145', 'ENSG00000258364']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['AHK', 'AHK2']\n",
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000211996', 'ENSG00000214581', 'ENSG00000234222']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n",
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['DJC13', 'ENSG00000221810', 'ENSG00000273192']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 6\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "6 Hugo symbols not mapped to Entrez gene ID ['ENSG00000216191', 'ENSG00000218416', 'ENSG00000221280', 'ENSG00000233863', 'ENSG00000260411', 'PLX2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000221118', 'ENSG00000226232', 'ENSG00000268301']\n",
      "1"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " Hugo symbols not mapped to Entrez gene ID ['AHK2']\n",
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000221684', 'P1L3']\n",
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000236233', 'ENSG00000270012']\n",
      "5 Hugo symbols not mapped to Entrez gene ID ['APC1', 'ENSG00000216191', 'ENSG00000228302', 'MT-CO1', 'MT-TS2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n",
      "Warning: query IDs not mapped to any target IDs excluded: 5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['LOC101929847', 'ENSG00000205746', 'ENSG00000216113', 'ENSG00000269243', 'MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 9\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "9 Hugo symbols not mapped to Entrez gene ID ['TCEB3CL', 'CAC1G', 'DH8', 'DJC22', 'ENSG00000221104', 'ENSG00000226738', 'ENSG00000242111', 'ENSG00000259037', 'KC1']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['CAC1A']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs mapping to duplicated target IDs in mapping table: 2\n",
      "Warning: query IDs not mapped to any target IDs excluded: 51\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "51 Hugo symbols not mapped to Entrez gene ID ['STRA13', 'LOC101930008', 'LOC440157', 'CA', 'CAC1C-AS1', 'CAC1D', 'CAC1S', 'CAC2D1', 'CNTP2', 'CTN2', 'DH14', 'DH3', 'DH5', 'DH6', 'DH7', 'DJB13', 'DJB3', 'DJC13', 'EH', 'ENSG00000188477', 'ENSG00000203849', 'ENSG00000219926', 'ENSG00000221653', 'ENSG00000225261', 'ENSG00000227175', 'ENSG00000228980', 'ENSG00000231171', 'ENSG00000231512', 'ENSG00000232274', 'ENSG00000234810', 'ENSG00000234921', 'ENSG00000235772', 'ENSG00000235881', 'ENSG00000250026', 'ENSG00000251273', 'ENSG00000251429', 'ENSG00000258559', 'ENSG00000260874', 'ENSG00000261720', 'ENSG00000267575', 'ENSG00000269352', 'ENSG00000269495', 'ENSG00000269533', 'ENSG00000272084', 'ENSG00000273189', 'FLJ33360', 'MGC34034', 'PLX2', 'SERPI3', 'SP91', 'V3']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 9\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "9 Hugo symbols not mapped to Entrez gene ID ['DH7', 'ENSG00000224363', 'ENSG00000237452', 'ENSG00000245156', 'ENSG00000252258', 'ENSG00000257434', 'KP3', 'MT-CO1', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000271894']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 7\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "7 Hugo symbols not mapped to Entrez gene ID ['CAC1H', 'CHR9', 'DJC8', 'ENSG00000187695', 'ENSG00000230333', 'ENSG00000257057', 'ENSG00000272231']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['CAC1F', 'ENSG00000254553', 'ENSG00000257743', 'ENSG00000271959']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 6\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "6 Hugo symbols not mapped to Entrez gene ID ['ENSG00000205918', 'ENSG00000216054', 'ENSG00000226145', 'ENSG00000264576', 'GI2', 'MT-RNR1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 6\n",
      "Warning: query IDs not mapped to any target IDs excluded: 5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "6 Hugo symbols not mapped to Entrez gene ID ['ENSG00000173213', 'ENSG00000215067', 'ENSG00000215933', 'ENSG00000221096', 'ENSG00000238737', 'ENSG00000251226']\n",
      "5 Hugo symbols not mapped to Entrez gene ID ['LOC100128374', 'DH14', 'ENSG00000215941', 'ENSG00000236295', 'ENSG00000272356']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['101928757', 'ENSG00000216073', 'ENSG00000221281']\n",
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000213132', 'ENSG00000234722']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['LOC100293748', 'ENSG00000216113', 'ENSG00000221684', 'MT-RNR2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['ENSG00000215972', 'ENSG00000227082', 'ENSG00000250186', 'ENSG00000273433', 'IF16']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n",
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000226113', 'ENSG00000266060', 'MT-RNR1']\n",
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000203849', 'MT-ND5', 'MT-RNR2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['DH5', 'ENSG00000223291', 'ENSG00000243175', 'ENSG00000261393', 'ENSG00000272885']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['LOC101060604', 'DJC13', 'EH', 'ENSG00000249149', 'ENSG00000264057']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 7\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "7 Hugo symbols not mapped to Entrez gene ID ['ENSG00000215954', 'ENSG00000221280', 'ENSG00000222732', 'ENSG00000256249', 'ENSG00000265289', 'ENSG00000271959', 'MT-RNR2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 11\n",
      "Warning: query IDs not mapped to any target IDs excluded: 5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "11 Hugo symbols not mapped to Entrez gene ID ['CTN2', 'DH8', 'ENSG00000180458', 'ENSG00000215933', 'ENSG00000221279', 'ENSG00000221317', 'ENSG00000221388', 'ENSG00000221664', 'ENSG00000251966', 'ENSG00000257395', 'ENSG00000259241']\n",
      "5 Hugo symbols not mapped to Entrez gene ID"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " ['ENSG00000221319', 'ENSG00000237234', 'ENSG00000238411', 'ENSG00000268985', 'GLU']\n",
      "3 Hugo symbols not mapped to Entrez gene ID ['CAC1S', 'ENSG00000215953', 'ENSG00000268032']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['CNTP4', 'ENSG00000227082', 'ENSG00000259605', 'ENSG00000268573', 'MT-CO1']\n",
      "2 Hugo symbols not mapped to Entrez gene ID ['CTN2', 'ENSG00000262903']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 6\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "6 Hugo symbols not mapped to Entrez gene ID ['DH12', 'ENSG00000216191', 'ENSG00000229481', 'ENSG00000259455', 'ENSG00000259966', 'I']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['LOC100996870', '101928757', 'DH10']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 8\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "8 Hugo symbols not mapped to Entrez gene ID ['CAC1F', 'DH11', 'ENSG00000224631', 'ENSG00000230615', 'ENSG00000232274', 'ENSG00000245330', 'ENSG00000268032', 'ST6GALC3']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['LOC100507033', 'DH7', 'ENSG00000222033', 'ENSG00000260628', 'ENSG00000272913']\n",
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000263120', 'ENSG00000268812']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000244227']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n",
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['ENSG00000216127', 'ENSG00000221395', 'ENSG00000265209', 'ENSG00000267801']\n",
      "3"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 11\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " Hugo symbols not mapped to Entrez gene ID ['DH14', 'ENSG00000249237', 'FLJ40448']\n",
      "11 Hugo symbols not mapped to Entrez gene ID ['GL', 'DF5', 'ENSG00000184774', 'ENSG00000203849', 'ENSG00000216173', 'ENSG00000221279', 'ENSG00000221388', 'ENSG00000224516', 'ENSG00000251273', 'MT-TY', 'TSRE1']\n",
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000234613', 'ENSG00000237452', 'SERPI10']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['DH5', 'ENSG00000273199']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['CAC1D', 'ENSG00000215958']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['LOC101928787', 'MT-RNR1']\n",
      "2 "
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n",
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Hugo symbols not mapped to Entrez gene ID ['DH9', 'FLJ41200']\n",
      "5 Hugo symbols not mapped to Entrez gene ID ['ENSG00000221810', 'ENSG00000259455', 'ENSG00000269890', 'ENSG00000273312', 'KIAA1045']\n",
      "4 Hugo symbols not mapped to Entrez gene ID ['LOC101928921', 'DH6', 'ENSG00000221145', 'ENSG00000248115']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000234978']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['CAC1F', 'ENSG00000183154', 'ENSG00000233002', 'ENSG00000254990', 'ENSG00000259668']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 7\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "7 Hugo symbols not mapped to Entrez gene ID ['DH5', 'ENSG00000197210', 'ENSG00000226145', 'ENSG00000231611', 'ENSG00000255384', 'FLJ45139', 'IF8']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000245482']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 8\n",
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "8 Hugo symbols not mapped to Entrez gene ID ['LOC101927648', 'LOC101930657', 'AHK2', 'CNTP2', 'ENSG00000215933', 'ENSG00000215953', 'ENSG00000259521', 'ENSG00000267865']\n",
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000216166', 'ENSG00000232274', 'ENSG00000261351']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['AZI1', 'DH2', 'DI2', 'ENSG00000268366']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['CHR4']\n",
      "5 Hugo symbols not mapped to Entrez gene ID ['ENSG00000205215', 'ENSG00000268081', 'ENSG00000269688', 'ENSG00000272485', 'MT-RNR2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['CNTP5', 'ENSG00000256616']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['A40']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['ENSG00000229047', 'ENSG00000238009', 'ENSG00000244151', 'ENSG00000255641', 'KC5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['FLJ20444']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 8\n",
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "8 Hugo symbols not mapped to Entrez gene ID ['LOC101928921', 'DAF3', 'DJA3', 'ENSG00000204398', 'ENSG00000214105', 'ENSG00000229953', 'ENSG00000249430', 'MT-CO1']\n",
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000212952', 'ENSG00000221591', 'FLJ40448']\n",
      "2 Hugo symbols not mapped to Entrez gene ID ['DH9', 'ENSG00000272231']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000232274', 'ENSG00000235999']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 6\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "6 Hugo symbols not mapped to Entrez gene ID ['CAC1C', 'CNTP5', 'ENSG00000166104', 'ENSG00000177699', 'ENSG00000215346', 'ENSG00000272231']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-RNR2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000221664', 'ENSG00000239012', 'MT-ATP6']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['IF1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 6\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "6 Hugo symbols not mapped to Entrez gene ID ['CD97', 'DJB11', 'ENSG00000229852', 'ENSG00000271828', 'RSE10', 'V3']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['ENSG00000216089', 'ENSG00000237452', 'ENSG00000254592', 'ENSG00000257743', 'ENSG00000267660']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000215954']\n",
      "4 Hugo symbols not mapped to Entrez gene ID ['CNTP4', 'ENSG00000228506', 'PLX3', 'RSET2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n",
      "Warning: query IDs not mapped to any target IDs excluded: 5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['AIM1', 'DH5', 'ENSG00000221244', 'ENSG00000251226']\n",
      "5"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " Hugo symbols not mapped to Entrez gene ID ['GS', 'ENSG00000227175', 'ENSG00000232274', 'ENSG00000273000', 'ST6GALC5']\n",
      "4 Hugo symbols not mapped to Entrez gene ID ['AHK2', 'DH8', 'ENSG00000204792', 'ENSG00000226849']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['ENSG00000179253', 'ENSG00000232274', 'ENSG00000257434', 'ENSG00000267601']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['CHR7', 'ENSG00000201913', 'ENSG00000227407']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 9\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "9 Hugo symbols not mapped to Entrez gene ID ['LOC101929948', 'ALAD2', 'CAC1B', 'CAC1F', 'DH11', 'DH6', 'DH9', 'ENSG00000223164', 'IF14']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000216089']"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "4 Hugo symbols not mapped to Entrez gene ID ['LOC101927091', 'ENSG00000167765', 'ENSG00000253629', 'ENSG00000263120']\n",
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000264542', 'MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000257433']\n",
      "3 Hugo symbols not mapped to Entrez gene ID ['DH11', 'DH6', 'ENSG00000253381']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n",
      "Warning: query IDs not mapped to any target IDs excluded: 5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['CAC1C', 'DH14', 'ENSG00000185710', 'ENSG00000215941', 'RSEK']\n",
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000215941', 'IF17']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n",
      "Warning: query IDs not mapped to any target IDs excluded: 12\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "12 Hugo symbols not mapped to Entrez gene ID ['LOC101929847', 'AHK', 'CSPG4P8', 'ENSG00000215941', 'ENSG00000221672', 'ENSG00000223200', 'ENSG00000228463', 'ENSG00000236233', 'ENSG00000261238', 'ENSG00000266535', 'ENSG00000268889', 'V3']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['BP2', 'ENSG00000264151', 'ST6GALC3']\n",
      "3 Hugo symbols not mapped to Entrez gene ID ['DH14', 'DI2', 'SERPI11']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000221415', 'ENSG00000270433']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['DH9', 'ENSG00000226145', 'ENSG00000272231']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 18\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "18 Hugo symbols not mapped to Entrez gene ID ['AIM1', 'AHK2', 'CAC1F', 'CSGALCT1', 'CTN2', 'DH9', 'DJC17', 'DJC24', 'DK2', 'ENSG00000188474', 'ENSG00000204038', 'ENSG00000221664', 'ENSG00000223579', 'ENSG00000232675', 'ENSG00000252176', 'ENSG00000272625', 'FLJ27255', 'KC5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000223730', 'MT-RNR2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 6\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "6 Hugo symbols not mapped to Entrez gene ID ['AHK', 'DJC5B', 'ENSG00000221684', 'ENSG00000254553', 'MIR3118-6', 'MT-TF']\n",
      "2 Hugo symbols not mapped to Entrez gene ID "
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "['CAC1B', 'ENSG00000272231']\n",
      "4 Hugo symbols not mapped to Entrez gene ID ['ENSG00000222032', 'ENSG00000234277', 'ENSG00000251273', 'RSEL']\n",
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000215941', 'ENSG00000215976']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-RNR2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n",
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['CNTP2', 'DJC13', 'ENSG00000236432', 'ENSG00000264164']\n",
      "4 Hugo symbols not mapped to Entrez gene ID ['ENSG00000184566', 'ENSG00000216154', 'ENSG00000230615', 'ENSG00000270726']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 8\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "8 Hugo symbols not mapped to Entrez gene ID ['LOC100128374', 'AHK2', 'CAC1B', 'CAC1I', 'ENSG00000226145', 'ENSG00000242288', 'ENSG00000254967', 'FLJ44006']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['KC1']\n",
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000214397', 'FLJ46361']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n",
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['DH17', 'ENSG00000200294', 'ENSG00000221732']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000224505', 'PLX2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['CNTP3']\n",
      "4 Hugo symbols not mapped to Entrez gene ID ['LOC440157', 'DJB2', 'ENSG00000215933', 'ENSG00000221474']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n",
      "Warning: query IDs not mapped to any target IDs excluded: 6\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "6 Hugo symbols not mapped to Entrez gene ID ['CAC1I', 'ENSG00000203849', 'ENSG00000221121', 'ENSG00000221281', 'ENSG00000232274', 'ENSG00000267943']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000215954', 'ENSG00000230615']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['DH17', 'ENSG00000265002']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 6\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "6 Hugo symbols not mapped to Entrez gene ID ['ENSG00000218739', 'ENSG00000221279', 'ENSG00000221280', 'ENSG00000221307', 'ENSG00000255193', 'KC1']\n",
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000272231', 'MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000215941', 'ENSG00000265865']\n",
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000267075', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['DH8', 'ENSG00000221684']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['DH3', 'ENSG00000221696', 'ENSG00000258944', 'ENSG00000268366']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['A60', 'CTN2', 'DH7']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['CTN3', 'DH5', 'ENSG00000263120']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-RNR2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 7\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "7 Hugo symbols not mapped to Entrez gene ID ['CAC1C', 'DH8', 'ENSG00000211510', 'ENSG00000215941', 'ENSG00000221388', 'IF1', 'KC3']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['DH5', 'DH6', 'ENSG00000251273', 'ENSG00000259241', 'ENSG00000267260']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n",
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000221672', 'ENSG00000221704', 'R5-8SP6']\n",
      "4 Hugo symbols not mapped to Entrez gene ID ['ENSG00000221280', 'ENSG00000233002', 'ENSG00000244306', 'MT-ND6']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 6\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "6 Hugo symbols not mapped to Entrez gene ID ['DH17-AS1', 'DH2', 'ENSG00000205018', 'ENSG00000215941', 'ENSG00000258661', 'ENSG00000264222']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000259241']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 11\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "11 Hugo symbols not mapped to Entrez gene ID ['AHK', 'CHR3', 'DH17', 'ENSG00000204038', 'ENSG00000232274', 'ENSG00000233145', 'ENSG00000242539', 'ENSG00000244306', 'ENSG00000257042', 'ENSG00000273433', 'PLX1']\n",
      "4 Hugo symbols not mapped to Entrez gene ID ['ENSG00000215941', 'ENSG00000221664', 'ENSG00000231486', 'ENSG00000259479']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n",
      "Warning: query IDs not mapped to any target IDs excluded: 5\n",
      "Warning: query IDs not mapped to any target IDs excluded: 6\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['CAC1A', 'CAC1G', 'CNTP3', 'ENSG00000222717', 'ENSG00000228980']\n",
      "6 Hugo symbols not mapped to Entrez gene ID ['AZI1', 'ENSG00000221219', 'ENSG00000221307', 'ENSG00000250026', 'ENSG00000270726', 'MT-RNR1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000249275', 'KATL1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000227719', 'ENSG00000264299']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000216116', 'ENSG00000254913', 'ENSG00000263606']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['DH14', 'ENSG00000266490', 'ENSG00000269988']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 92\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "92 Hugo symbols not mapped to Entrez gene ID ['ADC', 'PA', 'PSA', 'STRA13', 'LOC100996735', 'LOC101929518', '101928757', 'AHK', 'AHK2', 'BACH1-IT1', 'CA', 'CAC1A', 'CAC1E', 'CAC1I', 'CAC1S', 'CAC2D3', 'CHR9', 'CTN3', 'DF5', 'DH10', 'DH11', 'DH17', 'DH5', 'DH8', 'DH9', 'DJB8', 'DSE1L2', 'EB1BP2', 'ENSG00000166104', 'ENSG00000173213', 'ENSG00000177553', 'ENSG00000182873', 'ENSG00000184566', 'ENSG00000187812', 'ENSG00000204038', 'ENSG00000205018', 'ENSG00000207187', 'ENSG00000215023', 'ENSG00000221335', 'ENSG00000224113', 'ENSG00000224631', 'ENSG00000225891', 'ENSG00000228998', 'ENSG00000229839', 'ENSG00000230578', 'ENSG00000230615', 'ENSG00000231437', 'ENSG00000234232', 'ENSG00000234978', 'ENSG00000235881', 'ENSG00000238129', 'ENSG00000239674', 'ENSG00000240401', 'ENSG00000247121', 'ENSG00000249502', 'ENSG00000250046', 'ENSG00000254551', 'ENSG00000254571', 'ENSG00000254815', 'ENSG00000254844', 'ENSG00000255200', 'ENSG00000256209', 'ENSG00000257494', 'ENSG00000258017', 'ENSG00000259426', 'ENSG00000260973', 'ENSG00000261709', 'ENSG00000262732', 'ENSG00000263738', 'ENSG00000263887', 'ENSG00000265899', 'ENSG00000267688', 'ENSG00000268650', 'ENSG00000268864', 'ENSG00000269636', 'ENSG00000269888', 'ENSG00000272017', 'ENSG00000272849', 'ENSG00000273106', 'FLJ27255', 'KC4', 'KP1', 'NIPSP3B', 'PLX4', 'RSE10', 'SERPI7', 'SP29', 'TIG', 'TIGL1', 'TRU1AP', 'TTLL10-AS1', 'V3']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000253334', 'LCN']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 25\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "25 Hugo symbols not mapped to Entrez gene ID ['EFTUD1', 'GS', 'CAC1E', 'CNTP5', 'CTN1', 'DAF1', 'DH17', 'DJC10', 'ENSG00000187812', 'ENSG00000203496', 'ENSG00000203849', 'ENSG00000223804', 'ENSG00000233487', 'ENSG00000237265', 'ENSG00000249509', 'ENSG00000258908', 'ENSG00000261759', 'ENSG00000265002', 'ENSG00000266817', 'ENSG00000267561', 'ENSG00000269895', 'ENSG00000271959', 'FLJ46361', 'SERPI1', 'ST6GALC1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000221734', 'ENSG00000255622', 'ENSG00000261200']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000216102', 'R5-8SP6']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n",
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['FAM21A', 'ENSG00000261720', 'ENSG00000267749']\n",
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000221280', 'ENSG00000267075']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['DH9', 'ENSG00000221281', 'ENSG00000265002', 'ENSG00000268845', 'LCN']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['LOC101929847', 'DH10', 'ENSG00000221388']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000237357', 'ENSG00000266156']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['LOC101927958', 'DJC8', 'ENSG00000262112', 'KIAA1804']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['CAC1H', 'CHR5', 'DH9', 'ENSG00000207207']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000257434']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['LOC101930284', 'ENSG00000203849', 'ENSG00000221156']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['DH14', 'KC4', 'MT-RNR2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 6\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "6 Hugo symbols not mapped to Entrez gene ID ['CAC1H', 'DH8', 'ENSG00000204398', 'ENSG00000244227', 'ENSG00000258973', 'ENSG00000266008']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 8\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "8 Hugo symbols not mapped to Entrez gene ID ['GS', 'ENSG00000258364', 'ENSG00000260628', 'MT-RNR1', 'MT-RNR2', 'MT-TC', 'MT-TF', 'SP91']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 6\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "6 Hugo symbols not mapped to Entrez gene ID ['DH5', 'ENSG00000220256', 'ENSG00000229481', 'ENSG00000244227', 'ENSG00000260158', 'ENSG00000267260']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['DH3', 'ENSG00000235704']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs mapping to duplicated target IDs in mapping table: 2\n",
      "Warning: query IDs not mapped to any target IDs excluded: 34\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "34 Hugo symbols not mapped to Entrez gene ID ['AZI1', 'LOC101929676', 'AHK', 'CAC1C-AS1', 'CNTP4', 'DH2', 'DH3', 'DH5', 'DH7', 'DH8', 'DH9', 'DJB11', 'DJB3', 'DJC1', 'ENSG00000161103', 'ENSG00000162947', 'ENSG00000204044', 'ENSG00000204957', 'ENSG00000205830', 'ENSG00000222961', 'ENSG00000228318', 'ENSG00000228829', 'ENSG00000229481', 'ENSG00000231933', 'ENSG00000239636', 'ENSG00000242590', 'ENSG00000249494', 'ENSG00000252868', 'ENSG00000257434', 'ENSG00000259069', 'ENSG00000263826', 'ENSG00000272373', 'SERPI4', 'SI1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 6\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "6 Hugo symbols not mapped to Entrez gene ID ['DH9', 'ENSG00000207207', 'ENSG00000237031', 'ENSG00000237281', 'ENSG00000263272', 'ENSG00000267462']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 8\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "8 Hugo symbols not mapped to Entrez gene ID ['DH8', 'ENSG00000215923', 'ENSG00000221672', 'ENSG00000222043', 'ENSG00000225946', 'ENSG00000226145', 'ENSG00000254967', 'PLX2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n",
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['AHK', 'CHR4', 'ENSG00000221186', 'ENSG00000235837', 'ENSG00000270540']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000259241', 'MT-CO1', 'MT-ND5']\n",
      "4 Hugo symbols not mapped to Entrez gene ID ['AHK', 'ENSG00000206195', 'ENSG00000225411', 'ENSG00000273312']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LGG\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "preprocessed maf 287 fies;\t\traw maf: 531 files\n",
      "select raw maf file for cohort LGG\n",
      "use path ../../TCGA/mutations/data_raw/gdac.broadinstitute.org_LGG.Mutation_Packager_Raw_Calls.Level_3.2016012800.0.0/\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "STAD\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "preprocessed maf 290 fies;\t\traw maf: 396 files\n",
      "select raw maf file for cohort STAD\n",
      "use path ../../TCGA/mutations/data_raw/gdac.broadinstitute.org_STAD.Mutation_Packager_Raw_Calls.Level_3.2016012800.0.0/\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000214999', 'ENSG00000255641']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['MT-ND5', 'MT-ND6']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['C11orf48']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['LOC101927016', 'ENSG00000181495']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000251606']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['AGPAT9']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['AZI1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000269808']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000255622']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000268412', 'ENSG00000269175', 'KIAA1804']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['C10orf2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['AZI1', 'ENSG00000267882']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000258027']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['LOC100996735', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['B3GNT1', 'ENSG00000212884']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000180574']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000189332']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND5']\n",
      "1 Hugo symbols not mapped to Entrez gene ID ['101928757']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['101928757', 'ENSG00000180574']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['CD97']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['C10orf2', 'KIAA1045']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['AZI1', 'CXXC11', '101928757', 'ENSG00000269808', 'KIAA1804']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['CSRP2BP', 'ENSG00000267360', 'KIAA1045']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000241720']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ADC', 'ENSG00000205821', 'ENSG00000253917']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['AIM1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000249034']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000228532']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000173213', 'KIAA1045']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000258027']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['NAT6', 'ENSG00000255622', 'ENSG00000273398', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['KIAA1804']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['AZI1', 'ENSG00000187461']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['CXXC11']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000197604', 'KIAA1804']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['KIAA1804']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000267970']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000212884', 'ENSG00000219492']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000240040']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000255641']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000180574']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000188474', 'ENSG00000255641']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000227091', 'ENSG00000255622', 'ENSG00000273398']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000255641', 'ENSG00000259966']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['SLC35E2', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000184909']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000228532', 'MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['EFTUD1', 'ENSG00000197604', 'ENSG00000232600']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ADC', '101928757']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['AIM1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000255622', 'MT-ATP6']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['CSRP2BP', 'CXXC11', 'ENSG00000260007', 'PRAMEF16']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['C11orf48']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000234719', 'ENSG00000249034']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000269657']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000212884']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 10\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "10 Hugo symbols not mapped to Entrez gene ID ['CSRP2BP', 'EFTUD1', 'FAM21A', '101928757', 'ENSG00000181495', 'ENSG00000212884', 'ENSG00000259455', 'ENSG00000263065', 'ENSG00000267561', 'KIAA1804']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs mapping to duplicated target IDs in mapping table: 2\n",
      "Warning: query IDs not mapped to any target IDs excluded: 7\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "7 Hugo symbols not mapped to Entrez gene ID ['AZI1', 'C10orf2', 'EFTUD1', 'FAM21A', 'TRAPPC2P1', 'ENSG00000228532', 'ENSG00000273398']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['CD97']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['KIAA1804']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['CD97', 'NOTCH2NL']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000236432']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs mapping to duplicated target IDs in mapping table: 2\n",
      "Warning: query IDs not mapped to any target IDs excluded: 7\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "7 Hugo symbols not mapped to Entrez gene ID ['AZI1', 'FAM21A', 'ENSG00000180574', 'ENSG00000219492', 'ENSG00000237568', 'ENSG00000269175', 'ENSG00000269657']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['C10orf2']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['AIM1', 'CSRP2BP', 'ENSG00000273398']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['NAT6']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['KIAA1045']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['KIAA1045']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000173213', 'KIAA1045']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['NAT6', 'ENSG00000141979', 'ENSG00000257743']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['KIAA1804']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 8\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "8 Hugo symbols not mapped to Entrez gene ID ['CSRP2BP', 'CXXC11', 'SLC35E2', 'LOC101929271', 'ENSG00000173213', 'ENSG00000255622', 'ENSG00000268241', 'KIAA1804']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000212884']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000173213', 'ENSG00000269175']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000258654']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['AIM1', 'ENSG00000267954', 'MT-CO1', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['C11orf48', 'ENSG00000212884', 'ENSG00000234719', 'KIAA1045']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['B3GNT1', 'ENSG00000255470']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['ENSG00000180574', 'ENSG00000268412', 'KIAA1804', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs mapping to duplicated target IDs in mapping table: 2\n",
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['AZI1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['FAM21A']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000266956']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000257743']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000234719', 'ENSG00000269175']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['AIM1', 'AZI1', 'CD97', 'ENSG00000254230']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000206532']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['AZI1', 'FAM21A', 'TCEB3C']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000243696', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['AZI1', 'ENSG00000180574', 'ENSG00000269175']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000258027']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000261711']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['LOC100996735', 'ENSG00000259455']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['AZI1', 'C10orf2', 'ENSG00000243501', 'ENSG00000255168', 'ENSG00000269175']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['101928757']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000250850', 'ENSG00000267976']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['CD97', 'EFTUD1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['TRAPPC2P1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['MT-CO1', 'MT-ND4L']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000250692']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['EFTUD1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['EFTUD1', 'LOC101927016', '101928503']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000180574']\n",
      "1"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " Hugo symbols not mapped to Entrez gene ID ['LOC101927016']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['HN1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000243501']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['B3GNT1', 'KIAA1045']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-CO1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000269175', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['CXXC11', 'EFTUD1', '101928503', 'ENSG00000249034', 'ENSG00000268467']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000234719']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['CXXC11']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['KIAA1045']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000121388']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000269846']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000187811']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['AZI1', 'MT-ND6']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000180574']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['CD97', 'CSRP2BP', 'ENSG00000259966', 'ENSG00000269175']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000273398', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000212884']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 7\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "7 Hugo symbols not mapped to Entrez gene ID ['AIM1', 'AZI1', 'CSRP2BP', 'NAT6', '101928757', 'ENSG00000255622', 'KIAA1045']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['C10orf2', 'ENSG00000255622']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000268412', 'ENSG00000269175', 'MT-ATP6']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['APITD1', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['B3GNT1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['EFTUD1', '101928757']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ADC']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['AZI1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5 Hugo symbols not mapped to Entrez gene ID ['ADC', 'FAM21A', 'SLC35E2', 'ENSG00000234719', 'ENSG00000255168']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000273398']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['ENSG00000244558', 'ENSG00000255182']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4 Hugo symbols not mapped to Entrez gene ID ['CXXC11', 'ENSG00000255168', 'ENSG00000268412', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3 Hugo symbols not mapped to Entrez gene ID ['ENSG00000258654', 'MT-CO1', 'MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['C10orf2', 'ENSG00000255641']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000173213']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['CD97']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['AZI1']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000189332']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['ENSG00000272822']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['MT-ND5']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['CSRP2BP', 'ENSG00000243008']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['CSRP2BP']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2 Hugo symbols not mapped to Entrez gene ID ['AGPAT9', 'FAM21A']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: query IDs not mapped to any target IDs excluded: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1 Hugo symbols not mapped to Entrez gene ID ['CSRP2BP']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "READ\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "preprocessed maf 70 fies;\t\traw maf: 123 files\n",
      "select raw maf file for cohort READ\n",
      "use path ../../TCGA/mutations/data_raw/gdac.broadinstitute.org_READ.Mutation_Packager_Raw_Calls.Level_3.2016012800.0.0/\n",
      "Protein change columns not found in TCGA-CI-6619-01.maf.txt READ\n"
     ]
    },
    {
     "ename": "KeyError",
     "evalue": "\"['AAChange'] not in index\"",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-183-bb4286da1065>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     42\u001b[0m                 \u001b[0;32mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Protein change columns not found in\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcohort\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfile\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstderr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     43\u001b[0m             \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfillna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0mprotein_change\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\"NA\"\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 44\u001b[0;31m             \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mread_broad_maf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtcga_point\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtcga_truncating\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprotein_change\u001b[0m \u001b[0;34m=\u001b[0m  \u001b[0mprotein_change\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     45\u001b[0m             \u001b[0;31m# if all Entrez gene IDs are zeroes\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     46\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Entrez_gene_ID\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m<ipython-input-177-42df2c6b4575>\u001b[0m in \u001b[0;36mread_broad_maf\u001b[0;34m(df, point, truncating, protein_change)\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mread_broad_maf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpoint\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtruncating\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprotein_change\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"Protein_Change\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m     df = df[[\"Hugo_Symbol\",\"Entrez_Gene_Id\",\"Variant_Classification\",\n\u001b[0;32m----> 3\u001b[0;31m                        protein_change,'Tumor_Sample_Barcode']].copy()\n\u001b[0m\u001b[1;32m      4\u001b[0m     \u001b[0;31m#                    'Chromosome', u'Start_position', u'End_position']].copy()\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0;34m\"-Tumor\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Tumor_Sample_Barcode\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/olya/miniconda2/lib/python2.7/site-packages/pandas/core/frame.pyc\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   2680\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mSeries\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mIndex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2681\u001b[0m             \u001b[0;31m# either boolean or fancy integer index\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2682\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2683\u001b[0m         \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2684\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_frame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/olya/miniconda2/lib/python2.7/site-packages/pandas/core/frame.pyc\u001b[0m in \u001b[0;36m_getitem_array\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   2724\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_take\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2725\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2726\u001b[0;31m             \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_convert_to_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2727\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_take\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2728\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/olya/miniconda2/lib/python2.7/site-packages/pandas/core/indexing.pyc\u001b[0m in \u001b[0;36m_convert_to_indexer\u001b[0;34m(self, obj, axis, is_setter)\u001b[0m\n\u001b[1;32m   1325\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1326\u001b[0m                     raise KeyError('{mask} not in index'\n\u001b[0;32m-> 1327\u001b[0;31m                                    .format(mask=objarr[mask]))\n\u001b[0m\u001b[1;32m   1328\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1329\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_values_from_object\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyError\u001b[0m: \"['AAChange'] not in index\""
     ]
    }
   ],
   "source": [
    "tcga_point = ['In_Frame_Del','In_Frame_Ins','Missense_Mutation']\n",
    "tcga_truncating = ['Frame_Shift_Del','Frame_Shift_Ins','Nonsense_Mutation',\n",
    "                   'Splice_Site','Translation_Start_Site','Nonstop_Mutation']\n",
    "\n",
    "vatiant_types = set()\n",
    "for cohort in cohorts: \n",
    "    print(cohort)\n",
    "    tcga = {}\n",
    "    \n",
    "    # decide which mutation file to use\n",
    "    proc_maf_path = \"../../TCGA/mutations/data/gdac.broadinstitute.org_\"+cohort+\".Mutation_Packager_Calls.Level_3.2016012800.0.0/\"\n",
    "    raw_maf_path = \"../../TCGA/mutations/data_raw/gdac.broadinstitute.org_\"+cohort+\".Mutation_Packager_Raw_Calls.Level_3.2016012800.0.0/\"\n",
    "    proc_maf_files, raw_maf_files = 0, 0 \n",
    "    if os.path.exists(processed_maf_path):\n",
    "        proc_maf_files = len(os.listdir(proc_maf_path))\n",
    "    if os.path.exists(raw_maf_path):\n",
    "        raw_maf_files =  len(os.listdir(raw_maf_path))\n",
    "    # select the one with maximal number of mutations\n",
    "    print(\"preprocessed maf\",proc_maf_files,\"fies;\\t\\traw maf:\",raw_maf_files,\"files\",file = sys.stderr)\n",
    "    if raw_maf_files > proc_maf_files:\n",
    "        path = raw_maf_path\n",
    "        print(\"select raw maf file for cohort\", cohort, file =sys.stderr )\n",
    "    else:\n",
    "        path = proc_maf_path\n",
    "        print(\"select processed maf file for cohort\", cohort, file =sys.stderr )\n",
    "    print(\"use path\",path, file = sys.stderr)\n",
    "    for fname in os.listdir(path):\n",
    "        if fname.endswith(\".maf.txt\"):\n",
    "            #print(sname)\n",
    "            df = pd.read_csv(path+fname, sep = \"\\t\")\n",
    "            sname = df[\"Tumor_Sample_Barcode\"].values[0]\n",
    "            vatiant_types = vatiant_types | set(df[\"Variant_Classification\"].values)\n",
    "            if \"Protein_Change\" in df.columns:\n",
    "                protein_change = \"Protein_Change\"\n",
    "            elif \"amino_acid_change_WU\" in df.columns:\n",
    "                protein_change = \"amino_acid_change_WU\"\n",
    "            elif \"AAChange\" in df.columns:\n",
    "                protein_change = \"AAChange\"\n",
    "            elif \"amino_acid_change\" in df.columns:\n",
    "                protein_change = \"amino_acid_change\"\n",
    "            else:\n",
    "                print(\"Protein change columns not found in\", fname, cohort,file=sys.stderr)\n",
    "            df.fillna(value={protein_change:\"NA\"}, inplace=True)\n",
    "            df = read_broad_maf(df, tcga_point, tcga_truncating, protein_change =  protein_change)\n",
    "            # if all Entrez gene IDs are zeroes\n",
    "            if set(df[\"Entrez_gene_ID\"].values) == set([0]):\n",
    "                # remap Hugo symbols \n",
    "                df = hgnc2entrez_mapper(df,ncbi_symbols,ncbi_synonyms)\n",
    "            elif 0 in set(df[\"Entrez_gene_ID\"].values) :\n",
    "                # if zero in Entrez gene IDs, remove \n",
    "                df = df.loc[df[\"Entrez_gene_ID\"]!=0,:]\n",
    "            if df.shape[0] == 0:\n",
    "                tcga[sname] = {}\n",
    "            else:\n",
    "                # exlued point mutations with undefined postions \n",
    "                s = df.loc[df[\"aa_pos\"]== (\"NA\",\"NA\"),:].loc[df[\"function\"]== \"point_mutation\",:]\n",
    "                if s.shape[0] > 0:\n",
    "                    print(s[[\"Hugo_Symbol\",\"Entrez_gene_ID\",\"Variant_Classification\",\n",
    "                             \"function\",protein_change,\"aa_pos\"]],file = sys.stderr)\n",
    "                    df = df.loc[~df.index.isin(s.index.values),:]\n",
    "                    if s.shape[0] > 5:\n",
    "                        print(sname, cohort,s.shape[0],\"point mutations without a position\",df.shape[0],\"remain\",file = sys.stderr)\n",
    "                if df.shape[0] == 0:\n",
    "                    tcga[sname] = {}\n",
    "                else:\n",
    "                    tcga[sname] = mutations2score(df, TSG_entrez,OG_entrez, hotspots, w=w)\n",
    "    # write table\n",
    "    tcga = pd.DataFrame.from_dict(tcga)\n",
    "    tcga.fillna(0,inplace = True)\n",
    "    tcga.to_csv(preprocessed_dir+\"/TCGA-\"+cohort+\".non_bin_mutations.tsv\",sep = \"\\t\")\n",
    "    # binarize and write\n",
    "    #tcga= tcga.applymap(lambda x: binarize(x))\n",
    "    #tcga.to_csv(preprocessed_dir+\"/TCGA-\"+cohort+\".binary_mutations.tsv\",sep = \"\\t\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "vatiant_types "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "tcga.loc[[7157,5925,5290,4893],:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}