[6ac965]: / src / iterpretability / datasets / tumorp / prepare_ovarian.ipynb

Download this file

428 lines (427 with data), 13.7 kB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "# df = pd.read_csv(\"/cluster/work/tumorp/analysis/wickilab/data/DepMap_24Q2/real/ovarian_semi_synthetic_rf.csv\", index_col=0)\n",
    "#df = pd.read_csv(\"/cluster/work/tumorp/analysis/wickilab/data/DepMap_24Q2/real/ovarian_semi_synthetic_l1.csv\", index_col=0)\n",
    "df = pd.read_csv(\"/cluster/work/tumorp/analysis/wickilab/data/DepMap_24Q2/real/melanoma_semi_synthetic_l1.csv\", index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "df_gt = df.iloc[:,:2]\n",
    "df_data = df.iloc[:,2:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "df_data.iloc[:,:2] = (df_data.iloc[:,:2] > 0.5).astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "metadata": {}
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.1989795918367347"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(df_data.iloc[:,0]>df_data.iloc[:,1]).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "metadata": {}
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>immuno</th>\n",
       "      <th>titodeath_geq4</th>\n",
       "      <th>pred_a0_y2 (immuno)</th>\n",
       "      <th>pred_a1_y2 (immuno)</th>\n",
       "      <th>TuPro</th>\n",
       "      <th>treline</th>\n",
       "      <th>age</th>\n",
       "      <th>charlci</th>\n",
       "      <th>clinstage</th>\n",
       "      <th>pradiot</th>\n",
       "      <th>pio</th>\n",
       "      <th>psyst</th>\n",
       "      <th>ecog</th>\n",
       "      <th>brainmets</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>vajun-jinok</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.589392</td>\n",
       "      <td>0.552390</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>50.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pifop-balap</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.364219</td>\n",
       "      <td>0.448789</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mijud-dator</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.659857</td>\n",
       "      <td>0.453112</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>53.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fobus-fudor</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.209008</td>\n",
       "      <td>0.357861</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>tabom-supum</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.832661</td>\n",
       "      <td>0.700153</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fizol-botad</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.283023</td>\n",
       "      <td>0.234100</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fofiz-tuvak</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.374240</td>\n",
       "      <td>0.286329</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>41.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>foniz-rijoj</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.427237</td>\n",
       "      <td>0.235907</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nugiz-bilin</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.684217</td>\n",
       "      <td>0.686841</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>60.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>bamip-lumak</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.825362</td>\n",
       "      <td>0.366591</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>196 rows × 14 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             immuno  titodeath_geq4  pred_a0_y2 (immuno)  pred_a1_y2 (immuno)  \\\n",
       "ID                                                                              \n",
       "vajun-jinok       1               0             0.589392             0.552390   \n",
       "pifop-balap       1               0             0.364219             0.448789   \n",
       "mijud-dator       1               1             0.659857             0.453112   \n",
       "fobus-fudor       1               0             0.209008             0.357861   \n",
       "tabom-supum       1               0             0.832661             0.700153   \n",
       "...             ...             ...                  ...                  ...   \n",
       "fizol-botad       0               0             0.283023             0.234100   \n",
       "fofiz-tuvak       0               0             0.374240             0.286329   \n",
       "foniz-rijoj       0               0             0.427237             0.235907   \n",
       "nugiz-bilin       0               1             0.684217             0.686841   \n",
       "bamip-lumak       0               0             0.825362             0.366591   \n",
       "\n",
       "             TuPro  treline   age  charlci  clinstage  pradiot  pio  psyst  \\\n",
       "ID                                                                           \n",
       "vajun-jinok    1.0      2.0  50.0      6.0        4.0      1.0  1.0    0.0   \n",
       "pifop-balap    0.0      1.0  67.0      8.0        4.0      0.0  0.0    0.0   \n",
       "mijud-dator    0.0      1.0  53.0      7.0        4.0      0.0  1.0    0.0   \n",
       "fobus-fudor    1.0      3.0  70.0      9.0        4.0      0.0  1.0    1.0   \n",
       "tabom-supum    0.0      1.0  68.0      8.0        4.0      1.0  0.0    0.0   \n",
       "...            ...      ...   ...      ...        ...      ...  ...    ...   \n",
       "fizol-botad    0.0      2.0  75.0      9.0        4.0      0.0  1.0    0.0   \n",
       "fofiz-tuvak    0.0      5.0  41.0      6.0        4.0      1.0  1.0    1.0   \n",
       "foniz-rijoj    1.0      3.0  58.0      7.0        4.0      1.0  1.0    0.0   \n",
       "nugiz-bilin    0.0      2.0  60.0      8.0        4.0      1.0  1.0    0.0   \n",
       "bamip-lumak    1.0      4.0  69.0      8.0        4.0      0.0  1.0    1.0   \n",
       "\n",
       "             ecog  brainmets  \n",
       "ID                            \n",
       "vajun-jinok   0.0        0.0  \n",
       "pifop-balap   2.0        1.0  \n",
       "mijud-dator   1.0        1.0  \n",
       "fobus-fudor   2.0        0.0  \n",
       "tabom-supum   0.0        1.0  \n",
       "...           ...        ...  \n",
       "fizol-botad   1.0        0.0  \n",
       "fofiz-tuvak   3.0        1.0  \n",
       "foniz-rijoj   1.0        0.0  \n",
       "nugiz-bilin   0.0        0.0  \n",
       "bamip-lumak   0.0        0.0  \n",
       "\n",
       "[196 rows x 14 columns]"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "# df_gt.to_csv(\"/cluster/work/tumorp/analysis/wickilab/data/DepMap_24Q2/real/ovarian_semi_synthetic_rf_groundtruth.csv\")\n",
    "# df_gt.to_csv(\"/cluster/work/tumorp/analysis/wickilab/data/DepMap_24Q2/real/ovarian_semi_synthetic_l1_groundtruth.csv\")\n",
    "df_gt.to_csv(\"/cluster/work/tumorp/analysis/wickilab/data/DepMap_24Q2/real/melanoma_semi_synthetic_l1_groundtruth.csv\")\n",
    "\n",
    "# df_data.to_csv(\"/cluster/work/tumorp/analysis/wickilab/data/DepMap_24Q2/real/ovarian_semi_synthetic_rf.csv\")\n",
    "# df_data.to_csv(\"/cluster/work/tumorp/analysis/wickilab/data/DepMap_24Q2/real/ovarian_semi_synthetic_l1.csv\")\n",
    "df_data.to_csv(\"/cluster/work/tumorp/analysis/wickilab/data/DepMap_24Q2/real/melanoma_semi_synthetic_l1.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv_tumorp",
   "language": "python",
   "name": ".venv_tumorp"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}