Machine-Learning-for-Dise / Git / Diff of /FinalTestPCR.ipynb

Models:

joseph-gordon/

Machine-Learning-for-Dise

Downloads: 1

Diff of /FinalTestPCR.ipynb [000000] .. [4bdf3e]

Switch to unified view

 b/FinalTestPCR.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "id": "09f2ef64",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# importing the model\n",
+    "\n",
+    "import joblib\n",
+    "model = joblib.load('XGBoost_final.pkl')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "id": "d4325fde",
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "# loading the test dataset\n",
+    "\n",
+    "import pandas as pd\n",
+    "test_Df = pd.read_excel('TestDatasetExample (1).xls') # change this to the required file name/path."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "id": "bca2827b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.decomposition import PCA\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "\n",
+    "# preprocessing and null value removal.\n",
+    "\n",
+    "test_Df=test_Df.replace(999, None)\n",
+    "\n",
+    "#imputation using mode\n",
+    "for col in test_Df.columns:\n",
+    "    test_Df[col].fillna(test_Df[col].mode()[0], inplace=True)\n",
+    "\n",
+    "# only column 11 onwards taken for pca\n",
+    "test_Df_forPCA = test_Df.iloc[:,11:]\n",
+    "\n",
+    "# Standardize the features\n",
+    "scaler = StandardScaler()\n",
+    "test_Df_forPCA = scaler.fit_transform(test_Df_forPCA)\n",
+    "\n",
+    "# Perform PCA\n",
+    "pca = PCA(n_components=6)  # Reduce to 6 principal components\n",
+    "test_Df_afterPCA = pca.fit_transform(test_Df_forPCA)\n",
+    "\n",
+    "test_Df_afterPCA = pd.DataFrame(test_Df_afterPCA)\n",
+    "test_Df = test_Df.iloc[:,0:11].merge(test_Df_afterPCA, left_index = True, right_index = True, how = 'right')\n",
+    "\n",
+    "new_column_names = {0: 'COMP0', 1: 'COMP1', 2: 'COMP2',3: 'COMP3',4:'COMP4',5:'COMP5',}\n",
+    "test_Df = test_Df.rename(columns=new_column_names)\n",
+    "\n",
+    "# scaling all the values\n",
+    "test_Df[['Age']] = StandardScaler().fit_transform(test_Df[['Age']])\n",
+    "\n",
+    "#drop proliferation as its not needed.\n",
+    "test_Df = test_Df.drop(columns=['Proliferation'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "id": "42d4a3c7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Making predictions on data\n",
+    "test_predictions = model.predict(test_Df.drop(columns=['ID']))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "id": "da1931da",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_predictions = pd.DataFrame(test_predictions, columns=['pCR(result)'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "id": "3fba86a0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "to_excel_Df = pd.DataFrame(test_Df['ID'])\n",
+    "#data after handling missing values\n",
+    "to_excel_Df = pd.concat((to_excel_Df, test_predictions), axis=1)\n",
+    "to_excel_Df.to_excel('FinalTestPCR.xlsx', index=False, header=True)\n",
+    "\n",
+    "                        "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "45827c84",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "77571afa",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}