Diff of /FinalTestPCR.ipynb [000000] .. [4bdf3e]

Switch to unified view

a b/FinalTestPCR.ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": 65,
6
   "id": "09f2ef64",
7
   "metadata": {},
8
   "outputs": [],
9
   "source": [
10
    "# importing the model\n",
11
    "\n",
12
    "import joblib\n",
13
    "model = joblib.load('XGBoost_final.pkl')"
14
   ]
15
  },
16
  {
17
   "cell_type": "code",
18
   "execution_count": 60,
19
   "id": "d4325fde",
20
   "metadata": {
21
    "scrolled": false
22
   },
23
   "outputs": [],
24
   "source": [
25
    "# loading the test dataset\n",
26
    "\n",
27
    "import pandas as pd\n",
28
    "test_Df = pd.read_excel('TestDatasetExample (1).xls') # change this to the required file name/path."
29
   ]
30
  },
31
  {
32
   "cell_type": "code",
33
   "execution_count": 61,
34
   "id": "bca2827b",
35
   "metadata": {},
36
   "outputs": [],
37
   "source": [
38
    "from sklearn.decomposition import PCA\n",
39
    "from sklearn.preprocessing import StandardScaler\n",
40
    "\n",
41
    "# preprocessing and null value removal.\n",
42
    "\n",
43
    "test_Df=test_Df.replace(999, None)\n",
44
    "\n",
45
    "#imputation using mode\n",
46
    "for col in test_Df.columns:\n",
47
    "    test_Df[col].fillna(test_Df[col].mode()[0], inplace=True)\n",
48
    "\n",
49
    "# only column 11 onwards taken for pca\n",
50
    "test_Df_forPCA = test_Df.iloc[:,11:]\n",
51
    "\n",
52
    "# Standardize the features\n",
53
    "scaler = StandardScaler()\n",
54
    "test_Df_forPCA = scaler.fit_transform(test_Df_forPCA)\n",
55
    "\n",
56
    "# Perform PCA\n",
57
    "pca = PCA(n_components=6)  # Reduce to 6 principal components\n",
58
    "test_Df_afterPCA = pca.fit_transform(test_Df_forPCA)\n",
59
    "\n",
60
    "test_Df_afterPCA = pd.DataFrame(test_Df_afterPCA)\n",
61
    "test_Df = test_Df.iloc[:,0:11].merge(test_Df_afterPCA, left_index = True, right_index = True, how = 'right')\n",
62
    "\n",
63
    "new_column_names = {0: 'COMP0', 1: 'COMP1', 2: 'COMP2',3: 'COMP3',4:'COMP4',5:'COMP5',}\n",
64
    "test_Df = test_Df.rename(columns=new_column_names)\n",
65
    "\n",
66
    "# scaling all the values\n",
67
    "test_Df[['Age']] = StandardScaler().fit_transform(test_Df[['Age']])\n",
68
    "\n",
69
    "#drop proliferation as its not needed.\n",
70
    "test_Df = test_Df.drop(columns=['Proliferation'])"
71
   ]
72
  },
73
  {
74
   "cell_type": "code",
75
   "execution_count": 62,
76
   "id": "42d4a3c7",
77
   "metadata": {},
78
   "outputs": [],
79
   "source": [
80
    "# Making predictions on data\n",
81
    "test_predictions = model.predict(test_Df.drop(columns=['ID']))"
82
   ]
83
  },
84
  {
85
   "cell_type": "code",
86
   "execution_count": 63,
87
   "id": "da1931da",
88
   "metadata": {},
89
   "outputs": [],
90
   "source": [
91
    "test_predictions = pd.DataFrame(test_predictions, columns=['pCR(result)'])"
92
   ]
93
  },
94
  {
95
   "cell_type": "code",
96
   "execution_count": 64,
97
   "id": "3fba86a0",
98
   "metadata": {},
99
   "outputs": [],
100
   "source": [
101
    "to_excel_Df = pd.DataFrame(test_Df['ID'])\n",
102
    "#data after handling missing values\n",
103
    "to_excel_Df = pd.concat((to_excel_Df, test_predictions), axis=1)\n",
104
    "to_excel_Df.to_excel('FinalTestPCR.xlsx', index=False, header=True)\n",
105
    "\n",
106
    "                        "
107
   ]
108
  },
109
  {
110
   "cell_type": "code",
111
   "execution_count": null,
112
   "id": "45827c84",
113
   "metadata": {},
114
   "outputs": [],
115
   "source": []
116
  },
117
  {
118
   "cell_type": "code",
119
   "execution_count": null,
120
   "id": "77571afa",
121
   "metadata": {},
122
   "outputs": [],
123
   "source": []
124
  }
125
 ],
126
 "metadata": {
127
  "kernelspec": {
128
   "display_name": "Python 3 (ipykernel)",
129
   "language": "python",
130
   "name": "python3"
131
  },
132
  "language_info": {
133
   "codemirror_mode": {
134
    "name": "ipython",
135
    "version": 3
136
   },
137
   "file_extension": ".py",
138
   "mimetype": "text/x-python",
139
   "name": "python",
140
   "nbconvert_exporter": "python",
141
   "pygments_lexer": "ipython3",
142
   "version": "3.10.11"
143
  }
144
 },
145
 "nbformat": 4,
146
 "nbformat_minor": 5
147
}