|
a |
|
b/model.py |
|
|
1 |
import pandas as pd |
|
|
2 |
import matplotlib.pyplot as plt |
|
|
3 |
import pickle |
|
|
4 |
|
|
|
5 |
# Import necessary modules |
|
|
6 |
from sklearn.model_selection import train_test_split |
|
|
7 |
from sklearn.preprocessing import StandardScaler |
|
|
8 |
|
|
|
9 |
# Keras specific |
|
|
10 |
import keras |
|
|
11 |
from keras.models import Sequential |
|
|
12 |
from keras.layers import Dense |
|
|
13 |
|
|
|
14 |
df = pd.read_csv('cancer patient data sets.csv') |
|
|
15 |
|
|
|
16 |
level_mapping = {'Low': float(0), 'Medium': float(1.0), 'High': float(2.0)} |
|
|
17 |
|
|
|
18 |
df['Level'] = df['Level'].replace(level_mapping) |
|
|
19 |
|
|
|
20 |
df.to_csv('cancer patient data sets.csv', index=False) |
|
|
21 |
|
|
|
22 |
X = df.iloc[:, 2:-1].values # Select all rows and columns from index 2 (excluding Level and index, Patient Id) up to the last column |
|
|
23 |
y = df.iloc[:, -1].values |
|
|
24 |
|
|
|
25 |
scaler = StandardScaler() |
|
|
26 |
X = scaler.fit_transform(X) |
|
|
27 |
|
|
|
28 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30) |
|
|
29 |
|
|
|
30 |
model = Sequential() |
|
|
31 |
model.add(Dense(32, activation='relu', input_dim=23)) |
|
|
32 |
model.add(Dense(16, activation='relu')) |
|
|
33 |
model.add(Dense(3, activation='sigmoid')) |
|
|
34 |
|
|
|
35 |
# Compile the model |
|
|
36 |
optimizer = keras.optimizers.Adam(learning_rate=0.001) |
|
|
37 |
model.compile(optimizer=optimizer, |
|
|
38 |
loss='binary_crossentropy', |
|
|
39 |
metrics=['accuracy']) |
|
|
40 |
|
|
|
41 |
# Train the model |
|
|
42 |
history = model.fit(X_train, keras.utils.to_categorical(y_train, 3), epochs=10, batch_size=32, validation_data=(X_test, keras.utils.to_categorical(y_test, 3)), verbose=2) |
|
|
43 |
|
|
|
44 |
# Evaluate the model on test data |
|
|
45 |
loss, accuracy = model.evaluate(X_test, keras.utils.to_categorical(y_test, 3)) |
|
|
46 |
print('Test accuracy:', round(accuracy*100, 2), '%') |
|
|
47 |
|
|
|
48 |
# Plot the training and validation loss and accuracy |
|
|
49 |
plt.plot(history.history['accuracy']) |
|
|
50 |
plt.plot(history.history['val_accuracy']) |
|
|
51 |
plt.title('Model Accuracy') |
|
|
52 |
plt.ylabel('Accuracy') |
|
|
53 |
plt.xlabel('Epoch') |
|
|
54 |
plt.legend(['Train', 'Validation'], loc='upper left') |
|
|
55 |
plt.show() |
|
|
56 |
|
|
|
57 |
plt.plot(history.history['loss']) |
|
|
58 |
plt.plot(history.history['val_loss']) |
|
|
59 |
plt.title('Model Loss') |
|
|
60 |
plt.ylabel('Loss') |
|
|
61 |
plt.xlabel('Epoch') |
|
|
62 |
plt.legend(['Train', 'Validation'], loc='upper left') |
|
|
63 |
plt.show() |
|
|
64 |
|
|
|
65 |
# Save the model |
|
|
66 |
model.save('my_model.h5') |
|
|
67 |
|
|
|
68 |
# Save the scaler object to a file |
|
|
69 |
filename = 'scaler.pkl' |
|
|
70 |
with open(filename, 'wb') as f: |
|
|
71 |
pickle.dump(scaler, f) |