|
a |
|
b/diabatics.py |
|
|
1 |
import numpy as np |
|
|
2 |
import pandas as pd |
|
|
3 |
from sklearn.preprocessing import StandardScaler |
|
|
4 |
from sklearn.model_selection import train_test_split |
|
|
5 |
from sklearn import svm |
|
|
6 |
from sklearn.metrics import accuracy_score |
|
|
7 |
|
|
|
8 |
dataset=pd.read_csv('diabetes.csv') |
|
|
9 |
|
|
|
10 |
# print(dataset.head()) |
|
|
11 |
print(dataset.shape) |
|
|
12 |
# print(dataset.describe()) |
|
|
13 |
# print(dataset['Outcome'].value_counts()) |
|
|
14 |
# print(dataset.groupby('Outcome').mean()) |
|
|
15 |
|
|
|
16 |
#seperating data and labels |
|
|
17 |
X=dataset.drop(columns='Outcome',axis=1) |
|
|
18 |
Y=dataset["Outcome"] |
|
|
19 |
print(X) |
|
|
20 |
# print(Y) |
|
|
21 |
|
|
|
22 |
# standardisation |
|
|
23 |
scaler=StandardScaler() |
|
|
24 |
scaler.fit(X) |
|
|
25 |
std_data=scaler.transform(X) |
|
|
26 |
print(std_data) |
|
|
27 |
X=std_data |
|
|
28 |
|
|
|
29 |
# print(X) |
|
|
30 |
# print(Y) |
|
|
31 |
#splitting training and training set |
|
|
32 |
x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.1,stratify=Y,random_state=3) |
|
|
33 |
print(x_train.shape,x_test.shape,y_train.shape,y_test.shape) |
|
|
34 |
|
|
|
35 |
#training the model |
|
|
36 |
classifier=svm.SVC(kernel='linear') |
|
|
37 |
|
|
|
38 |
#training the support vector machine classifier |
|
|
39 |
print(classifier.fit(x_train,y_train)) |
|
|
40 |
#model evaluation |
|
|
41 |
|
|
|
42 |
x_train_accuracy=classifier.predict(x_train) |
|
|
43 |
training_data_accuracy=accuracy_score(x_train_accuracy,y_train) |
|
|
44 |
print("accuracy score of training data:",training_data_accuracy) |
|
|
45 |
|
|
|
46 |
x_test_accuracy=classifier.predict(x_test) |
|
|
47 |
test_data_accuracy=accuracy_score(x_test_accuracy,y_test) |
|
|
48 |
print("accuracy score of test data:",test_data_accuracy) |
|
|
49 |
|
|
|
50 |
#making a prediction |
|
|
51 |
input_data=(10,100,88,60,110,46.8,0.942,31) |
|
|
52 |
#changing the input data to numpy array |
|
|
53 |
|
|
|
54 |
numpy_array=np.asarray(input_data) |
|
|
55 |
#reshape the array |
|
|
56 |
reshaped=numpy_array.reshape(1,-1) |
|
|
57 |
std_data=scaler.transform(reshaped) |
|
|
58 |
# print(std_data) |
|
|
59 |
prediction=classifier.predict(std_data) |
|
|
60 |
# print(prediction) |
|
|
61 |
|
|
|
62 |
if(prediction[0]==0): |
|
|
63 |
print("the person is not diabetic") |
|
|
64 |
else: |
|
|
65 |
print("the person is diabetic") |
|
|
66 |
|
|
|
67 |
|
|
|
68 |
|
|
|
69 |
|
|
|
70 |
|
|
|
71 |
# import numpy as np |
|
|
72 |
# import pandas as pd |
|
|
73 |
# from sklearn.preprocessing import StandardScaler |
|
|
74 |
# from sklearn.model_selection import train_test_split |
|
|
75 |
# from sklearn import svm |
|
|
76 |
# from sklearn.metrics import accuracy_score |
|
|
77 |
|
|
|
78 |
# class DiabetesPredictor: |
|
|
79 |
# def __init__(self, dataset_path): |
|
|
80 |
# self.dataset_path = dataset_path |
|
|
81 |
# self.model = svm.SVC(kernel='linear') |
|
|
82 |
# self.scaler = StandardScaler() |
|
|
83 |
|
|
|
84 |
# def load_data(self): |
|
|
85 |
# dataset = pd.read_csv(self.dataset_path) |
|
|
86 |
# X = dataset.drop(columns='Outcome', axis=1) |
|
|
87 |
# Y = dataset['Outcome'] |
|
|
88 |
# return X, Y |
|
|
89 |
|
|
|
90 |
# def preprocess_data(self, X): |
|
|
91 |
# self.scaler.fit(X) |
|
|
92 |
# return self.scaler.transform(X) |
|
|
93 |
|
|
|
94 |
# def train_model(self, X, Y): |
|
|
95 |
# X_train, X_test, Y_train, Y_test = train_test_split( |
|
|
96 |
# X, Y, test_size=0.2, stratify=Y, random_state=2 |
|
|
97 |
# ) |
|
|
98 |
# self.model.fit(X_train, Y_train) |
|
|
99 |
|
|
|
100 |
# # Evaluate the model |
|
|
101 |
# train_accuracy = accuracy_score(Y_train, self.model.predict(X_train)) |
|
|
102 |
# test_accuracy = accuracy_score(Y_test, self.model.predict(X_test)) |
|
|
103 |
|
|
|
104 |
# return train_accuracy, test_accuracy |
|
|
105 |
|
|
|
106 |
# def predict(self, input_data): |
|
|
107 |
# input_array = np.asarray(input_data).reshape(1, -1) |
|
|
108 |
# standardized_data = self.scaler.transform(input_array) |
|
|
109 |
# prediction = self.model.predict(standardized_data) |
|
|
110 |
# return 'Diabetic' if prediction[0] == 1 else 'Not Diabetic' |
|
|
111 |
|
|
|
112 |
# if __name__ == "__main__": |
|
|
113 |
# # Initialize predictor |
|
|
114 |
# predictor = DiabetesPredictor('diabetes.csv') |
|
|
115 |
|
|
|
116 |
# # Load and preprocess data |
|
|
117 |
# X, Y = predictor.load_data() |
|
|
118 |
# X = predictor.preprocess_data(X) |
|
|
119 |
|
|
|
120 |
# # Train the model |
|
|
121 |
# train_accuracy, test_accuracy = predictor.train_model(X, Y) |
|
|
122 |
# print(f"Training Accuracy: {train_accuracy:.2f}") |
|
|
123 |
# print(f"Testing Accuracy: {test_accuracy:.2f}") |
|
|
124 |
|
|
|
125 |
# # Make a prediction |
|
|
126 |
# input_data = (5, 166, 72, 19, 175, 25.8, 0.587, 51) |
|
|
127 |
# result = predictor.predict(input_data) |
|
|
128 |
# print(f"Prediction for input data: {result}") |