--- a +++ b/diabatics.py @@ -0,0 +1,128 @@ +import numpy as np +import pandas as pd +from sklearn.preprocessing import StandardScaler +from sklearn.model_selection import train_test_split +from sklearn import svm +from sklearn.metrics import accuracy_score + +dataset=pd.read_csv('diabetes.csv') + +# print(dataset.head()) +print(dataset.shape) +# print(dataset.describe()) +# print(dataset['Outcome'].value_counts()) +# print(dataset.groupby('Outcome').mean()) + +#seperating data and labels +X=dataset.drop(columns='Outcome',axis=1) +Y=dataset["Outcome"] +print(X) +# print(Y) + +# standardisation +scaler=StandardScaler() +scaler.fit(X) +std_data=scaler.transform(X) +print(std_data) +X=std_data + +# print(X) +# print(Y) +#splitting training and training set +x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.1,stratify=Y,random_state=3) +print(x_train.shape,x_test.shape,y_train.shape,y_test.shape) + +#training the model +classifier=svm.SVC(kernel='linear') + +#training the support vector machine classifier +print(classifier.fit(x_train,y_train)) +#model evaluation + +x_train_accuracy=classifier.predict(x_train) +training_data_accuracy=accuracy_score(x_train_accuracy,y_train) +print("accuracy score of training data:",training_data_accuracy) + +x_test_accuracy=classifier.predict(x_test) +test_data_accuracy=accuracy_score(x_test_accuracy,y_test) +print("accuracy score of test data:",test_data_accuracy) + +#making a prediction +input_data=(10,100,88,60,110,46.8,0.942,31) +#changing the input data to numpy array + +numpy_array=np.asarray(input_data) +#reshape the array +reshaped=numpy_array.reshape(1,-1) +std_data=scaler.transform(reshaped) +# print(std_data) +prediction=classifier.predict(std_data) +# print(prediction) + +if(prediction[0]==0): + print("the person is not diabetic") +else: + print("the person is diabetic") + + + + + +# import numpy as np +# import pandas as pd +# from sklearn.preprocessing import StandardScaler +# from sklearn.model_selection import train_test_split +# from sklearn import svm +# from sklearn.metrics import accuracy_score + +# class DiabetesPredictor: +# def __init__(self, dataset_path): +# self.dataset_path = dataset_path +# self.model = svm.SVC(kernel='linear') +# self.scaler = StandardScaler() + +# def load_data(self): +# dataset = pd.read_csv(self.dataset_path) +# X = dataset.drop(columns='Outcome', axis=1) +# Y = dataset['Outcome'] +# return X, Y + +# def preprocess_data(self, X): +# self.scaler.fit(X) +# return self.scaler.transform(X) + +# def train_model(self, X, Y): +# X_train, X_test, Y_train, Y_test = train_test_split( +# X, Y, test_size=0.2, stratify=Y, random_state=2 +# ) +# self.model.fit(X_train, Y_train) + +# # Evaluate the model +# train_accuracy = accuracy_score(Y_train, self.model.predict(X_train)) +# test_accuracy = accuracy_score(Y_test, self.model.predict(X_test)) + +# return train_accuracy, test_accuracy + +# def predict(self, input_data): +# input_array = np.asarray(input_data).reshape(1, -1) +# standardized_data = self.scaler.transform(input_array) +# prediction = self.model.predict(standardized_data) +# return 'Diabetic' if prediction[0] == 1 else 'Not Diabetic' + +# if __name__ == "__main__": +# # Initialize predictor +# predictor = DiabetesPredictor('diabetes.csv') + +# # Load and preprocess data +# X, Y = predictor.load_data() +# X = predictor.preprocess_data(X) + +# # Train the model +# train_accuracy, test_accuracy = predictor.train_model(X, Y) +# print(f"Training Accuracy: {train_accuracy:.2f}") +# print(f"Testing Accuracy: {test_accuracy:.2f}") + +# # Make a prediction +# input_data = (5, 166, 72, 19, 175, 25.8, 0.587, 51) +# result = predictor.predict(input_data) +# print(f"Prediction for input data: {result}")