--- a +++ b/ClassifierCompare.py @@ -0,0 +1,131 @@ +from sklearn.linear_model import RidgeClassifier + +print(__doc__) + +# Code source: Gaël Varoquaux +# Andreas Müller +# Modified for documentation by Jaques Grobler +# License: BSD 3 clause +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.colors import ListedColormap +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler +from sklearn.datasets import make_moons, make_circles, make_classification +from sklearn.neural_network import MLPClassifier +from sklearn.neighbors import KNeighborsClassifier +from sklearn.svm import SVC +from sklearn.gaussian_process import GaussianProcessClassifier +from sklearn.gaussian_process.kernels import RBF +from sklearn.tree import DecisionTreeClassifier +from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier +from sklearn.naive_bayes import GaussianNB +from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis, LinearDiscriminantAnalysis + +h = .02 # step size in the mesh + +names = ["KNN", "Linear SVM", + "Naive Bayes", "LDA", "QDA"] + +classifiers = [ + KNeighborsClassifier(), + SVC(kernel="linear", C=0.025), + GaussianNB(), + LinearDiscriminantAnalysis(), + QuadraticDiscriminantAnalysis()] + +train_original = pd.read_csv("DataUsed/method23_real2.csv") +test_original = pd.read_csv("DataUsed/method23_real2_valid.csv") +df = train_original.append(test_original, ignore_index=True) + +# df.insert(3, "num2", num2) +targetIndex = -1 +# df = df.iloc[pd.isna(df.iloc[:, targetIndex]).values == False, :] +# df = df.drop(columns=["Num1"]) + +vars = df.columns[range(len(df.columns) - 1)] +df = df.values +X1 = df[:, [0, -2]] +X2 = df[:, [0, -6]] +X3 = df[:, [-6, -2]] +y = df[:, targetIndex] + +datasetsNames = ["450,810 nm", "450, 610 nm", "610,810 nm"] +C450810 = (X1, y) +C450610 = (X2, y) +C610810 = (X3, y) + +datasets = [C450810, C450610, C610810] + +figure = plt.figure(figsize=(27, 9)) +i = 1 +# iterate over datasets +for ds_cnt, ds in enumerate(datasets): + # preprocess dataset, split into training and test part + X, y = ds + X = StandardScaler().fit_transform(X) + X_train, X_test, y_train, y_test = \ + train_test_split(X, y, test_size=.8) + + x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 + y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 + xx, yy = np.meshgrid(np.arange(x_min, x_max, h), + np.arange(y_min, y_max, h)) + + # just plot the dataset first + cm = plt.cm.RdBu + cm_bright = ListedColormap(['#FF0000', '#0000FF']) + ax = plt.subplot(len(datasets), len(classifiers) + 1, i) + if ds_cnt == 0: + ax.set_title("Input data") + # Plot the training points + ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright, + edgecolors='k') + # Plot the testing points + ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6, + edgecolors='k') + ax.set_xlim(xx.min(), xx.max()) + ax.set_ylim(yy.min(), yy.max()) + ax.set_xticks(()) + ax.set_yticks(()) + ax.set_ylabel(datasetsNames[ds_cnt]) + i += 1 + + # iterate over classifiers + for name, clf in zip(names, classifiers): + ax = plt.subplot(len(datasets), len(classifiers) + 1, i) + clf.fit(X_train, y_train) + score = clf.score(X_test, y_test) + + # Plot the decision boundary. For that, we will assign a color to each + # point in the mesh [x_min, x_max]x[y_min, y_max]. + if hasattr(clf, "decision_function"): + Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) + else: + Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] + + # Put the result into a color plot + Z = Z.reshape(xx.shape) + ax.contourf(xx, yy, Z, cmap=cm, alpha=.8) + + # Plot the training points + ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright, + edgecolors='k') + # Plot the testing points + ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, + edgecolors='k', alpha=0.6) + + ax.set_xlim(xx.min(), xx.max()) + ax.set_ylim(yy.min(), yy.max()) + ax.set_xticks(()) + ax.set_yticks(()) + if ds_cnt == 0: + ax.set_title(name) + ax.text(xx.max() - .3, yy.min() + .3, ('Accuracy: %.2f' % score).lstrip('0'), + size=15, horizontalalignment='right') + i += 1 + +plt.tight_layout() +plt.show() +pass