--- a +++ b/scratch.py @@ -0,0 +1,83 @@ +""" +import pandas as pd +from sklearn import svm +file = 'data/train.csv' + +train_data = pd.read_csv(file) + +print(train_data.head()) + +print(train_data.columns) + +#features = Sex, Age, Pclass, Cabin, SibSp, Parch, Embarked, Name, Ticket +#label = Survived + +#'PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp','Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked' + +#SVM +#Bayesian logisitic regression +kernel = 'rbf' +svm.SVC() +""" + +# Extract features using sliding window and form the training dataset, test dataset + +from sklearn.ensemble import RandomForestClassifier +from sklearn.datasets import make_classification +from sklearn.model_selection import train_test_split +from sklearn.mixture import GaussianMixture + +import numpy as np + +X, y = make_classification(n_samples=10000, n_features=6, + n_informative=3, n_redundant=0, + random_state=0, shuffle=True) + +print(X.shape) # 10000x6 +print(y.shape) # 10000 + +# TODO: Feature extraction using sliding window + +train_features, test_features, train_labels, test_labels = train_test_split(X, y, + test_size=0.25, random_state=42) +# TODO: K-fold cross validation + +print('Training Features Shape:', train_features.shape) +print('Training Labels Shape:', train_labels.shape) +print('Testing Features Shape:', test_features.shape) +print('Testing Labels Shape:', test_labels.shape) + +clf = RandomForestClassifier(n_estimators=100, max_depth=3, oob_score=True + ) + +clf.fit(X, y) + +print(clf.feature_importances_) +#print(clf.oob_decision_function_) +print(clf.oob_score_) + +predictions = clf.predict(test_features) +errors = abs(predictions - test_labels) +print("M A E: ", round(np.mean(errors), 2)) + + +# Visualization +feature_list = [1, 2, 3, 4, 5, 6] +from sklearn.tree import export_graphviz +import pydot +# Pull out one tree from the forest +tree = clf.estimators_[5] +# Export the image to a dot file +export_graphviz(tree, out_file='tree.dot', feature_names=feature_list, rounded=True, precision=1) +# Use dot file to create a graph +(graph, ) = pydot.graph_from_dot_file('tree.dot') +# Write graph to a png file +#graph.write_png('tree_.png') + +# TODO: Confusion matrix, Accuracy + + +# GMM + +gmm = GaussianMixture(n_components=3, covariance_type='full') +gmm.fit(X, y)