Diff of /scratch.py [000000] .. [5c6b9a]

Switch to side-by-side view

--- a
+++ b/scratch.py
@@ -0,0 +1,83 @@
+"""
+import pandas as pd
+from sklearn import svm
+file = 'data/train.csv'
+
+train_data = pd.read_csv(file)
+
+print(train_data.head())
+
+print(train_data.columns)
+
+#features = Sex, Age, Pclass, Cabin, SibSp, Parch, Embarked, Name, Ticket
+#label = Survived
+
+#'PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp','Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'
+
+#SVM
+#Bayesian logisitic regression
+kernel = 'rbf'
+svm.SVC()
+"""
+
+# Extract features using sliding window and form the training dataset, test dataset
+
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.datasets import make_classification
+from sklearn.model_selection import train_test_split
+from sklearn.mixture import GaussianMixture
+
+import numpy as np
+
+X, y = make_classification(n_samples=10000, n_features=6,
+                            n_informative=3, n_redundant=0,
+                            random_state=0, shuffle=True)
+
+print(X.shape)  # 10000x6
+print(y.shape)  # 10000
+
+# TODO: Feature extraction using sliding window
+
+train_features, test_features, train_labels, test_labels = train_test_split(X, y,
+                                                                            test_size=0.25, random_state=42)
+# TODO: K-fold cross validation
+
+print('Training Features Shape:', train_features.shape)
+print('Training Labels Shape:', train_labels.shape)
+print('Testing Features Shape:', test_features.shape)
+print('Testing Labels Shape:', test_labels.shape)
+
+clf = RandomForestClassifier(n_estimators=100, max_depth=3, oob_score=True
+                             )
+
+clf.fit(X, y)
+
+print(clf.feature_importances_)
+#print(clf.oob_decision_function_)
+print(clf.oob_score_)
+
+predictions = clf.predict(test_features)
+errors = abs(predictions - test_labels)
+print("M A E: ", round(np.mean(errors), 2))
+
+
+# Visualization
+feature_list = [1, 2, 3, 4, 5, 6]
+from sklearn.tree import export_graphviz
+import pydot
+# Pull out one tree from the forest
+tree = clf.estimators_[5]
+# Export the image to a dot file
+export_graphviz(tree, out_file='tree.dot', feature_names=feature_list, rounded=True, precision=1)
+# Use dot file to create a graph
+(graph, ) = pydot.graph_from_dot_file('tree.dot')
+# Write graph to a png file
+#graph.write_png('tree_.png')
+
+# TODO: Confusion matrix, Accuracy
+
+
+# GMM
+
+gmm = GaussianMixture(n_components=3, covariance_type='full')
+gmm.fit(X, y)