[5c6b9a]: / scratch.py

Download this file

84 lines (59 with data), 2.4 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
"""
import pandas as pd
from sklearn import svm
file = 'data/train.csv'
train_data = pd.read_csv(file)
print(train_data.head())
print(train_data.columns)
#features = Sex, Age, Pclass, Cabin, SibSp, Parch, Embarked, Name, Ticket
#label = Survived
#'PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp','Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'
#SVM
#Bayesian logisitic regression
kernel = 'rbf'
svm.SVC()
"""
# Extract features using sliding window and form the training dataset, test dataset
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.mixture import GaussianMixture
import numpy as np
X, y = make_classification(n_samples=10000, n_features=6,
n_informative=3, n_redundant=0,
random_state=0, shuffle=True)
print(X.shape) # 10000x6
print(y.shape) # 10000
# TODO: Feature extraction using sliding window
train_features, test_features, train_labels, test_labels = train_test_split(X, y,
test_size=0.25, random_state=42)
# TODO: K-fold cross validation
print('Training Features Shape:', train_features.shape)
print('Training Labels Shape:', train_labels.shape)
print('Testing Features Shape:', test_features.shape)
print('Testing Labels Shape:', test_labels.shape)
clf = RandomForestClassifier(n_estimators=100, max_depth=3, oob_score=True
)
clf.fit(X, y)
print(clf.feature_importances_)
#print(clf.oob_decision_function_)
print(clf.oob_score_)
predictions = clf.predict(test_features)
errors = abs(predictions - test_labels)
print("M A E: ", round(np.mean(errors), 2))
# Visualization
feature_list = [1, 2, 3, 4, 5, 6]
from sklearn.tree import export_graphviz
import pydot
# Pull out one tree from the forest
tree = clf.estimators_[5]
# Export the image to a dot file
export_graphviz(tree, out_file='tree.dot', feature_names=feature_list, rounded=True, precision=1)
# Use dot file to create a graph
(graph, ) = pydot.graph_from_dot_file('tree.dot')
# Write graph to a png file
#graph.write_png('tree_.png')
# TODO: Confusion matrix, Accuracy
# GMM
gmm = GaussianMixture(n_components=3, covariance_type='full')
gmm.fit(X, y)