stress_affect_detection / Git / [5c6b9a] /scratch.py

Models:
Robert-Orr/
stress_affect_detection
Downloads: 1
[5c6b9a]: / scratch.py
History
Download this file
84 lines (59 with data), 2.4 kB

"""
import pandas as pd
from sklearn import svm
file = 'data/train.csv'

train_data = pd.read_csv(file)

print(train_data.head())

print(train_data.columns)

#features = Sex, Age, Pclass, Cabin, SibSp, Parch, Embarked, Name, Ticket
#label = Survived

#'PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp','Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'

#SVM
#Bayesian logisitic regression
kernel = 'rbf'
svm.SVC()
"""

# Extract features using sliding window and form the training dataset, test dataset

from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.mixture import GaussianMixture

import numpy as np

X, y = make_classification(n_samples=10000, n_features=6,
                            n_informative=3, n_redundant=0,
                            random_state=0, shuffle=True)

print(X.shape)  # 10000x6
print(y.shape)  # 10000

# TODO: Feature extraction using sliding window

train_features, test_features, train_labels, test_labels = train_test_split(X, y,
                                                                            test_size=0.25, random_state=42)
# TODO: K-fold cross validation

print('Training Features Shape:', train_features.shape)
print('Training Labels Shape:', train_labels.shape)
print('Testing Features Shape:', test_features.shape)
print('Testing Labels Shape:', test_labels.shape)

clf = RandomForestClassifier(n_estimators=100, max_depth=3, oob_score=True
                             )

clf.fit(X, y)

print(clf.feature_importances_)
#print(clf.oob_decision_function_)
print(clf.oob_score_)

predictions = clf.predict(test_features)
errors = abs(predictions - test_labels)
print("M A E: ", round(np.mean(errors), 2))


# Visualization
feature_list = [1, 2, 3, 4, 5, 6]
from sklearn.tree import export_graphviz
import pydot
# Pull out one tree from the forest
tree = clf.estimators_[5]
# Export the image to a dot file
export_graphviz(tree, out_file='tree.dot', feature_names=feature_list, rounded=True, precision=1)
# Use dot file to create a graph
(graph, ) = pydot.graph_from_dot_file('tree.dot')
# Write graph to a png file
#graph.write_png('tree_.png')

# TODO: Confusion matrix, Accuracy


# GMM

gmm = GaussianMixture(n_components=3, covariance_type='full')
gmm.fit(X, y)