--- a
+++ b/code/modelTuning.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# ### In conclusion, KNN with bage of words dataset is the winner among all classifers with highest score on test accuracy, precision, recall and F1 scores, The Random Forest is best on the Training accuracy. 
+
+# In[ ]:
+
+
+### Find the best classifer among all classifers
+model_list = [rf_w2v,lr_bow,svc_tf,knn_bow,gnb_tf]
+name_list = ["Random Forest with w2v","Logistic Regression with bow", "SVC with tf","KNN withbow","gaussian Naive Bayes with tf"]
+y_data = [[y_w2v_train,y_w2v_test], [y_bow_train,y_bow_test], [y_tf_train,y_tf_test],[y_bow_train,y_bow_test],[y_tf_train,y_tf_test]]
+X_data = [[X_w2v_train,X_w2v_test], [X_bow_train,X_bow_test], [X_tf_train,X_tf_test],[X_bow_train,X_bow_test],[X_tf_train,X_tf_test]]
+matric_table(model_list, name_list, y_data, X_data)
+
+
+# In[ ]:
+
+
+##Further tuning the KNN  classifer with bow
+best_models = []
+n_neighbors = [3,5,7,9]
+weights = ['uniform','distance']
+ps = [1,2]
+
+def KNN_clf(n_neighbors, weight, p):
+    knn = KNeighborsClassifier(n_neighbors = n_neighbors, weights = weight,p = p)
+    knn.fit(X_bow_train, y_bow_train)
+    y_pred = knn.predict(X_bow_test)
+    n = accuracy_score(y_bow_test,y_pred)
+    best_models.append((n_neighbors, weight, p ,n))
+
+for c in n_neighbors:
+    for w in weights:
+        for p in ps:
+            KNN_clf(c, w, p)
+
+print(max(best_models,key=lambda item:item[3]))
+
+
+# In[ ]:
+
+
+##Further tuning the random forest classifer with w2v
+best_models = []
+crit = ['gini', 'entropy']
+max_d = range(1,20,4)
+min_s_leaf = range(1,20,4)
+n_est = [50, 100, 200]
+
+def RF_clf(crit, max_d, min_s_leaf, n_est):
+    forest = RandomForestClassifier(criterion=crit, max_depth=max_d, min_samples_leaf=min_s_leaf, n_estimators=n_est, random_state=1)
+    forest.fit(X_w2v_train, y_w2v_train)
+    y_pred = forest.predict(X_w2v_test)
+    n = accuracy_score(y_w2v_test,y_pred)
+    best_models.append((crit,max_d,min_s_leaf,n_est,n))
+
+
+for c in crit:
+    for md in max_d:
+        for msl in min_s_leaf:
+            for n_e in n_est:
+                RF_clf(c, md, msl, n_e)
+
+
+# In[ ]:
+
+
+Knn_best = KNeighborsClassifier(n_neighbors = 3, weights = 'uniform' ,p = 1)
+Rf_best = RandomForestClassifier(criterion='gini', max_depth=13, min_samples_leaf=1, n_estimators=50, random_state=1)
+Knn_best.fit(X_bow_train, y_bow_train)
+Rf_best.fit(X_w2v_train, y_w2v_train)
+
+model_list = [Knn_best, Rf_best]
+name_list = ["Tuned KNN", 'Tuned Randome Forest']
+y_data = [[y_bow_train,y_bow_test], [y_w2v_train,y_w2v_test] ]
+X_data = [[X_bow_train,X_bow_test], [X_w2v_train,X_w2v_test]]
+matric_table(model_list, name_list, y_data, X_data)
+
+
+# In[ ]:
+
+
+# too perfect, maybe we don't need graph just use our table to explain
+import scikitplot as skplt
+import matplotlib.pyplot as plt
+
+
+skplt.metrics.plot_roc(y_w2v_test, Rf_best.predict_proba(X_w2v_test)
+                                          ,text_fontsize = 'small'
+                                          ,title = ' ROC for best model'
+                                          ,figsize = (12,8))
+plt.show()
+
+
+# In[ ]:
+
+
+skplt.metrics.plot_precision_recall_curve(y_w2v_test, Rf_best.predict_proba(X_w2v_test),
+                                          text_fontsize = 'small'
+                                          ,title = 'PR Curve for best model'
+                                          ,figsize = (12,8))
+plt.show()
+