|
a |
|
b/demo deployment/Deployment/demo.py |
|
|
1 |
import pandas as pd |
|
|
2 |
import numpy as np |
|
|
3 |
from flask import Flask, request, render_template |
|
|
4 |
import pickle |
|
|
5 |
import nltk |
|
|
6 |
from nltk.corpus import wordnet as wn |
|
|
7 |
from nltk.stem import WordNetLemmatizer |
|
|
8 |
from nltk.tokenize import word_tokenize |
|
|
9 |
from nltk.corpus import stopwords |
|
|
10 |
import string |
|
|
11 |
import re |
|
|
12 |
|
|
|
13 |
app = Flask(__name__) |
|
|
14 |
model = pickle.load(open('bestmodel.pkl', 'rb')) |
|
|
15 |
pca_model = pickle.load(open('word2vec.pkl', 'rb')) |
|
|
16 |
w2v_model = pickle.load(open('w2v_model.pkl', 'rb')) |
|
|
17 |
stopwords_list = set(stopwords.words("english")) |
|
|
18 |
|
|
|
19 |
def input_process(data): |
|
|
20 |
input_clean = phrase_cleanse(data) |
|
|
21 |
w2v_model = pickle.load(open('w2v_model.pkl', 'rb')) |
|
|
22 |
input_clean = [input_clean.split(" ")] |
|
|
23 |
processed_input = word_avg_vect(input_clean, w2v_model, 100) |
|
|
24 |
pca_model = pickle.load(open('word2vec.pkl', 'rb')) |
|
|
25 |
test = pca_model.transform(processed_input) |
|
|
26 |
return test |
|
|
27 |
|
|
|
28 |
def phrase_cleanse(phrase): |
|
|
29 |
#Tokenize and divide phrase into separate words |
|
|
30 |
token_words = word_tokenize(phrase) |
|
|
31 |
|
|
|
32 |
# Convert all texts to lower cases |
|
|
33 |
words_step1 = [] |
|
|
34 |
for word_1 in token_words: |
|
|
35 |
words_step1.append(word_1.lower()) |
|
|
36 |
|
|
|
37 |
#Clear all punctuation |
|
|
38 |
words_step2 = [] |
|
|
39 |
for word_2 in words_step1: |
|
|
40 |
word_cleaned = re.sub(r'[^\w\s]','',word_2) |
|
|
41 |
words_step2.append(word_cleaned) |
|
|
42 |
|
|
|
43 |
#Clean the text list |
|
|
44 |
words_step3 = [] |
|
|
45 |
for word_3 in words_step2: |
|
|
46 |
# check if every characters are alphbets |
|
|
47 |
if word_3.isalpha(): |
|
|
48 |
# get rid of stop words |
|
|
49 |
if word_3 not in list(stopwords_list): |
|
|
50 |
words_step3.append(word_3) |
|
|
51 |
else: |
|
|
52 |
continue |
|
|
53 |
|
|
|
54 |
#Lemmatization - group different forms of same word which has more than 2 characters into one word |
|
|
55 |
lem = nltk.stem.WordNetLemmatizer() |
|
|
56 |
lem_list = [] |
|
|
57 |
for word_4 in words_step3: |
|
|
58 |
if(len(word_4) > 2): |
|
|
59 |
lem_list.append(lem.lemmatize(word_4)) |
|
|
60 |
|
|
|
61 |
join_text = " ".join(lem_list) |
|
|
62 |
|
|
|
63 |
return join_text |
|
|
64 |
|
|
|
65 |
def word_avg_vect(data, model, num_features): |
|
|
66 |
words = set(model.wv.index_to_key) |
|
|
67 |
X_vect = np.array([np.array([model.wv[i] for i in s if i in words]) for s in data]) |
|
|
68 |
X_vect_avg = [] |
|
|
69 |
for v in X_vect: |
|
|
70 |
if v.size: |
|
|
71 |
X_vect_avg.append(v.mean(axis = 0)) |
|
|
72 |
else: |
|
|
73 |
X_vect_avg.append(np.zeros(num_features, dtype = float)) |
|
|
74 |
|
|
|
75 |
df_vect_avg = pd.DataFrame(X_vect_avg) |
|
|
76 |
return df_vect_avg |
|
|
77 |
|
|
|
78 |
@app.route('/') |
|
|
79 |
def home(): |
|
|
80 |
return render_template('index.html') |
|
|
81 |
|
|
|
82 |
@app.route('/predict',methods=['POST']) |
|
|
83 |
def predict(): |
|
|
84 |
''' |
|
|
85 |
For rendering results on HTML GUI |
|
|
86 |
''' |
|
|
87 |
input = request.form['medical diagonisis'] |
|
|
88 |
final_features = input_process(input) |
|
|
89 |
prediction = model.predict(final_features) |
|
|
90 |
|
|
|
91 |
output = prediction[0] |
|
|
92 |
|
|
|
93 |
return render_template('index.html', prediction_text='Our predicted diagnosis is {}'.format(output)) |
|
|
94 |
|
|
|
95 |
|
|
|
96 |
if __name__ == "__main__": |
|
|
97 |
app.run(debug=True) |