Switch to unified view

a b/demo deployment/Deployment/demo.py
1
import pandas as pd
2
import numpy as np
3
from flask import Flask, request, render_template
4
import pickle
5
import nltk
6
from nltk.corpus import wordnet as wn 
7
from nltk.stem import WordNetLemmatizer 
8
from nltk.tokenize import word_tokenize
9
from nltk.corpus import stopwords
10
import string
11
import re
12
13
app = Flask(__name__)
14
model = pickle.load(open('bestmodel.pkl', 'rb'))
15
pca_model = pickle.load(open('word2vec.pkl', 'rb'))
16
w2v_model = pickle.load(open('w2v_model.pkl', 'rb'))
17
stopwords_list = set(stopwords.words("english"))
18
19
def input_process(data):
20
    input_clean = phrase_cleanse(data)
21
    w2v_model = pickle.load(open('w2v_model.pkl', 'rb'))
22
    input_clean = [input_clean.split(" ")]
23
    processed_input = word_avg_vect(input_clean, w2v_model, 100)
24
    pca_model = pickle.load(open('word2vec.pkl', 'rb')) 
25
    test = pca_model.transform(processed_input)
26
    return test
27
28
def phrase_cleanse(phrase):
29
    #Tokenize and divide phrase into separate words
30
    token_words = word_tokenize(phrase)
31
    
32
    # Convert all texts to lower cases
33
    words_step1 = []
34
    for word_1 in token_words:
35
        words_step1.append(word_1.lower())
36
    
37
    #Clear all punctuation
38
    words_step2 = [] 
39
    for word_2 in words_step1:
40
        word_cleaned = re.sub(r'[^\w\s]','',word_2)
41
        words_step2.append(word_cleaned)
42
    
43
    #Clean the text list
44
    words_step3 = []
45
    for word_3 in words_step2:
46
        # check if every characters are alphbets
47
        if word_3.isalpha():
48
            # get rid of stop words
49
            if word_3 not in list(stopwords_list):
50
                words_step3.append(word_3)
51
            else:
52
                continue
53
    
54
    #Lemmatization - group different forms of same word which has more than 2 characters into one word
55
    lem = nltk.stem.WordNetLemmatizer()
56
    lem_list = []
57
    for word_4 in words_step3:
58
        if(len(word_4) > 2):
59
            lem_list.append(lem.lemmatize(word_4))
60
    
61
    join_text = " ".join(lem_list)
62
    
63
    return join_text
64
65
def word_avg_vect(data, model, num_features):
66
    words = set(model.wv.index_to_key)
67
    X_vect = np.array([np.array([model.wv[i] for i in s if i in words]) for s in data])
68
    X_vect_avg = []
69
    for v in X_vect:
70
        if v.size:
71
            X_vect_avg.append(v.mean(axis = 0))
72
        else:
73
            X_vect_avg.append(np.zeros(num_features, dtype = float))
74
75
    df_vect_avg = pd.DataFrame(X_vect_avg)
76
    return df_vect_avg
77
78
@app.route('/')
79
def home():
80
    return render_template('index.html')
81
82
@app.route('/predict',methods=['POST'])
83
def predict():
84
    '''
85
    For rendering results on HTML GUI
86
    '''
87
    input =  request.form['medical diagonisis']
88
    final_features = input_process(input)
89
    prediction = model.predict(final_features)
90
91
    output = prediction[0]
92
93
    return render_template('index.html', prediction_text='Our predicted diagnosis is {}'.format(output))
94
95
96
if __name__ == "__main__":
97
    app.run(debug=True)