Diff of /app.py [000000] .. [ec103b]

Switch to unified view

a b/app.py
1
from flask import Flask, request, render_template, url_for
2
from werkzeug.utils import secure_filename, redirect
3
import pandas as pd
4
import pickle
5
import csv
6
import os
7
import rule_based_model as rule_based_model
8
9
# Declare a Flask app
10
app = Flask(__name__)
11
12
path = os.path.dirname(__file__)
13
14
15
@app.route('/', methods=['GET', 'POST'])
16
def index():
17
    # If a form is submitted
18
    if request.method == "POST":
19
        req = ""
20
        for filename, file in request.files.items():
21
            req = request.files[filename].name
22
        print(req)
23
        if req == "model1":
24
            clf = pickle.load(open('model.pkl', 'rb'))
25
26
            f = request.files[req]
27
            f.save(secure_filename(f.filename))
28
29
            data = []
30
            with open(f.filename) as file:
31
                reader = csv.DictReader(file)
32
33
                [data.append(dict(row)) for row in reader]
34
35
            input_df = pd.DataFrame(data)
36
            input_df['Combined Diagnosis'] = input_df['Diagnosis'] + input_df['Gross Description'] + input_df[
37
                'Microscopic Description']
38
            input_df = input_df[['Combined Diagnosis']]
39
40
            # Get prediction
41
            predictions, output = clf.predict(input_df['Combined Diagnosis'].values.tolist())
42
            prediction = pd.DataFrame(predictions, columns=['Cancerous?'])
43
            prediction = pd.concat([input_df, prediction], axis=1)
44
            prediction[["accepted-rejected", "comments"]] = ""
45
            print(prediction.head())
46
            print(len(prediction))
47
48
            pred_csv = prediction.to_csv(os.path.join(path, r"preds.csv"))
49
            return redirect(url_for('cancerprediction'))
50
51
        elif req == "model2":
52
            clf = pickle.load(open('model2.pkl', 'rb'))
53
54
            f = request.files[req]
55
            f.save(secure_filename(f.filename))
56
57
            data = []
58
            with open(f.filename) as file:
59
                reader = csv.DictReader(file)
60
61
                [data.append(dict(row)) for row in reader]
62
63
            input_df = pd.DataFrame(data)
64
            input_df['Combined Diagnosis'] = input_df['Diagnosis'] + input_df['Gross Description'] + input_df[
65
                'Microscopic Description']
66
            input_df = input_df[['Combined Diagnosis']]
67
            columns = ('AMPULLA', 'ANAL CANAL', 'ANORECTAL JUNCTION', 'ANTERIOR SEGMENT', 'ANUS', 'APPENDIX', 'ASCENDING)', 'AXILLA (LEFT)', 'BLADDER', 'BREAST', 'BREAST (LEFT)', 'BREAST (RIGHT)', 'CAECUM', 'CAECUM/ILEOCAECAL JUNCTION', 'CERVIX', 'CHEST WALL', 'CHEST WALL (LEFT)', 'COLON', 'COLON (ANASTOMOTIC SITE)', 'COLON (ASCENDING)', 'COLON (CAECUM', 'COLON (CAECUM)', 'COLON (DESCENDING)', 'COLON (DISTAL TRANSVERSE)', 'COLON (HEPATIC FLEXURE)', 'COLON (PROXIMAL SIGMOID)', 'COLON (RECTOSIGMOID)', 'COLON (RECTUM)', 'COLON (RIGHT)', 'COLON (SIGMOID)', 'COLON (SPLENIC FLEXURE)', 'COLON (TRANSVERSE)', 'COLON (UPPER RECTUM)', 'COLORECTAL', 'COLORECTAL (PRIMARY)', 'DUODENUM', 'ENDOMETRIUM', 'ESOPHAGUS', 'FALLOPIAN TUBE (LEFT)', 'FALLOPIAN TUBE (RIGHT)', 'FOOT', 'GASTRIC', 'HEPATIC FLEXURE', 'KIDNEY', 'KIDNEY (LEFT)', 'LARGE BOWEL', 'LIVER', 'LIVER (LEFT LOBE)', 'LIVER (SEGMENT II)', 'LIVER SEGMENT 7/8 NODULE', 'LIVER SEGMENT 8 NODULE', 'LIVER SEGMENT II', 'LUNG', 'LUNG (LEFT LOWER LOBE)', 'LUNG (RIGHT LOWER LOBE)', 'LUNG (RIGHT UPPER LOBE)', 'LUNG (RIGHT)', 'MANDIBLE', 'NASOPHARYNX', 'OVARY', 'PARASTERNAL (LEFT)', 'PAROTID (LEFT)', 'PERITONEUM', 'PROSTATE', 'RECTAL', 'RECTOSIGMOID', 'RECTUM', 'RENAL (LEFT)', 'RENAL (RIGHT)', 'RIGHT LOWER LOBE', 'RIGHT UPPER LOBE', 'SALIVARY GLAND', 'SIGMOID', 'SPLENIC FLEXURE', 'THIGH', 'THYMUS', 'TONGUE', 'TONSIL', 'UNKNOWN', 'UTERUS')
68
            # Get prediction
69
            predictions, output = clf.predict(input_df['Combined Diagnosis'].values.tolist())
70
            prediction = pd.DataFrame(predictions,
71
                                      columns=['AMPULLA', 'ANAL CANAL', 'ANORECTAL JUNCTION', 'ANTERIOR SEGMENT', 'ANUS', 'APPENDIX', 'ASCENDING)', 'AXILLA (LEFT)', 'BLADDER', 'BREAST', 'BREAST (LEFT)', 'BREAST (RIGHT)', 'CAECUM', 'CAECUM/ILEOCAECAL JUNCTION', 'CERVIX', 'CHEST WALL', 'CHEST WALL (LEFT)', 'COLON', 'COLON (ANASTOMOTIC SITE)', 'COLON (ASCENDING)', 'COLON (CAECUM', 'COLON (CAECUM)', 'COLON (DESCENDING)', 'COLON (DISTAL TRANSVERSE)', 'COLON (HEPATIC FLEXURE)', 'COLON (PROXIMAL SIGMOID)', 'COLON (RECTOSIGMOID)', 'COLON (RECTUM)', 'COLON (RIGHT)', 'COLON (SIGMOID)', 'COLON (SPLENIC FLEXURE)', 'COLON (TRANSVERSE)', 'COLON (UPPER RECTUM)', 'COLORECTAL', 'COLORECTAL (PRIMARY)', 'DUODENUM', 'ENDOMETRIUM', 'ESOPHAGUS', 'FALLOPIAN TUBE (LEFT)', 'FALLOPIAN TUBE (RIGHT)', 'FOOT', 'GASTRIC', 'HEPATIC FLEXURE', 'KIDNEY', 'KIDNEY (LEFT)', 'LARGE BOWEL', 'LIVER', 'LIVER (LEFT LOBE)', 'LIVER (SEGMENT II)', 'LIVER SEGMENT 7/8 NODULE', 'LIVER SEGMENT 8 NODULE', 'LIVER SEGMENT II', 'LUNG', 'LUNG (LEFT LOWER LOBE)', 'LUNG (RIGHT LOWER LOBE)', 'LUNG (RIGHT UPPER LOBE)', 'LUNG (RIGHT)', 'MANDIBLE', 'NASOPHARYNX', 'OVARY', 'PARASTERNAL (LEFT)', 'PAROTID (LEFT)', 'PERITONEUM', 'PROSTATE', 'RECTAL', 'RECTOSIGMOID', 'RECTUM', 'RENAL (LEFT)', 'RENAL (RIGHT)', 'RIGHT LOWER LOBE', 'RIGHT UPPER LOBE', 'SALIVARY GLAND', 'SIGMOID', 'SPLENIC FLEXURE', 'THIGH', 'THYMUS', 'TONGUE', 'TONSIL', 'UNKNOWN', 'UTERUS'])
72
            prediction = pd.concat([input_df, prediction], axis=1)
73
            prediction[["Predicted Primary Site(s)"]] = ""
74
            for index, row in prediction.iterrows():
75
                sites = ""
76
                for organ in columns:
77
                    if row[organ] == 1:
78
                        sites += ", " + str(organ)
79
                if sites == "":
80
                    sites = "None Predicted"
81
                prediction.iat[index, 81] = sites
82
83
            prediction[["accepted-rejected", "comments"]] = ""
84
            print(prediction.head())
85
            print(len(prediction))
86
87
            pred_csv = prediction.to_csv(os.path.join(path, r"preds.csv"))
88
            return redirect(url_for('primarysitedetection'))
89
90
        elif req == "model3":
91
            # get the file uploaded
92
            f = request.files[req]
93
            f.save(secure_filename(f.filename))
94
95
            # link to regex model
96
            # from rule_based_model code
97
            df_initial = pd.read_csv(f.filename)
98
            df_initial.columns = [x.upper() for x in df_initial.columns]
99
            df_initial = df_initial[["SCM GUIDE", "DIAGNOSIS", "MICROSCOPIC DESCRIPTION",
100
                                     "GRADE(1, 2, 3, MILDLY OR WELL = 1, MODERATELY = 2, POORLY = 3)"]]
101
            print(df_initial)
102
            df_initial.rename(columns={'GRADE(1, 2, 3, MILDLY OR WELL = 1, MODERATELY = 2, POORLY = 3)': 'grades'},
103
                              inplace=True)
104
            print(df_initial)
105
            df_initial['grades'] = df_initial['grades'].fillna(
106
                0)  # this changes all NaN values in the grade column to 0
107
            print(df_initial)
108
109
            # Step 1: Convert DF to show ID, TEXT and GRADES
110
            df = rule_based_model.convert_df(df_initial)
111
112
            # Step 2: Find text matches to the word 'grade' and 'differentiated' and store in list (+ MATCHES)
113
            df = rule_based_model.find_matches(df)
114
115
            # Step 3: Determine the list of grades from the list of matches (+ DETERMINED)
116
            df = rule_based_model.determine_grade(df)
117
118
            # Step 4: Evaulate if determined grade is "Correct" or "Wrong" and calculate overall accuracy score (+ RESULT)
119
            df, accuracy_score = rule_based_model.evaluate_accuracy(df)
120
121
            # create the new columns
122
            df[["accepted-rejected", "comments"]] = ""
123
124
            pred_csv = df.to_csv(os.path.join(path, r'preds.csv'))
125
            return redirect(url_for('rulebasedmodelwebpage'))
126
127
        else:
128
            pass
129
130
    else:
131
        prediction = ""
132
        return render_template("index.html")
133
134
135
dataset_location = os.path.join(path, r"preds.csv")
136
137
138
@app.route('/cancerprediction', methods=['GET', 'POST'])
139
def cancerprediction():
140
    # variable to hold CSV data
141
    data = []
142
    # read data from CSV file
143
144
    with open(dataset_location) as f:
145
        # create CSV dictionary reader instance
146
        reader = csv.DictReader(f)
147
148
        # init CSV dataset
149
        [data.append(dict(row)) for row in reader]
150
        print(data)
151
152
        # print(data)
153
        row_number = 0  # initialise row number to zero
154
        number_of_reports = len(data)  # for the total number of reports in the html
155
156
        if request.method == "POST":
157
            if request.values.get("report-number-input"):
158
                row_number = int(request.values.get("report-number-input"))
159
160
            if request.values.get("accept-button"):
161
                # print(request.values.get("accept-button")) to see output values
162
                row_number = int(
163
                    request.values.get("accept-button"))  # stored row number in the button value so can access it
164
165
                # Read csv into dataframe
166
                df = pd.read_csv(dataset_location)
167
                # print(df) to debug
168
                # print(row_number) # to check if row number is updated
169
170
                # edit cell based on cell value row, column
171
                # https://re-thought.com/how-to-change-or-update-a-cell-value-in-python-pandas-dataframe/
172
                df.iat[row_number, 3] = "Accepted"
173
174
                # write output
175
                df.to_csv(dataset_location, index=False)
176
177
                # to read the csv, repeated code
178
                # variable to hold CSV data
179
                data = []
180
181
                # read data from CSV file
182
183
                with open(dataset_location) as f:
184
                    # create CSV dictionary reader instance
185
                    reader = csv.DictReader(f)
186
187
                    # init CSV dataset
188
                    [data.append(dict(row)) for row in reader]
189
190
                    # print(data)
191
                    number_of_reports = len(data)  # for the total number of reports in the html
192
193
            elif request.values.get("reject-button"):
194
                # print(request.values.get("reject-button")) to see output values
195
                row_number = int(
196
                    request.values.get("reject-button"))  # stored row number in the button value so can access it
197
198
                # Read csv into dataframe
199
                df = pd.read_csv(dataset_location)
200
                # print(df) to debug
201
                # print(row_number) # to check if row number is updated
202
203
                # edit cell based on cell value row, column
204
                # https://re-thought.com/how-to-change-or-update-a-cell-value-in-python-pandas-dataframe/
205
                df.iat[row_number, 3] = "Rejected"
206
207
                # write output
208
                df.to_csv(dataset_location, index=False)
209
210
                # to read the csv, repeated code
211
                # variable to hold CSV data
212
                data = []
213
214
                # read data from CSV file
215
216
                with open(dataset_location) as f:
217
                    # create CSV dictionary reader instance
218
                    reader = csv.DictReader(f)
219
220
                    # init CSV dataset
221
                    [data.append(dict(row)) for row in reader]
222
223
                    # print(data)
224
                    number_of_reports = len(data)  # for the total number of reports in the html
225
226
            elif request.values.get("comments-given-input"):
227
                # print(request.values.get("comments-given-input")) # to see output values
228
                comment = request.values.get("comments-given-input")
229
230
                row_number = int(request.values.get(
231
                    "comment-submit-button"))  # stored row number in the button value so can access it
232
233
                # Read csv into dataframe
234
                df = pd.read_csv(dataset_location)
235
                # print(df) # to debug
236
                # print(comment) # to check if row number is updated
237
                # print(row_number) # to check if row number is updated
238
239
                df.iat[row_number, 4] = comment
240
241
                # write output
242
                df.to_csv(dataset_location, index=False)
243
244
                # to read the csv, repeated code, could be stored in a function
245
                # variable to hold CSV data
246
                data = []
247
248
                # read data from CSV file
249
250
                with open(dataset_location) as f:
251
                    # create CSV dictionary reader instance
252
                    reader = csv.DictReader(f)
253
254
                    # init CSV dataset
255
                    [data.append(dict(row)) for row in reader]
256
257
                    # print(data)
258
                    number_of_reports = len(data)  # for the total number of reports in the html
259
260
        if row_number >= number_of_reports or row_number < 0:
261
            row_number = 0
262
263
        # print(row_number) console print for debugging
264
265
    # render HTML page dynamically
266
    return render_template("cancerpredictionmodel.html", data=data, list=list, len=len, str=str, row_number=row_number,
267
                           number_of_reports=number_of_reports)
268
269
270
@app.route('/primarysitedetection', methods=['GET', 'POST'])
271
def primarysitedetection():
272
    # variable to hold CSV data
273
    data = []
274
    # read data from CSV file
275
276
    with open(dataset_location) as f:
277
        # create CSV dictionary reader instance
278
        reader = csv.DictReader(f)
279
280
        # init CSV dataset
281
        [data.append(dict(row)) for row in reader]
282
        print(data)
283
284
        # print(data)
285
        row_number = 0  # initialise row number to zero
286
        number_of_reports = len(data)  # for the total number of reports in the html
287
288
        if request.method == "POST":
289
            if request.values.get("report-number-input"):
290
                row_number = int(request.values.get("report-number-input"))
291
292
            if request.values.get("accept-button"):
293
                # print(request.values.get("accept-button")) to see output values
294
                row_number = int(
295
                    request.values.get("accept-button"))  # stored row number in the button value so can access it
296
297
                # Read csv into dataframe
298
                df = pd.read_csv(dataset_location)
299
                # print(df) to debug
300
                # print(row_number) # to check if row number is updated
301
302
                # edit cell based on cell value row, column
303
                # https://re-thought.com/how-to-change-or-update-a-cell-value-in-python-pandas-dataframe/
304
                df.iat[row_number, 83] = "Accepted"
305
306
                # write output
307
                df.to_csv(dataset_location, index=False)
308
309
                # to read the csv, repeated code
310
                # variable to hold CSV data
311
                data = []
312
313
                # read data from CSV file
314
315
                with open(dataset_location) as f:
316
                    # create CSV dictionary reader instance
317
                    reader = csv.DictReader(f)
318
319
                    # init CSV dataset
320
                    [data.append(dict(row)) for row in reader]
321
322
                    # print(data)
323
                    number_of_reports = len(data)  # for the total number of reports in the html
324
325
            elif request.values.get("reject-button"):
326
                # print(request.values.get("reject-button")) to see output values
327
                row_number = int(
328
                    request.values.get("reject-button"))  # stored row number in the button value so can access it
329
330
                # Read csv into dataframe
331
                df = pd.read_csv(dataset_location)
332
                # print(df) to debug
333
                # print(row_number) # to check if row number is updated
334
335
                # edit cell based on cell value row, column
336
                # https://re-thought.com/how-to-change-or-update-a-cell-value-in-python-pandas-dataframe/
337
                df.iat[row_number, 83] = "Rejected"
338
339
                # write output
340
                df.to_csv(dataset_location, index=False)
341
342
                # to read the csv, repeated code
343
                # variable to hold CSV data
344
                data = []
345
346
                # read data from CSV file
347
348
                with open(dataset_location) as f:
349
                    # create CSV dictionary reader instance
350
                    reader = csv.DictReader(f)
351
352
                    # init CSV dataset
353
                    [data.append(dict(row)) for row in reader]
354
355
                    # print(data)
356
                    number_of_reports = len(data)  # for the total number of reports in the html
357
358
            elif request.values.get("comments-given-input"):
359
                # print(request.values.get("comments-given-input")) # to see output values
360
                comment = request.values.get("comments-given-input")
361
362
                row_number = int(request.values.get(
363
                    "comment-submit-button"))  # stored row number in the button value so can access it
364
365
                # Read csv into dataframe
366
                df = pd.read_csv(dataset_location)
367
                # print(df) # to debug
368
                # print(comment) # to check if row number is updated
369
                # print(row_number) # to check if row number is updated
370
                df.iat[row_number, 84] = comment
371
372
                # write output
373
                df.to_csv(dataset_location, index=False)
374
375
                # to read the csv, repeated code, could be stored in a function
376
                # variable to hold CSV data
377
                data = []
378
379
                # read data from CSV file
380
381
                with open(dataset_location) as f:
382
                    # create CSV dictionary reader instance
383
                    reader = csv.DictReader(f)
384
385
                    # init CSV dataset
386
                    [data.append(dict(row)) for row in reader]
387
388
                    # print(data)
389
                    number_of_reports = len(data)  # for the total number of reports in the html
390
391
        if row_number >= number_of_reports or row_number < 0:
392
            row_number = 0
393
394
        # print(row_number) console print for debugging
395
396
    # render HTML page dynamically
397
    return render_template("primarysitepredictionmodel.html", data=data, list=list, len=len, str=str, row_number=row_number,
398
                           number_of_reports=number_of_reports)
399
400
401
@app.route('/rulebasedmodelwebpage', methods=["POST", "GET"])
402
def rulebasedmodelwebpage():
403
    # variable to hold CSV data
404
    data = []
405
406
    # read data from CSV file
407
408
    with open(dataset_location) as f:
409
        # create CSV dictionary reader instance
410
        reader = csv.DictReader(f)
411
412
        # init CSV dataset
413
        [data.append(dict(row)) for row in reader]
414
415
        # print(data)
416
        row_number = 0  # initialise row number to zero
417
        number_of_reports = len(data)  # for the total number of reports in the html
418
419
        if request.method == "POST":
420
            if request.values.get("report-number-input"):
421
                row_number = int(request.values.get("report-number-input"))
422
423
            if request.values.get("accept-button"):
424
                # print(request.values.get("accept-button")) to see output values
425
                row_number = int(
426
                    request.values.get("accept-button"))  # stored row number in the button value so can access it
427
428
                # Read csv into dataframe
429
                df = pd.read_csv(dataset_location)
430
                # print(df) to debug
431
                # print(row_number) # to check if row number is updated
432
433
                # edit cell based on cell value row, column
434
                # https://re-thought.com/how-to-change-or-update-a-cell-value-in-python-pandas-dataframe/
435
                df.iat[row_number, 7] = "Accepted"
436
437
                # write output
438
                df.to_csv(dataset_location, index=False)
439
440
                # to read the csv, repeated code
441
                # variable to hold CSV data
442
                data = []
443
444
                # read data from CSV file
445
446
                with open(dataset_location) as f:
447
                    # create CSV dictionary reader instance
448
                    reader = csv.DictReader(f)
449
450
                    # init CSV dataset
451
                    [data.append(dict(row)) for row in reader]
452
453
                    # print(data)
454
                    number_of_reports = len(data)  # for the total number of reports in the html
455
456
            elif request.values.get("reject-button"):
457
                # print(request.values.get("reject-button")) to see output values
458
                row_number = int(
459
                    request.values.get("reject-button"))  # stored row number in the button value so can access it
460
461
                # Read csv into dataframe
462
                df = pd.read_csv(dataset_location)
463
                # print(df) to debug
464
                # print(row_number) # to check if row number is updated
465
466
                # edit cell based on cell value row, column
467
                # https://re-thought.com/how-to-change-or-update-a-cell-value-in-python-pandas-dataframe/
468
                df.iat[row_number, 7] = "Rejected"
469
470
                # write output
471
                df.to_csv(dataset_location, index=False)
472
473
                # to read the csv, repeated code
474
                # variable to hold CSV data
475
                data = []
476
477
                # read data from CSV file
478
479
                with open(dataset_location) as f:
480
                    # create CSV dictionary reader instance
481
                    reader = csv.DictReader(f)
482
483
                    # init CSV dataset
484
                    [data.append(dict(row)) for row in reader]
485
486
                    # print(data)
487
                    number_of_reports = len(data)  # for the total number of reports in the html
488
489
            elif request.values.get("comments-given-input"):
490
                # print(request.values.get("comments-given-input")) # to see output values
491
                comment = request.values.get("comments-given-input")
492
493
                row_number = int(request.values.get(
494
                    "comment-submit-button"))  # stored row number in the button value so can access it
495
496
                # Read csv into dataframe
497
                df = pd.read_csv(dataset_location)
498
                # print(df) # to debug
499
                # print(comment) # to check if row number is updated
500
                # print(row_number) # to check if row number is updated
501
502
                df.iat[row_number, 8] = comment
503
504
                # write output
505
                df.to_csv(dataset_location, index=False)
506
507
                # to read the csv, repeated code, could be stored in a function
508
                # variable to hold CSV data
509
                data = []
510
511
                # read data from CSV file
512
513
                with open(dataset_location) as f:
514
                    # create CSV dictionary reader instance
515
                    reader = csv.DictReader(f)
516
517
                    # init CSV dataset
518
                    [data.append(dict(row)) for row in reader]
519
520
                    # print(data)
521
                    number_of_reports = len(data)  # for the total number of reports in the html
522
523
        if row_number >= number_of_reports or row_number < 0:
524
            row_number = 0
525
526
        # print(row_number) console print for debugging
527
528
    # render HTML page dynamically
529
    return render_template("cancergradepredictionmodel.html", data=data, list=list, len=len, str=str, row_number=row_number,
530
                           number_of_reports=number_of_reports)
531
532
533
# Running the app
534
if __name__ == '__main__':
535
    app.run(debug=True)