[9063a2]: / run.py

Download this file

126 lines (95 with data), 5.2 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import streamlit
from ocr import extract_images_from_pdf, ocr_core
from evaluation import higlight
import os
import pdfplumber
import base64
def process_file(file, choices=[True, False, False, False, False]):
''' Takes as input a pdf file, store the file and run the pipeline '''
dir_name = file.name[:-4]
if not os.path.exists("./NER-Medical-Document/processed_files/"+dir_name):
os.mkdir("./NER-Medical-Document/processed_files/"+dir_name)
with open(os.path.join("./NER-Medical-Document/processed_files/"+dir_name,file.name),"wb") as f:
f.write(file.getbuffer())
extract_images_from_pdf(file.name)
if file.name.endswith('.pdf'):
ocr_core(file.name)
higlight(file.name, choices)
else:
file_name = file.name[:-4] + ".pdf"
ocr_core(file_name)
higlight(file_name, choices)
def display_pdf(file: str):
''' Display the pdf file from its path '''
# Opening file from file path
with open(file, "rb") as f:
base64_pdf = base64.b64encode(f.read()).decode('utf-8')
# Embedding PDF in HTML
pdf_display = F'<embed src="data:application/pdf;base64,{base64_pdf}" width="650" height="1000" type="application/pdf">'
# Displaying File
streamlit.markdown(pdf_display, unsafe_allow_html=True)
def run():
''' Main function to run the app '''
# set the title
streamlit.title('Prototype')
# add a file explorer to load multiple files
uploaded_files = streamlit.file_uploader('Upload your file here', type=['pdf', 'png', 'jpg'], accept_multiple_files=True)
# create a bool static variable to save if the user want to display the full transcript
streamlit.session_state["view_file"] = False
# bool static variable: the value is True once the file has been processed and the results are ready
streamlit.session_state["view_results"] = False
codes = {'Chemicals': '#FFFF00', 'Diseases': '#00FF00', 'Dates': '#00B3FF', 'Adverse effects': '#FF0000', 'Doses': '#FF00FF'}
# add a slectbox to select the file to process
option = streamlit.selectbox(
'Select the file you want to process',
tuple((uploaded_file.name for uploaded_file in uploaded_files)))
# add a selectbox to select the type of entities to extract
with streamlit.expander(label='Select the entities you want to higlight', expanded=False):
chemicals = streamlit.checkbox('Chemicals', value=True)
diseases = streamlit.checkbox('Diseases', value=False)
dates = streamlit.checkbox('Dates', value=False)
adverse = streamlit.checkbox('Adverse effects', value=False)
doses = streamlit.checkbox('Doses', value=False)
choices = [chemicals, diseases, dates, adverse, doses]
# add a button to process the selected file
if streamlit.button('Process file') and option is not None:
uploaded_file = [uploaded_file for uploaded_file in uploaded_files if uploaded_file.name == option][0]
process_file(uploaded_file, choices)
streamlit.session_state["view_results"] = True
if streamlit.button('View results') and option is not None:
col1, col2 = streamlit.columns([10000, 1], gap='large')
with col1:
uploaded_file = [uploaded_file for uploaded_file in uploaded_files if uploaded_file.name == option][0]
file_name = uploaded_file.name[:-4]
if not os.path.exists("./NER-Medical-Document/processed_files/"+file_name+"/"+file_name+"_highlighted.pdf"):
streamlit.write("The file has not been processed yet")
else:
file_name = uploaded_file.name
display_pdf("./NER-Medical-Document/processed_files/"+file_name[:-4]+"/"+file_name[:-4]+"_highlighted.pdf")
with col2:
i = 0
for key, value in codes.items():
if choices[i]:
streamlit.color_picker(key, value, disabled=False)
i += 1
# add a button to toggle or untoggle the full transcript
if streamlit.button('View file') and option is not None:
streamlit.session_state["view_file"] = not streamlit.session_state["view_file"]
if streamlit.session_state["view_file"]:
uploaded_file = [uploaded_file for uploaded_file in uploaded_files if uploaded_file.name == option][0]
if uploaded_file.name.endswith('.pdf'):
base64_pdf = base64.b64encode(uploaded_file.read()).decode('utf-8')
pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width="600" height="1300" type="application/pdf">'
streamlit.markdown(pdf_display, unsafe_allow_html=True)
elif uploaded_file.name.endswith('.png') or uploaded_file.name.endswith('.jpg'):
streamlit.image(uploaded_file, use_column_width=True)
if streamlit.button('View transcript') and option is not None:
uploaded_file = [uploaded_file for uploaded_file in uploaded_files if uploaded_file.name == option][0]
try:
with pdfplumber.open(uploaded_file) as pdf:
for page in pdf.pages:
streamlit.write(page.extract_text())
except:
streamlit.write("None")
if __name__ == '__main__':
run()