|
a |
|
b/app.py |
|
|
1 |
""" |
|
|
2 |
Streamlit app for demo. |
|
|
3 |
""" |
|
|
4 |
|
|
|
5 |
import streamlit as st |
|
|
6 |
from aitrika.engine.online_aitrika import OnlineAItrika |
|
|
7 |
from aitrika.utils.text_parser import generate_documents |
|
|
8 |
from aitrika.llm.openai import OpenAILLM |
|
|
9 |
from dotenv import load_dotenv |
|
|
10 |
import os |
|
|
11 |
import time |
|
|
12 |
|
|
|
13 |
|
|
|
14 |
load_dotenv() |
|
|
15 |
|
|
|
16 |
st.set_page_config( |
|
|
17 |
page_title="AItrika", |
|
|
18 |
page_icon="images/logo.png", |
|
|
19 |
) |
|
|
20 |
st.title("AItrika 🧪") |
|
|
21 |
|
|
|
22 |
|
|
|
23 |
def response_generator(query: str): |
|
|
24 |
response = llm.query(query=query) |
|
|
25 |
print(response) |
|
|
26 |
for word in response.split(): |
|
|
27 |
yield word + " " |
|
|
28 |
time.sleep(0.05) |
|
|
29 |
|
|
|
30 |
|
|
|
31 |
def reset_conversation(): |
|
|
32 |
st.session_state.conversation = None |
|
|
33 |
st.session_state.chat_history = None |
|
|
34 |
|
|
|
35 |
|
|
|
36 |
def format_list_to_markdown(items): |
|
|
37 |
markdown_text = "" |
|
|
38 |
if items[0].strip().startswith("**") and items[0].strip().endswith("**"): |
|
|
39 |
markdown_text += f"## {items[0].strip('* ')}\n\n" |
|
|
40 |
items = items[1:] |
|
|
41 |
for item in items: |
|
|
42 |
parts = item.split("\n\n") |
|
|
43 |
if len(parts) >= 2: |
|
|
44 |
for subitem in parts[1:]: |
|
|
45 |
markdown_text += f"- {subitem.strip()}\n" |
|
|
46 |
else: |
|
|
47 |
markdown_text += f"- {item.strip()}\n" |
|
|
48 |
markdown_text = markdown_text.replace('"', "") |
|
|
49 |
markdown_text += "\n" |
|
|
50 |
return markdown_text |
|
|
51 |
|
|
|
52 |
|
|
|
53 |
pubmed_id = st.text_input("Enter the PubMed ID", placeholder="23747889") |
|
|
54 |
|
|
|
55 |
if pubmed_id: |
|
|
56 |
engine = OnlineAItrika(pubmed_id=pubmed_id) |
|
|
57 |
documents = generate_documents(content=engine.extract_abstract()) |
|
|
58 |
llm = OpenAILLM(documents=documents, api_key=os.getenv("GROQ_API_KEY")) |
|
|
59 |
with st.expander("Select the information that you want to extract: "): |
|
|
60 |
option = st.selectbox( |
|
|
61 |
" ", |
|
|
62 |
( |
|
|
63 |
"PubMed ID", |
|
|
64 |
"Title", |
|
|
65 |
"Abstract", |
|
|
66 |
"Authors", |
|
|
67 |
"Full text", |
|
|
68 |
"Journal", |
|
|
69 |
"Genes", |
|
|
70 |
"Diseases", |
|
|
71 |
"Species", |
|
|
72 |
"Chemicals", |
|
|
73 |
"Mutations", |
|
|
74 |
"Associations between genes and diseases", |
|
|
75 |
"Results", |
|
|
76 |
"Bibliography", |
|
|
77 |
"Methods", |
|
|
78 |
"Acknowledgements", |
|
|
79 |
"Introduction", |
|
|
80 |
"Paper results", |
|
|
81 |
"Number of participants", |
|
|
82 |
"Characteristics of participants", |
|
|
83 |
), |
|
|
84 |
) |
|
|
85 |
if option == "PubMed ID": |
|
|
86 |
st.markdown("## PubMed ID") |
|
|
87 |
st.write(engine.extract_pubmed_id()) |
|
|
88 |
elif option == "Title": |
|
|
89 |
st.markdown("## Title") |
|
|
90 |
st.write(engine.extract_title()) |
|
|
91 |
elif option == "Abstract": |
|
|
92 |
st.markdown("## Abstract") |
|
|
93 |
st.write(engine.extract_abstract()) |
|
|
94 |
elif option == "Authors": |
|
|
95 |
st.markdown("## Authors") |
|
|
96 |
st.write(engine.extract_authors()) |
|
|
97 |
elif option == "Full text": |
|
|
98 |
st.markdown("## Full text") |
|
|
99 |
st.write(engine.extract_full_text()) |
|
|
100 |
elif option == "Journal": |
|
|
101 |
st.markdown("## Journal") |
|
|
102 |
st.write(engine.extract_journal()) |
|
|
103 |
elif option == "Genes": |
|
|
104 |
st.markdown("## Genes") |
|
|
105 |
st.dataframe(engine.extract_genes(dataframe=True)) |
|
|
106 |
st.json(engine.genes()) |
|
|
107 |
elif option == "Diseases": |
|
|
108 |
st.markdown("## Diseases") |
|
|
109 |
st.dataframe(engine.extract_diseases(dataframe=True)) |
|
|
110 |
st.json(engine.diseases()) |
|
|
111 |
elif option == "Associations between genes and diseases": |
|
|
112 |
st.markdown("## Associations between genes and diseases") |
|
|
113 |
st.dataframe(engine.extract_associations(dataframe=True)) |
|
|
114 |
st.json(engine.extract_associations()) |
|
|
115 |
elif option == "Species": |
|
|
116 |
st.markdown("## Species") |
|
|
117 |
st.dataframe(engine.extract_species(dataframe=True)) |
|
|
118 |
st.json(engine.extract_species()) |
|
|
119 |
elif option == "Mutations": |
|
|
120 |
st.markdown("## Mutations") |
|
|
121 |
st.dataframe(engine.extract_mutations(dataframe=True)) |
|
|
122 |
st.json(engine.extract_mutations()) |
|
|
123 |
elif option == "Chemicals": |
|
|
124 |
st.markdown("## Chemicals") |
|
|
125 |
st.dataframe(engine.extract_chemicals(dataframe=True)) |
|
|
126 |
st.json(engine.extract_chemicals()) |
|
|
127 |
elif option == "Results": |
|
|
128 |
st.markdown("## Results") |
|
|
129 |
st.write(engine.extract_results(llm=llm)) |
|
|
130 |
elif option == "Bibliography": |
|
|
131 |
st.markdown("## Bibliography") |
|
|
132 |
st.write(engine.extract_bibliography(llm=llm)) |
|
|
133 |
elif option == "Methods": |
|
|
134 |
st.markdown("## Methods") |
|
|
135 |
methods = engine.extract_methods(llm=llm).split("---") |
|
|
136 |
formatted_methods = format_list_to_markdown(methods) |
|
|
137 |
st.markdown(formatted_methods) |
|
|
138 |
elif option == "Acknowledgements": |
|
|
139 |
st.markdown("## Acknowledgements") |
|
|
140 |
st.write(engine.extract_acknowledgements(llm=llm)) |
|
|
141 |
elif option == "Introduction": |
|
|
142 |
st.markdown("## Introduction") |
|
|
143 |
st.write(engine.extract_introduction(llm=llm)) |
|
|
144 |
elif option == "Number of participants": |
|
|
145 |
st.markdown("## Number of participants") |
|
|
146 |
st.write(engine.extract_number_of_participants(llm=llm)) |
|
|
147 |
elif option == "Paper results": |
|
|
148 |
st.markdown("## Paper results") |
|
|
149 |
paper_results = engine.extract_paper_results(llm=llm).split("---") |
|
|
150 |
formatted_paper_results = format_list_to_markdown(paper_results) |
|
|
151 |
st.markdown(formatted_paper_results) |
|
|
152 |
elif option == "Characteristics of participants": |
|
|
153 |
st.markdown("## Characteristics of participants") |
|
|
154 |
characteristics_of_participants = ( |
|
|
155 |
engine.extract_characteristics_of_participants(llm=llm).split("---") |
|
|
156 |
) |
|
|
157 |
formatted_characteristics_of_participants = format_list_to_markdown( |
|
|
158 |
characteristics_of_participants |
|
|
159 |
) |
|
|
160 |
st.markdown(formatted_characteristics_of_participants) |