[3af7d7]: / aiagents4pharma / talk2biomodels / tools / query_article.py

Download this file

64 lines (56 with data), 2.3 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env python3
"""
Tool for asking questions to the article.
"""
import logging
from typing import Type, Annotated
from pydantic import BaseModel, Field
from langchain_core.tools import BaseTool
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_community.document_loaders import PyPDFLoader
from langgraph.prebuilt import InjectedState
# Initialize logger
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class QueryArticleInput(BaseModel):
"""
Input schema for the query_articles tool.
"""
question: Annotated[str, Field(description="User question to search articles.")]
state: Annotated[dict, InjectedState]
# Note: It's important that every field has type hints. BaseTool is a
# Pydantic class and not having type hints can lead to unexpected behavior.
class QueryArticle(BaseTool):
"""
Tool to ask questions to the article.
"""
name: str = "query_article"
description: str = "Ask questions to the article."
args_schema: Type[BaseModel] = QueryArticleInput
def _run(self,
question: str,
state: Annotated[dict, InjectedState]):
"""
Run the tool.
Args:
query (str): The search query.
"""
logger.log(logging.INFO, "loading the article from %s", state['pdf_file_name'])
logger.log(logging.INFO, "searching the article with the question: %s", question)
# Load the article
loader = PyPDFLoader(state['pdf_file_name'])
# Load the pages of the article
pages = []
for page in loader.lazy_load():
pages.append(page)
# Set up text embedding model
text_embedding_model = state['text_embedding_model']
logging.info("Loaded text embedding model %s", text_embedding_model)
# Create a vector store from the pages
vector_store = InMemoryVectorStore.from_documents(
pages,
text_embedding_model)
# Search the article with the question
docs = vector_store.similarity_search(question)
# Return the content of the pages
return "\n".join([doc.page_content for doc in docs])