--- a +++ b/aiagents4pharma/talk2scholars/agents/pdf_agent.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +""" +Agent for interacting with PDF documents via question and answer. + +This module initializes and compiles a LangGraph application that enables users to query PDF +documents using a question_and_answer tool. It integrates a language model and follows +the ReAct pattern to process and answer queries related to PDF content. + +Usage: + >>> app = get_app("unique_thread_id") + >>> response = app.invoke(initial_state) +""" + +import logging +import hydra +from langchain_core.language_models.chat_models import BaseChatModel +from langgraph.graph import START, StateGraph +from langgraph.prebuilt import create_react_agent, ToolNode +from langgraph.checkpoint.memory import MemorySaver +from ..state.state_talk2scholars import Talk2Scholars +from ..tools.pdf.question_and_answer import question_and_answer_tool +from ..tools.s2.query_results import query_results + +# Initialize logger +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def get_app(uniq_id, llm_model: BaseChatModel): + """ + Initializes and returns the LangGraph application for the PDF agent. + + This function sets up the PDF agent by loading configuration settings via Hydra, + initializing a model, and creating a workflow graph that incorporates + PDF-specific tools. The agent is built using the ReAct pattern to facilitate interactive + querying and processing of PDF documents. + + Args: + uniq_id (str): A unique identifier for the current conversation session or thread. + llm_model (BaseChatModel, optional): The language model instance to be used. + Defaults to ChatOpenAI(model="gpt-4o-mini", temperature=0). + + Returns: + StateGraph: A compiled LangGraph application capable of handling PDF interactions. + + Example: + >>> app = get_app("thread_123") + >>> result = app.invoke(initial_state) + """ + # Load configuration using Hydra. + with hydra.initialize(version_base=None, config_path="../configs"): + cfg = hydra.compose( + config_name="config", + overrides=["agents/talk2scholars/pdf_agent=default"], + ) + cfg = cfg.agents.talk2scholars.pdf_agent + logger.info("Loaded pdf_agent configuration.") + + def agent_pdf_node(state: Talk2Scholars): + """ + Processes the current state by invoking the language model for PDF question and answer. + + Args: + state (Talk2Scholars): The current conversation state containing query details + and context. + + Returns: + Any: The response generated by the language model after processing the state. + """ + logger.info("Creating Agent_PDF node with thread_id %s", uniq_id) + response = model.invoke(state, {"configurable": {"thread_id": uniq_id}}) + return response + + # Define the tool node that includes the PDF QnA tool. + tools = ToolNode([question_and_answer_tool, query_results]) + + logger.info("Using OpenAI model %s", llm_model) + + # Create the agent using the provided BaseChatModel instance. + model = create_react_agent( + llm_model, + tools=tools, + state_schema=Talk2Scholars, + prompt=cfg.pdf_agent, + checkpointer=MemorySaver(), + ) + + # Define a new workflow graph with the state schema. + workflow = StateGraph(Talk2Scholars) + workflow.add_node("agent_pdf", agent_pdf_node) + workflow.add_edge(START, "agent_pdf") + + # Initialize memory to persist state between runs. + checkpointer = MemorySaver() + + # Compile the graph into a runnable app. + app = workflow.compile(checkpointer=checkpointer, name="agent_pdf") + logger.info("Compiled the PDF agent graph.") + + return app