AIAgents4Pharma / Git / [3af7d7] /aiagents4pharma/talk2scholars/tests/test_paper_download

Models:
Amanda-D/
AIAgents4Pharma
Downloads: 1
[3af7d7]: / aiagents4pharma / talk2scholars / tests / test_paper_download_tools.py
History
Download this file
155 lines (130 with data), 5.8 kB

"""
Unit tests for arXiv paper downloading functionality, including:
- AbstractPaperDownloader (base class)
- ArxivPaperDownloader (arXiv-specific implementation)
- download_arxiv_paper tool function.
"""

from unittest.mock import patch, MagicMock
import pytest
import requests
from requests.exceptions import HTTPError
from langgraph.types import Command
from langchain_core.messages import ToolMessage

# Import the classes and function under test
from aiagents4pharma.talk2scholars.tools.paper_download.abstract_downloader import (
    AbstractPaperDownloader,
)
from aiagents4pharma.talk2scholars.tools.paper_download.arxiv_downloader import (
    ArxivPaperDownloader,
)
from aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input import (
    download_arxiv_paper,
)

@pytest.mark.parametrize("class_obj", [AbstractPaperDownloader])

def test_abstract_downloader_cannot_be_instantiated(class_obj):
    """
    Validates that AbstractPaperDownloader is indeed abstract and raises TypeError
    if anyone attempts to instantiate it directly.
    """
    with pytest.raises(TypeError):
        class_obj()


@pytest.fixture(name="arxiv_downloader_fixture")
@pytest.mark.usefixtures("mock_hydra_config_setup")
def fixture_arxiv_downloader():
    """
    Provides an ArxivPaperDownloader instance with a mocked Hydra config.
    """
    return ArxivPaperDownloader()


def test_fetch_metadata_success(arxiv_downloader_fixture,):
    """
    Ensures fetch_metadata retrieves XML data correctly, given a successful HTTP response.
    """
    mock_response = MagicMock()
    mock_response.text = "<xml>Mock ArXiv Metadata</xml>"
    mock_response.raise_for_status = MagicMock()

    with patch.object(requests, "get", return_value=mock_response) as mock_get:
        paper_id = "1234.5678"
        result = arxiv_downloader_fixture.fetch_metadata(paper_id)
        mock_get.assert_called_once_with(
            "http://export.arxiv.org/api/query?search_query=id:1234.5678&start=0&max_results=1",
            timeout=10,
        )
        assert result["xml"] == "<xml>Mock ArXiv Metadata</xml>"


def test_fetch_metadata_http_error(arxiv_downloader_fixture):
    """
    Validates that fetch_metadata raises HTTPError when the response indicates a failure.
    """
    mock_response = MagicMock()
    mock_response.raise_for_status.side_effect = HTTPError("Mocked HTTP failure")

    with patch.object(requests, "get", return_value=mock_response):
        with pytest.raises(HTTPError):
            arxiv_downloader_fixture.fetch_metadata("invalid_id")


def test_download_pdf_success(arxiv_downloader_fixture):
    """
    Tests that download_pdf fetches the PDF link from metadata and successfully
    retrieves the binary content.
    """
    mock_metadata = {
        "xml": """
        <feed xmlns="http://www.w3.org/2005/Atom">
            <entry>
                <link title="pdf" href="http://test.arxiv.org/pdf/1234.5678v1.pdf"/>
            </entry>
        </feed>
        """
    }

    mock_pdf_response = MagicMock()
    mock_pdf_response.raise_for_status = MagicMock()
    mock_pdf_response.iter_content = lambda chunk_size: [b"FAKE_PDF_CONTENT"]

    with patch.object(arxiv_downloader_fixture, "fetch_metadata", return_value=mock_metadata):
        with patch.object(requests, "get", return_value=mock_pdf_response) as mock_get:
            result = arxiv_downloader_fixture.download_pdf("1234.5678")
            assert result["pdf_object"] == b"FAKE_PDF_CONTENT"
            assert result["pdf_url"] == "http://test.arxiv.org/pdf/1234.5678v1.pdf"
            assert result["arxiv_id"] == "1234.5678"
            mock_get.assert_called_once_with(
                "http://test.arxiv.org/pdf/1234.5678v1.pdf",
                stream=True,
                timeout=10,
            )


def test_download_pdf_no_pdf_link(arxiv_downloader_fixture):
    """
    Ensures a RuntimeError is raised if no <link> with title="pdf" is found in the XML.
    """
    mock_metadata = {"xml": "<feed></feed>"}

    with patch.object(arxiv_downloader_fixture, "fetch_metadata", return_value=mock_metadata):
        with pytest.raises(RuntimeError, match="Failed to download PDF"):
            arxiv_downloader_fixture.download_pdf("1234.5678")


def test_download_arxiv_paper_tool_success(arxiv_downloader_fixture):
    """
    Validates download_arxiv_paper orchestrates the ArxivPaperDownloader correctly,
    returning a Command with PDF data and success messages.
    """
    mock_metadata = {"xml": "<mockxml></mockxml>"}
    mock_pdf_response = {
        "pdf_object": b"FAKE_PDF_CONTENT",
        "pdf_url": "http://test.arxiv.org/mock.pdf",
        "arxiv_id": "9999.8888",
    }

    with patch(
        "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input."
        "ArxivPaperDownloader",
        return_value=arxiv_downloader_fixture,
    ):
        with patch.object(arxiv_downloader_fixture, "fetch_metadata", return_value=mock_metadata):
            with patch.object(
                arxiv_downloader_fixture,
                "download_pdf",
                return_value=mock_pdf_response,
            ):
                command_result = download_arxiv_paper.invoke(
                    {"arxiv_id": "9999.8888", "tool_call_id": "test_tool_call"}
                )

                assert isinstance(command_result, Command)
                assert "pdf_data" in command_result.update
                assert command_result.update["pdf_data"] == mock_pdf_response

                messages = command_result.update.get("messages", [])
                assert len(messages) == 1
                assert isinstance(messages[0], ToolMessage)
                assert "Successfully downloaded PDF" in messages[0].content
                assert "9999.8888" in messages[0].content