[3af7d7]: / aiagents4pharma / talk2scholars / tests / test_paper_download_tools.py

Download this file

155 lines (130 with data), 5.8 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
"""
Unit tests for arXiv paper downloading functionality, including:
- AbstractPaperDownloader (base class)
- ArxivPaperDownloader (arXiv-specific implementation)
- download_arxiv_paper tool function.
"""
from unittest.mock import patch, MagicMock
import pytest
import requests
from requests.exceptions import HTTPError
from langgraph.types import Command
from langchain_core.messages import ToolMessage
# Import the classes and function under test
from aiagents4pharma.talk2scholars.tools.paper_download.abstract_downloader import (
AbstractPaperDownloader,
)
from aiagents4pharma.talk2scholars.tools.paper_download.arxiv_downloader import (
ArxivPaperDownloader,
)
from aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input import (
download_arxiv_paper,
)
@pytest.mark.parametrize("class_obj", [AbstractPaperDownloader])
def test_abstract_downloader_cannot_be_instantiated(class_obj):
"""
Validates that AbstractPaperDownloader is indeed abstract and raises TypeError
if anyone attempts to instantiate it directly.
"""
with pytest.raises(TypeError):
class_obj()
@pytest.fixture(name="arxiv_downloader_fixture")
@pytest.mark.usefixtures("mock_hydra_config_setup")
def fixture_arxiv_downloader():
"""
Provides an ArxivPaperDownloader instance with a mocked Hydra config.
"""
return ArxivPaperDownloader()
def test_fetch_metadata_success(arxiv_downloader_fixture,):
"""
Ensures fetch_metadata retrieves XML data correctly, given a successful HTTP response.
"""
mock_response = MagicMock()
mock_response.text = "<xml>Mock ArXiv Metadata</xml>"
mock_response.raise_for_status = MagicMock()
with patch.object(requests, "get", return_value=mock_response) as mock_get:
paper_id = "1234.5678"
result = arxiv_downloader_fixture.fetch_metadata(paper_id)
mock_get.assert_called_once_with(
"http://export.arxiv.org/api/query?search_query=id:1234.5678&start=0&max_results=1",
timeout=10,
)
assert result["xml"] == "<xml>Mock ArXiv Metadata</xml>"
def test_fetch_metadata_http_error(arxiv_downloader_fixture):
"""
Validates that fetch_metadata raises HTTPError when the response indicates a failure.
"""
mock_response = MagicMock()
mock_response.raise_for_status.side_effect = HTTPError("Mocked HTTP failure")
with patch.object(requests, "get", return_value=mock_response):
with pytest.raises(HTTPError):
arxiv_downloader_fixture.fetch_metadata("invalid_id")
def test_download_pdf_success(arxiv_downloader_fixture):
"""
Tests that download_pdf fetches the PDF link from metadata and successfully
retrieves the binary content.
"""
mock_metadata = {
"xml": """
<feed xmlns="http://www.w3.org/2005/Atom">
<entry>
<link title="pdf" href="http://test.arxiv.org/pdf/1234.5678v1.pdf"/>
</entry>
</feed>
"""
}
mock_pdf_response = MagicMock()
mock_pdf_response.raise_for_status = MagicMock()
mock_pdf_response.iter_content = lambda chunk_size: [b"FAKE_PDF_CONTENT"]
with patch.object(arxiv_downloader_fixture, "fetch_metadata", return_value=mock_metadata):
with patch.object(requests, "get", return_value=mock_pdf_response) as mock_get:
result = arxiv_downloader_fixture.download_pdf("1234.5678")
assert result["pdf_object"] == b"FAKE_PDF_CONTENT"
assert result["pdf_url"] == "http://test.arxiv.org/pdf/1234.5678v1.pdf"
assert result["arxiv_id"] == "1234.5678"
mock_get.assert_called_once_with(
"http://test.arxiv.org/pdf/1234.5678v1.pdf",
stream=True,
timeout=10,
)
def test_download_pdf_no_pdf_link(arxiv_downloader_fixture):
"""
Ensures a RuntimeError is raised if no <link> with title="pdf" is found in the XML.
"""
mock_metadata = {"xml": "<feed></feed>"}
with patch.object(arxiv_downloader_fixture, "fetch_metadata", return_value=mock_metadata):
with pytest.raises(RuntimeError, match="Failed to download PDF"):
arxiv_downloader_fixture.download_pdf("1234.5678")
def test_download_arxiv_paper_tool_success(arxiv_downloader_fixture):
"""
Validates download_arxiv_paper orchestrates the ArxivPaperDownloader correctly,
returning a Command with PDF data and success messages.
"""
mock_metadata = {"xml": "<mockxml></mockxml>"}
mock_pdf_response = {
"pdf_object": b"FAKE_PDF_CONTENT",
"pdf_url": "http://test.arxiv.org/mock.pdf",
"arxiv_id": "9999.8888",
}
with patch(
"aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input."
"ArxivPaperDownloader",
return_value=arxiv_downloader_fixture,
):
with patch.object(arxiv_downloader_fixture, "fetch_metadata", return_value=mock_metadata):
with patch.object(
arxiv_downloader_fixture,
"download_pdf",
return_value=mock_pdf_response,
):
command_result = download_arxiv_paper.invoke(
{"arxiv_id": "9999.8888", "tool_call_id": "test_tool_call"}
)
assert isinstance(command_result, Command)
assert "pdf_data" in command_result.update
assert command_result.update["pdf_data"] == mock_pdf_response
messages = command_result.update.get("messages", [])
assert len(messages) == 1
assert isinstance(messages[0], ToolMessage)
assert "Successfully downloaded PDF" in messages[0].content
assert "9999.8888" in messages[0].content