[9d3784]: / aiagents4pharma / talk2scholars / tools / zotero / utils / read_helper.py

Download this file

168 lines (141 with data), 6.0 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#!/usr/bin/env python3
"""
Utility for zotero read tool.
"""
import logging
from typing import Any, Dict, List
import hydra
from pyzotero import zotero
from .zotero_path import get_item_collections
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class ZoteroSearchData:
"""Helper class to organize Zotero search-related data."""
def __init__(
self,
query: str,
only_articles: bool,
limit: int,
tool_call_id: str,
):
self.query = query
self.only_articles = only_articles
self.limit = limit
self.tool_call_id = tool_call_id
self.cfg = self._load_config()
self.zot = self._init_zotero_client()
self.item_to_collections = get_item_collections(self.zot)
self.filtered_papers = {}
self.content = ""
def process_search(self) -> None:
"""Process the search request and prepare results."""
items = self._fetch_items()
self._filter_and_format_papers(items)
self._create_content()
def get_search_results(self) -> Dict[str, Any]:
"""Get the search results and content."""
return {
"filtered_papers": self.filtered_papers,
"content": self.content,
}
def _load_config(self) -> Any:
"""Load hydra configuration."""
with hydra.initialize(version_base=None, config_path="../../../configs"):
cfg = hydra.compose(
config_name="config", overrides=["tools/zotero_read=default"]
)
logger.info("Loaded configuration for Zotero search tool")
return cfg.tools.zotero_read
def _init_zotero_client(self) -> zotero.Zotero:
"""Initialize Zotero client."""
logger.info(
"Searching Zotero for query: '%s' (only_articles: %s, limit: %d)",
self.query,
self.only_articles,
self.limit,
)
return zotero.Zotero(self.cfg.user_id, self.cfg.library_type, self.cfg.api_key)
def _fetch_items(self) -> List[Dict[str, Any]]:
"""Fetch items from Zotero."""
try:
if self.query.strip() == "":
logger.info(
"Empty query provided, fetching all items up to max_limit: %d",
self.cfg.zotero.max_limit,
)
items = self.zot.items(limit=self.cfg.zotero.max_limit)
else:
items = self.zot.items(
q=self.query, limit=min(self.limit, self.cfg.zotero.max_limit)
)
except Exception as e:
logger.error("Failed to fetch items from Zotero: %s", e)
raise RuntimeError(
"Failed to fetch items from Zotero. Please retry the same query."
) from e
logger.info("Received %d items from Zotero", len(items))
if not items:
logger.error("No items returned from Zotero for query: '%s'", self.query)
raise RuntimeError(
"No items returned from Zotero. Please retry the same query."
)
return items
def _filter_and_format_papers(self, items: List[Dict[str, Any]]) -> None:
"""Filter and format papers from items."""
filter_item_types = (
self.cfg.zotero.filter_item_types if self.only_articles else []
)
logger.debug("Filtering item types: %s", filter_item_types)
for item in items:
if not isinstance(item, dict):
continue
data = item.get("data")
if not isinstance(data, dict):
continue
item_type = data.get("itemType", "N/A")
logger.debug("Item type: %s", item_type)
key = data.get("key")
if not key:
continue
collection_paths = self.item_to_collections.get(key, ["/Unknown"])
self.filtered_papers[key] = {
"Title": data.get("title", "N/A"),
"Abstract": data.get("abstractNote", "N/A"),
"Publication Date": data.get("date", "N/A"),
"URL": data.get("url", "N/A"),
"Type": item_type if isinstance(item_type, str) else "N/A",
"Collections": collection_paths,
"Citation Count": data.get("citationCount", "N/A"),
"Venue": data.get("venue", "N/A"),
"Publication Venue": data.get("publicationTitle", "N/A"),
"Journal Name": data.get("journalAbbreviation", "N/A"),
"Authors": [
f"{creator.get('firstName', '')} {creator.get('lastName', '')}".strip()
for creator in data.get("creators", [])
if isinstance(creator, dict)
and creator.get("creatorType") == "author"
],
}
if not self.filtered_papers:
logger.error(
"No matching papers returned from Zotero for query: '%s'", self.query
)
raise RuntimeError(
"No matching papers returned from Zotero. Please retry the same query."
)
logger.info("Filtered %d items", len(self.filtered_papers))
def _create_content(self) -> None:
"""Create the content message for the response."""
top_papers = list(self.filtered_papers.values())[:2]
top_papers_info = "\n".join(
[
f"{i+1}. {paper['Title']} ({paper['Type']})"
for i, paper in enumerate(top_papers)
]
)
self.content = "Retrieval was successful. Papers are attached as an artifact."
self.content += " And here is a summary of the retrieval results:\n"
self.content += f"Number of papers found: {len(self.filtered_papers)}\n"
self.content += f"Query: {self.query}\n"
self.content += "Here are a few of these papers:\n" + top_papers_info