1use crate::common::PubMedId;
4use crate::error::{ParseError, PubMedError, Result};
5use crate::pubmed::models::ArticleSummary;
6use crate::pubmed::query::SortOrder;
7use crate::pubmed::responses::{ESummaryDocSum, ESummaryResponse};
8use tracing::{debug, info, instrument, warn};
9
10use super::PubMedClient;
11
12impl PubMedClient {
13 #[instrument(skip(self), fields(pmids_count = pmids.len()))]
43 pub async fn fetch_summaries(&self, pmids: &[&str]) -> Result<Vec<ArticleSummary>> {
44 if pmids.is_empty() {
45 return Ok(Vec::new());
46 }
47
48 let validated: Vec<u32> = pmids
50 .iter()
51 .map(|pmid| {
52 PubMedId::parse(pmid)
53 .map(|p| p.as_u32())
54 .map_err(PubMedError::from)
55 })
56 .collect::<Result<Vec<_>>>()?;
57
58 const BATCH_SIZE: usize = 200;
59
60 let mut all_summaries = Vec::with_capacity(pmids.len());
61
62 for chunk in validated.chunks(BATCH_SIZE) {
63 let id_list: String = chunk
64 .iter()
65 .map(|id| id.to_string())
66 .collect::<Vec<_>>()
67 .join(",");
68
69 let url = format!(
70 "{}/esummary.fcgi?db=pubmed&id={}&retmode=json",
71 self.base_url, id_list
72 );
73
74 debug!(
75 batch_size = chunk.len(),
76 "Making batch ESummary API request"
77 );
78 let response = self.make_request(&url).await?;
79 let json_text = response.text().await?;
80
81 if json_text.trim().is_empty() {
82 continue;
83 }
84
85 let summaries = Self::parse_esummary_response(&json_text)?;
86 info!(
87 requested = chunk.len(),
88 parsed = summaries.len(),
89 "ESummary batch completed"
90 );
91 all_summaries.extend(summaries);
92 }
93
94 Ok(all_summaries)
95 }
96
97 #[instrument(skip(self), fields(pmid = %pmid))]
121 pub async fn fetch_summary(&self, pmid: &str) -> Result<ArticleSummary> {
122 let mut summaries = self.fetch_summaries(&[pmid]).await?;
123
124 if summaries.len() == 1 {
125 Ok(summaries.remove(0))
126 } else {
127 let idx = summaries.iter().position(|s| s.pmid == pmid);
128 match idx {
129 Some(i) => Ok(summaries.remove(i)),
130 None => Err(ParseError::ArticleNotFound {
131 pmid: pmid.to_string(),
132 }
133 .into()),
134 }
135 }
136 }
137
138 pub async fn search_and_fetch_summaries(
169 &self,
170 query: &str,
171 limit: usize,
172 sort: Option<&SortOrder>,
173 ) -> Result<Vec<ArticleSummary>> {
174 let pmids = self.search_articles(query, limit, sort).await?;
175
176 let pmid_refs: Vec<&str> = pmids.iter().map(|s| s.as_str()).collect();
177 self.fetch_summaries(&pmid_refs).await
178 }
179
180 pub(crate) fn parse_esummary_response(json_text: &str) -> Result<Vec<ArticleSummary>> {
182 let response: ESummaryResponse =
183 serde_json::from_str(json_text).map_err(|e| PubMedError::from(ParseError::from(e)))?;
184
185 let result = &response.result;
186
187 let uids = result
189 .get("uids")
190 .and_then(|v| v.as_array())
191 .map(|arr| {
192 arr.iter()
193 .filter_map(|v| v.as_str().map(|s| s.to_string()))
194 .collect::<Vec<_>>()
195 })
196 .unwrap_or_default();
197
198 let mut summaries = Vec::with_capacity(uids.len());
199
200 for uid in &uids {
201 let Some(doc_value) = result.get(uid) else {
202 warn!(uid = %uid, "UID not found in ESummary response");
203 continue;
204 };
205
206 if doc_value.get("error").is_some() {
208 warn!(uid = %uid, "ESummary returned error for UID");
209 continue;
210 }
211
212 let doc: ESummaryDocSum = match serde_json::from_value(doc_value.clone()) {
213 Ok(d) => d,
214 Err(e) => {
215 warn!(uid = %uid, error = %e, "Failed to parse ESummary document");
216 continue;
217 }
218 };
219
220 let mut doi = None;
222 let mut pmc_id = None;
223 for aid in &doc.articleids {
224 match aid.idtype.as_str() {
225 "doi" => {
226 if !aid.value.is_empty() {
227 doi = Some(aid.value.clone());
228 }
229 }
230 "pmc" => {
231 if !aid.value.is_empty() {
232 pmc_id = Some(aid.value.clone());
233 }
234 }
235 _ => {}
236 }
237 }
238
239 let author_names: Vec<String> = doc.authors.iter().map(|a| a.name.clone()).collect();
240
241 summaries.push(ArticleSummary {
242 pmid: doc.uid,
243 title: doc.title,
244 authors: author_names,
245 journal: doc.source,
246 full_journal_name: doc.fulljournalname,
247 pub_date: doc.pubdate,
248 epub_date: doc.epubdate,
249 doi,
250 pmc_id,
251 volume: doc.volume,
252 issue: doc.issue,
253 pages: doc.pages,
254 languages: doc.lang,
255 pub_types: doc.pubtype,
256 issn: doc.issn,
257 essn: doc.essn,
258 sort_pub_date: doc.sortpubdate,
259 pmc_ref_count: doc.pmcrefcount,
260 record_status: doc.recordstatus,
261 });
262 }
263
264 Ok(summaries)
265 }
266}
267
268#[cfg(test)]
269mod tests {
270 use super::*;
271
272 #[test]
273 fn test_parse_esummary_response_basic() {
274 let json = r#"{"result":{"uids":["31978945"],"31978945":{"uid":"31978945","pubdate":"2020 Feb","epubdate":"2020 Jan 24","source":"N Engl J Med","authors":[{"name":"Zhu N","authtype":"Author","clusterid":""},{"name":"Zhang D","authtype":"Author","clusterid":""}],"title":"A Novel Coronavirus from Patients with Pneumonia in China, 2019.","sorttitle":"novel coronavirus","volume":"382","issue":"8","pages":"727-733","lang":["eng"],"issn":"0028-4793","essn":"1533-4406","pubtype":["Journal Article"],"articleids":[{"idtype":"pubmed","idtypen":1,"value":"31978945"},{"idtype":"doi","idtypen":3,"value":"10.1056/NEJMoa2001017"},{"idtype":"pmc","idtypen":8,"value":"PMC7092803"}],"fulljournalname":"The New England journal of medicine","sortpubdate":"2020/02/20 00:00","pmcrefcount":14123,"recordstatus":"PubMed - indexed for MEDLINE"}}}"#;
275
276 let summaries = PubMedClient::parse_esummary_response(json).unwrap();
277 assert_eq!(summaries.len(), 1);
278
279 let s = &summaries[0];
280 assert_eq!(s.pmid, "31978945");
281 assert_eq!(
282 s.title,
283 "A Novel Coronavirus from Patients with Pneumonia in China, 2019."
284 );
285 assert_eq!(s.authors, vec!["Zhu N", "Zhang D"]);
286 assert_eq!(s.journal, "N Engl J Med");
287 assert_eq!(s.full_journal_name, "The New England journal of medicine");
288 assert_eq!(s.pub_date, "2020 Feb");
289 assert_eq!(s.epub_date, "2020 Jan 24");
290 assert_eq!(s.doi.as_deref(), Some("10.1056/NEJMoa2001017"));
291 assert_eq!(s.pmc_id.as_deref(), Some("PMC7092803"));
292 assert_eq!(s.volume, "382");
293 assert_eq!(s.issue, "8");
294 assert_eq!(s.pages, "727-733");
295 assert_eq!(s.languages, vec!["eng"]);
296 assert_eq!(s.pub_types, vec!["Journal Article"]);
297 assert_eq!(s.issn, "0028-4793");
298 assert_eq!(s.essn, "1533-4406");
299 assert_eq!(s.sort_pub_date, "2020/02/20 00:00");
300 assert_eq!(s.pmc_ref_count, 14123);
301 assert_eq!(s.record_status, "PubMed - indexed for MEDLINE");
302 }
303
304 #[test]
305 fn test_parse_esummary_response_multiple_uids() {
306 let json = r#"{"result":{"uids":["31978945","33515491"],"31978945":{"uid":"31978945","pubdate":"2020 Feb","epubdate":"","source":"N Engl J Med","authors":[{"name":"Zhu N","authtype":"Author","clusterid":""}],"title":"Article One","volume":"382","issue":"8","pages":"727-733","lang":["eng"],"issn":"","essn":"","pubtype":[],"articleids":[],"fulljournalname":"N Engl J Med","sortpubdate":"","pmcrefcount":0,"recordstatus":""},"33515491":{"uid":"33515491","pubdate":"2021 Jan","epubdate":"","source":"Science","authors":[{"name":"Smith J","authtype":"Author","clusterid":""}],"title":"Article Two","volume":"371","issue":"6526","pages":"120-125","lang":["eng"],"issn":"","essn":"","pubtype":[],"articleids":[{"idtype":"doi","idtypen":3,"value":"10.1126/science.abc123"}],"fulljournalname":"Science","sortpubdate":"","pmcrefcount":100,"recordstatus":""}}}"#;
307
308 let summaries = PubMedClient::parse_esummary_response(json).unwrap();
309 assert_eq!(summaries.len(), 2);
310 assert_eq!(summaries[0].pmid, "31978945");
311 assert_eq!(summaries[0].title, "Article One");
312 assert_eq!(summaries[1].pmid, "33515491");
313 assert_eq!(summaries[1].title, "Article Two");
314 assert_eq!(summaries[1].doi.as_deref(), Some("10.1126/science.abc123"));
315 }
316
317 #[test]
318 fn test_parse_esummary_response_empty() {
319 let json = r#"{"result": {"uids": []}}"#;
320 let summaries = PubMedClient::parse_esummary_response(json).unwrap();
321 assert!(summaries.is_empty());
322 }
323
324 #[test]
325 fn test_parse_esummary_response_with_error_uid() {
326 let json = r#"{"result":{"uids":["99999999999"],"99999999999":{"uid":"99999999999","error":"cannot get document summary"}}}"#;
327
328 let summaries = PubMedClient::parse_esummary_response(json).unwrap();
329 assert!(summaries.is_empty());
330 }
331
332 #[test]
333 fn test_parse_esummary_response_no_doi_no_pmc() {
334 let json = r#"{"result":{"uids":["12345678"],"12345678":{"uid":"12345678","pubdate":"2020","epubdate":"","source":"Some Journal","authors":[],"title":"Test Article","volume":"","issue":"","pages":"","lang":[],"issn":"","essn":"","pubtype":[],"articleids":[{"idtype":"pubmed","idtypen":1,"value":"12345678"}],"fulljournalname":"Some Journal","sortpubdate":"","pmcrefcount":0,"recordstatus":""}}}"#;
335
336 let summaries = PubMedClient::parse_esummary_response(json).unwrap();
337 assert_eq!(summaries.len(), 1);
338 assert!(summaries[0].doi.is_none());
339 assert!(summaries[0].pmc_id.is_none());
340 assert!(summaries[0].authors.is_empty());
341 }
342
343 #[tokio::test]
344 async fn test_fetch_summaries_empty_input() {
345 let client = PubMedClient::new();
346 let result = client.fetch_summaries(&[]).await;
347 assert!(result.is_ok());
348 assert!(
349 result
350 .expect("empty input should return empty summaries")
351 .is_empty()
352 );
353 }
354
355 #[tokio::test]
356 async fn test_fetch_summaries_invalid_pmid() {
357 let client = PubMedClient::new();
358 let result = client.fetch_summaries(&["not_a_number"]).await;
359 assert!(result.is_err());
360 }
361}