pubmed_client/pubmed/client/
summary.rs

1//! ESummary API operations for fetching lightweight article metadata
2
3use crate::common::PubMedId;
4use crate::error::{ParseError, PubMedError, Result};
5use crate::pubmed::models::ArticleSummary;
6use crate::pubmed::query::SortOrder;
7use crate::pubmed::responses::{ESummaryDocSum, ESummaryResponse};
8use tracing::{debug, info, instrument, warn};
9
10use super::PubMedClient;
11
12impl PubMedClient {
13    /// Fetch lightweight article summaries by PMIDs using the ESummary API
14    ///
15    /// Returns basic metadata (title, authors, journal, dates, DOI) without
16    /// abstracts, MeSH terms, or chemical lists. Faster than `fetch_articles()`
17    /// when you only need bibliographic overview data.
18    ///
19    /// # Arguments
20    ///
21    /// * `pmids` - Slice of PubMed IDs as strings
22    ///
23    /// # Returns
24    ///
25    /// Returns a `Result<Vec<ArticleSummary>>` containing lightweight article metadata
26    ///
27    /// # Example
28    ///
29    /// ```no_run
30    /// use pubmed_client::PubMedClient;
31    ///
32    /// #[tokio::main]
33    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
34    ///     let client = PubMedClient::new();
35    ///     let summaries = client.fetch_summaries(&["31978945", "33515491"]).await?;
36    ///     for summary in &summaries {
37    ///         println!("{}: {} ({})", summary.pmid, summary.title, summary.pub_date);
38    ///     }
39    ///     Ok(())
40    /// }
41    /// ```
42    #[instrument(skip(self), fields(pmids_count = pmids.len()))]
43    pub async fn fetch_summaries(&self, pmids: &[&str]) -> Result<Vec<ArticleSummary>> {
44        if pmids.is_empty() {
45            return Ok(Vec::new());
46        }
47
48        // Validate all PMIDs upfront
49        let validated: Vec<u32> = pmids
50            .iter()
51            .map(|pmid| {
52                PubMedId::parse(pmid)
53                    .map(|p| p.as_u32())
54                    .map_err(PubMedError::from)
55            })
56            .collect::<Result<Vec<_>>>()?;
57
58        const BATCH_SIZE: usize = 200;
59
60        let mut all_summaries = Vec::with_capacity(pmids.len());
61
62        for chunk in validated.chunks(BATCH_SIZE) {
63            let id_list: String = chunk
64                .iter()
65                .map(|id| id.to_string())
66                .collect::<Vec<_>>()
67                .join(",");
68
69            let url = format!(
70                "{}/esummary.fcgi?db=pubmed&id={}&retmode=json",
71                self.base_url, id_list
72            );
73
74            debug!(
75                batch_size = chunk.len(),
76                "Making batch ESummary API request"
77            );
78            let response = self.make_request(&url).await?;
79            let json_text = response.text().await?;
80
81            if json_text.trim().is_empty() {
82                continue;
83            }
84
85            let summaries = Self::parse_esummary_response(&json_text)?;
86            info!(
87                requested = chunk.len(),
88                parsed = summaries.len(),
89                "ESummary batch completed"
90            );
91            all_summaries.extend(summaries);
92        }
93
94        Ok(all_summaries)
95    }
96
97    /// Fetch a single article summary by PMID using the ESummary API
98    ///
99    /// # Arguments
100    ///
101    /// * `pmid` - PubMed ID as a string
102    ///
103    /// # Returns
104    ///
105    /// Returns a `Result<ArticleSummary>` containing lightweight article metadata
106    ///
107    /// # Example
108    ///
109    /// ```no_run
110    /// use pubmed_client::PubMedClient;
111    ///
112    /// #[tokio::main]
113    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
114    ///     let client = PubMedClient::new();
115    ///     let summary = client.fetch_summary("31978945").await?;
116    ///     println!("{}: {}", summary.pmid, summary.title);
117    ///     Ok(())
118    /// }
119    /// ```
120    #[instrument(skip(self), fields(pmid = %pmid))]
121    pub async fn fetch_summary(&self, pmid: &str) -> Result<ArticleSummary> {
122        let mut summaries = self.fetch_summaries(&[pmid]).await?;
123
124        if summaries.len() == 1 {
125            Ok(summaries.remove(0))
126        } else {
127            let idx = summaries.iter().position(|s| s.pmid == pmid);
128            match idx {
129                Some(i) => Ok(summaries.remove(i)),
130                None => Err(ParseError::ArticleNotFound {
131                    pmid: pmid.to_string(),
132                }
133                .into()),
134            }
135        }
136    }
137
138    /// Search and fetch lightweight summaries in a single operation
139    ///
140    /// Combines `search_articles()` and `fetch_summaries()`. Use this when you
141    /// only need basic metadata (title, authors, journal, dates) and want faster
142    /// retrieval than `search_and_fetch()`.
143    ///
144    /// # Arguments
145    ///
146    /// * `query` - Search query string
147    /// * `limit` - Maximum number of articles to fetch
148    ///
149    /// # Returns
150    ///
151    /// Returns a `Result<Vec<ArticleSummary>>` containing lightweight article metadata
152    ///
153    /// # Example
154    ///
155    /// ```no_run
156    /// use pubmed_client::PubMedClient;
157    ///
158    /// #[tokio::main]
159    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
160    ///     let client = PubMedClient::new();
161    ///     let summaries = client.search_and_fetch_summaries("covid-19 treatment", 20, None).await?;
162    ///     for summary in &summaries {
163    ///         println!("{}: {}", summary.pmid, summary.title);
164    ///     }
165    ///     Ok(())
166    /// }
167    /// ```
168    pub async fn search_and_fetch_summaries(
169        &self,
170        query: &str,
171        limit: usize,
172        sort: Option<&SortOrder>,
173    ) -> Result<Vec<ArticleSummary>> {
174        let pmids = self.search_articles(query, limit, sort).await?;
175
176        let pmid_refs: Vec<&str> = pmids.iter().map(|s| s.as_str()).collect();
177        self.fetch_summaries(&pmid_refs).await
178    }
179
180    /// Parse ESummary JSON response into ArticleSummary objects
181    pub(crate) fn parse_esummary_response(json_text: &str) -> Result<Vec<ArticleSummary>> {
182        let response: ESummaryResponse =
183            serde_json::from_str(json_text).map_err(|e| PubMedError::from(ParseError::from(e)))?;
184
185        let result = &response.result;
186
187        // Get the list of UIDs
188        let uids = result
189            .get("uids")
190            .and_then(|v| v.as_array())
191            .map(|arr| {
192                arr.iter()
193                    .filter_map(|v| v.as_str().map(|s| s.to_string()))
194                    .collect::<Vec<_>>()
195            })
196            .unwrap_or_default();
197
198        let mut summaries = Vec::with_capacity(uids.len());
199
200        for uid in &uids {
201            let Some(doc_value) = result.get(uid) else {
202                warn!(uid = %uid, "UID not found in ESummary response");
203                continue;
204            };
205
206            // Check for error in individual document
207            if doc_value.get("error").is_some() {
208                warn!(uid = %uid, "ESummary returned error for UID");
209                continue;
210            }
211
212            let doc: ESummaryDocSum = match serde_json::from_value(doc_value.clone()) {
213                Ok(d) => d,
214                Err(e) => {
215                    warn!(uid = %uid, error = %e, "Failed to parse ESummary document");
216                    continue;
217                }
218            };
219
220            // Extract DOI and PMC ID from articleids
221            let mut doi = None;
222            let mut pmc_id = None;
223            for aid in &doc.articleids {
224                match aid.idtype.as_str() {
225                    "doi" => {
226                        if !aid.value.is_empty() {
227                            doi = Some(aid.value.clone());
228                        }
229                    }
230                    "pmc" => {
231                        if !aid.value.is_empty() {
232                            pmc_id = Some(aid.value.clone());
233                        }
234                    }
235                    _ => {}
236                }
237            }
238
239            let author_names: Vec<String> = doc.authors.iter().map(|a| a.name.clone()).collect();
240
241            summaries.push(ArticleSummary {
242                pmid: doc.uid,
243                title: doc.title,
244                authors: author_names,
245                journal: doc.source,
246                full_journal_name: doc.fulljournalname,
247                pub_date: doc.pubdate,
248                epub_date: doc.epubdate,
249                doi,
250                pmc_id,
251                volume: doc.volume,
252                issue: doc.issue,
253                pages: doc.pages,
254                languages: doc.lang,
255                pub_types: doc.pubtype,
256                issn: doc.issn,
257                essn: doc.essn,
258                sort_pub_date: doc.sortpubdate,
259                pmc_ref_count: doc.pmcrefcount,
260                record_status: doc.recordstatus,
261            });
262        }
263
264        Ok(summaries)
265    }
266}
267
268#[cfg(test)]
269mod tests {
270    use super::*;
271
272    #[test]
273    fn test_parse_esummary_response_basic() {
274        let json = r#"{"result":{"uids":["31978945"],"31978945":{"uid":"31978945","pubdate":"2020 Feb","epubdate":"2020 Jan 24","source":"N Engl J Med","authors":[{"name":"Zhu N","authtype":"Author","clusterid":""},{"name":"Zhang D","authtype":"Author","clusterid":""}],"title":"A Novel Coronavirus from Patients with Pneumonia in China, 2019.","sorttitle":"novel coronavirus","volume":"382","issue":"8","pages":"727-733","lang":["eng"],"issn":"0028-4793","essn":"1533-4406","pubtype":["Journal Article"],"articleids":[{"idtype":"pubmed","idtypen":1,"value":"31978945"},{"idtype":"doi","idtypen":3,"value":"10.1056/NEJMoa2001017"},{"idtype":"pmc","idtypen":8,"value":"PMC7092803"}],"fulljournalname":"The New England journal of medicine","sortpubdate":"2020/02/20 00:00","pmcrefcount":14123,"recordstatus":"PubMed - indexed for MEDLINE"}}}"#;
275
276        let summaries = PubMedClient::parse_esummary_response(json).unwrap();
277        assert_eq!(summaries.len(), 1);
278
279        let s = &summaries[0];
280        assert_eq!(s.pmid, "31978945");
281        assert_eq!(
282            s.title,
283            "A Novel Coronavirus from Patients with Pneumonia in China, 2019."
284        );
285        assert_eq!(s.authors, vec!["Zhu N", "Zhang D"]);
286        assert_eq!(s.journal, "N Engl J Med");
287        assert_eq!(s.full_journal_name, "The New England journal of medicine");
288        assert_eq!(s.pub_date, "2020 Feb");
289        assert_eq!(s.epub_date, "2020 Jan 24");
290        assert_eq!(s.doi.as_deref(), Some("10.1056/NEJMoa2001017"));
291        assert_eq!(s.pmc_id.as_deref(), Some("PMC7092803"));
292        assert_eq!(s.volume, "382");
293        assert_eq!(s.issue, "8");
294        assert_eq!(s.pages, "727-733");
295        assert_eq!(s.languages, vec!["eng"]);
296        assert_eq!(s.pub_types, vec!["Journal Article"]);
297        assert_eq!(s.issn, "0028-4793");
298        assert_eq!(s.essn, "1533-4406");
299        assert_eq!(s.sort_pub_date, "2020/02/20 00:00");
300        assert_eq!(s.pmc_ref_count, 14123);
301        assert_eq!(s.record_status, "PubMed - indexed for MEDLINE");
302    }
303
304    #[test]
305    fn test_parse_esummary_response_multiple_uids() {
306        let json = r#"{"result":{"uids":["31978945","33515491"],"31978945":{"uid":"31978945","pubdate":"2020 Feb","epubdate":"","source":"N Engl J Med","authors":[{"name":"Zhu N","authtype":"Author","clusterid":""}],"title":"Article One","volume":"382","issue":"8","pages":"727-733","lang":["eng"],"issn":"","essn":"","pubtype":[],"articleids":[],"fulljournalname":"N Engl J Med","sortpubdate":"","pmcrefcount":0,"recordstatus":""},"33515491":{"uid":"33515491","pubdate":"2021 Jan","epubdate":"","source":"Science","authors":[{"name":"Smith J","authtype":"Author","clusterid":""}],"title":"Article Two","volume":"371","issue":"6526","pages":"120-125","lang":["eng"],"issn":"","essn":"","pubtype":[],"articleids":[{"idtype":"doi","idtypen":3,"value":"10.1126/science.abc123"}],"fulljournalname":"Science","sortpubdate":"","pmcrefcount":100,"recordstatus":""}}}"#;
307
308        let summaries = PubMedClient::parse_esummary_response(json).unwrap();
309        assert_eq!(summaries.len(), 2);
310        assert_eq!(summaries[0].pmid, "31978945");
311        assert_eq!(summaries[0].title, "Article One");
312        assert_eq!(summaries[1].pmid, "33515491");
313        assert_eq!(summaries[1].title, "Article Two");
314        assert_eq!(summaries[1].doi.as_deref(), Some("10.1126/science.abc123"));
315    }
316
317    #[test]
318    fn test_parse_esummary_response_empty() {
319        let json = r#"{"result": {"uids": []}}"#;
320        let summaries = PubMedClient::parse_esummary_response(json).unwrap();
321        assert!(summaries.is_empty());
322    }
323
324    #[test]
325    fn test_parse_esummary_response_with_error_uid() {
326        let json = r#"{"result":{"uids":["99999999999"],"99999999999":{"uid":"99999999999","error":"cannot get document summary"}}}"#;
327
328        let summaries = PubMedClient::parse_esummary_response(json).unwrap();
329        assert!(summaries.is_empty());
330    }
331
332    #[test]
333    fn test_parse_esummary_response_no_doi_no_pmc() {
334        let json = r#"{"result":{"uids":["12345678"],"12345678":{"uid":"12345678","pubdate":"2020","epubdate":"","source":"Some Journal","authors":[],"title":"Test Article","volume":"","issue":"","pages":"","lang":[],"issn":"","essn":"","pubtype":[],"articleids":[{"idtype":"pubmed","idtypen":1,"value":"12345678"}],"fulljournalname":"Some Journal","sortpubdate":"","pmcrefcount":0,"recordstatus":""}}}"#;
335
336        let summaries = PubMedClient::parse_esummary_response(json).unwrap();
337        assert_eq!(summaries.len(), 1);
338        assert!(summaries[0].doi.is_none());
339        assert!(summaries[0].pmc_id.is_none());
340        assert!(summaries[0].authors.is_empty());
341    }
342
343    #[tokio::test]
344    async fn test_fetch_summaries_empty_input() {
345        let client = PubMedClient::new();
346        let result = client.fetch_summaries(&[]).await;
347        assert!(result.is_ok());
348        assert!(
349            result
350                .expect("empty input should return empty summaries")
351                .is_empty()
352        );
353    }
354
355    #[tokio::test]
356    async fn test_fetch_summaries_invalid_pmid() {
357        let client = PubMedClient::new();
358        let result = client.fetch_summaries(&["not_a_number"]).await;
359        assert!(result.is_err());
360    }
361}