pubmed_client/pubmed/client/
mod.rs

1mod citmatch;
2mod egquery;
3mod einfo;
4mod elink;
5mod espell;
6mod history;
7mod summary;
8
9use std::time::Duration;
10
11use crate::common::PubMedId;
12use crate::config::ClientConfig;
13use crate::error::{ParseError, PubMedError, Result};
14use crate::pubmed::models::PubMedArticle;
15use crate::pubmed::parser::parse_articles_from_xml;
16use crate::pubmed::query::SortOrder;
17use crate::pubmed::responses::ESearchResult;
18use crate::rate_limit::RateLimiter;
19use crate::retry::with_retry;
20use reqwest::{Client, Response};
21use tracing::{debug, info, instrument, warn};
22
23/// Client for interacting with PubMed API
24#[derive(Clone)]
25pub struct PubMedClient {
26    client: Client,
27    pub(crate) base_url: String,
28    rate_limiter: RateLimiter,
29    config: ClientConfig,
30}
31
32impl PubMedClient {
33    /// Create a search query builder for this client
34    ///
35    /// # Example
36    ///
37    /// ```no_run
38    /// use pubmed_client::PubMedClient;
39    ///
40    /// #[tokio::main]
41    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
42    ///     let client = PubMedClient::new();
43    ///     let articles = client
44    ///         .search()
45    ///         .query("covid-19 treatment")
46    ///         .free_full_text_only()
47    ///         .published_after(2020)
48    ///         .limit(10)
49    ///         .search_and_fetch(&client)
50    ///         .await?;
51    ///
52    ///     println!("Found {} articles", articles.len());
53    ///     Ok(())
54    /// }
55    /// ```
56    pub fn search(&self) -> super::query::SearchQuery {
57        super::query::SearchQuery::new()
58    }
59
60    /// Create a new PubMed client with default configuration
61    ///
62    /// Uses default NCBI rate limiting (3 requests/second) and no API key.
63    /// For production use, consider using `with_config()` to set an API key.
64    ///
65    /// # Example
66    ///
67    /// ```
68    /// use pubmed_client::PubMedClient;
69    ///
70    /// let client = PubMedClient::new();
71    /// ```
72    pub fn new() -> Self {
73        let config = ClientConfig::new();
74        Self::with_config(config)
75    }
76
77    /// Create a new PubMed client with custom configuration
78    ///
79    /// # Arguments
80    ///
81    /// * `config` - Client configuration including rate limits, API key, etc.
82    ///
83    /// # Example
84    ///
85    /// ```
86    /// use pubmed_client::{PubMedClient, ClientConfig};
87    ///
88    /// let config = ClientConfig::new()
89    ///     .with_api_key("your_api_key_here")
90    ///     .with_email("researcher@university.edu");
91    ///
92    /// let client = PubMedClient::with_config(config);
93    /// ```
94    pub fn with_config(config: ClientConfig) -> Self {
95        let rate_limiter = config.create_rate_limiter();
96        let base_url = config.effective_base_url().to_string();
97
98        let client = {
99            #[cfg(not(target_arch = "wasm32"))]
100            {
101                Client::builder()
102                    .user_agent(config.effective_user_agent())
103                    .timeout(Duration::from_secs(config.timeout.as_secs()))
104                    .build()
105                    .expect("Failed to create HTTP client")
106            }
107
108            #[cfg(target_arch = "wasm32")]
109            {
110                Client::builder()
111                    .user_agent(config.effective_user_agent())
112                    .build()
113                    .expect("Failed to create HTTP client")
114            }
115        };
116
117        Self {
118            client,
119            base_url,
120            rate_limiter,
121            config,
122        }
123    }
124
125    /// Create a new PubMed client with custom HTTP client and default configuration
126    ///
127    /// # Arguments
128    ///
129    /// * `client` - Custom reqwest client with specific configuration
130    ///
131    /// # Example
132    ///
133    /// ```
134    /// use pubmed_client::PubMedClient;
135    /// use reqwest::Client;
136    /// use std::time::Duration;
137    ///
138    /// let http_client = Client::builder()
139    ///     .timeout(Duration::from_secs(30))
140    ///     .build()
141    ///     .unwrap();
142    ///
143    /// let client = PubMedClient::with_client(http_client);
144    /// ```
145    pub fn with_client(client: Client) -> Self {
146        let config = ClientConfig::new();
147        let rate_limiter = config.create_rate_limiter();
148        let base_url = config.effective_base_url().to_string();
149
150        Self {
151            client,
152            base_url,
153            rate_limiter,
154            config,
155        }
156    }
157
158    /// Get a reference to the client configuration
159    pub(crate) fn config(&self) -> &ClientConfig {
160        &self.config
161    }
162
163    /// Get a reference to the rate limiter
164    pub(crate) fn rate_limiter(&self) -> &RateLimiter {
165        &self.rate_limiter
166    }
167
168    /// Get a reference to the HTTP client
169    pub(crate) fn http_client(&self) -> &Client {
170        &self.client
171    }
172
173    /// Fetch article metadata by PMID with full details including abstract
174    ///
175    /// # Arguments
176    ///
177    /// * `pmid` - PubMed ID as a string
178    ///
179    /// # Returns
180    ///
181    /// Returns a `Result<PubMedArticle>` containing the article metadata with abstract
182    ///
183    /// # Errors
184    ///
185    /// * `ParseError::ArticleNotFound` - If the article is not found
186    /// * `PubMedError::RequestError` - If the HTTP request fails
187    /// * `ParseError::JsonError` - If JSON parsing fails
188    ///
189    /// # Example
190    ///
191    /// ```no_run
192    /// use pubmed_client::PubMedClient;
193    ///
194    /// #[tokio::main]
195    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
196    ///     let client = PubMedClient::new();
197    ///     let article = client.fetch_article("31978945").await?;
198    ///     println!("Title: {}", article.title);
199    ///     if let Some(abstract_text) = &article.abstract_text {
200    ///         println!("Abstract: {}", abstract_text);
201    ///     }
202    ///     Ok(())
203    /// }
204    /// ```
205    #[instrument(skip(self), fields(pmid = %pmid))]
206    pub async fn fetch_article(&self, pmid: &str) -> Result<PubMedArticle> {
207        let mut articles = self.fetch_articles(&[pmid]).await?;
208
209        if articles.len() == 1 {
210            Ok(articles.remove(0))
211        } else {
212            // Try to find by PMID in case batch returned extra/different articles
213            let idx = articles.iter().position(|a| a.pmid == pmid);
214            match idx {
215                Some(i) => Ok(articles.remove(i)),
216                None => Err(ParseError::ArticleNotFound {
217                    pmid: pmid.to_string(),
218                }
219                .into()),
220            }
221        }
222    }
223
224    /// Search for articles using a query string
225    ///
226    /// # Arguments
227    ///
228    /// * `query` - Search query string
229    /// * `limit` - Maximum number of results to return
230    /// * `sort` - Optional sort order for results
231    ///
232    /// # Returns
233    ///
234    /// Returns a `Result<Vec<String>>` containing PMIDs of matching articles
235    ///
236    /// # Errors
237    ///
238    /// * `PubMedError::RequestError` - If the HTTP request fails
239    /// * `ParseError::JsonError` - If JSON parsing fails
240    ///
241    /// # Example
242    ///
243    /// ```no_run
244    /// use pubmed_client::PubMedClient;
245    ///
246    /// #[tokio::main]
247    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
248    ///     let client = PubMedClient::new();
249    ///     let pmids = client.search_articles("covid-19 treatment", 10, None).await?;
250    ///     println!("Found {} articles", pmids.len());
251    ///     Ok(())
252    /// }
253    /// ```
254    #[instrument(skip(self, sort), fields(query = %query, limit = limit))]
255    pub async fn search_articles(
256        &self,
257        query: &str,
258        limit: usize,
259        sort: Option<&SortOrder>,
260    ) -> Result<Vec<String>> {
261        // PubMed limits: retstart cannot exceed 9998, and retmax is capped at 9999
262        // This means we can only retrieve the first 9,999 results (indices 0-9998)
263        const MAX_RETRIEVABLE: usize = 9999;
264
265        if limit > MAX_RETRIEVABLE {
266            return Err(PubMedError::SearchLimitExceeded {
267                requested: limit,
268                maximum: MAX_RETRIEVABLE,
269            });
270        }
271
272        if query.trim().is_empty() {
273            debug!("Empty query provided, returning empty results");
274            return Ok(Vec::new());
275        }
276
277        let mut url = format!(
278            "{}/esearch.fcgi?db=pubmed&term={}&retmax={}&retstart={}&retmode=json",
279            self.base_url,
280            urlencoding::encode(query),
281            limit,
282            0
283        );
284
285        if let Some(sort_order) = sort {
286            url.push_str(&format!("&sort={}", sort_order.as_api_param()));
287        }
288
289        debug!("Making initial ESearch API request");
290        let response = self.make_request(&url).await?;
291
292        let search_result: ESearchResult = response.json().await?;
293
294        // Check for API error response (NCBI sometimes returns 200 OK with ERROR field)
295        if let Some(error_msg) = &search_result.esearchresult.error {
296            return Err(PubMedError::ApiError {
297                status: 200,
298                message: format!("NCBI ESearch API error: {}", error_msg),
299            });
300        }
301
302        let total_count: usize = search_result
303            .esearchresult
304            .count
305            .as_ref()
306            .and_then(|c| c.parse().ok())
307            .unwrap_or(0);
308
309        if total_count >= limit {
310            warn!(
311                "Total results ({}) exceed or equal requested limit ({}). Only the first {} results can be retrieved.",
312                total_count, limit, MAX_RETRIEVABLE
313            );
314        }
315
316        Ok(search_result.esearchresult.idlist)
317    }
318
319    /// Fetch multiple articles by PMIDs in a single batch request
320    ///
321    /// This method sends a single EFetch request with multiple PMIDs (comma-separated),
322    /// which is significantly more efficient than fetching articles one by one.
323    /// For large numbers of PMIDs, the request is automatically split into batches.
324    ///
325    /// # Arguments
326    ///
327    /// * `pmids` - Slice of PubMed IDs as strings
328    ///
329    /// # Returns
330    ///
331    /// Returns a `Result<Vec<PubMedArticle>>` containing articles with metadata.
332    /// Articles that fail to parse are skipped (logged via tracing).
333    ///
334    /// # Example
335    ///
336    /// ```no_run
337    /// use pubmed_client::PubMedClient;
338    ///
339    /// #[tokio::main]
340    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
341    ///     let client = PubMedClient::new();
342    ///     let articles = client.fetch_articles(&["31978945", "33515491", "25760099"]).await?;
343    ///     for article in &articles {
344    ///         println!("{}: {}", article.pmid, article.title);
345    ///     }
346    ///     Ok(())
347    /// }
348    /// ```
349    #[instrument(skip(self), fields(pmids_count = pmids.len()))]
350    pub async fn fetch_articles(&self, pmids: &[&str]) -> Result<Vec<PubMedArticle>> {
351        if pmids.is_empty() {
352            return Ok(Vec::new());
353        }
354
355        // Validate all PMIDs upfront
356        let validated: Vec<u32> = pmids
357            .iter()
358            .map(|pmid| {
359                PubMedId::parse(pmid)
360                    .map(|p| p.as_u32())
361                    .map_err(PubMedError::from)
362            })
363            .collect::<Result<Vec<_>>>()?;
364
365        // NCBI recommends batches of up to 200 IDs per request
366        const BATCH_SIZE: usize = 200;
367
368        let mut all_articles = Vec::with_capacity(pmids.len());
369
370        for chunk in validated.chunks(BATCH_SIZE) {
371            let id_list: String = chunk
372                .iter()
373                .map(|id| id.to_string())
374                .collect::<Vec<_>>()
375                .join(",");
376
377            let url = format!(
378                "{}/efetch.fcgi?db=pubmed&id={}&retmode=xml&rettype=abstract",
379                self.base_url, id_list
380            );
381
382            debug!(batch_size = chunk.len(), "Making batch EFetch API request");
383            let response = self.make_request(&url).await?;
384            let xml_text = response.text().await?;
385
386            if xml_text.trim().is_empty() {
387                continue;
388            }
389
390            let articles = parse_articles_from_xml(&xml_text)?;
391            info!(
392                requested = chunk.len(),
393                parsed = articles.len(),
394                "Batch fetch completed"
395            );
396            all_articles.extend(articles);
397        }
398
399        Ok(all_articles)
400    }
401
402    /// Search and fetch multiple articles with metadata
403    ///
404    /// Uses batch fetching internally for efficient retrieval.
405    ///
406    /// # Arguments
407    ///
408    /// * `query` - Search query string
409    /// * `limit` - Maximum number of articles to fetch
410    ///
411    /// # Returns
412    ///
413    /// Returns a `Result<Vec<PubMedArticle>>` containing articles with metadata
414    ///
415    /// # Example
416    ///
417    /// ```no_run
418    /// use pubmed_client::PubMedClient;
419    ///
420    /// #[tokio::main]
421    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
422    ///     let client = PubMedClient::new();
423    ///     let articles = client.search_and_fetch("covid-19", 5, None).await?;
424    ///     for article in articles {
425    ///         println!("{}: {}", article.pmid, article.title);
426    ///     }
427    ///     Ok(())
428    /// }
429    /// ```
430    pub async fn search_and_fetch(
431        &self,
432        query: &str,
433        limit: usize,
434        sort: Option<&SortOrder>,
435    ) -> Result<Vec<PubMedArticle>> {
436        let pmids = self.search_articles(query, limit, sort).await?;
437
438        let pmid_refs: Vec<&str> = pmids.iter().map(|s| s.as_str()).collect();
439        self.fetch_articles(&pmid_refs).await
440    }
441
442    /// Internal helper method for making HTTP requests with retry logic.
443    /// Automatically appends API parameters (api_key, email, tool) to the URL.
444    pub(crate) async fn make_request(&self, url: &str) -> Result<Response> {
445        // Build final URL with API parameters
446        let mut final_url = url.to_string();
447        let api_params = self.config.build_api_params();
448
449        if !api_params.is_empty() {
450            // Check if URL already has query parameters
451            let separator = if url.contains('?') { '&' } else { '?' };
452            final_url.push(separator);
453
454            // Append API parameters
455            let param_strings: Vec<String> = api_params
456                .into_iter()
457                .map(|(key, value)| format!("{}={}", key, urlencoding::encode(&value)))
458                .collect();
459            final_url.push_str(&param_strings.join("&"));
460        }
461
462        let response = with_retry(
463            || async {
464                self.rate_limiter.acquire().await?;
465                debug!("Making API request to: {}", final_url);
466                let response = self
467                    .client
468                    .get(&final_url)
469                    .send()
470                    .await
471                    .map_err(PubMedError::from)?;
472
473                // Check if response has server error status and convert to retryable error
474                if response.status().is_server_error() || response.status().as_u16() == 429 {
475                    return Err(PubMedError::ApiError {
476                        status: response.status().as_u16(),
477                        message: response
478                            .status()
479                            .canonical_reason()
480                            .unwrap_or("Unknown error")
481                            .to_string(),
482                    });
483                }
484
485                Ok(response)
486            },
487            &self.config.retry_config,
488            "NCBI API request",
489        )
490        .await?;
491
492        // Check for any non-success status (client errors, etc.)
493        if !response.status().is_success() {
494            warn!("API request failed with status: {}", response.status());
495            return Err(PubMedError::ApiError {
496                status: response.status().as_u16(),
497                message: response
498                    .status()
499                    .canonical_reason()
500                    .unwrap_or("Unknown error")
501                    .to_string(),
502            });
503        }
504
505        Ok(response)
506    }
507}
508
509impl Default for PubMedClient {
510    fn default() -> Self {
511        Self::new()
512    }
513}
514
515#[cfg(test)]
516mod tests {
517    use std::{
518        mem,
519        time::{Duration, Instant},
520    };
521
522    use super::*;
523
524    #[test]
525    fn test_client_config_rate_limiting() {
526        // Test default configuration (no API key)
527        let config = ClientConfig::new();
528        assert_eq!(config.effective_rate_limit(), 3.0);
529
530        // Test with API key
531        let config_with_key = ClientConfig::new().with_api_key("test_key");
532        assert_eq!(config_with_key.effective_rate_limit(), 10.0);
533
534        // Test custom rate limit
535        let config_custom = ClientConfig::new().with_rate_limit(5.0);
536        assert_eq!(config_custom.effective_rate_limit(), 5.0);
537
538        // Test custom rate limit overrides API key default
539        let config_override = ClientConfig::new()
540            .with_api_key("test_key")
541            .with_rate_limit(7.0);
542        assert_eq!(config_override.effective_rate_limit(), 7.0);
543    }
544
545    #[test]
546    fn test_client_api_params() {
547        let config = ClientConfig::new()
548            .with_api_key("test_key_123")
549            .with_email("test@example.com")
550            .with_tool("TestTool");
551
552        let params = config.build_api_params();
553
554        // Should have 3 parameters
555        assert_eq!(params.len(), 3);
556
557        // Check each parameter
558        assert!(params.contains(&("api_key".to_string(), "test_key_123".to_string())));
559        assert!(params.contains(&("email".to_string(), "test@example.com".to_string())));
560        assert!(params.contains(&("tool".to_string(), "TestTool".to_string())));
561    }
562
563    #[test]
564    fn test_config_effective_values() {
565        let config = ClientConfig::new()
566            .with_email("test@example.com")
567            .with_tool("TestApp");
568
569        assert_eq!(
570            config.effective_base_url(),
571            "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
572        );
573        assert!(config.effective_user_agent().starts_with("pubmed-client/"));
574        assert_eq!(config.effective_tool(), "TestApp");
575    }
576
577    #[test]
578    fn test_rate_limiter_creation_from_config() {
579        let config = ClientConfig::new()
580            .with_api_key("test_key")
581            .with_rate_limit(8.0);
582
583        let rate_limiter = config.create_rate_limiter();
584
585        // Rate limiter should be created successfully
586        // We can't easily test the exact rate without async context,
587        // but we can verify it was created
588        assert!(mem::size_of_val(&rate_limiter) > 0);
589    }
590
591    #[tokio::test]
592    async fn test_invalid_pmid_rate_limiting() {
593        let config = ClientConfig::new().with_rate_limit(5.0);
594        let client = PubMedClient::with_config(config);
595
596        // Invalid PMID should fail before rate limiting (validation happens first)
597        let start = Instant::now();
598        let result = client.fetch_article("invalid_pmid").await;
599        assert!(result.is_err());
600
601        let elapsed = start.elapsed();
602        // Should fail quickly without consuming rate limit token
603        assert!(elapsed < Duration::from_millis(100));
604    }
605
606    #[tokio::test]
607    async fn test_fetch_articles_empty_input() {
608        let client = PubMedClient::new();
609
610        let result = client.fetch_articles(&[]).await;
611        assert!(result.is_ok());
612        assert!(result.unwrap().is_empty());
613    }
614
615    #[tokio::test]
616    async fn test_fetch_articles_invalid_pmid() {
617        let client = PubMedClient::new();
618
619        let result = client.fetch_articles(&["not_a_number"]).await;
620        assert!(result.is_err());
621    }
622
623    #[tokio::test]
624    async fn test_fetch_articles_validates_all_pmids_before_request() {
625        let client = PubMedClient::new();
626
627        // Mix of valid and invalid - should fail on validation before any network request
628        let start = Instant::now();
629        let result = client
630            .fetch_articles(&["31978945", "invalid", "33515491"])
631            .await;
632        assert!(result.is_err());
633
634        // Should fail quickly (validation only, no network)
635        let elapsed = start.elapsed();
636        assert!(elapsed < Duration::from_millis(100));
637    }
638}