pubmed_client/pubmed/client/mod.rs
1mod citmatch;
2mod egquery;
3mod einfo;
4mod elink;
5mod espell;
6mod history;
7mod summary;
8
9use std::time::Duration;
10
11use crate::common::PubMedId;
12use crate::config::ClientConfig;
13use crate::error::{ParseError, PubMedError, Result};
14use crate::pubmed::models::PubMedArticle;
15use crate::pubmed::parser::parse_articles_from_xml;
16use crate::pubmed::query::SortOrder;
17use crate::pubmed::responses::ESearchResult;
18use crate::rate_limit::RateLimiter;
19use crate::retry::with_retry;
20use reqwest::{Client, Response};
21use tracing::{debug, info, instrument, warn};
22
23/// Client for interacting with PubMed API
24#[derive(Clone)]
25pub struct PubMedClient {
26 client: Client,
27 pub(crate) base_url: String,
28 rate_limiter: RateLimiter,
29 config: ClientConfig,
30}
31
32impl PubMedClient {
33 /// Create a search query builder for this client
34 ///
35 /// # Example
36 ///
37 /// ```no_run
38 /// use pubmed_client::PubMedClient;
39 ///
40 /// #[tokio::main]
41 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
42 /// let client = PubMedClient::new();
43 /// let articles = client
44 /// .search()
45 /// .query("covid-19 treatment")
46 /// .free_full_text_only()
47 /// .published_after(2020)
48 /// .limit(10)
49 /// .search_and_fetch(&client)
50 /// .await?;
51 ///
52 /// println!("Found {} articles", articles.len());
53 /// Ok(())
54 /// }
55 /// ```
56 pub fn search(&self) -> super::query::SearchQuery {
57 super::query::SearchQuery::new()
58 }
59
60 /// Create a new PubMed client with default configuration
61 ///
62 /// Uses default NCBI rate limiting (3 requests/second) and no API key.
63 /// For production use, consider using `with_config()` to set an API key.
64 ///
65 /// # Example
66 ///
67 /// ```
68 /// use pubmed_client::PubMedClient;
69 ///
70 /// let client = PubMedClient::new();
71 /// ```
72 pub fn new() -> Self {
73 let config = ClientConfig::new();
74 Self::with_config(config)
75 }
76
77 /// Create a new PubMed client with custom configuration
78 ///
79 /// # Arguments
80 ///
81 /// * `config` - Client configuration including rate limits, API key, etc.
82 ///
83 /// # Example
84 ///
85 /// ```
86 /// use pubmed_client::{PubMedClient, ClientConfig};
87 ///
88 /// let config = ClientConfig::new()
89 /// .with_api_key("your_api_key_here")
90 /// .with_email("researcher@university.edu");
91 ///
92 /// let client = PubMedClient::with_config(config);
93 /// ```
94 pub fn with_config(config: ClientConfig) -> Self {
95 let rate_limiter = config.create_rate_limiter();
96 let base_url = config.effective_base_url().to_string();
97
98 let client = {
99 #[cfg(not(target_arch = "wasm32"))]
100 {
101 Client::builder()
102 .user_agent(config.effective_user_agent())
103 .timeout(Duration::from_secs(config.timeout.as_secs()))
104 .build()
105 .expect("Failed to create HTTP client")
106 }
107
108 #[cfg(target_arch = "wasm32")]
109 {
110 Client::builder()
111 .user_agent(config.effective_user_agent())
112 .build()
113 .expect("Failed to create HTTP client")
114 }
115 };
116
117 Self {
118 client,
119 base_url,
120 rate_limiter,
121 config,
122 }
123 }
124
125 /// Create a new PubMed client with custom HTTP client and default configuration
126 ///
127 /// # Arguments
128 ///
129 /// * `client` - Custom reqwest client with specific configuration
130 ///
131 /// # Example
132 ///
133 /// ```
134 /// use pubmed_client::PubMedClient;
135 /// use reqwest::Client;
136 /// use std::time::Duration;
137 ///
138 /// let http_client = Client::builder()
139 /// .timeout(Duration::from_secs(30))
140 /// .build()
141 /// .unwrap();
142 ///
143 /// let client = PubMedClient::with_client(http_client);
144 /// ```
145 pub fn with_client(client: Client) -> Self {
146 let config = ClientConfig::new();
147 let rate_limiter = config.create_rate_limiter();
148 let base_url = config.effective_base_url().to_string();
149
150 Self {
151 client,
152 base_url,
153 rate_limiter,
154 config,
155 }
156 }
157
158 /// Get a reference to the client configuration
159 pub(crate) fn config(&self) -> &ClientConfig {
160 &self.config
161 }
162
163 /// Get a reference to the rate limiter
164 pub(crate) fn rate_limiter(&self) -> &RateLimiter {
165 &self.rate_limiter
166 }
167
168 /// Get a reference to the HTTP client
169 pub(crate) fn http_client(&self) -> &Client {
170 &self.client
171 }
172
173 /// Fetch article metadata by PMID with full details including abstract
174 ///
175 /// # Arguments
176 ///
177 /// * `pmid` - PubMed ID as a string
178 ///
179 /// # Returns
180 ///
181 /// Returns a `Result<PubMedArticle>` containing the article metadata with abstract
182 ///
183 /// # Errors
184 ///
185 /// * `ParseError::ArticleNotFound` - If the article is not found
186 /// * `PubMedError::RequestError` - If the HTTP request fails
187 /// * `ParseError::JsonError` - If JSON parsing fails
188 ///
189 /// # Example
190 ///
191 /// ```no_run
192 /// use pubmed_client::PubMedClient;
193 ///
194 /// #[tokio::main]
195 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
196 /// let client = PubMedClient::new();
197 /// let article = client.fetch_article("31978945").await?;
198 /// println!("Title: {}", article.title);
199 /// if let Some(abstract_text) = &article.abstract_text {
200 /// println!("Abstract: {}", abstract_text);
201 /// }
202 /// Ok(())
203 /// }
204 /// ```
205 #[instrument(skip(self), fields(pmid = %pmid))]
206 pub async fn fetch_article(&self, pmid: &str) -> Result<PubMedArticle> {
207 let mut articles = self.fetch_articles(&[pmid]).await?;
208
209 if articles.len() == 1 {
210 Ok(articles.remove(0))
211 } else {
212 // Try to find by PMID in case batch returned extra/different articles
213 let idx = articles.iter().position(|a| a.pmid == pmid);
214 match idx {
215 Some(i) => Ok(articles.remove(i)),
216 None => Err(ParseError::ArticleNotFound {
217 pmid: pmid.to_string(),
218 }
219 .into()),
220 }
221 }
222 }
223
224 /// Search for articles using a query string
225 ///
226 /// # Arguments
227 ///
228 /// * `query` - Search query string
229 /// * `limit` - Maximum number of results to return
230 /// * `sort` - Optional sort order for results
231 ///
232 /// # Returns
233 ///
234 /// Returns a `Result<Vec<String>>` containing PMIDs of matching articles
235 ///
236 /// # Errors
237 ///
238 /// * `PubMedError::RequestError` - If the HTTP request fails
239 /// * `ParseError::JsonError` - If JSON parsing fails
240 ///
241 /// # Example
242 ///
243 /// ```no_run
244 /// use pubmed_client::PubMedClient;
245 ///
246 /// #[tokio::main]
247 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
248 /// let client = PubMedClient::new();
249 /// let pmids = client.search_articles("covid-19 treatment", 10, None).await?;
250 /// println!("Found {} articles", pmids.len());
251 /// Ok(())
252 /// }
253 /// ```
254 #[instrument(skip(self, sort), fields(query = %query, limit = limit))]
255 pub async fn search_articles(
256 &self,
257 query: &str,
258 limit: usize,
259 sort: Option<&SortOrder>,
260 ) -> Result<Vec<String>> {
261 // PubMed limits: retstart cannot exceed 9998, and retmax is capped at 9999
262 // This means we can only retrieve the first 9,999 results (indices 0-9998)
263 const MAX_RETRIEVABLE: usize = 9999;
264
265 if limit > MAX_RETRIEVABLE {
266 return Err(PubMedError::SearchLimitExceeded {
267 requested: limit,
268 maximum: MAX_RETRIEVABLE,
269 });
270 }
271
272 if query.trim().is_empty() {
273 debug!("Empty query provided, returning empty results");
274 return Ok(Vec::new());
275 }
276
277 let mut url = format!(
278 "{}/esearch.fcgi?db=pubmed&term={}&retmax={}&retstart={}&retmode=json",
279 self.base_url,
280 urlencoding::encode(query),
281 limit,
282 0
283 );
284
285 if let Some(sort_order) = sort {
286 url.push_str(&format!("&sort={}", sort_order.as_api_param()));
287 }
288
289 debug!("Making initial ESearch API request");
290 let response = self.make_request(&url).await?;
291
292 let search_result: ESearchResult = response.json().await?;
293
294 // Check for API error response (NCBI sometimes returns 200 OK with ERROR field)
295 if let Some(error_msg) = &search_result.esearchresult.error {
296 return Err(PubMedError::ApiError {
297 status: 200,
298 message: format!("NCBI ESearch API error: {}", error_msg),
299 });
300 }
301
302 let total_count: usize = search_result
303 .esearchresult
304 .count
305 .as_ref()
306 .and_then(|c| c.parse().ok())
307 .unwrap_or(0);
308
309 if total_count >= limit {
310 warn!(
311 "Total results ({}) exceed or equal requested limit ({}). Only the first {} results can be retrieved.",
312 total_count, limit, MAX_RETRIEVABLE
313 );
314 }
315
316 Ok(search_result.esearchresult.idlist)
317 }
318
319 /// Fetch multiple articles by PMIDs in a single batch request
320 ///
321 /// This method sends a single EFetch request with multiple PMIDs (comma-separated),
322 /// which is significantly more efficient than fetching articles one by one.
323 /// For large numbers of PMIDs, the request is automatically split into batches.
324 ///
325 /// # Arguments
326 ///
327 /// * `pmids` - Slice of PubMed IDs as strings
328 ///
329 /// # Returns
330 ///
331 /// Returns a `Result<Vec<PubMedArticle>>` containing articles with metadata.
332 /// Articles that fail to parse are skipped (logged via tracing).
333 ///
334 /// # Example
335 ///
336 /// ```no_run
337 /// use pubmed_client::PubMedClient;
338 ///
339 /// #[tokio::main]
340 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
341 /// let client = PubMedClient::new();
342 /// let articles = client.fetch_articles(&["31978945", "33515491", "25760099"]).await?;
343 /// for article in &articles {
344 /// println!("{}: {}", article.pmid, article.title);
345 /// }
346 /// Ok(())
347 /// }
348 /// ```
349 #[instrument(skip(self), fields(pmids_count = pmids.len()))]
350 pub async fn fetch_articles(&self, pmids: &[&str]) -> Result<Vec<PubMedArticle>> {
351 if pmids.is_empty() {
352 return Ok(Vec::new());
353 }
354
355 // Validate all PMIDs upfront
356 let validated: Vec<u32> = pmids
357 .iter()
358 .map(|pmid| {
359 PubMedId::parse(pmid)
360 .map(|p| p.as_u32())
361 .map_err(PubMedError::from)
362 })
363 .collect::<Result<Vec<_>>>()?;
364
365 // NCBI recommends batches of up to 200 IDs per request
366 const BATCH_SIZE: usize = 200;
367
368 let mut all_articles = Vec::with_capacity(pmids.len());
369
370 for chunk in validated.chunks(BATCH_SIZE) {
371 let id_list: String = chunk
372 .iter()
373 .map(|id| id.to_string())
374 .collect::<Vec<_>>()
375 .join(",");
376
377 let url = format!(
378 "{}/efetch.fcgi?db=pubmed&id={}&retmode=xml&rettype=abstract",
379 self.base_url, id_list
380 );
381
382 debug!(batch_size = chunk.len(), "Making batch EFetch API request");
383 let response = self.make_request(&url).await?;
384 let xml_text = response.text().await?;
385
386 if xml_text.trim().is_empty() {
387 continue;
388 }
389
390 let articles = parse_articles_from_xml(&xml_text)?;
391 info!(
392 requested = chunk.len(),
393 parsed = articles.len(),
394 "Batch fetch completed"
395 );
396 all_articles.extend(articles);
397 }
398
399 Ok(all_articles)
400 }
401
402 /// Search and fetch multiple articles with metadata
403 ///
404 /// Uses batch fetching internally for efficient retrieval.
405 ///
406 /// # Arguments
407 ///
408 /// * `query` - Search query string
409 /// * `limit` - Maximum number of articles to fetch
410 ///
411 /// # Returns
412 ///
413 /// Returns a `Result<Vec<PubMedArticle>>` containing articles with metadata
414 ///
415 /// # Example
416 ///
417 /// ```no_run
418 /// use pubmed_client::PubMedClient;
419 ///
420 /// #[tokio::main]
421 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
422 /// let client = PubMedClient::new();
423 /// let articles = client.search_and_fetch("covid-19", 5, None).await?;
424 /// for article in articles {
425 /// println!("{}: {}", article.pmid, article.title);
426 /// }
427 /// Ok(())
428 /// }
429 /// ```
430 pub async fn search_and_fetch(
431 &self,
432 query: &str,
433 limit: usize,
434 sort: Option<&SortOrder>,
435 ) -> Result<Vec<PubMedArticle>> {
436 let pmids = self.search_articles(query, limit, sort).await?;
437
438 let pmid_refs: Vec<&str> = pmids.iter().map(|s| s.as_str()).collect();
439 self.fetch_articles(&pmid_refs).await
440 }
441
442 /// Internal helper method for making HTTP requests with retry logic.
443 /// Automatically appends API parameters (api_key, email, tool) to the URL.
444 pub(crate) async fn make_request(&self, url: &str) -> Result<Response> {
445 // Build final URL with API parameters
446 let mut final_url = url.to_string();
447 let api_params = self.config.build_api_params();
448
449 if !api_params.is_empty() {
450 // Check if URL already has query parameters
451 let separator = if url.contains('?') { '&' } else { '?' };
452 final_url.push(separator);
453
454 // Append API parameters
455 let param_strings: Vec<String> = api_params
456 .into_iter()
457 .map(|(key, value)| format!("{}={}", key, urlencoding::encode(&value)))
458 .collect();
459 final_url.push_str(¶m_strings.join("&"));
460 }
461
462 let response = with_retry(
463 || async {
464 self.rate_limiter.acquire().await?;
465 debug!("Making API request to: {}", final_url);
466 let response = self
467 .client
468 .get(&final_url)
469 .send()
470 .await
471 .map_err(PubMedError::from)?;
472
473 // Check if response has server error status and convert to retryable error
474 if response.status().is_server_error() || response.status().as_u16() == 429 {
475 return Err(PubMedError::ApiError {
476 status: response.status().as_u16(),
477 message: response
478 .status()
479 .canonical_reason()
480 .unwrap_or("Unknown error")
481 .to_string(),
482 });
483 }
484
485 Ok(response)
486 },
487 &self.config.retry_config,
488 "NCBI API request",
489 )
490 .await?;
491
492 // Check for any non-success status (client errors, etc.)
493 if !response.status().is_success() {
494 warn!("API request failed with status: {}", response.status());
495 return Err(PubMedError::ApiError {
496 status: response.status().as_u16(),
497 message: response
498 .status()
499 .canonical_reason()
500 .unwrap_or("Unknown error")
501 .to_string(),
502 });
503 }
504
505 Ok(response)
506 }
507}
508
509impl Default for PubMedClient {
510 fn default() -> Self {
511 Self::new()
512 }
513}
514
515#[cfg(test)]
516mod tests {
517 use std::{
518 mem,
519 time::{Duration, Instant},
520 };
521
522 use super::*;
523
524 #[test]
525 fn test_client_config_rate_limiting() {
526 // Test default configuration (no API key)
527 let config = ClientConfig::new();
528 assert_eq!(config.effective_rate_limit(), 3.0);
529
530 // Test with API key
531 let config_with_key = ClientConfig::new().with_api_key("test_key");
532 assert_eq!(config_with_key.effective_rate_limit(), 10.0);
533
534 // Test custom rate limit
535 let config_custom = ClientConfig::new().with_rate_limit(5.0);
536 assert_eq!(config_custom.effective_rate_limit(), 5.0);
537
538 // Test custom rate limit overrides API key default
539 let config_override = ClientConfig::new()
540 .with_api_key("test_key")
541 .with_rate_limit(7.0);
542 assert_eq!(config_override.effective_rate_limit(), 7.0);
543 }
544
545 #[test]
546 fn test_client_api_params() {
547 let config = ClientConfig::new()
548 .with_api_key("test_key_123")
549 .with_email("test@example.com")
550 .with_tool("TestTool");
551
552 let params = config.build_api_params();
553
554 // Should have 3 parameters
555 assert_eq!(params.len(), 3);
556
557 // Check each parameter
558 assert!(params.contains(&("api_key".to_string(), "test_key_123".to_string())));
559 assert!(params.contains(&("email".to_string(), "test@example.com".to_string())));
560 assert!(params.contains(&("tool".to_string(), "TestTool".to_string())));
561 }
562
563 #[test]
564 fn test_config_effective_values() {
565 let config = ClientConfig::new()
566 .with_email("test@example.com")
567 .with_tool("TestApp");
568
569 assert_eq!(
570 config.effective_base_url(),
571 "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
572 );
573 assert!(config.effective_user_agent().starts_with("pubmed-client/"));
574 assert_eq!(config.effective_tool(), "TestApp");
575 }
576
577 #[test]
578 fn test_rate_limiter_creation_from_config() {
579 let config = ClientConfig::new()
580 .with_api_key("test_key")
581 .with_rate_limit(8.0);
582
583 let rate_limiter = config.create_rate_limiter();
584
585 // Rate limiter should be created successfully
586 // We can't easily test the exact rate without async context,
587 // but we can verify it was created
588 assert!(mem::size_of_val(&rate_limiter) > 0);
589 }
590
591 #[tokio::test]
592 async fn test_invalid_pmid_rate_limiting() {
593 let config = ClientConfig::new().with_rate_limit(5.0);
594 let client = PubMedClient::with_config(config);
595
596 // Invalid PMID should fail before rate limiting (validation happens first)
597 let start = Instant::now();
598 let result = client.fetch_article("invalid_pmid").await;
599 assert!(result.is_err());
600
601 let elapsed = start.elapsed();
602 // Should fail quickly without consuming rate limit token
603 assert!(elapsed < Duration::from_millis(100));
604 }
605
606 #[tokio::test]
607 async fn test_fetch_articles_empty_input() {
608 let client = PubMedClient::new();
609
610 let result = client.fetch_articles(&[]).await;
611 assert!(result.is_ok());
612 assert!(result.unwrap().is_empty());
613 }
614
615 #[tokio::test]
616 async fn test_fetch_articles_invalid_pmid() {
617 let client = PubMedClient::new();
618
619 let result = client.fetch_articles(&["not_a_number"]).await;
620 assert!(result.is_err());
621 }
622
623 #[tokio::test]
624 async fn test_fetch_articles_validates_all_pmids_before_request() {
625 let client = PubMedClient::new();
626
627 // Mix of valid and invalid - should fail on validation before any network request
628 let start = Instant::now();
629 let result = client
630 .fetch_articles(&["31978945", "invalid", "33515491"])
631 .await;
632 assert!(result.is_err());
633
634 // Should fail quickly (validation only, no network)
635 let elapsed = start.elapsed();
636 assert!(elapsed < Duration::from_millis(100));
637 }
638}