pubmed_client/pubmed/client/
egquery.rs

1//! EGQuery API operations for querying across all NCBI databases
2
3use crate::error::{PubMedError, Result};
4use crate::pubmed::models::{DatabaseCount, GlobalQueryResults};
5use tracing::{debug, info, instrument};
6
7use super::PubMedClient;
8
9impl PubMedClient {
10    /// Query all NCBI databases for record counts using the EGQuery API
11    ///
12    /// Returns the number of records matching the query in each Entrez database.
13    /// Useful for exploratory searches and understanding data distribution across databases.
14    ///
15    /// # Arguments
16    ///
17    /// * `term` - Search query string
18    ///
19    /// # Returns
20    ///
21    /// Returns a `Result<GlobalQueryResults>` containing counts per database
22    ///
23    /// # Example
24    ///
25    /// ```no_run
26    /// use pubmed_client::PubMedClient;
27    ///
28    /// #[tokio::main]
29    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
30    ///     let client = PubMedClient::new();
31    ///     let results = client.global_query("asthma").await?;
32    ///     println!("Query: {}", results.term);
33    ///     for db in results.non_zero() {
34    ///         println!("  {}: {} records", db.menu_name, db.count);
35    ///     }
36    ///     Ok(())
37    /// }
38    /// ```
39    #[instrument(skip(self))]
40    pub async fn global_query(&self, term: &str) -> Result<GlobalQueryResults> {
41        let term = term.trim();
42        if term.is_empty() {
43            return Err(PubMedError::InvalidQuery(
44                "Search term cannot be empty".to_string(),
45            ));
46        }
47
48        let url = format!(
49            "{}/egquery.fcgi?term={}",
50            self.base_url,
51            urlencoding::encode(term)
52        );
53
54        debug!(term = %term, "Making EGQuery API request");
55        let response = self.make_request(&url).await?;
56        let xml_text = response.text().await?;
57
58        // Parse XML response using string-based extraction (consistent with existing xml_utils)
59        let results = Self::parse_egquery_response(&xml_text, term)?;
60
61        info!(
62            term = %term,
63            database_count = results.results.len(),
64            non_zero_count = results.non_zero().len(),
65            "EGQuery completed"
66        );
67
68        Ok(results)
69    }
70
71    /// Parse EGQuery XML response into GlobalQueryResults
72    pub(crate) fn parse_egquery_response(
73        xml: &str,
74        query_term: &str,
75    ) -> Result<GlobalQueryResults> {
76        use crate::common::xml_utils::{extract_all_text_between, extract_text_between};
77
78        // Extract the term from response, fallback to the query term
79        let term = extract_text_between(xml, "<Term>", "</Term>")
80            .unwrap_or_else(|| query_term.to_string());
81
82        // Extract all ResultItem blocks
83        let result_items = extract_all_text_between(xml, "<ResultItem>", "</ResultItem>");
84
85        let mut results = Vec::new();
86        for item in &result_items {
87            let db_name = extract_text_between(item, "<DbName>", "</DbName>").unwrap_or_default();
88            let menu_name =
89                extract_text_between(item, "<MenuName>", "</MenuName>").unwrap_or_default();
90            let count_str = extract_text_between(item, "<Count>", "</Count>").unwrap_or_default();
91            let status = extract_text_between(item, "<Status>", "</Status>").unwrap_or_default();
92
93            let count = count_str.parse::<u64>().unwrap_or(0);
94
95            results.push(DatabaseCount {
96                db_name,
97                menu_name,
98                count,
99                status,
100            });
101        }
102
103        Ok(GlobalQueryResults { term, results })
104    }
105}
106
107#[cfg(test)]
108mod tests {
109    use super::*;
110
111    #[test]
112    fn test_parse_egquery_response() {
113        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
114<Result>
115  <Term>asthma</Term>
116  <eGQueryResult>
117    <ResultItem>
118      <DbName>pubmed</DbName>
119      <MenuName>PubMed</MenuName>
120      <Count>234567</Count>
121      <Status>Ok</Status>
122    </ResultItem>
123    <ResultItem>
124      <DbName>pmc</DbName>
125      <MenuName>PMC</MenuName>
126      <Count>89012</Count>
127      <Status>Ok</Status>
128    </ResultItem>
129    <ResultItem>
130      <DbName>mesh</DbName>
131      <MenuName>MeSH</MenuName>
132      <Count>0</Count>
133      <Status>Ok</Status>
134    </ResultItem>
135  </eGQueryResult>
136</Result>"#;
137
138        let result = PubMedClient::parse_egquery_response(xml, "asthma").unwrap();
139        assert_eq!(result.term, "asthma");
140        assert_eq!(result.results.len(), 3);
141
142        assert_eq!(result.results[0].db_name, "pubmed");
143        assert_eq!(result.results[0].menu_name, "PubMed");
144        assert_eq!(result.results[0].count, 234567);
145        assert_eq!(result.results[0].status, "Ok");
146
147        assert_eq!(result.results[1].db_name, "pmc");
148        assert_eq!(result.results[1].count, 89012);
149
150        // Test helper methods
151        let non_zero = result.non_zero();
152        assert_eq!(non_zero.len(), 2); // pubmed and pmc, not mesh
153
154        assert_eq!(result.count_for("pubmed"), Some(234567));
155        assert_eq!(result.count_for("pmc"), Some(89012));
156        assert_eq!(result.count_for("mesh"), Some(0));
157        assert_eq!(result.count_for("nonexistent"), None);
158    }
159
160    #[test]
161    fn test_parse_egquery_response_empty() {
162        let xml = r#"<Result><Term>test</Term><eGQueryResult></eGQueryResult></Result>"#;
163        let result = PubMedClient::parse_egquery_response(xml, "test").unwrap();
164        assert_eq!(result.term, "test");
165        assert!(result.results.is_empty());
166    }
167
168    #[test]
169    fn test_parse_egquery_response_error_status() {
170        let xml = r#"<Result>
171  <Term>test</Term>
172  <eGQueryResult>
173    <ResultItem>
174      <DbName>pubmed</DbName>
175      <MenuName>PubMed</MenuName>
176      <Count>100</Count>
177      <Status>Ok</Status>
178    </ResultItem>
179    <ResultItem>
180      <DbName>snp</DbName>
181      <MenuName>SNP</MenuName>
182      <Count>0</Count>
183      <Status>Term or Database is not found</Status>
184    </ResultItem>
185  </eGQueryResult>
186</Result>"#;
187        let result = PubMedClient::parse_egquery_response(xml, "test").unwrap();
188        assert_eq!(result.results.len(), 2);
189        assert_eq!(result.results[1].status, "Term or Database is not found");
190    }
191
192    #[test]
193    fn test_global_query_empty_term() {
194        use tokio_test;
195        let client = PubMedClient::new();
196        let result = tokio_test::block_on(client.global_query(""));
197        assert!(result.is_err());
198    }
199
200    #[test]
201    fn test_global_query_whitespace_term() {
202        use tokio_test;
203        let client = PubMedClient::new();
204        let result = tokio_test::block_on(client.global_query("   "));
205        assert!(result.is_err());
206    }
207}