pubmed_client/pubmed/client/
citmatch.rs

1//! ECitMatch API operations for matching citations to PMIDs
2
3use crate::error::Result;
4use crate::pubmed::models::{CitationMatch, CitationMatchStatus, CitationMatches, CitationQuery};
5use tracing::{debug, info, instrument};
6
7use super::PubMedClient;
8
9impl PubMedClient {
10    /// Match citations to PMIDs using the ECitMatch API
11    ///
12    /// This method takes citation information (journal, year, volume, page, author)
13    /// and returns the corresponding PMIDs. Useful for identifying PMIDs from
14    /// reference lists.
15    ///
16    /// # Arguments
17    ///
18    /// * `citations` - List of citation queries to match
19    ///
20    /// # Returns
21    ///
22    /// Returns a `Result<CitationMatches>` containing match results for each citation
23    ///
24    /// # Example
25    ///
26    /// ```no_run
27    /// use pubmed_client::{PubMedClient, pubmed::CitationQuery};
28    ///
29    /// #[tokio::main]
30    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
31    ///     let client = PubMedClient::new();
32    ///     let citations = vec![
33    ///         CitationQuery::new(
34    ///             "proc natl acad sci u s a", "1991", "88", "3248", "mann bj", "Art1",
35    ///         ),
36    ///         CitationQuery::new(
37    ///             "science", "1987", "235", "182", "palmenberg ac", "Art2",
38    ///         ),
39    ///     ];
40    ///     let results = client.match_citations(&citations).await?;
41    ///     for m in &results.matches {
42    ///         println!("{}: {:?} ({:?})", m.key, m.pmid, m.status);
43    ///     }
44    ///     Ok(())
45    /// }
46    /// ```
47    #[instrument(skip(self), fields(citations_count = citations.len()))]
48    pub async fn match_citations(&self, citations: &[CitationQuery]) -> Result<CitationMatches> {
49        if citations.is_empty() {
50            return Ok(CitationMatches {
51                matches: Vec::new(),
52            });
53        }
54
55        // Build bdata parameter: citations separated by %0D (carriage return)
56        let bdata: String = citations
57            .iter()
58            .map(|c| c.to_bdata())
59            .collect::<Vec<_>>()
60            .join("%0D");
61
62        let url = format!(
63            "{}/ecitmatch.cgi?db=pubmed&retmode=xml&bdata={}",
64            self.base_url, bdata
65        );
66
67        debug!(
68            citations_count = citations.len(),
69            "Making ECitMatch API request"
70        );
71        let response = self.make_request(&url).await?;
72        let text = response.text().await?;
73
74        // Parse pipe-delimited response
75        let matches = Self::parse_ecitmatch_response(&text);
76
77        info!(
78            citations_count = citations.len(),
79            matched_count = matches
80                .iter()
81                .filter(|m| m.status == CitationMatchStatus::Found)
82                .count(),
83            "ECitMatch completed"
84        );
85
86        Ok(CitationMatches { matches })
87    }
88
89    /// Parse ECitMatch pipe-delimited response into CitationMatch results
90    pub(crate) fn parse_ecitmatch_response(text: &str) -> Vec<CitationMatch> {
91        let mut matches = Vec::new();
92
93        for line in text.lines() {
94            let line = line.trim();
95            if line.is_empty() {
96                continue;
97            }
98
99            let parts: Vec<&str> = line.split('|').collect();
100            if parts.len() >= 7 {
101                let pmid_str = parts[6].trim();
102                let (pmid, status) = if pmid_str.is_empty() {
103                    (None, CitationMatchStatus::NotFound)
104                } else if pmid_str.eq_ignore_ascii_case("AMBIGUOUS") {
105                    (None, CitationMatchStatus::Ambiguous)
106                } else {
107                    (Some(pmid_str.to_string()), CitationMatchStatus::Found)
108                };
109
110                matches.push(CitationMatch {
111                    journal: parts[0].replace('+', " "),
112                    year: parts[1].to_string(),
113                    volume: parts[2].to_string(),
114                    first_page: parts[3].to_string(),
115                    author_name: parts[4].replace('+', " "),
116                    key: parts[5].to_string(),
117                    pmid,
118                    status,
119                });
120            }
121        }
122
123        matches
124    }
125}
126
127#[cfg(test)]
128mod tests {
129    use super::*;
130
131    #[test]
132    fn test_parse_ecitmatch_response_found() {
133        let response = "proc natl acad sci u s a|1991|88|3248|mann bj|Art1|2014248\n";
134        let matches = PubMedClient::parse_ecitmatch_response(response);
135
136        assert_eq!(matches.len(), 1);
137        assert_eq!(matches[0].journal, "proc natl acad sci u s a");
138        assert_eq!(matches[0].year, "1991");
139        assert_eq!(matches[0].volume, "88");
140        assert_eq!(matches[0].first_page, "3248");
141        assert_eq!(matches[0].author_name, "mann bj");
142        assert_eq!(matches[0].key, "Art1");
143        assert_eq!(matches[0].pmid, Some("2014248".to_string()));
144        assert_eq!(matches[0].status, CitationMatchStatus::Found);
145    }
146
147    #[test]
148    fn test_parse_ecitmatch_response_not_found() {
149        let response = "fake journal|2000|1|1|nobody|ref1|\n";
150        let matches = PubMedClient::parse_ecitmatch_response(response);
151
152        assert_eq!(matches.len(), 1);
153        assert_eq!(matches[0].pmid, None);
154        assert_eq!(matches[0].status, CitationMatchStatus::NotFound);
155    }
156
157    #[test]
158    fn test_parse_ecitmatch_response_ambiguous() {
159        let response = "some journal|2000|1|1|smith|ref1|AMBIGUOUS\n";
160        let matches = PubMedClient::parse_ecitmatch_response(response);
161
162        assert_eq!(matches.len(), 1);
163        assert_eq!(matches[0].pmid, None);
164        assert_eq!(matches[0].status, CitationMatchStatus::Ambiguous);
165    }
166
167    #[test]
168    fn test_parse_ecitmatch_response_multiple() {
169        let response = concat!(
170            "proc natl acad sci u s a|1991|88|3248|mann bj|Art1|2014248\n",
171            "science|1987|235|182|palmenberg ac|Art2|3026048\n",
172        );
173        let matches = PubMedClient::parse_ecitmatch_response(response);
174
175        assert_eq!(matches.len(), 2);
176        assert_eq!(matches[0].pmid, Some("2014248".to_string()));
177        assert_eq!(matches[1].pmid, Some("3026048".to_string()));
178    }
179
180    #[test]
181    fn test_parse_ecitmatch_response_empty() {
182        let matches = PubMedClient::parse_ecitmatch_response("");
183        assert!(matches.is_empty());
184    }
185
186    #[test]
187    fn test_parse_ecitmatch_response_plus_to_space() {
188        let response = "proc+natl+acad+sci|1991|88|3248|mann+bj|Art1|2014248\n";
189        let matches = PubMedClient::parse_ecitmatch_response(response);
190
191        assert_eq!(matches[0].journal, "proc natl acad sci");
192        assert_eq!(matches[0].author_name, "mann bj");
193    }
194
195    #[test]
196    fn test_citation_query_to_bdata() {
197        let query = CitationQuery::new(
198            "proc natl acad sci u s a",
199            "1991",
200            "88",
201            "3248",
202            "mann bj",
203            "Art1",
204        );
205        let bdata = query.to_bdata();
206        assert_eq!(bdata, "proc+natl+acad+sci+u+s+a|1991|88|3248|mann+bj|Art1|");
207    }
208
209    #[test]
210    fn test_empty_citations_match() {
211        use tokio_test;
212        let client = PubMedClient::new();
213        let result = tokio_test::block_on(client.match_citations(&[]));
214        assert!(result.is_ok());
215        assert!(result.unwrap().matches.is_empty());
216    }
217}