pubmed_client/pubmed/client/
citmatch.rs1use crate::error::Result;
4use crate::pubmed::models::{CitationMatch, CitationMatchStatus, CitationMatches, CitationQuery};
5use tracing::{debug, info, instrument};
6
7use super::PubMedClient;
8
9impl PubMedClient {
10 #[instrument(skip(self), fields(citations_count = citations.len()))]
48 pub async fn match_citations(&self, citations: &[CitationQuery]) -> Result<CitationMatches> {
49 if citations.is_empty() {
50 return Ok(CitationMatches {
51 matches: Vec::new(),
52 });
53 }
54
55 let bdata: String = citations
57 .iter()
58 .map(|c| c.to_bdata())
59 .collect::<Vec<_>>()
60 .join("%0D");
61
62 let url = format!(
63 "{}/ecitmatch.cgi?db=pubmed&retmode=xml&bdata={}",
64 self.base_url, bdata
65 );
66
67 debug!(
68 citations_count = citations.len(),
69 "Making ECitMatch API request"
70 );
71 let response = self.make_request(&url).await?;
72 let text = response.text().await?;
73
74 let matches = Self::parse_ecitmatch_response(&text);
76
77 info!(
78 citations_count = citations.len(),
79 matched_count = matches
80 .iter()
81 .filter(|m| m.status == CitationMatchStatus::Found)
82 .count(),
83 "ECitMatch completed"
84 );
85
86 Ok(CitationMatches { matches })
87 }
88
89 pub(crate) fn parse_ecitmatch_response(text: &str) -> Vec<CitationMatch> {
91 let mut matches = Vec::new();
92
93 for line in text.lines() {
94 let line = line.trim();
95 if line.is_empty() {
96 continue;
97 }
98
99 let parts: Vec<&str> = line.split('|').collect();
100 if parts.len() >= 7 {
101 let pmid_str = parts[6].trim();
102 let (pmid, status) = if pmid_str.is_empty() {
103 (None, CitationMatchStatus::NotFound)
104 } else if pmid_str.eq_ignore_ascii_case("AMBIGUOUS") {
105 (None, CitationMatchStatus::Ambiguous)
106 } else {
107 (Some(pmid_str.to_string()), CitationMatchStatus::Found)
108 };
109
110 matches.push(CitationMatch {
111 journal: parts[0].replace('+', " "),
112 year: parts[1].to_string(),
113 volume: parts[2].to_string(),
114 first_page: parts[3].to_string(),
115 author_name: parts[4].replace('+', " "),
116 key: parts[5].to_string(),
117 pmid,
118 status,
119 });
120 }
121 }
122
123 matches
124 }
125}
126
127#[cfg(test)]
128mod tests {
129 use super::*;
130
131 #[test]
132 fn test_parse_ecitmatch_response_found() {
133 let response = "proc natl acad sci u s a|1991|88|3248|mann bj|Art1|2014248\n";
134 let matches = PubMedClient::parse_ecitmatch_response(response);
135
136 assert_eq!(matches.len(), 1);
137 assert_eq!(matches[0].journal, "proc natl acad sci u s a");
138 assert_eq!(matches[0].year, "1991");
139 assert_eq!(matches[0].volume, "88");
140 assert_eq!(matches[0].first_page, "3248");
141 assert_eq!(matches[0].author_name, "mann bj");
142 assert_eq!(matches[0].key, "Art1");
143 assert_eq!(matches[0].pmid, Some("2014248".to_string()));
144 assert_eq!(matches[0].status, CitationMatchStatus::Found);
145 }
146
147 #[test]
148 fn test_parse_ecitmatch_response_not_found() {
149 let response = "fake journal|2000|1|1|nobody|ref1|\n";
150 let matches = PubMedClient::parse_ecitmatch_response(response);
151
152 assert_eq!(matches.len(), 1);
153 assert_eq!(matches[0].pmid, None);
154 assert_eq!(matches[0].status, CitationMatchStatus::NotFound);
155 }
156
157 #[test]
158 fn test_parse_ecitmatch_response_ambiguous() {
159 let response = "some journal|2000|1|1|smith|ref1|AMBIGUOUS\n";
160 let matches = PubMedClient::parse_ecitmatch_response(response);
161
162 assert_eq!(matches.len(), 1);
163 assert_eq!(matches[0].pmid, None);
164 assert_eq!(matches[0].status, CitationMatchStatus::Ambiguous);
165 }
166
167 #[test]
168 fn test_parse_ecitmatch_response_multiple() {
169 let response = concat!(
170 "proc natl acad sci u s a|1991|88|3248|mann bj|Art1|2014248\n",
171 "science|1987|235|182|palmenberg ac|Art2|3026048\n",
172 );
173 let matches = PubMedClient::parse_ecitmatch_response(response);
174
175 assert_eq!(matches.len(), 2);
176 assert_eq!(matches[0].pmid, Some("2014248".to_string()));
177 assert_eq!(matches[1].pmid, Some("3026048".to_string()));
178 }
179
180 #[test]
181 fn test_parse_ecitmatch_response_empty() {
182 let matches = PubMedClient::parse_ecitmatch_response("");
183 assert!(matches.is_empty());
184 }
185
186 #[test]
187 fn test_parse_ecitmatch_response_plus_to_space() {
188 let response = "proc+natl+acad+sci|1991|88|3248|mann+bj|Art1|2014248\n";
189 let matches = PubMedClient::parse_ecitmatch_response(response);
190
191 assert_eq!(matches[0].journal, "proc natl acad sci");
192 assert_eq!(matches[0].author_name, "mann bj");
193 }
194
195 #[test]
196 fn test_citation_query_to_bdata() {
197 let query = CitationQuery::new(
198 "proc natl acad sci u s a",
199 "1991",
200 "88",
201 "3248",
202 "mann bj",
203 "Art1",
204 );
205 let bdata = query.to_bdata();
206 assert_eq!(bdata, "proc+natl+acad+sci+u+s+a|1991|88|3248|mann+bj|Art1|");
207 }
208
209 #[test]
210 fn test_empty_citations_match() {
211 use tokio_test;
212 let client = PubMedClient::new();
213 let result = tokio_test::block_on(client.match_citations(&[]));
214 assert!(result.is_ok());
215 assert!(result.unwrap().matches.is_empty());
216 }
217}