pubmed_client/pubmed/client/
espell.rs1use crate::error::{PubMedError, Result};
4use crate::pubmed::models::{SpellCheckResult, SpelledQuerySegment};
5use tracing::{debug, info, instrument};
6
7use super::PubMedClient;
8
9impl PubMedClient {
10 #[instrument(skip(self), fields(term = %term))]
46 pub async fn spell_check(&self, term: &str) -> Result<SpellCheckResult> {
47 self.spell_check_db(term, "pubmed").await
48 }
49
50 #[instrument(skip(self), fields(term = %term, db = %db))]
77 pub async fn spell_check_db(&self, term: &str, db: &str) -> Result<SpellCheckResult> {
78 let term = term.trim();
79 if term.is_empty() {
80 return Err(PubMedError::InvalidQuery(
81 "Search term cannot be empty".to_string(),
82 ));
83 }
84
85 let db = db.trim();
86 if db.is_empty() {
87 return Err(PubMedError::ApiError {
88 status: 400,
89 message: "Database name cannot be empty".to_string(),
90 });
91 }
92
93 let url = format!(
94 "{}/espell.fcgi?db={}&term={}",
95 self.base_url,
96 urlencoding::encode(db),
97 urlencoding::encode(term)
98 );
99
100 debug!(term = %term, db = %db, "Making ESpell API request");
101 let response = self.make_request(&url).await?;
102 let xml_text = response.text().await?;
103
104 let result = Self::parse_espell_response(&xml_text, term, db)?;
105
106 info!(
107 term = %term,
108 corrected = %result.corrected_query,
109 has_corrections = result.has_corrections(),
110 "ESpell completed"
111 );
112
113 Ok(result)
114 }
115
116 pub(crate) fn parse_espell_response(
118 xml: &str,
119 query_term: &str,
120 db: &str,
121 ) -> Result<SpellCheckResult> {
122 use crate::common::xml_utils::extract_text_between;
123
124 let error = extract_text_between(xml, "<ERROR>", "</ERROR>");
126 if let Some(error_msg) = error
127 && !error_msg.is_empty()
128 {
129 return Err(PubMedError::ApiError {
130 status: 200,
131 message: format!("NCBI ESpell API error: {}", error_msg),
132 });
133 }
134
135 let database = extract_text_between(xml, "<Database>", "</Database>")
136 .unwrap_or_else(|| db.to_string());
137
138 let query = extract_text_between(xml, "<Query>", "</Query>")
139 .unwrap_or_else(|| query_term.to_string());
140
141 let corrected_query =
142 extract_text_between(xml, "<CorrectedQuery>", "</CorrectedQuery>").unwrap_or_default();
143
144 let spelled_query = if let Some(spelled_content) =
146 extract_text_between(xml, "<SpelledQuery>", "</SpelledQuery>")
147 {
148 Self::parse_spelled_query_segments(&spelled_content)
149 } else {
150 Vec::new()
151 };
152
153 Ok(SpellCheckResult {
154 database,
155 query,
156 corrected_query,
157 spelled_query,
158 })
159 }
160
161 fn parse_spelled_query_segments(content: &str) -> Vec<SpelledQuerySegment> {
163 let mut segments = Vec::new();
164 let mut pos = 0;
165
166 while pos < content.len() {
167 let orig_pos = content[pos..].find("<Original>");
168 let repl_pos = content[pos..].find("<Replaced>");
169
170 match (orig_pos, repl_pos) {
171 (Some(o), Some(r)) if o <= r => {
172 let abs_start = pos + o;
174 if let Some(end_offset) = content[abs_start..].find("</Original>") {
175 let text_start = abs_start + "<Original>".len();
176 let text_end = abs_start + end_offset;
177 segments.push(SpelledQuerySegment::Original(
178 content[text_start..text_end].to_string(),
179 ));
180 pos = text_end + "</Original>".len();
181 } else {
182 break;
183 }
184 }
185 (Some(_), Some(r)) => {
186 let abs_start = pos + r;
188 if let Some(end_offset) = content[abs_start..].find("</Replaced>") {
189 let text_start = abs_start + "<Replaced>".len();
190 let text_end = abs_start + end_offset;
191 segments.push(SpelledQuerySegment::Replaced(
192 content[text_start..text_end].to_string(),
193 ));
194 pos = text_end + "</Replaced>".len();
195 } else {
196 break;
197 }
198 }
199 (Some(o), None) => {
200 let abs_start = pos + o;
202 if let Some(end_offset) = content[abs_start..].find("</Original>") {
203 let text_start = abs_start + "<Original>".len();
204 let text_end = abs_start + end_offset;
205 segments.push(SpelledQuerySegment::Original(
206 content[text_start..text_end].to_string(),
207 ));
208 pos = text_end + "</Original>".len();
209 } else {
210 break;
211 }
212 }
213 (None, Some(r)) => {
214 let abs_start = pos + r;
216 if let Some(end_offset) = content[abs_start..].find("</Replaced>") {
217 let text_start = abs_start + "<Replaced>".len();
218 let text_end = abs_start + end_offset;
219 segments.push(SpelledQuerySegment::Replaced(
220 content[text_start..text_end].to_string(),
221 ));
222 pos = text_end + "</Replaced>".len();
223 } else {
224 break;
225 }
226 }
227 (None, None) => break,
228 }
229 }
230
231 segments
232 }
233}
234
235#[cfg(test)]
236mod tests {
237 use super::*;
238
239 #[test]
240 fn test_parse_espell_response_with_corrections() {
241 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
242<eSpellResult>
243 <Database>pubmed</Database>
244 <Query>asthmaa OR alergies</Query>
245 <CorrectedQuery>asthma or allergies</CorrectedQuery>
246 <SpelledQuery>
247 <Original></Original>
248 <Replaced>asthma</Replaced>
249 <Original> OR </Original>
250 <Replaced>allergies</Replaced>
251 </SpelledQuery>
252 <ERROR/>
253</eSpellResult>"#;
254
255 let result =
256 PubMedClient::parse_espell_response(xml, "asthmaa OR alergies", "pubmed").unwrap();
257 assert_eq!(result.database, "pubmed");
258 assert_eq!(result.query, "asthmaa OR alergies");
259 assert_eq!(result.corrected_query, "asthma or allergies");
260 assert!(result.has_corrections());
261
262 let replacements = result.replacements();
263 assert_eq!(replacements.len(), 2);
264 assert_eq!(replacements[0], "asthma");
265 assert_eq!(replacements[1], "allergies");
266
267 assert_eq!(result.spelled_query.len(), 4);
268 assert_eq!(
269 result.spelled_query[0],
270 SpelledQuerySegment::Original("".to_string())
271 );
272 assert_eq!(
273 result.spelled_query[1],
274 SpelledQuerySegment::Replaced("asthma".to_string())
275 );
276 assert_eq!(
277 result.spelled_query[2],
278 SpelledQuerySegment::Original(" OR ".to_string())
279 );
280 assert_eq!(
281 result.spelled_query[3],
282 SpelledQuerySegment::Replaced("allergies".to_string())
283 );
284 }
285
286 #[test]
287 fn test_parse_espell_response_no_corrections() {
288 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
289<eSpellResult>
290 <Database>pubmed</Database>
291 <Query>asthma</Query>
292 <CorrectedQuery>asthma</CorrectedQuery>
293 <SpelledQuery>
294 <Original>asthma</Original>
295 </SpelledQuery>
296 <ERROR/>
297</eSpellResult>"#;
298
299 let result = PubMedClient::parse_espell_response(xml, "asthma", "pubmed").unwrap();
300 assert_eq!(result.query, "asthma");
301 assert_eq!(result.corrected_query, "asthma");
302 assert!(!result.has_corrections());
303 assert!(result.replacements().is_empty());
304 }
305
306 #[test]
307 fn test_parse_espell_response_empty_corrected() {
308 let xml = r#"<eSpellResult>
309 <Database>pubmed</Database>
310 <Query>xyznonexistent</Query>
311 <CorrectedQuery></CorrectedQuery>
312 <SpelledQuery/>
313 <ERROR/>
314</eSpellResult>"#;
315
316 let result = PubMedClient::parse_espell_response(xml, "xyznonexistent", "pubmed").unwrap();
317 assert_eq!(result.query, "xyznonexistent");
318 assert_eq!(result.corrected_query, "");
319 }
320
321 #[test]
322 fn test_parse_espell_response_pmc_database() {
323 let xml = r#"<eSpellResult>
324 <Database>pmc</Database>
325 <Query>fiberblast</Query>
326 <CorrectedQuery>fibroblast</CorrectedQuery>
327 <SpelledQuery>
328 <Replaced>fibroblast</Replaced>
329 </SpelledQuery>
330 <ERROR/>
331</eSpellResult>"#;
332
333 let result = PubMedClient::parse_espell_response(xml, "fiberblast", "pmc").unwrap();
334 assert_eq!(result.database, "pmc");
335 assert_eq!(result.corrected_query, "fibroblast");
336 assert!(result.has_corrections());
337 }
338
339 #[test]
340 fn test_spell_check_empty_term() {
341 use tokio_test;
342 let client = PubMedClient::new();
343 let result = tokio_test::block_on(client.spell_check(""));
344 assert!(result.is_err());
345 }
346
347 #[test]
348 fn test_spell_check_whitespace_term() {
349 use tokio_test;
350 let client = PubMedClient::new();
351 let result = tokio_test::block_on(client.spell_check(" "));
352 assert!(result.is_err());
353 }
354
355 #[test]
356 fn test_spell_check_db_empty_db() {
357 use tokio_test;
358 let client = PubMedClient::new();
359 let result = tokio_test::block_on(client.spell_check_db("asthma", ""));
360 assert!(result.is_err());
361 }
362}