pubmed_client/pubmed/query/
validation.rs1use super::SearchQuery;
4use crate::error::{PubMedError, Result};
5
6impl SearchQuery {
7 pub fn validate(&self) -> Result<()> {
22 if self.terms.is_empty() && self.filters.is_empty() {
24 return Err(PubMedError::InvalidQuery(
25 "Query cannot be empty".to_string(),
26 ));
27 }
28
29 if let Some(limit) = self.limit {
31 if limit == 0 {
32 return Err(PubMedError::InvalidQuery(
33 "Limit must be greater than 0".to_string(),
34 ));
35 }
36 if limit > 10000 {
37 return Err(PubMedError::InvalidQuery(
38 "Limit should not exceed 10,000 for performance reasons".to_string(),
39 ));
40 }
41 }
42
43 let query_string = self.build();
45 if query_string.len() > 4000 {
46 return Err(PubMedError::InvalidQuery(
47 "Query string is too long (>4000 characters)".to_string(),
48 ));
49 }
50
51 let open_parens = query_string.matches('(').count();
53 let close_parens = query_string.matches(')').count();
54 if open_parens != close_parens {
55 return Err(PubMedError::InvalidQuery(
56 "Unbalanced parentheses in query".to_string(),
57 ));
58 }
59
60 Ok(())
61 }
62
63 pub fn optimize(mut self) -> Self {
80 self.filters.sort();
82 self.filters.dedup();
83
84 self.terms.sort();
86 self.terms.dedup();
87
88 self.terms.retain(|term| !term.trim().is_empty());
90 self.filters.retain(|filter| !filter.trim().is_empty());
91
92 self
93 }
94
95 pub fn get_stats(&self) -> (usize, usize, usize) {
114 let term_count = self.terms.len();
115 let filter_count = self.filters.len();
116
117 let query_string = self.build();
119 let complexity = query_string.matches(" AND ").count()
120 + query_string.matches(" OR ").count() * 2
121 + query_string.matches(" NOT ").count() * 2
122 + query_string.matches('(').count()
123 + 1; (term_count, filter_count, complexity)
126 }
127}
128
129#[cfg(test)]
130mod tests {
131 use super::*;
132
133 #[test]
134 fn test_validate_empty_query() {
135 let query = SearchQuery::new();
136 assert!(query.validate().is_err());
137
138 if let Err(e) = query.validate() {
139 assert!(e.to_string().contains("Query cannot be empty"));
140 }
141 }
142
143 #[test]
144 fn test_validate_valid_query() {
145 let query = SearchQuery::new().query("covid-19");
146 assert!(query.validate().is_ok());
147 }
148
149 #[test]
150 fn test_validate_zero_limit() {
151 let query = SearchQuery::new().query("test").limit(0);
152 assert!(query.validate().is_err());
153
154 if let Err(e) = query.validate() {
155 assert!(e.to_string().contains("Limit must be greater than 0"));
156 }
157 }
158
159 #[test]
160 fn test_validate_excessive_limit() {
161 let query = SearchQuery::new().query("test").limit(20000);
162 assert!(query.validate().is_err());
163
164 if let Err(e) = query.validate() {
165 assert!(e.to_string().contains("Limit should not exceed 10,000"));
166 }
167 }
168
169 #[test]
170 fn test_validate_reasonable_limit() {
171 let query = SearchQuery::new().query("test").limit(100);
172 assert!(query.validate().is_ok());
173 }
174
175 #[test]
176 fn test_validate_max_reasonable_limit() {
177 let query = SearchQuery::new().query("test").limit(10000);
178 assert!(query.validate().is_ok());
179 }
180
181 #[test]
182 fn test_validate_very_long_query() {
183 let long_term = "a".repeat(4001);
184 let query = SearchQuery::new().query(long_term);
185 assert!(query.validate().is_err());
186
187 if let Err(e) = query.validate() {
188 assert!(e.to_string().contains("Query string is too long"));
189 }
190 }
191
192 #[test]
193 fn test_validate_unbalanced_parentheses() {
194 let query1 = SearchQuery::new()
195 .query("covid")
196 .and(SearchQuery::new().query("vaccine"));
197 let mut broken_query = query1.clone();
199 broken_query.terms = vec!["((test".to_string()];
200
201 assert!(broken_query.validate().is_err());
202 if let Err(e) = broken_query.validate() {
203 assert!(e.to_string().contains("Unbalanced parentheses"));
204 }
205 }
206
207 #[test]
208 fn test_validate_balanced_parentheses() {
209 let query = SearchQuery::new()
210 .query("covid")
211 .and(SearchQuery::new().query("vaccine"))
212 .group();
213 assert!(query.validate().is_ok());
214 }
215
216 #[test]
217 fn test_optimize_removes_duplicates() {
218 let mut query = SearchQuery::new();
219 query.terms = vec![
220 "covid".to_string(),
221 "vaccine".to_string(),
222 "covid".to_string(),
223 ];
224 query.filters = vec![
225 "test[filter]".to_string(),
226 "other[filter]".to_string(),
227 "test[filter]".to_string(),
228 ];
229
230 let optimized = query.optimize();
231 assert_eq!(optimized.terms.len(), 2);
232 assert_eq!(optimized.filters.len(), 2);
233 assert!(optimized.terms.contains(&"covid".to_string()));
234 assert!(optimized.terms.contains(&"vaccine".to_string()));
235 }
236
237 #[test]
238 fn test_optimize_removes_empty_strings() {
239 let mut query = SearchQuery::new();
240 query.terms = vec![
241 "covid".to_string(),
242 " ".to_string(),
243 "vaccine".to_string(),
244 "".to_string(),
245 ];
246 query.filters = vec![
247 "test[filter]".to_string(),
248 " ".to_string(),
249 "other[filter]".to_string(),
250 ];
251
252 let optimized = query.optimize();
253 assert_eq!(optimized.terms.len(), 2);
254 assert_eq!(optimized.filters.len(), 2);
255 assert!(optimized.terms.contains(&"covid".to_string()));
256 assert!(optimized.terms.contains(&"vaccine".to_string()));
257 }
258
259 #[test]
260 fn test_optimize_sorts_terms_and_filters() {
261 let mut query = SearchQuery::new();
262 query.terms = vec![
263 "zebra".to_string(),
264 "apple".to_string(),
265 "banana".to_string(),
266 ];
267 query.filters = vec![
268 "z[filter]".to_string(),
269 "a[filter]".to_string(),
270 "b[filter]".to_string(),
271 ];
272
273 let optimized = query.optimize();
274 assert_eq!(
275 optimized.terms,
276 vec![
277 "apple".to_string(),
278 "banana".to_string(),
279 "zebra".to_string()
280 ]
281 );
282 assert_eq!(
283 optimized.filters,
284 vec![
285 "a[filter]".to_string(),
286 "b[filter]".to_string(),
287 "z[filter]".to_string()
288 ]
289 );
290 }
291
292 #[test]
293 fn test_get_stats_basic() {
294 let query = SearchQuery::new()
295 .query("covid")
296 .query("vaccine")
297 .mesh_term("Neoplasms")
298 .author("Smith");
299
300 let (term_count, filter_count, complexity) = query.get_stats();
301 assert_eq!(term_count, 2); assert_eq!(filter_count, 2); assert!(complexity > 0);
304 }
305
306 #[test]
307 fn test_get_stats_empty_query() {
308 let query = SearchQuery::new();
309 let (term_count, filter_count, complexity) = query.get_stats();
310 assert_eq!(term_count, 0);
311 assert_eq!(filter_count, 0);
312 assert_eq!(complexity, 1); }
314
315 #[test]
316 fn test_get_stats_complex_query() {
317 let query1 = SearchQuery::new().query("covid");
318 let query2 = SearchQuery::new().query("vaccine");
319 let complex_query = query1.and(query2).or(SearchQuery::new().query("treatment"));
320
321 let (_term_count, _filter_count, complexity) = complex_query.get_stats();
322 assert!(complexity > 3); }
324
325 #[test]
326 fn test_validate_with_filters_only() {
327 let query = SearchQuery::new().mesh_term("Neoplasms");
328 assert!(query.validate().is_ok());
329 }
330
331 #[test]
332 fn test_validate_with_terms_only() {
333 let query = SearchQuery::new().query("covid");
334 assert!(query.validate().is_ok());
335 }
336
337 #[test]
338 fn test_optimize_preserves_limit() {
339 let query = SearchQuery::new().query("test").limit(100);
340
341 let optimized = query.optimize();
342 assert_eq!(optimized.get_limit(), 100);
343 }
344
345 #[test]
346 fn test_complexity_calculation() {
347 let and_query = SearchQuery::new()
349 .query("a")
350 .and(SearchQuery::new().query("b"));
351 let (_, _, and_complexity) = and_query.get_stats();
352
353 let or_query = SearchQuery::new()
355 .query("a")
356 .or(SearchQuery::new().query("b"));
357 let (_, _, or_complexity) = or_query.get_stats();
358
359 assert!(or_complexity >= and_complexity);
361 }
362
363 #[test]
364 fn test_stats_with_nested_queries() {
365 let nested = SearchQuery::new()
366 .query("a")
367 .and(SearchQuery::new().query("b"))
368 .group();
369
370 let (_term_count, _filter_count, complexity) = nested.get_stats();
371 assert!(complexity > 2); }
373}