pubmed_parser/common/ids.rs
1//! ID validation and cleaning utilities for PubMed and PMC identifiers
2//!
3//! This module provides strongly-typed, validated ID types for PubMed IDs (PMIDs)
4//! and PubMed Central IDs (PMC IDs).
5
6use crate::error::{ParseError, Result};
7use serde::{Deserialize, Serialize};
8use std::fmt;
9use std::str::FromStr;
10
11/// A validated PubMed ID (PMID)
12///
13/// PMIDs are numeric identifiers for articles in the PubMed database.
14/// This type ensures that the ID is valid and provides methods for
15/// parsing, cleaning, and converting between different representations.
16///
17/// # Examples
18///
19/// ```
20/// use pubmed_parser::common::PubMedId;
21///
22/// // Parse from string
23/// let pmid = PubMedId::parse("31978945").unwrap();
24/// assert_eq!(pmid.as_u32(), 31978945);
25/// assert_eq!(pmid.as_str(), "31978945");
26///
27/// // Parse with whitespace (automatically cleaned)
28/// let pmid = PubMedId::parse(" 31978945 ").unwrap();
29/// assert_eq!(pmid.as_u32(), 31978945);
30///
31/// // From u32
32/// let pmid = PubMedId::from_u32(31978945);
33/// assert_eq!(pmid.to_string(), "31978945");
34/// ```
35#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
36pub struct PubMedId {
37 value: u32,
38}
39
40impl PubMedId {
41 /// Parse a PMID from a string
42 ///
43 /// The input is automatically trimmed of whitespace.
44 ///
45 /// # Errors
46 ///
47 /// Returns `ParseError::InvalidPmid` if:
48 /// - The string is empty after trimming
49 /// - The string contains non-numeric characters
50 /// - The number is zero
51 /// - The number is too large to fit in a u32
52 ///
53 /// # Examples
54 ///
55 /// ```
56 /// use pubmed_parser::common::PubMedId;
57 ///
58 /// let pmid = PubMedId::parse("31978945").unwrap();
59 /// assert_eq!(pmid.as_u32(), 31978945);
60 ///
61 /// // With whitespace
62 /// let pmid = PubMedId::parse(" 31978945 ").unwrap();
63 /// assert_eq!(pmid.as_u32(), 31978945);
64 ///
65 /// // Invalid cases
66 /// assert!(PubMedId::parse("").is_err());
67 /// assert!(PubMedId::parse("abc").is_err());
68 /// assert!(PubMedId::parse("0").is_err());
69 /// assert!(PubMedId::parse("-123").is_err());
70 /// ```
71 pub fn parse(s: &str) -> Result<Self> {
72 let trimmed = s.trim();
73
74 if trimmed.is_empty() {
75 return Err(ParseError::InvalidPmid {
76 pmid: s.to_string(),
77 });
78 }
79
80 // Parse as u32
81 let value = trimmed
82 .parse::<u32>()
83 .map_err(|_| ParseError::InvalidPmid {
84 pmid: s.to_string(),
85 })?;
86
87 // PMIDs should be positive (non-zero)
88 if value == 0 {
89 return Err(ParseError::InvalidPmid {
90 pmid: s.to_string(),
91 });
92 }
93
94 Ok(Self { value })
95 }
96
97 /// Create a PubMedId from a u32 value
98 ///
99 /// # Panics
100 ///
101 /// Panics if the value is zero.
102 ///
103 /// # Examples
104 ///
105 /// ```
106 /// use pubmed_parser::common::PubMedId;
107 ///
108 /// let pmid = PubMedId::from_u32(31978945);
109 /// assert_eq!(pmid.as_u32(), 31978945);
110 /// ```
111 pub fn from_u32(value: u32) -> Self {
112 assert!(value > 0, "PMID must be greater than zero");
113 Self { value }
114 }
115
116 /// Try to create a PubMedId from a u32 value
117 ///
118 /// # Errors
119 ///
120 /// Returns `ParseError::InvalidPmid` if the value is zero.
121 ///
122 /// # Examples
123 ///
124 /// ```
125 /// use pubmed_parser::common::PubMedId;
126 ///
127 /// let pmid = PubMedId::try_from_u32(31978945).unwrap();
128 /// assert_eq!(pmid.as_u32(), 31978945);
129 ///
130 /// assert!(PubMedId::try_from_u32(0).is_err());
131 /// ```
132 pub fn try_from_u32(value: u32) -> Result<Self> {
133 if value == 0 {
134 return Err(ParseError::InvalidPmid {
135 pmid: value.to_string(),
136 });
137 }
138 Ok(Self { value })
139 }
140
141 /// Get the PMID as a u32
142 ///
143 /// # Examples
144 ///
145 /// ```
146 /// use pubmed_parser::common::PubMedId;
147 ///
148 /// let pmid = PubMedId::parse("31978945").unwrap();
149 /// assert_eq!(pmid.as_u32(), 31978945);
150 /// ```
151 pub fn as_u32(&self) -> u32 {
152 self.value
153 }
154
155 /// Get the PMID as a string slice
156 ///
157 /// Note: This creates a temporary String and returns it.
158 /// For owned String, use `to_string()`.
159 ///
160 /// # Examples
161 ///
162 /// ```
163 /// use pubmed_parser::common::PubMedId;
164 ///
165 /// let pmid = PubMedId::from_u32(31978945);
166 /// assert_eq!(pmid.as_str(), "31978945");
167 /// ```
168 pub fn as_str(&self) -> String {
169 self.value.to_string()
170 }
171}
172
173impl fmt::Display for PubMedId {
174 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
175 write!(f, "{}", self.value)
176 }
177}
178
179impl FromStr for PubMedId {
180 type Err = ParseError;
181
182 fn from_str(s: &str) -> Result<Self> {
183 Self::parse(s)
184 }
185}
186
187impl From<u32> for PubMedId {
188 fn from(value: u32) -> Self {
189 Self::from_u32(value)
190 }
191}
192
193impl From<PubMedId> for u32 {
194 fn from(pmid: PubMedId) -> Self {
195 pmid.value
196 }
197}
198
199/// A validated PubMed Central ID (PMC ID)
200///
201/// PMC IDs are identifiers for full-text articles in the PMC database.
202/// They consist of the prefix "PMC" followed by numeric digits.
203/// This type ensures that the ID is valid and provides methods for
204/// parsing, cleaning, and normalizing the ID format.
205///
206/// # Examples
207///
208/// ```
209/// use pubmed_parser::common::PmcId;
210///
211/// // Parse with PMC prefix
212/// let pmcid = PmcId::parse("PMC7906746").unwrap();
213/// assert_eq!(pmcid.as_str(), "PMC7906746");
214/// assert_eq!(pmcid.numeric_part(), 7906746);
215///
216/// // Parse without PMC prefix (automatically added)
217/// let pmcid = PmcId::parse("7906746").unwrap();
218/// assert_eq!(pmcid.as_str(), "PMC7906746");
219///
220/// // With whitespace (automatically cleaned)
221/// let pmcid = PmcId::parse(" PMC7906746 ").unwrap();
222/// assert_eq!(pmcid.as_str(), "PMC7906746");
223/// ```
224#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
225pub struct PmcId {
226 value: u32,
227}
228
229impl PmcId {
230 /// Parse a PMC ID from a string
231 ///
232 /// The input is automatically trimmed of whitespace and the "PMC" prefix
233 /// is added if not present. Case-insensitive parsing is supported.
234 ///
235 /// # Errors
236 ///
237 /// Returns `ParseError::InvalidPmcid` if:
238 /// - The string is empty after trimming
239 /// - The numeric part contains non-numeric characters
240 /// - The numeric part is zero
241 /// - The number is too large to fit in a u32
242 ///
243 /// # Examples
244 ///
245 /// ```
246 /// use pubmed_parser::common::PmcId;
247 ///
248 /// // With PMC prefix
249 /// let pmcid = PmcId::parse("PMC7906746").unwrap();
250 /// assert_eq!(pmcid.as_str(), "PMC7906746");
251 ///
252 /// // Without PMC prefix
253 /// let pmcid = PmcId::parse("7906746").unwrap();
254 /// assert_eq!(pmcid.as_str(), "PMC7906746");
255 ///
256 /// // Case insensitive
257 /// let pmcid = PmcId::parse("pmc7906746").unwrap();
258 /// assert_eq!(pmcid.as_str(), "PMC7906746");
259 ///
260 /// // With whitespace
261 /// let pmcid = PmcId::parse(" PMC7906746 ").unwrap();
262 /// assert_eq!(pmcid.as_str(), "PMC7906746");
263 ///
264 /// // Invalid cases
265 /// assert!(PmcId::parse("").is_err());
266 /// assert!(PmcId::parse("PMC").is_err());
267 /// assert!(PmcId::parse("PMC0").is_err());
268 /// assert!(PmcId::parse("PMCabc").is_err());
269 /// ```
270 pub fn parse(s: &str) -> Result<Self> {
271 let trimmed = s.trim();
272
273 if trimmed.is_empty() {
274 return Err(ParseError::InvalidPmcid {
275 pmcid: s.to_string(),
276 });
277 }
278
279 // Remove PMC prefix if present (case-insensitive)
280 let numeric_part = if trimmed.len() >= 3 && trimmed[0..3].eq_ignore_ascii_case("PMC") {
281 &trimmed[3..]
282 } else {
283 trimmed
284 };
285
286 // Check if numeric part is empty
287 if numeric_part.is_empty() {
288 return Err(ParseError::InvalidPmcid {
289 pmcid: s.to_string(),
290 });
291 }
292
293 // Parse numeric part as u32
294 let value = numeric_part
295 .parse::<u32>()
296 .map_err(|_| ParseError::InvalidPmcid {
297 pmcid: s.to_string(),
298 })?;
299
300 // PMC IDs should be positive (non-zero)
301 if value == 0 {
302 return Err(ParseError::InvalidPmcid {
303 pmcid: s.to_string(),
304 });
305 }
306
307 Ok(Self { value })
308 }
309
310 /// Create a PmcId from a u32 value
311 ///
312 /// # Panics
313 ///
314 /// Panics if the value is zero.
315 ///
316 /// # Examples
317 ///
318 /// ```
319 /// use pubmed_parser::common::PmcId;
320 ///
321 /// let pmcid = PmcId::from_u32(7906746);
322 /// assert_eq!(pmcid.as_str(), "PMC7906746");
323 /// assert_eq!(pmcid.numeric_part(), 7906746);
324 /// ```
325 pub fn from_u32(value: u32) -> Self {
326 assert!(value > 0, "PMC ID numeric part must be greater than zero");
327 Self { value }
328 }
329
330 /// Try to create a PmcId from a u32 value
331 ///
332 /// # Errors
333 ///
334 /// Returns `ParseError::InvalidPmcid` if the value is zero.
335 ///
336 /// # Examples
337 ///
338 /// ```
339 /// use pubmed_parser::common::PmcId;
340 ///
341 /// let pmcid = PmcId::try_from_u32(7906746).unwrap();
342 /// assert_eq!(pmcid.numeric_part(), 7906746);
343 ///
344 /// assert!(PmcId::try_from_u32(0).is_err());
345 /// ```
346 pub fn try_from_u32(value: u32) -> Result<Self> {
347 if value == 0 {
348 return Err(ParseError::InvalidPmcid {
349 pmcid: value.to_string(),
350 });
351 }
352 Ok(Self { value })
353 }
354
355 /// Get the full PMC ID as a string (with "PMC" prefix)
356 ///
357 /// # Examples
358 ///
359 /// ```
360 /// use pubmed_parser::common::PmcId;
361 ///
362 /// let pmcid = PmcId::from_u32(7906746);
363 /// assert_eq!(pmcid.as_str(), "PMC7906746");
364 /// ```
365 pub fn as_str(&self) -> String {
366 format!("PMC{}", self.value)
367 }
368
369 /// Get the numeric part of the PMC ID (without "PMC" prefix)
370 ///
371 /// # Examples
372 ///
373 /// ```
374 /// use pubmed_parser::common::PmcId;
375 ///
376 /// let pmcid = PmcId::parse("PMC7906746").unwrap();
377 /// assert_eq!(pmcid.numeric_part(), 7906746);
378 /// ```
379 pub fn numeric_part(&self) -> u32 {
380 self.value
381 }
382
383 /// Get the numeric part as a string (without "PMC" prefix)
384 ///
385 /// # Examples
386 ///
387 /// ```
388 /// use pubmed_parser::common::PmcId;
389 ///
390 /// let pmcid = PmcId::parse("PMC7906746").unwrap();
391 /// assert_eq!(pmcid.numeric_part_str(), "7906746");
392 /// ```
393 pub fn numeric_part_str(&self) -> String {
394 self.value.to_string()
395 }
396}
397
398impl fmt::Display for PmcId {
399 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
400 write!(f, "PMC{}", self.value)
401 }
402}
403
404impl FromStr for PmcId {
405 type Err = ParseError;
406
407 fn from_str(s: &str) -> Result<Self> {
408 Self::parse(s)
409 }
410}
411
412impl From<u32> for PmcId {
413 fn from(value: u32) -> Self {
414 Self::from_u32(value)
415 }
416}
417
418#[cfg(test)]
419mod tests {
420 use super::*;
421
422 // PubMedId Tests
423
424 #[test]
425 fn test_pubmedid_parse_valid() {
426 let pmid = PubMedId::parse("31978945").unwrap();
427 assert_eq!(pmid.as_u32(), 31978945);
428 assert_eq!(pmid.as_str(), "31978945");
429 }
430
431 #[test]
432 fn test_pubmedid_parse_with_whitespace() {
433 let pmid = PubMedId::parse(" 31978945 ").unwrap();
434 assert_eq!(pmid.as_u32(), 31978945);
435 }
436
437 #[test]
438 fn test_pubmedid_parse_empty() {
439 assert!(PubMedId::parse("").is_err());
440 assert!(PubMedId::parse(" ").is_err());
441 }
442
443 #[test]
444 fn test_pubmedid_parse_non_numeric() {
445 assert!(PubMedId::parse("abc").is_err());
446 assert!(PubMedId::parse("123abc").is_err());
447 assert!(PubMedId::parse("12.34").is_err());
448 }
449
450 #[test]
451 fn test_pubmedid_parse_zero() {
452 assert!(PubMedId::parse("0").is_err());
453 }
454
455 #[test]
456 fn test_pubmedid_parse_negative() {
457 assert!(PubMedId::parse("-123").is_err());
458 }
459
460 #[test]
461 fn test_pubmedid_from_u32() {
462 let pmid = PubMedId::from_u32(31978945);
463 assert_eq!(pmid.as_u32(), 31978945);
464 }
465
466 #[test]
467 #[should_panic(expected = "PMID must be greater than zero")]
468 fn test_pubmedid_from_u32_zero_panics() {
469 PubMedId::from_u32(0);
470 }
471
472 #[test]
473 fn test_pubmedid_try_from_u32() {
474 let pmid = PubMedId::try_from_u32(31978945).unwrap();
475 assert_eq!(pmid.as_u32(), 31978945);
476 assert!(PubMedId::try_from_u32(0).is_err());
477 }
478
479 #[test]
480 fn test_pubmedid_display() {
481 let pmid = PubMedId::from_u32(31978945);
482 assert_eq!(format!("{}", pmid), "31978945");
483 }
484
485 #[test]
486 fn test_pubmedid_from_str_trait() {
487 let pmid: PubMedId = "31978945".parse().unwrap();
488 assert_eq!(pmid.as_u32(), 31978945);
489 }
490
491 #[test]
492 fn test_pubmedid_conversions() {
493 let pmid = PubMedId::from_u32(31978945);
494 let value: u32 = pmid.into();
495 assert_eq!(value, 31978945);
496 }
497
498 // PmcId Tests
499
500 #[test]
501 fn test_pmcid_parse_with_prefix() {
502 let pmcid = PmcId::parse("PMC7906746").unwrap();
503 assert_eq!(pmcid.as_str(), "PMC7906746");
504 assert_eq!(pmcid.numeric_part(), 7906746);
505 }
506
507 #[test]
508 fn test_pmcid_parse_without_prefix() {
509 let pmcid = PmcId::parse("7906746").unwrap();
510 assert_eq!(pmcid.as_str(), "PMC7906746");
511 assert_eq!(pmcid.numeric_part(), 7906746);
512 }
513
514 #[test]
515 fn test_pmcid_parse_case_insensitive() {
516 let pmcid1 = PmcId::parse("pmc7906746").unwrap();
517 let pmcid2 = PmcId::parse("Pmc7906746").unwrap();
518 let pmcid3 = PmcId::parse("PMC7906746").unwrap();
519
520 assert_eq!(pmcid1, pmcid2);
521 assert_eq!(pmcid2, pmcid3);
522 assert_eq!(pmcid1.as_str(), "PMC7906746");
523 }
524
525 #[test]
526 fn test_pmcid_parse_with_whitespace() {
527 let pmcid = PmcId::parse(" PMC7906746 ").unwrap();
528 assert_eq!(pmcid.as_str(), "PMC7906746");
529
530 let pmcid = PmcId::parse(" 7906746 ").unwrap();
531 assert_eq!(pmcid.as_str(), "PMC7906746");
532 }
533
534 #[test]
535 fn test_pmcid_parse_empty() {
536 assert!(PmcId::parse("").is_err());
537 assert!(PmcId::parse(" ").is_err());
538 assert!(PmcId::parse("PMC").is_err());
539 }
540
541 #[test]
542 fn test_pmcid_parse_non_numeric() {
543 assert!(PmcId::parse("PMCabc").is_err());
544 assert!(PmcId::parse("PMC123abc").is_err());
545 assert!(PmcId::parse("abc").is_err());
546 }
547
548 #[test]
549 fn test_pmcid_parse_zero() {
550 assert!(PmcId::parse("PMC0").is_err());
551 assert!(PmcId::parse("0").is_err());
552 }
553
554 #[test]
555 fn test_pmcid_from_u32() {
556 let pmcid = PmcId::from_u32(7906746);
557 assert_eq!(pmcid.as_str(), "PMC7906746");
558 assert_eq!(pmcid.numeric_part(), 7906746);
559 }
560
561 #[test]
562 #[should_panic(expected = "PMC ID numeric part must be greater than zero")]
563 fn test_pmcid_from_u32_zero_panics() {
564 PmcId::from_u32(0);
565 }
566
567 #[test]
568 fn test_pmcid_try_from_u32() {
569 let pmcid = PmcId::try_from_u32(7906746).unwrap();
570 assert_eq!(pmcid.numeric_part(), 7906746);
571 assert!(PmcId::try_from_u32(0).is_err());
572 }
573
574 #[test]
575 fn test_pmcid_numeric_part_str() {
576 let pmcid = PmcId::parse("PMC7906746").unwrap();
577 assert_eq!(pmcid.numeric_part_str(), "7906746");
578 }
579
580 #[test]
581 fn test_pmcid_display() {
582 let pmcid = PmcId::from_u32(7906746);
583 assert_eq!(format!("{}", pmcid), "PMC7906746");
584 }
585
586 #[test]
587 fn test_pmcid_from_str_trait() {
588 let pmcid: PmcId = "PMC7906746".parse().unwrap();
589 assert_eq!(pmcid.numeric_part(), 7906746);
590
591 let pmcid: PmcId = "7906746".parse().unwrap();
592 assert_eq!(pmcid.as_str(), "PMC7906746");
593 }
594
595 #[test]
596 fn test_pmcid_equality() {
597 let pmcid1 = PmcId::parse("PMC7906746").unwrap();
598 let pmcid2 = PmcId::parse("7906746").unwrap();
599 let pmcid3 = PmcId::from_u32(7906746);
600
601 assert_eq!(pmcid1, pmcid2);
602 assert_eq!(pmcid2, pmcid3);
603 }
604
605 #[test]
606 fn test_pmcid_hash() {
607 use std::collections::HashSet;
608
609 let mut set = HashSet::new();
610 set.insert(PmcId::parse("PMC7906746").unwrap());
611 set.insert(PmcId::parse("7906746").unwrap());
612
613 // Should only contain one item since they're equal
614 assert_eq!(set.len(), 1);
615 }
616
617 // Real-world examples from the codebase
618
619 #[test]
620 fn test_real_world_pmids() {
621 let test_cases = vec![
622 "31978945", // COVID-19 research
623 "25760099", // CRISPR-Cas9
624 "33515491", // Cancer treatment
625 "12345678",
626 ];
627
628 for pmid_str in test_cases {
629 let pmid = PubMedId::parse(pmid_str).unwrap();
630 assert_eq!(pmid.as_str(), pmid_str);
631 }
632 }
633
634 #[test]
635 fn test_real_world_pmcids() {
636 let test_cases = vec![
637 ("PMC7906746", "PMC7906746"),
638 ("PMC10618641", "PMC10618641"),
639 ("PMC10000000", "PMC10000000"),
640 ("7906746", "PMC7906746"), // Without prefix
641 ("10618641", "PMC10618641"), // Without prefix
642 ];
643
644 for (input, expected) in test_cases {
645 let pmcid = PmcId::parse(input).unwrap();
646 assert_eq!(pmcid.as_str(), expected);
647 }
648 }
649
650 #[test]
651 fn test_serialization() {
652 let pmid = PubMedId::from_u32(31978945);
653 let json = serde_json::to_string(&pmid).unwrap();
654 let deserialized: PubMedId = serde_json::from_str(&json).unwrap();
655 assert_eq!(pmid, deserialized);
656
657 let pmcid = PmcId::from_u32(7906746);
658 let json = serde_json::to_string(&pmcid).unwrap();
659 let deserialized: PmcId = serde_json::from_str(&json).unwrap();
660 assert_eq!(pmcid, deserialized);
661 }
662}