pubmed_parser/common/
models.rs

1//! Common data models shared across PubMed and PMC modules
2//!
3//! This module provides unified data structures for authors and affiliations
4//! that are used consistently across both PubMed metadata and PMC full-text content.
5
6use serde::{Deserialize, Serialize};
7use std::fmt::{Display, Formatter, Result as FmtResult};
8use std::str::Chars;
9
10/// Represents an author's institutional affiliation
11///
12/// This structure is used across both PubMed and PMC to represent
13/// institutional affiliations in a consistent way.
14#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
15pub struct Affiliation {
16    /// Affiliation ID (optional, commonly used in PMC XML)
17    pub id: Option<String>,
18    /// Institution name (e.g., "Harvard Medical School")
19    pub institution: Option<String>,
20    /// Department or division (e.g., "Department of Medicine")
21    pub department: Option<String>,
22    /// Full address including street, city, state/province
23    pub address: Option<String>,
24    /// Country
25    pub country: Option<String>,
26}
27
28/// Represents a detailed author with enhanced metadata
29///
30/// This structure provides a unified representation of author information
31/// across PubMed and PMC, consolidating various name formats and metadata.
32#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
33pub struct Author {
34    /// Author's surname (last name)
35    pub surname: Option<String>,
36    /// Author's given names (first name, middle names)
37    pub given_names: Option<String>,
38    /// Author's initials (useful when given_names not available)
39    pub initials: Option<String>,
40    /// Name suffix (e.g., "Jr", "Sr", "III")
41    pub suffix: Option<String>,
42    /// Full formatted name
43    pub full_name: String,
44    /// List of institutional affiliations
45    pub affiliations: Vec<Affiliation>,
46    /// ORCID identifier (e.g., "0000-0000-0000-0000")
47    pub orcid: Option<String>,
48    /// Author's email address
49    pub email: Option<String>,
50    /// Whether this author is a corresponding author
51    pub is_corresponding: bool,
52    /// Author's roles/contributions (e.g., ["Conceptualization", "Writing - original draft"])
53    pub roles: Vec<String>,
54}
55
56impl Display for Author {
57    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
58        write!(f, "{}", self.full_name)
59    }
60}
61
62impl Author {
63    /// Create a new Author with basic information
64    pub fn new(surname: Option<String>, given_names: Option<String>) -> Self {
65        let full_name = format_author_name(&surname, &given_names, &None);
66        Author {
67            surname,
68            given_names,
69            initials: None,
70            suffix: None,
71            full_name,
72            affiliations: Vec::new(),
73            orcid: None,
74            email: None,
75            is_corresponding: false,
76            roles: Vec::new(),
77        }
78    }
79
80    /// Create an author from a full name string
81    ///
82    /// This is a convenience method for when you have a complete name
83    /// but don't need to separate it into surname and given names.
84    pub fn from_full_name(full_name: String) -> Self {
85        Author {
86            surname: None,
87            given_names: None,
88            initials: None,
89            suffix: None,
90            full_name,
91            affiliations: Vec::new(),
92            orcid: None,
93            email: None,
94            is_corresponding: false,
95            roles: Vec::new(),
96        }
97    }
98
99    /// Check if the author is affiliated with a specific institution
100    ///
101    /// # Arguments
102    ///
103    /// * `institution` - Institution name to check (case-insensitive)
104    ///
105    /// # Returns
106    ///
107    /// `true` if the author has an affiliation matching the institution
108    pub fn is_affiliated_with(&self, institution: &str) -> bool {
109        let institution_lower = institution.to_lowercase();
110        self.affiliations.iter().any(|affil| {
111            affil
112                .institution
113                .as_ref()
114                .is_some_and(|inst| inst.to_lowercase().contains(&institution_lower))
115        })
116    }
117
118    /// Get the author's primary affiliation (first in the list)
119    ///
120    /// # Returns
121    ///
122    /// A reference to the primary affiliation, if any
123    pub fn primary_affiliation(&self) -> Option<&Affiliation> {
124        self.affiliations.first()
125    }
126
127    /// Check if the author has an ORCID identifier
128    ///
129    /// # Returns
130    ///
131    /// `true` if the author has an ORCID ID
132    pub fn has_orcid(&self) -> bool {
133        self.orcid.is_some()
134    }
135
136    /// Check if the author name is empty
137    ///
138    /// # Returns
139    ///
140    /// `true` if the full name is empty or just whitespace
141    pub fn is_empty(&self) -> bool {
142        self.full_name.trim().is_empty()
143    }
144
145    /// Get the length of the author's full name
146    ///
147    /// # Returns
148    ///
149    /// Length of the full name string
150    pub fn len(&self) -> usize {
151        self.full_name.len()
152    }
153
154    /// Get an iterator over the characters in the author's full name
155    ///
156    /// # Returns
157    ///
158    /// Iterator over characters
159    pub fn chars(&self) -> Chars<'_> {
160        self.full_name.chars()
161    }
162}
163
164impl Affiliation {
165    /// Create a new Affiliation instance
166    pub fn new(institution: Option<String>) -> Self {
167        Self {
168            id: None,
169            institution,
170            department: None,
171            address: None,
172            country: None,
173        }
174    }
175}
176
177/// Format an author name from components
178///
179/// # Arguments
180///
181/// * `surname` - Author's surname (last name)
182/// * `given_names` - Author's given names (first, middle)
183/// * `initials` - Author's initials (used if given_names is missing)
184///
185/// # Returns
186///
187/// Formatted full name following these rules:
188/// 1. If both given_names and surname exist: "GivenNames Surname"
189/// 2. If only surname exists: "Initials Surname" (if initials available) or "Surname"
190/// 3. If only given_names exists: "GivenNames"
191/// 4. If neither exists: "Unknown Author"
192pub fn format_author_name(
193    surname: &Option<String>,
194    given_names: &Option<String>,
195    initials: &Option<String>,
196) -> String {
197    match (given_names, surname) {
198        (Some(given), Some(sur)) => format!("{given} {sur}"),
199        (None, Some(sur)) => {
200            if let Some(init) = initials {
201                format!("{init} {sur}")
202            } else {
203                sur.clone()
204            }
205        }
206        (Some(given), None) => given.clone(),
207        (None, None) => "Unknown Author".to_string(),
208    }
209}
210
211/// Structured publication date.
212///
213/// Used by both PubMed and PMC articles. A single article can have multiple
214/// publication dates distinguished by `pub_type` (e.g., electronic vs. print).
215///
216/// PubMed: Maps to `<PubDate>` inside `<JournalIssue>`.
217/// PMC/JATS: Maps to `<pub-date pub-type="...">`.
218#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
219pub struct PublicationDate {
220    /// Publication type. Common values: `"epub"`, `"ppub"`, `"collection"`, `"medline"`.
221    pub pub_type: Option<String>,
222    /// Year. From `<Year>`.
223    pub year: Option<u16>,
224    /// Month (1-12). From `<Month>`.
225    pub month: Option<u8>,
226    /// Day (1-31). From `<Day>`.
227    pub day: Option<u8>,
228}
229
230/// Publication history date.
231///
232/// Used by both PubMed and PMC articles for dates like received, accepted, revised.
233///
234/// PubMed: Maps to `<History>/<PubMedPubDate>`.
235/// PMC/JATS: Maps to `<history>/<date date-type="...">`.
236#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
237pub struct HistoryDate {
238    /// Date type. Common values: `"received"`, `"accepted"`, `"rev-recd"`,
239    /// `"pubmed"`, `"medline"`, `"entrez"`.
240    pub date_type: String,
241    /// Year. From `<Year>`.
242    pub year: Option<u16>,
243    /// Month (1-12). From `<Month>`.
244    pub month: Option<u8>,
245    /// Day (1-31). From `<Day>`.
246    pub day: Option<u8>,
247}
248
249#[cfg(test)]
250mod tests {
251    use super::*;
252
253    #[test]
254    fn test_author_creation() {
255        let author = Author::new(Some("Smith".to_string()), Some("Jane".to_string()));
256        assert_eq!(author.surname, Some("Smith".to_string()));
257        assert_eq!(author.given_names, Some("Jane".to_string()));
258        assert_eq!(author.full_name, "Jane Smith");
259        assert!(!author.has_orcid());
260        assert!(!author.is_corresponding);
261    }
262
263    #[test]
264    fn test_author_affiliations() {
265        let mut author = Author::new(Some("Doe".to_string()), Some("John".to_string()));
266        author.affiliations.push(Affiliation {
267            id: None,
268            institution: Some("Harvard Medical School".to_string()),
269            department: Some("Department of Medicine".to_string()),
270            address: Some("Boston, MA".to_string()),
271            country: Some("USA".to_string()),
272        });
273
274        assert!(author.is_affiliated_with("Harvard"));
275        assert!(!author.is_affiliated_with("Stanford"));
276
277        let primary = author.primary_affiliation().unwrap();
278        assert_eq!(
279            primary.institution,
280            Some("Harvard Medical School".to_string())
281        );
282    }
283
284    #[test]
285    fn test_format_author_name() {
286        assert_eq!(
287            format_author_name(&Some("Smith".to_string()), &Some("John".to_string()), &None),
288            "John Smith"
289        );
290
291        assert_eq!(
292            format_author_name(&Some("Doe".to_string()), &None, &Some("J".to_string())),
293            "J Doe"
294        );
295
296        assert_eq!(
297            format_author_name(&Some("Johnson".to_string()), &None, &None),
298            "Johnson"
299        );
300
301        assert_eq!(
302            format_author_name(&None, &Some("Jane".to_string()), &None),
303            "Jane"
304        );
305
306        assert_eq!(format_author_name(&None, &None, &None), "Unknown Author");
307    }
308
309    #[test]
310    fn test_affiliation_creation() {
311        let affil = Affiliation::new(Some("MIT".to_string()));
312        assert_eq!(affil.institution, Some("MIT".to_string()));
313        assert_eq!(affil.id, None);
314        assert_eq!(affil.department, None);
315    }
316}