pubmed_parser/common/models.rs
1//! Common data models shared across PubMed and PMC modules
2//!
3//! This module provides unified data structures for authors and affiliations
4//! that are used consistently across both PubMed metadata and PMC full-text content.
5
6use serde::{Deserialize, Serialize};
7use std::fmt::{Display, Formatter, Result as FmtResult};
8use std::str::Chars;
9
10/// Represents an author's institutional affiliation
11///
12/// This structure is used across both PubMed and PMC to represent
13/// institutional affiliations in a consistent way.
14#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
15pub struct Affiliation {
16 /// Affiliation ID (optional, commonly used in PMC XML)
17 pub id: Option<String>,
18 /// Institution name (e.g., "Harvard Medical School")
19 pub institution: Option<String>,
20 /// Department or division (e.g., "Department of Medicine")
21 pub department: Option<String>,
22 /// Full address including street, city, state/province
23 pub address: Option<String>,
24 /// Country
25 pub country: Option<String>,
26}
27
28/// Represents a detailed author with enhanced metadata
29///
30/// This structure provides a unified representation of author information
31/// across PubMed and PMC, consolidating various name formats and metadata.
32#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
33pub struct Author {
34 /// Author's surname (last name)
35 pub surname: Option<String>,
36 /// Author's given names (first name, middle names)
37 pub given_names: Option<String>,
38 /// Author's initials (useful when given_names not available)
39 pub initials: Option<String>,
40 /// Name suffix (e.g., "Jr", "Sr", "III")
41 pub suffix: Option<String>,
42 /// Full formatted name
43 pub full_name: String,
44 /// List of institutional affiliations
45 pub affiliations: Vec<Affiliation>,
46 /// ORCID identifier (e.g., "0000-0000-0000-0000")
47 pub orcid: Option<String>,
48 /// Author's email address
49 pub email: Option<String>,
50 /// Whether this author is a corresponding author
51 pub is_corresponding: bool,
52 /// Author's roles/contributions (e.g., ["Conceptualization", "Writing - original draft"])
53 pub roles: Vec<String>,
54}
55
56impl Display for Author {
57 fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
58 write!(f, "{}", self.full_name)
59 }
60}
61
62impl Author {
63 /// Create a new Author with basic information
64 pub fn new(surname: Option<String>, given_names: Option<String>) -> Self {
65 let full_name = format_author_name(&surname, &given_names, &None);
66 Author {
67 surname,
68 given_names,
69 initials: None,
70 suffix: None,
71 full_name,
72 affiliations: Vec::new(),
73 orcid: None,
74 email: None,
75 is_corresponding: false,
76 roles: Vec::new(),
77 }
78 }
79
80 /// Create an author from a full name string
81 ///
82 /// This is a convenience method for when you have a complete name
83 /// but don't need to separate it into surname and given names.
84 pub fn from_full_name(full_name: String) -> Self {
85 Author {
86 surname: None,
87 given_names: None,
88 initials: None,
89 suffix: None,
90 full_name,
91 affiliations: Vec::new(),
92 orcid: None,
93 email: None,
94 is_corresponding: false,
95 roles: Vec::new(),
96 }
97 }
98
99 /// Check if the author is affiliated with a specific institution
100 ///
101 /// # Arguments
102 ///
103 /// * `institution` - Institution name to check (case-insensitive)
104 ///
105 /// # Returns
106 ///
107 /// `true` if the author has an affiliation matching the institution
108 pub fn is_affiliated_with(&self, institution: &str) -> bool {
109 let institution_lower = institution.to_lowercase();
110 self.affiliations.iter().any(|affil| {
111 affil
112 .institution
113 .as_ref()
114 .is_some_and(|inst| inst.to_lowercase().contains(&institution_lower))
115 })
116 }
117
118 /// Get the author's primary affiliation (first in the list)
119 ///
120 /// # Returns
121 ///
122 /// A reference to the primary affiliation, if any
123 pub fn primary_affiliation(&self) -> Option<&Affiliation> {
124 self.affiliations.first()
125 }
126
127 /// Check if the author has an ORCID identifier
128 ///
129 /// # Returns
130 ///
131 /// `true` if the author has an ORCID ID
132 pub fn has_orcid(&self) -> bool {
133 self.orcid.is_some()
134 }
135
136 /// Check if the author name is empty
137 ///
138 /// # Returns
139 ///
140 /// `true` if the full name is empty or just whitespace
141 pub fn is_empty(&self) -> bool {
142 self.full_name.trim().is_empty()
143 }
144
145 /// Get the length of the author's full name
146 ///
147 /// # Returns
148 ///
149 /// Length of the full name string
150 pub fn len(&self) -> usize {
151 self.full_name.len()
152 }
153
154 /// Get an iterator over the characters in the author's full name
155 ///
156 /// # Returns
157 ///
158 /// Iterator over characters
159 pub fn chars(&self) -> Chars<'_> {
160 self.full_name.chars()
161 }
162}
163
164impl Affiliation {
165 /// Create a new Affiliation instance
166 pub fn new(institution: Option<String>) -> Self {
167 Self {
168 id: None,
169 institution,
170 department: None,
171 address: None,
172 country: None,
173 }
174 }
175}
176
177/// Format an author name from components
178///
179/// # Arguments
180///
181/// * `surname` - Author's surname (last name)
182/// * `given_names` - Author's given names (first, middle)
183/// * `initials` - Author's initials (used if given_names is missing)
184///
185/// # Returns
186///
187/// Formatted full name following these rules:
188/// 1. If both given_names and surname exist: "GivenNames Surname"
189/// 2. If only surname exists: "Initials Surname" (if initials available) or "Surname"
190/// 3. If only given_names exists: "GivenNames"
191/// 4. If neither exists: "Unknown Author"
192pub fn format_author_name(
193 surname: &Option<String>,
194 given_names: &Option<String>,
195 initials: &Option<String>,
196) -> String {
197 match (given_names, surname) {
198 (Some(given), Some(sur)) => format!("{given} {sur}"),
199 (None, Some(sur)) => {
200 if let Some(init) = initials {
201 format!("{init} {sur}")
202 } else {
203 sur.clone()
204 }
205 }
206 (Some(given), None) => given.clone(),
207 (None, None) => "Unknown Author".to_string(),
208 }
209}
210
211/// Structured publication date.
212///
213/// Used by both PubMed and PMC articles. A single article can have multiple
214/// publication dates distinguished by `pub_type` (e.g., electronic vs. print).
215///
216/// PubMed: Maps to `<PubDate>` inside `<JournalIssue>`.
217/// PMC/JATS: Maps to `<pub-date pub-type="...">`.
218#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
219pub struct PublicationDate {
220 /// Publication type. Common values: `"epub"`, `"ppub"`, `"collection"`, `"medline"`.
221 pub pub_type: Option<String>,
222 /// Year. From `<Year>`.
223 pub year: Option<u16>,
224 /// Month (1-12). From `<Month>`.
225 pub month: Option<u8>,
226 /// Day (1-31). From `<Day>`.
227 pub day: Option<u8>,
228}
229
230/// Publication history date.
231///
232/// Used by both PubMed and PMC articles for dates like received, accepted, revised.
233///
234/// PubMed: Maps to `<History>/<PubMedPubDate>`.
235/// PMC/JATS: Maps to `<history>/<date date-type="...">`.
236#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
237pub struct HistoryDate {
238 /// Date type. Common values: `"received"`, `"accepted"`, `"rev-recd"`,
239 /// `"pubmed"`, `"medline"`, `"entrez"`.
240 pub date_type: String,
241 /// Year. From `<Year>`.
242 pub year: Option<u16>,
243 /// Month (1-12). From `<Month>`.
244 pub month: Option<u8>,
245 /// Day (1-31). From `<Day>`.
246 pub day: Option<u8>,
247}
248
249#[cfg(test)]
250mod tests {
251 use super::*;
252
253 #[test]
254 fn test_author_creation() {
255 let author = Author::new(Some("Smith".to_string()), Some("Jane".to_string()));
256 assert_eq!(author.surname, Some("Smith".to_string()));
257 assert_eq!(author.given_names, Some("Jane".to_string()));
258 assert_eq!(author.full_name, "Jane Smith");
259 assert!(!author.has_orcid());
260 assert!(!author.is_corresponding);
261 }
262
263 #[test]
264 fn test_author_affiliations() {
265 let mut author = Author::new(Some("Doe".to_string()), Some("John".to_string()));
266 author.affiliations.push(Affiliation {
267 id: None,
268 institution: Some("Harvard Medical School".to_string()),
269 department: Some("Department of Medicine".to_string()),
270 address: Some("Boston, MA".to_string()),
271 country: Some("USA".to_string()),
272 });
273
274 assert!(author.is_affiliated_with("Harvard"));
275 assert!(!author.is_affiliated_with("Stanford"));
276
277 let primary = author.primary_affiliation().unwrap();
278 assert_eq!(
279 primary.institution,
280 Some("Harvard Medical School".to_string())
281 );
282 }
283
284 #[test]
285 fn test_format_author_name() {
286 assert_eq!(
287 format_author_name(&Some("Smith".to_string()), &Some("John".to_string()), &None),
288 "John Smith"
289 );
290
291 assert_eq!(
292 format_author_name(&Some("Doe".to_string()), &None, &Some("J".to_string())),
293 "J Doe"
294 );
295
296 assert_eq!(
297 format_author_name(&Some("Johnson".to_string()), &None, &None),
298 "Johnson"
299 );
300
301 assert_eq!(
302 format_author_name(&None, &Some("Jane".to_string()), &None),
303 "Jane"
304 );
305
306 assert_eq!(format_author_name(&None, &None, &None), "Unknown Author");
307 }
308
309 #[test]
310 fn test_affiliation_creation() {
311 let affil = Affiliation::new(Some("MIT".to_string()));
312 assert_eq!(affil.institution, Some("MIT".to_string()));
313 assert_eq!(affil.id, None);
314 assert_eq!(affil.department, None);
315 }
316}