pubmed_parser/pubmed/models.rs
1use serde::{Deserialize, Serialize};
2
3// Re-export common types
4pub use crate::common::{Affiliation, Author};
5
6/// A labeled section within a structured abstract
7///
8/// PubMed articles may have structured abstracts with labeled sections such as
9/// "BACKGROUND", "METHODS", "RESULTS", and "CONCLUSIONS". This type represents
10/// a single section of such a structured abstract.
11///
12/// # Example
13///
14/// ```
15/// use pubmed_parser::pubmed::AbstractSection;
16///
17/// let section = AbstractSection {
18/// label: "BACKGROUND".to_string(),
19/// text: "This study investigates...".to_string(),
20/// };
21/// ```
22#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
23pub struct AbstractSection {
24 /// Section label (e.g., "BACKGROUND", "METHODS", "RESULTS", "CONCLUSIONS")
25 pub label: String,
26 /// Text content of the section
27 pub text: String,
28}
29
30/// Represents a PubMed article with metadata
31#[derive(Debug, Serialize, Deserialize, Clone)]
32pub struct PubMedArticle {
33 /// PubMed ID
34 pub pmid: String,
35 /// Article title
36 pub title: String,
37 /// List of authors with detailed metadata
38 pub authors: Vec<Author>,
39 /// Number of authors (computed from authors list)
40 pub author_count: u32,
41 /// Journal name
42 pub journal: String,
43 /// Publication date
44 pub pub_date: String,
45 /// DOI (Digital Object Identifier)
46 pub doi: Option<String>,
47 /// PMC ID if available (with PMC prefix, e.g., "PMC7092803")
48 pub pmc_id: Option<String>,
49 /// Abstract text (if available)
50 pub abstract_text: Option<String>,
51 /// Structured abstract sections with labels (if available)
52 ///
53 /// Some PubMed articles have structured abstracts with labeled sections like
54 /// "BACKGROUND", "METHODS", "RESULTS", "CONCLUSIONS". When available, this
55 /// field contains each section separately. The `abstract_text` field still
56 /// contains the full concatenated text.
57 pub structured_abstract: Option<Vec<AbstractSection>>,
58 /// Article types (e.g., "Clinical Trial", "Review", etc.)
59 pub article_types: Vec<String>,
60 /// MeSH headings associated with the article
61 pub mesh_headings: Option<Vec<MeshHeading>>,
62 /// Author-provided keywords
63 pub keywords: Option<Vec<String>>,
64 /// Chemical substances mentioned in the article
65 pub chemical_list: Option<Vec<ChemicalConcept>>,
66 /// Journal volume (e.g., "88")
67 pub volume: Option<String>,
68 /// Journal issue number (e.g., "3")
69 pub issue: Option<String>,
70 /// Page range (e.g., "123-130")
71 pub pages: Option<String>,
72 /// Article language (e.g., "eng", "jpn")
73 pub language: Option<String>,
74 /// ISO journal abbreviation (e.g., "J Biol Chem")
75 pub journal_abbreviation: Option<String>,
76 /// ISSN (International Standard Serial Number)
77 pub issn: Option<String>,
78}
79
80/// Database information from EInfo API
81#[derive(Debug, Serialize, Deserialize, Clone)]
82pub struct DatabaseInfo {
83 /// Database name (e.g., "pubmed", "pmc")
84 pub name: String,
85 /// Human-readable menu name
86 pub menu_name: String,
87 /// Database description
88 pub description: String,
89 /// Database build version
90 pub build: Option<String>,
91 /// Number of records in database
92 pub count: Option<u64>,
93 /// Last update timestamp
94 pub last_update: Option<String>,
95 /// Available search fields
96 pub fields: Vec<FieldInfo>,
97 /// Available links to other databases
98 pub links: Vec<LinkInfo>,
99}
100
101/// Information about a database search field
102#[derive(Debug, Serialize, Deserialize, Clone)]
103pub struct FieldInfo {
104 /// Short field name (e.g., "titl", "auth")
105 pub name: String,
106 /// Full field name (e.g., "Title", "Author")
107 pub full_name: String,
108 /// Field description
109 pub description: String,
110 /// Number of indexed terms
111 pub term_count: Option<u64>,
112 /// Whether field contains dates
113 pub is_date: bool,
114 /// Whether field contains numerical values
115 pub is_numerical: bool,
116 /// Whether field uses single token indexing
117 pub single_token: bool,
118 /// Whether field uses hierarchical indexing
119 pub hierarchy: bool,
120 /// Whether field is hidden from users
121 pub is_hidden: bool,
122}
123
124/// Information about database links
125#[derive(Debug, Serialize, Deserialize, Clone)]
126pub struct LinkInfo {
127 /// Link name
128 pub name: String,
129 /// Menu display name
130 pub menu: String,
131 /// Link description
132 pub description: String,
133 /// Target database
134 pub target_db: String,
135}
136
137/// Results from ELink API for related article discovery
138#[derive(Debug, Serialize, Deserialize, Clone)]
139pub struct RelatedArticles {
140 /// Source PMIDs that were queried
141 pub source_pmids: Vec<u32>,
142 /// Related article PMIDs found
143 pub related_pmids: Vec<u32>,
144 /// Link type (e.g., "pubmed_pubmed", "pubmed_pubmed_reviews")
145 pub link_type: String,
146}
147
148/// PMC links discovered through ELink API
149#[derive(Debug, Serialize, Deserialize, Clone)]
150pub struct PmcLinks {
151 /// Source PMIDs that were queried
152 pub source_pmids: Vec<u32>,
153 /// PMC IDs that have full text available
154 pub pmc_ids: Vec<String>,
155}
156
157/// Citation information from ELink API
158#[derive(Debug, Serialize, Deserialize, Clone)]
159pub struct Citations {
160 /// Source PMIDs that were queried
161 pub source_pmids: Vec<u32>,
162 /// PMIDs of articles that cite the source articles
163 pub citing_pmids: Vec<u32>,
164 /// Link type (e.g., "pubmed_pubmed_citedin")
165 pub link_type: String,
166}
167
168/// Search result with WebEnv session information for history server pagination
169#[derive(Debug, Clone)]
170pub struct SearchResult {
171 /// List of PMIDs matching the search query
172 pub pmids: Vec<String>,
173 /// Total number of results matching the query
174 pub total_count: usize,
175 /// WebEnv session identifier for history server
176 pub webenv: Option<String>,
177 /// Query key for history server
178 pub query_key: Option<String>,
179 /// How PubMed interpreted and translated the search query
180 ///
181 /// For example, searching "asthma" might be translated to:
182 /// `"asthma"[MeSH Terms] OR "asthma"[All Fields]`
183 ///
184 /// This is useful for debugging search queries and understanding
185 /// how PubMed's automatic term mapping works.
186 pub query_translation: Option<String>,
187}
188
189impl SearchResult {
190 /// Get the history session if WebEnv and query_key are available
191 ///
192 /// Returns `Some(HistorySession)` if both webenv and query_key are present,
193 /// `None` otherwise.
194 pub fn history_session(&self) -> Option<HistorySession> {
195 match (&self.webenv, &self.query_key) {
196 (Some(webenv), Some(query_key)) => Some(HistorySession {
197 webenv: webenv.clone(),
198 query_key: query_key.clone(),
199 }),
200 _ => None,
201 }
202 }
203
204 /// Check if this result has history session information
205 pub fn has_history(&self) -> bool {
206 self.webenv.is_some() && self.query_key.is_some()
207 }
208}
209
210/// History server session information for paginated fetching
211///
212/// This represents a session on NCBI's history server that can be used
213/// to efficiently fetch large result sets in batches without re-running
214/// the search query.
215///
216/// # Note
217///
218/// WebEnv sessions typically expire after 1 hour of inactivity.
219#[derive(Debug, Clone, PartialEq, Eq)]
220pub struct HistorySession {
221 /// WebEnv session identifier
222 pub webenv: String,
223 /// Query key within the session
224 pub query_key: String,
225}
226
227/// Result from EPost API for uploading PMIDs to the NCBI History server
228///
229/// EPost stores a list of UIDs (PMIDs) on the History server and returns
230/// WebEnv/query_key identifiers. These can then be used with `fetch_from_history()`
231/// to retrieve article metadata, or combined with other E-utility calls.
232///
233/// # Example
234///
235/// ```ignore
236/// use pubmed_client::PubMedClient;
237///
238/// #[tokio::main]
239/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
240/// let client = PubMedClient::new();
241///
242/// // Upload a list of PMIDs to the history server
243/// let result = client.epost(&["31978945", "33515491", "25760099"]).await?;
244///
245/// // Use the session to fetch articles
246/// let session = result.history_session();
247/// let articles = client.fetch_from_history(&session, 0, 100).await?;
248/// println!("Fetched {} articles", articles.len());
249///
250/// Ok(())
251/// }
252/// ```
253#[derive(Debug, Clone)]
254pub struct EPostResult {
255 /// WebEnv session identifier for the uploaded IDs
256 pub webenv: String,
257 /// Query key for the uploaded IDs within the session
258 pub query_key: String,
259}
260
261impl EPostResult {
262 /// Convert to a HistorySession for use with `fetch_from_history()`
263 ///
264 /// This is a convenience method that creates a `HistorySession` from the
265 /// EPost result, which can then be passed to `fetch_from_history()`.
266 pub fn history_session(&self) -> HistorySession {
267 HistorySession {
268 webenv: self.webenv.clone(),
269 query_key: self.query_key.clone(),
270 }
271 }
272}
273
274/// Medical Subject Heading (MeSH) qualifier/subheading
275#[derive(Debug, Serialize, Deserialize, Clone)]
276pub struct MeshQualifier {
277 /// Qualifier name (e.g., "drug therapy", "genetics")
278 pub qualifier_name: String,
279 /// Unique identifier for the qualifier
280 pub qualifier_ui: String,
281 /// Whether this qualifier is a major topic
282 pub major_topic: bool,
283}
284
285/// Medical Subject Heading (MeSH) descriptor term
286#[derive(Debug, Serialize, Deserialize, Clone)]
287pub struct MeshTerm {
288 /// Descriptor name (e.g., "Diabetes Mellitus, Type 2")
289 pub descriptor_name: String,
290 /// Unique identifier for the descriptor
291 pub descriptor_ui: String,
292 /// Whether this term is a major topic of the article
293 pub major_topic: bool,
294 /// Associated qualifiers/subheadings
295 pub qualifiers: Vec<MeshQualifier>,
296}
297
298/// Supplemental MeSH concept (for substances, diseases, etc.)
299#[derive(Debug, Serialize, Deserialize, Clone)]
300pub struct SupplementalConcept {
301 /// Concept name
302 pub name: String,
303 /// Unique identifier
304 pub ui: String,
305 /// Concept type (e.g., "Disease", "Drug")
306 pub concept_type: Option<String>,
307}
308
309/// Chemical substance mentioned in the article
310#[derive(Debug, Serialize, Deserialize, Clone)]
311pub struct ChemicalConcept {
312 /// Chemical name
313 pub name: String,
314 /// Registry number (e.g., CAS number)
315 pub registry_number: Option<String>,
316 /// Chemical UI
317 pub ui: Option<String>,
318}
319
320/// Complete MeSH heading information for an article
321#[derive(Debug, Serialize, Deserialize, Clone)]
322pub struct MeshHeading {
323 /// MeSH descriptor terms
324 pub mesh_terms: Vec<MeshTerm>,
325 /// Supplemental concepts
326 pub supplemental_concepts: Vec<SupplementalConcept>,
327}
328
329impl PubMedArticle {
330 /// Get all major MeSH terms from the article
331 ///
332 /// # Returns
333 ///
334 /// A vector of major MeSH term names
335 ///
336 /// # Example
337 ///
338 /// ```
339 /// # use pubmed_parser::pubmed::PubMedArticle;
340 /// # let article = PubMedArticle {
341 /// # pmid: "123".to_string(),
342 /// # title: "Test".to_string(),
343 /// # authors: vec![],
344 /// # author_count: 0,
345 /// # journal: "Test Journal".to_string(),
346 /// # pub_date: "2023".to_string(),
347 /// # doi: None,
348 /// # pmc_id: None,
349 /// # abstract_text: None,
350 /// # structured_abstract: None,
351 /// # article_types: vec![],
352 /// # mesh_headings: None,
353 /// # keywords: None,
354 /// # chemical_list: None,
355 /// # volume: None, issue: None, pages: None,
356 /// # language: None, journal_abbreviation: None, issn: None,
357 /// # };
358 /// let major_terms = article.get_major_mesh_terms();
359 /// ```
360 pub fn get_major_mesh_terms(&self) -> Vec<String> {
361 let mut major_terms = Vec::new();
362
363 if let Some(mesh_headings) = &self.mesh_headings {
364 for heading in mesh_headings {
365 for term in &heading.mesh_terms {
366 if term.major_topic {
367 major_terms.push(term.descriptor_name.clone());
368 }
369 }
370 }
371 }
372
373 major_terms
374 }
375
376 /// Check if the article has a specific MeSH term
377 ///
378 /// # Arguments
379 ///
380 /// * `term` - The MeSH term to check for
381 ///
382 /// # Returns
383 ///
384 /// `true` if the article has the specified MeSH term, `false` otherwise
385 ///
386 /// # Example
387 ///
388 /// ```
389 /// # use pubmed_parser::pubmed::PubMedArticle;
390 /// # let article = PubMedArticle {
391 /// # pmid: "123".to_string(),
392 /// # title: "Test".to_string(),
393 /// # authors: vec![],
394 /// # author_count: 0,
395 /// # journal: "Test Journal".to_string(),
396 /// # pub_date: "2023".to_string(),
397 /// # doi: None,
398 /// # pmc_id: None,
399 /// # abstract_text: None,
400 /// # structured_abstract: None,
401 /// # article_types: vec![],
402 /// # mesh_headings: None,
403 /// # keywords: None,
404 /// # chemical_list: None,
405 /// # volume: None, issue: None, pages: None,
406 /// # language: None, journal_abbreviation: None, issn: None,
407 /// # };
408 /// let has_diabetes = article.has_mesh_term("Diabetes Mellitus");
409 /// ```
410 pub fn has_mesh_term(&self, term: &str) -> bool {
411 if let Some(mesh_headings) = &self.mesh_headings {
412 for heading in mesh_headings {
413 for mesh_term in &heading.mesh_terms {
414 if mesh_term.descriptor_name.eq_ignore_ascii_case(term) {
415 return true;
416 }
417 }
418 }
419 }
420 false
421 }
422
423 /// Get all MeSH terms from the article
424 ///
425 /// # Returns
426 ///
427 /// A vector of all MeSH term names
428 pub fn get_all_mesh_terms(&self) -> Vec<String> {
429 let mut terms = Vec::new();
430
431 if let Some(mesh_headings) = &self.mesh_headings {
432 for heading in mesh_headings {
433 for term in &heading.mesh_terms {
434 terms.push(term.descriptor_name.clone());
435 }
436 }
437 }
438
439 terms
440 }
441
442 /// Get corresponding authors from the article
443 ///
444 /// # Returns
445 ///
446 /// A vector of references to authors marked as corresponding
447 pub fn get_corresponding_authors(&self) -> Vec<&Author> {
448 self.authors
449 .iter()
450 .filter(|author| author.is_corresponding)
451 .collect()
452 }
453
454 /// Get authors affiliated with a specific institution
455 ///
456 /// # Arguments
457 ///
458 /// * `institution` - Institution name to search for (case-insensitive substring match)
459 ///
460 /// # Returns
461 ///
462 /// A vector of references to authors with matching affiliations
463 pub fn get_authors_by_institution(&self, institution: &str) -> Vec<&Author> {
464 let institution_lower = institution.to_lowercase();
465 self.authors
466 .iter()
467 .filter(|author| {
468 author.affiliations.iter().any(|affil| {
469 affil
470 .institution
471 .as_ref()
472 .is_some_and(|inst| inst.to_lowercase().contains(&institution_lower))
473 })
474 })
475 .collect()
476 }
477
478 /// Get all unique countries from author affiliations
479 ///
480 /// # Returns
481 ///
482 /// A vector of unique country names
483 pub fn get_author_countries(&self) -> Vec<String> {
484 use std::collections::HashSet;
485 let mut countries: HashSet<String> = HashSet::new();
486
487 for author in &self.authors {
488 for affiliation in &author.affiliations {
489 if let Some(country) = &affiliation.country {
490 countries.insert(country.clone());
491 }
492 }
493 }
494
495 countries.into_iter().collect()
496 }
497
498 /// Get authors with ORCID identifiers
499 ///
500 /// # Returns
501 ///
502 /// A vector of references to authors who have ORCID IDs
503 pub fn get_authors_with_orcid(&self) -> Vec<&Author> {
504 self.authors
505 .iter()
506 .filter(|author| author.orcid.is_some())
507 .collect()
508 }
509
510 /// Check if the article has international collaboration
511 ///
512 /// # Returns
513 ///
514 /// `true` if authors are from multiple countries
515 pub fn has_international_collaboration(&self) -> bool {
516 self.get_author_countries().len() > 1
517 }
518
519 /// Calculate MeSH term similarity between two articles
520 ///
521 /// # Arguments
522 ///
523 /// * `other` - The other article to compare with
524 ///
525 /// # Returns
526 ///
527 /// A similarity score between 0.0 and 1.0 based on Jaccard similarity
528 ///
529 /// # Example
530 ///
531 /// ```
532 /// # use pubmed_parser::pubmed::PubMedArticle;
533 /// # let article1 = PubMedArticle {
534 /// # pmid: "123".to_string(),
535 /// # title: "Test".to_string(),
536 /// # authors: vec![],
537 /// # author_count: 0,
538 /// # journal: "Test Journal".to_string(),
539 /// # pub_date: "2023".to_string(),
540 /// # doi: None,
541 /// # pmc_id: None,
542 /// # abstract_text: None,
543 /// # structured_abstract: None,
544 /// # article_types: vec![],
545 /// # mesh_headings: None,
546 /// # keywords: None,
547 /// # chemical_list: None,
548 /// # volume: None, issue: None, pages: None,
549 /// # language: None, journal_abbreviation: None, issn: None,
550 /// # };
551 /// # let article2 = article1.clone();
552 /// let similarity = article1.mesh_term_similarity(&article2);
553 /// ```
554 pub fn mesh_term_similarity(&self, other: &PubMedArticle) -> f64 {
555 use std::collections::HashSet;
556
557 let terms1: HashSet<String> = self
558 .get_all_mesh_terms()
559 .into_iter()
560 .map(|t| t.to_lowercase())
561 .collect();
562
563 let terms2: HashSet<String> = other
564 .get_all_mesh_terms()
565 .into_iter()
566 .map(|t| t.to_lowercase())
567 .collect();
568
569 if terms1.is_empty() && terms2.is_empty() {
570 return 0.0;
571 }
572
573 let intersection = terms1.intersection(&terms2).count();
574 let union = terms1.union(&terms2).count();
575
576 if union == 0 {
577 0.0
578 } else {
579 intersection as f64 / union as f64
580 }
581 }
582
583 /// Get MeSH qualifiers for a specific term
584 ///
585 /// # Arguments
586 ///
587 /// * `term` - The MeSH term to get qualifiers for
588 ///
589 /// # Returns
590 ///
591 /// A vector of qualifier names for the specified term
592 pub fn get_mesh_qualifiers(&self, term: &str) -> Vec<String> {
593 let mut qualifiers = Vec::new();
594
595 if let Some(mesh_headings) = &self.mesh_headings {
596 for heading in mesh_headings {
597 for mesh_term in &heading.mesh_terms {
598 if mesh_term.descriptor_name.eq_ignore_ascii_case(term) {
599 for qualifier in &mesh_term.qualifiers {
600 qualifiers.push(qualifier.qualifier_name.clone());
601 }
602 }
603 }
604 }
605 }
606
607 qualifiers
608 }
609
610 /// Check if the article has any MeSH terms
611 ///
612 /// # Returns
613 ///
614 /// `true` if the article has MeSH terms, `false` otherwise
615 pub fn has_mesh_terms(&self) -> bool {
616 self.mesh_headings
617 .as_ref()
618 .map(|h| !h.is_empty())
619 .unwrap_or(false)
620 }
621
622 /// Get chemicals mentioned in the article
623 ///
624 /// # Returns
625 ///
626 /// A vector of chemical names
627 pub fn get_chemical_names(&self) -> Vec<String> {
628 self.chemical_list
629 .as_ref()
630 .map(|chemicals| chemicals.iter().map(|c| c.name.clone()).collect())
631 .unwrap_or_default()
632 }
633}
634
635// ================================================================================================
636// ESpell API types
637// ================================================================================================
638
639/// Represents a segment of the spelled query from the ESpell API
640///
641/// Each segment is either an original (unchanged) part of the query or a
642/// replacement (corrected spelling suggestion).
643#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
644pub enum SpelledQuerySegment {
645 /// A term or separator that was not changed
646 Original(String),
647 /// A corrected/suggested replacement term
648 Replaced(String),
649}
650
651/// Result from the ESpell API providing spelling suggestions
652///
653/// ESpell provides spelling suggestions for terms within a single text query
654/// in a given database. It acts as a preprocessing/spell-check tool to improve
655/// search accuracy before executing actual searches.
656///
657/// # Example
658///
659/// ```ignore
660/// use pubmed_client::PubMedClient;
661///
662/// #[tokio::main]
663/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
664/// let client = PubMedClient::new();
665/// let result = client.spell_check("asthmaa OR alergies").await?;
666/// println!("Original: {}", result.query);
667/// println!("Corrected: {}", result.corrected_query);
668/// Ok(())
669/// }
670/// ```
671#[derive(Debug, Serialize, Deserialize, Clone)]
672pub struct SpellCheckResult {
673 /// The database that was queried
674 pub database: String,
675 /// The original query string as submitted
676 pub query: String,
677 /// The full corrected/suggested query as a plain string
678 pub corrected_query: String,
679 /// Detailed segments showing which terms were replaced vs. kept
680 pub spelled_query: Vec<SpelledQuerySegment>,
681}
682
683impl SpellCheckResult {
684 /// Check if the query had any spelling corrections.
685 ///
686 /// Returns `true` only when NCBI provided a non-empty corrected query that differs from
687 /// the original. The NCBI ESpell API returns an empty `<CorrectedQuery/>` element when
688 /// no corrections are available.
689 pub fn has_corrections(&self) -> bool {
690 !self.corrected_query.is_empty() && self.query != self.corrected_query
691 }
692
693 /// Get only the replaced (corrected) terms
694 pub fn replacements(&self) -> Vec<&str> {
695 self.spelled_query
696 .iter()
697 .filter_map(|segment| match segment {
698 SpelledQuerySegment::Replaced(s) => Some(s.as_str()),
699 SpelledQuerySegment::Original(_) => None,
700 })
701 .collect()
702 }
703}
704
705// ================================================================================================
706// ECitMatch API types
707// ================================================================================================
708
709/// Input for a single citation match query
710///
711/// Used with the ECitMatch API to find PMIDs from citation information.
712/// Each field corresponds to a part of the citation string sent to the API.
713///
714/// # Example
715///
716/// ```
717/// use pubmed_parser::pubmed::CitationQuery;
718///
719/// let query = CitationQuery::new(
720/// "proc natl acad sci u s a",
721/// "1991",
722/// "88",
723/// "3248",
724/// "mann bj",
725/// "Art1",
726/// );
727/// ```
728#[derive(Debug, Clone)]
729pub struct CitationQuery {
730 /// Journal title abbreviation (e.g., "proc natl acad sci u s a")
731 pub journal: String,
732 /// Publication year (e.g., "1991")
733 pub year: String,
734 /// Volume number (e.g., "88")
735 pub volume: String,
736 /// First page number (e.g., "3248")
737 pub first_page: String,
738 /// Author name (e.g., "mann bj")
739 pub author_name: String,
740 /// User-defined key for identifying results (e.g., "Art1")
741 pub key: String,
742}
743
744impl CitationQuery {
745 /// Create a new citation query
746 pub fn new(
747 journal: &str,
748 year: &str,
749 volume: &str,
750 first_page: &str,
751 author_name: &str,
752 key: &str,
753 ) -> Self {
754 Self {
755 journal: journal.to_string(),
756 year: year.to_string(),
757 volume: volume.to_string(),
758 first_page: first_page.to_string(),
759 author_name: author_name.to_string(),
760 key: key.to_string(),
761 }
762 }
763
764 /// Format this citation as a bdata string for the ECitMatch API
765 pub fn to_bdata(&self) -> String {
766 format!(
767 "{}|{}|{}|{}|{}|{}|",
768 self.journal.replace(' ', "+"),
769 self.year,
770 self.volume,
771 self.first_page,
772 self.author_name.replace(' ', "+"),
773 self.key,
774 )
775 }
776}
777
778/// Status of a citation match result
779#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
780pub enum CitationMatchStatus {
781 /// A unique PMID was found for the citation
782 Found,
783 /// No PMID could be found for the citation
784 NotFound,
785 /// Multiple PMIDs matched the citation (ambiguous)
786 Ambiguous,
787}
788
789/// Result of a single citation match from the ECitMatch API
790#[derive(Debug, Serialize, Deserialize, Clone)]
791pub struct CitationMatch {
792 /// Journal title from the query
793 pub journal: String,
794 /// Year from the query
795 pub year: String,
796 /// Volume from the query
797 pub volume: String,
798 /// First page from the query
799 pub first_page: String,
800 /// Author name from the query
801 pub author_name: String,
802 /// User-defined key from the query
803 pub key: String,
804 /// Matched PMID (if found)
805 pub pmid: Option<String>,
806 /// Match status
807 pub status: CitationMatchStatus,
808}
809
810/// Results from ECitMatch API for batch citation matching
811///
812/// Contains the results of matching multiple citations to PMIDs.
813#[derive(Debug, Serialize, Deserialize, Clone)]
814pub struct CitationMatches {
815 /// List of citation match results
816 pub matches: Vec<CitationMatch>,
817}
818
819impl CitationMatches {
820 /// Get only the successfully matched citations
821 pub fn found(&self) -> Vec<&CitationMatch> {
822 self.matches
823 .iter()
824 .filter(|m| m.status == CitationMatchStatus::Found)
825 .collect()
826 }
827
828 /// Get the number of successful matches
829 pub fn found_count(&self) -> usize {
830 self.matches
831 .iter()
832 .filter(|m| m.status == CitationMatchStatus::Found)
833 .count()
834 }
835}
836
837// ================================================================================================
838// EGQuery API types
839// ================================================================================================
840
841/// Record count for a single NCBI database from the EGQuery API
842#[derive(Debug, Serialize, Deserialize, Clone)]
843pub struct DatabaseCount {
844 /// Internal database name (e.g., "pubmed", "pmc", "nuccore")
845 pub db_name: String,
846 /// Human-readable database name (e.g., "PubMed", "PMC", "Nucleotide")
847 pub menu_name: String,
848 /// Number of records matching the query in this database
849 pub count: u64,
850 /// Status of the query for this database (e.g., "Ok")
851 pub status: String,
852}
853
854/// Results from EGQuery API for global database search
855///
856/// Contains the number of records matching a query across all NCBI Entrez databases.
857///
858/// # Example
859///
860/// ```ignore
861/// use pubmed_client::PubMedClient;
862///
863/// #[tokio::main]
864/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
865/// let client = PubMedClient::new();
866/// let results = client.global_query("asthma").await?;
867/// for db in &results.results {
868/// if db.count > 0 {
869/// println!("{}: {} records", db.menu_name, db.count);
870/// }
871/// }
872/// Ok(())
873/// }
874/// ```
875#[derive(Debug, Serialize, Deserialize, Clone)]
876pub struct GlobalQueryResults {
877 /// The query term that was searched
878 pub term: String,
879 /// Results for each NCBI database
880 pub results: Vec<DatabaseCount>,
881}
882
883impl GlobalQueryResults {
884 /// Get results for databases with matching records (count > 0)
885 pub fn non_zero(&self) -> Vec<&DatabaseCount> {
886 self.results.iter().filter(|r| r.count > 0).collect()
887 }
888
889 /// Get the count for a specific database
890 pub fn count_for(&self, db_name: &str) -> Option<u64> {
891 self.results
892 .iter()
893 .find(|r| r.db_name == db_name)
894 .map(|r| r.count)
895 }
896}
897
898// ================================================================================================
899// ESummary API types
900// ================================================================================================
901
902/// Lightweight article summary from the ESummary API
903///
904/// Contains basic metadata (title, authors, journal, dates) without the full
905/// abstract, MeSH terms, or chemical lists that EFetch provides. Use this when
906/// you only need basic bibliographic information for a large number of articles.
907///
908/// # Example
909///
910/// ```ignore
911/// use pubmed_client::PubMedClient;
912///
913/// #[tokio::main]
914/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
915/// let client = PubMedClient::new();
916/// let summaries = client.fetch_summaries(&["31978945", "33515491"]).await?;
917/// for summary in &summaries {
918/// println!("{}: {} ({})", summary.pmid, summary.title, summary.pub_date);
919/// }
920/// Ok(())
921/// }
922/// ```
923#[derive(Debug, Serialize, Deserialize, Clone)]
924pub struct ArticleSummary {
925 /// PubMed ID
926 pub pmid: String,
927 /// Article title
928 pub title: String,
929 /// Author names (e.g., ["Zhu N", "Zhang D", "Wang W"])
930 pub authors: Vec<String>,
931 /// Journal name (source field)
932 pub journal: String,
933 /// Full journal name (e.g., "The New England journal of medicine")
934 pub full_journal_name: String,
935 /// Publication date (e.g., "2020 Feb")
936 pub pub_date: String,
937 /// Electronic publication date (e.g., "2020 Jan 24")
938 pub epub_date: String,
939 /// DOI (Digital Object Identifier)
940 pub doi: Option<String>,
941 /// PMC ID if available (e.g., "PMC7092803")
942 pub pmc_id: Option<String>,
943 /// Journal volume (e.g., "382")
944 pub volume: String,
945 /// Journal issue (e.g., "8")
946 pub issue: String,
947 /// Page range (e.g., "727-733")
948 pub pages: String,
949 /// Languages (e.g., ["eng"])
950 pub languages: Vec<String>,
951 /// Publication types (e.g., ["Journal Article", "Review"])
952 pub pub_types: Vec<String>,
953 /// ISSN
954 pub issn: String,
955 /// Electronic ISSN
956 pub essn: String,
957 /// Sorted publication date (e.g., "2020/02/20 00:00")
958 pub sort_pub_date: String,
959 /// PMC reference count (number of citing articles in PMC)
960 pub pmc_ref_count: u64,
961 /// Record status (e.g., "PubMed - indexed for MEDLINE")
962 pub record_status: String,
963}
964
965#[cfg(test)]
966mod tests {
967 use super::*;
968 use crate::common::format_author_name;
969
970 fn create_test_author() -> Author {
971 Author {
972 surname: Some("Doe".to_string()),
973 given_names: Some("John A".to_string()),
974 initials: Some("JA".to_string()),
975 suffix: None,
976 full_name: "John A Doe".to_string(),
977 affiliations: vec![
978 Affiliation {
979 id: None,
980 institution: Some("Harvard Medical School".to_string()),
981 department: Some("Department of Medicine".to_string()),
982 address: Some("Boston, MA".to_string()),
983 country: Some("USA".to_string()),
984 },
985 Affiliation {
986 id: None,
987 institution: Some("Massachusetts General Hospital".to_string()),
988 department: None,
989 address: Some("Boston, MA".to_string()),
990 country: Some("USA".to_string()),
991 },
992 ],
993 orcid: Some("0000-0001-2345-6789".to_string()),
994 email: Some("john.doe@hms.harvard.edu".to_string()),
995 is_corresponding: true,
996 roles: vec![
997 "Conceptualization".to_string(),
998 "Writing - original draft".to_string(),
999 ],
1000 }
1001 }
1002
1003 fn create_test_article_with_mesh() -> PubMedArticle {
1004 PubMedArticle {
1005 pmid: "12345".to_string(),
1006 title: "Test Article".to_string(),
1007 authors: vec![create_test_author()],
1008 author_count: 1,
1009 journal: "Test Journal".to_string(),
1010 pub_date: "2023".to_string(),
1011 doi: None,
1012 pmc_id: None,
1013 abstract_text: None,
1014 structured_abstract: None,
1015 article_types: vec![],
1016 mesh_headings: Some(vec![
1017 MeshHeading {
1018 mesh_terms: vec![MeshTerm {
1019 descriptor_name: "Diabetes Mellitus, Type 2".to_string(),
1020 descriptor_ui: "D003924".to_string(),
1021 major_topic: true,
1022 qualifiers: vec![
1023 MeshQualifier {
1024 qualifier_name: "drug therapy".to_string(),
1025 qualifier_ui: "Q000188".to_string(),
1026 major_topic: false,
1027 },
1028 MeshQualifier {
1029 qualifier_name: "genetics".to_string(),
1030 qualifier_ui: "Q000235".to_string(),
1031 major_topic: true,
1032 },
1033 ],
1034 }],
1035 supplemental_concepts: vec![],
1036 },
1037 MeshHeading {
1038 mesh_terms: vec![MeshTerm {
1039 descriptor_name: "Hypertension".to_string(),
1040 descriptor_ui: "D006973".to_string(),
1041 major_topic: false,
1042 qualifiers: vec![],
1043 }],
1044 supplemental_concepts: vec![],
1045 },
1046 ]),
1047 keywords: Some(vec!["diabetes".to_string(), "treatment".to_string()]),
1048 chemical_list: Some(vec![ChemicalConcept {
1049 name: "Metformin".to_string(),
1050 registry_number: Some("657-24-9".to_string()),
1051 ui: Some("D008687".to_string()),
1052 }]),
1053 volume: Some("45".to_string()),
1054 issue: Some("3".to_string()),
1055 pages: Some("123-130".to_string()),
1056 language: Some("eng".to_string()),
1057 journal_abbreviation: Some("Test J".to_string()),
1058 issn: Some("1234-5678".to_string()),
1059 }
1060 }
1061
1062 #[test]
1063 fn test_get_major_mesh_terms() {
1064 let article = create_test_article_with_mesh();
1065 let major_terms = article.get_major_mesh_terms();
1066
1067 assert_eq!(major_terms.len(), 1);
1068 assert_eq!(major_terms[0], "Diabetes Mellitus, Type 2");
1069 }
1070
1071 #[test]
1072 fn test_has_mesh_term() {
1073 let article = create_test_article_with_mesh();
1074
1075 assert!(article.has_mesh_term("Diabetes Mellitus, Type 2"));
1076 assert!(article.has_mesh_term("DIABETES MELLITUS, TYPE 2")); // Case insensitive
1077 assert!(article.has_mesh_term("Hypertension"));
1078 assert!(!article.has_mesh_term("Cancer"));
1079 }
1080
1081 #[test]
1082 fn test_get_all_mesh_terms() {
1083 let article = create_test_article_with_mesh();
1084 let all_terms = article.get_all_mesh_terms();
1085
1086 assert_eq!(all_terms.len(), 2);
1087 assert!(all_terms.contains(&"Diabetes Mellitus, Type 2".to_string()));
1088 assert!(all_terms.contains(&"Hypertension".to_string()));
1089 }
1090
1091 #[test]
1092 fn test_mesh_term_similarity() {
1093 let article1 = create_test_article_with_mesh();
1094 let mut article2 = create_test_article_with_mesh();
1095
1096 // Same article should have similarity of 1.0
1097 let similarity = article1.mesh_term_similarity(&article2);
1098 assert_eq!(similarity, 1.0);
1099
1100 // Different MeSH terms
1101 article2.mesh_headings = Some(vec![MeshHeading {
1102 mesh_terms: vec![
1103 MeshTerm {
1104 descriptor_name: "Diabetes Mellitus, Type 2".to_string(),
1105 descriptor_ui: "D003924".to_string(),
1106 major_topic: true,
1107 qualifiers: vec![],
1108 },
1109 MeshTerm {
1110 descriptor_name: "Obesity".to_string(),
1111 descriptor_ui: "D009765".to_string(),
1112 major_topic: false,
1113 qualifiers: vec![],
1114 },
1115 ],
1116 supplemental_concepts: vec![],
1117 }]);
1118
1119 let similarity = article1.mesh_term_similarity(&article2);
1120 // Should have partial similarity (1 common term out of 3 unique terms)
1121 assert!(similarity > 0.0 && similarity < 1.0);
1122 assert_eq!(similarity, 1.0 / 3.0); // Jaccard similarity
1123
1124 // No MeSH terms
1125 let article3 = PubMedArticle {
1126 pmid: "54321".to_string(),
1127 title: "Test".to_string(),
1128 authors: vec![],
1129 author_count: 0,
1130 journal: "Test".to_string(),
1131 pub_date: "2023".to_string(),
1132 doi: None,
1133 pmc_id: None,
1134 abstract_text: None,
1135 structured_abstract: None,
1136 article_types: vec![],
1137 mesh_headings: None,
1138 keywords: None,
1139 chemical_list: None,
1140 volume: None,
1141 issue: None,
1142 pages: None,
1143 language: None,
1144 journal_abbreviation: None,
1145 issn: None,
1146 };
1147
1148 assert_eq!(article1.mesh_term_similarity(&article3), 0.0);
1149 }
1150
1151 #[test]
1152 fn test_get_mesh_qualifiers() {
1153 let article = create_test_article_with_mesh();
1154
1155 let qualifiers = article.get_mesh_qualifiers("Diabetes Mellitus, Type 2");
1156 assert_eq!(qualifiers.len(), 2);
1157 assert!(qualifiers.contains(&"drug therapy".to_string()));
1158 assert!(qualifiers.contains(&"genetics".to_string()));
1159
1160 let qualifiers = article.get_mesh_qualifiers("Hypertension");
1161 assert_eq!(qualifiers.len(), 0);
1162
1163 let qualifiers = article.get_mesh_qualifiers("Nonexistent Term");
1164 assert_eq!(qualifiers.len(), 0);
1165 }
1166
1167 #[test]
1168 fn test_has_mesh_terms() {
1169 let article = create_test_article_with_mesh();
1170 assert!(article.has_mesh_terms());
1171
1172 let mut article_no_mesh = article.clone();
1173 article_no_mesh.mesh_headings = None;
1174 assert!(!article_no_mesh.has_mesh_terms());
1175
1176 let mut article_empty_mesh = article.clone();
1177 article_empty_mesh.mesh_headings = Some(vec![]);
1178 assert!(!article_empty_mesh.has_mesh_terms());
1179 }
1180
1181 #[test]
1182 fn test_get_chemical_names() {
1183 let article = create_test_article_with_mesh();
1184 let chemicals = article.get_chemical_names();
1185
1186 assert_eq!(chemicals.len(), 1);
1187 assert_eq!(chemicals[0], "Metformin");
1188
1189 let mut article_no_chemicals = article.clone();
1190 article_no_chemicals.chemical_list = None;
1191 let chemicals = article_no_chemicals.get_chemical_names();
1192 assert_eq!(chemicals.len(), 0);
1193 }
1194
1195 #[test]
1196 fn test_author_creation() {
1197 let author = Author::new(Some("Smith".to_string()), Some("Jane".to_string()));
1198 assert_eq!(author.surname, Some("Smith".to_string()));
1199 assert_eq!(author.given_names, Some("Jane".to_string()));
1200 assert_eq!(author.full_name, "Jane Smith");
1201 assert!(!author.has_orcid());
1202 assert!(!author.is_corresponding);
1203 }
1204
1205 #[test]
1206 fn test_author_affiliations() {
1207 let author = create_test_author();
1208
1209 assert!(author.is_affiliated_with("Harvard"));
1210 assert!(author.is_affiliated_with("Massachusetts General"));
1211 assert!(!author.is_affiliated_with("Stanford"));
1212
1213 let primary = author.primary_affiliation().unwrap();
1214 assert_eq!(
1215 primary.institution,
1216 Some("Harvard Medical School".to_string())
1217 );
1218
1219 assert!(author.has_orcid());
1220 assert!(author.is_corresponding);
1221 }
1222
1223 #[test]
1224 fn test_get_corresponding_authors() {
1225 let article = create_test_article_with_mesh();
1226 let corresponding = article.get_corresponding_authors();
1227
1228 assert_eq!(corresponding.len(), 1);
1229 assert_eq!(corresponding[0].full_name, "John A Doe");
1230 }
1231
1232 #[test]
1233 fn test_get_authors_by_institution() {
1234 let article = create_test_article_with_mesh();
1235
1236 let harvard_authors = article.get_authors_by_institution("Harvard");
1237 assert_eq!(harvard_authors.len(), 1);
1238
1239 let stanford_authors = article.get_authors_by_institution("Stanford");
1240 assert_eq!(stanford_authors.len(), 0);
1241 }
1242
1243 #[test]
1244 fn test_get_author_countries() {
1245 let article = create_test_article_with_mesh();
1246 let countries = article.get_author_countries();
1247
1248 assert_eq!(countries.len(), 1);
1249 assert!(countries.contains(&"USA".to_string()));
1250 }
1251
1252 #[test]
1253 fn test_international_collaboration() {
1254 let article = create_test_article_with_mesh();
1255 assert!(!article.has_international_collaboration());
1256
1257 // Create article with international authors
1258 let mut international_article = article.clone();
1259 let mut uk_author = create_test_author();
1260 uk_author.affiliations[0].country = Some("UK".to_string());
1261 international_article.authors.push(uk_author);
1262 international_article.author_count = 2;
1263
1264 assert!(international_article.has_international_collaboration());
1265 }
1266
1267 #[test]
1268 fn test_get_authors_with_orcid() {
1269 let article = create_test_article_with_mesh();
1270 let authors_with_orcid = article.get_authors_with_orcid();
1271
1272 assert_eq!(authors_with_orcid.len(), 1);
1273 assert_eq!(
1274 authors_with_orcid[0].orcid,
1275 Some("0000-0001-2345-6789".to_string())
1276 );
1277 }
1278
1279 #[test]
1280 fn test_format_author_name() {
1281 assert_eq!(
1282 format_author_name(&Some("Smith".to_string()), &Some("John".to_string()), &None),
1283 "John Smith"
1284 );
1285
1286 assert_eq!(
1287 format_author_name(&Some("Doe".to_string()), &None, &Some("J".to_string())),
1288 "J Doe"
1289 );
1290
1291 assert_eq!(
1292 format_author_name(&Some("Johnson".to_string()), &None, &None),
1293 "Johnson"
1294 );
1295
1296 assert_eq!(
1297 format_author_name(&None, &Some("Jane".to_string()), &None),
1298 "Jane"
1299 );
1300
1301 assert_eq!(format_author_name(&None, &None, &None), "Unknown Author");
1302 }
1303
1304 #[test]
1305 fn test_spell_check_result_has_corrections() {
1306 let result = SpellCheckResult {
1307 database: "pubmed".to_string(),
1308 query: "asthmaa".to_string(),
1309 corrected_query: "asthma".to_string(),
1310 spelled_query: vec![SpelledQuerySegment::Replaced("asthma".to_string())],
1311 };
1312 assert!(result.has_corrections());
1313
1314 let no_correction = SpellCheckResult {
1315 database: "pubmed".to_string(),
1316 query: "asthma".to_string(),
1317 corrected_query: "asthma".to_string(),
1318 spelled_query: vec![SpelledQuerySegment::Original("asthma".to_string())],
1319 };
1320 assert!(!no_correction.has_corrections());
1321 }
1322
1323 #[test]
1324 fn test_spell_check_result_replacements() {
1325 let result = SpellCheckResult {
1326 database: "pubmed".to_string(),
1327 query: "asthmaa OR alergies".to_string(),
1328 corrected_query: "asthma or allergies".to_string(),
1329 spelled_query: vec![
1330 SpelledQuerySegment::Original("".to_string()),
1331 SpelledQuerySegment::Replaced("asthma".to_string()),
1332 SpelledQuerySegment::Original(" OR ".to_string()),
1333 SpelledQuerySegment::Replaced("allergies".to_string()),
1334 ],
1335 };
1336 let replacements = result.replacements();
1337 assert_eq!(replacements.len(), 2);
1338 assert_eq!(replacements[0], "asthma");
1339 assert_eq!(replacements[1], "allergies");
1340 }
1341
1342 #[test]
1343 fn test_bibliographic_fields_on_article() {
1344 let article = create_test_article_with_mesh();
1345
1346 assert_eq!(article.volume, Some("45".to_string()));
1347 assert_eq!(article.issue, Some("3".to_string()));
1348 assert_eq!(article.pages, Some("123-130".to_string()));
1349 assert_eq!(article.language, Some("eng".to_string()));
1350 assert_eq!(article.journal_abbreviation, Some("Test J".to_string()));
1351 assert_eq!(article.issn, Some("1234-5678".to_string()));
1352 }
1353}