pubmed_parser/pubmed/
models.rs

1use serde::{Deserialize, Serialize};
2
3// Re-export common types
4pub use crate::common::{Affiliation, Author};
5
6/// A labeled section within a structured abstract
7///
8/// PubMed articles may have structured abstracts with labeled sections such as
9/// "BACKGROUND", "METHODS", "RESULTS", and "CONCLUSIONS". This type represents
10/// a single section of such a structured abstract.
11///
12/// # Example
13///
14/// ```
15/// use pubmed_parser::pubmed::AbstractSection;
16///
17/// let section = AbstractSection {
18///     label: "BACKGROUND".to_string(),
19///     text: "This study investigates...".to_string(),
20/// };
21/// ```
22#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
23pub struct AbstractSection {
24    /// Section label (e.g., "BACKGROUND", "METHODS", "RESULTS", "CONCLUSIONS")
25    pub label: String,
26    /// Text content of the section
27    pub text: String,
28}
29
30/// Represents a PubMed article with metadata
31#[derive(Debug, Serialize, Deserialize, Clone)]
32pub struct PubMedArticle {
33    /// PubMed ID
34    pub pmid: String,
35    /// Article title
36    pub title: String,
37    /// List of authors with detailed metadata
38    pub authors: Vec<Author>,
39    /// Number of authors (computed from authors list)
40    pub author_count: u32,
41    /// Journal name
42    pub journal: String,
43    /// Publication date
44    pub pub_date: String,
45    /// DOI (Digital Object Identifier)
46    pub doi: Option<String>,
47    /// PMC ID if available (with PMC prefix, e.g., "PMC7092803")
48    pub pmc_id: Option<String>,
49    /// Abstract text (if available)
50    pub abstract_text: Option<String>,
51    /// Structured abstract sections with labels (if available)
52    ///
53    /// Some PubMed articles have structured abstracts with labeled sections like
54    /// "BACKGROUND", "METHODS", "RESULTS", "CONCLUSIONS". When available, this
55    /// field contains each section separately. The `abstract_text` field still
56    /// contains the full concatenated text.
57    pub structured_abstract: Option<Vec<AbstractSection>>,
58    /// Article types (e.g., "Clinical Trial", "Review", etc.)
59    pub article_types: Vec<String>,
60    /// MeSH headings associated with the article
61    pub mesh_headings: Option<Vec<MeshHeading>>,
62    /// Author-provided keywords
63    pub keywords: Option<Vec<String>>,
64    /// Chemical substances mentioned in the article
65    pub chemical_list: Option<Vec<ChemicalConcept>>,
66    /// Journal volume (e.g., "88")
67    pub volume: Option<String>,
68    /// Journal issue number (e.g., "3")
69    pub issue: Option<String>,
70    /// Page range (e.g., "123-130")
71    pub pages: Option<String>,
72    /// Article language (e.g., "eng", "jpn")
73    pub language: Option<String>,
74    /// ISO journal abbreviation (e.g., "J Biol Chem")
75    pub journal_abbreviation: Option<String>,
76    /// ISSN (International Standard Serial Number)
77    pub issn: Option<String>,
78}
79
80/// Database information from EInfo API
81#[derive(Debug, Serialize, Deserialize, Clone)]
82pub struct DatabaseInfo {
83    /// Database name (e.g., "pubmed", "pmc")
84    pub name: String,
85    /// Human-readable menu name
86    pub menu_name: String,
87    /// Database description
88    pub description: String,
89    /// Database build version
90    pub build: Option<String>,
91    /// Number of records in database
92    pub count: Option<u64>,
93    /// Last update timestamp
94    pub last_update: Option<String>,
95    /// Available search fields
96    pub fields: Vec<FieldInfo>,
97    /// Available links to other databases
98    pub links: Vec<LinkInfo>,
99}
100
101/// Information about a database search field
102#[derive(Debug, Serialize, Deserialize, Clone)]
103pub struct FieldInfo {
104    /// Short field name (e.g., "titl", "auth")
105    pub name: String,
106    /// Full field name (e.g., "Title", "Author")
107    pub full_name: String,
108    /// Field description
109    pub description: String,
110    /// Number of indexed terms
111    pub term_count: Option<u64>,
112    /// Whether field contains dates
113    pub is_date: bool,
114    /// Whether field contains numerical values
115    pub is_numerical: bool,
116    /// Whether field uses single token indexing
117    pub single_token: bool,
118    /// Whether field uses hierarchical indexing
119    pub hierarchy: bool,
120    /// Whether field is hidden from users
121    pub is_hidden: bool,
122}
123
124/// Information about database links
125#[derive(Debug, Serialize, Deserialize, Clone)]
126pub struct LinkInfo {
127    /// Link name
128    pub name: String,
129    /// Menu display name
130    pub menu: String,
131    /// Link description
132    pub description: String,
133    /// Target database
134    pub target_db: String,
135}
136
137/// Results from ELink API for related article discovery
138#[derive(Debug, Serialize, Deserialize, Clone)]
139pub struct RelatedArticles {
140    /// Source PMIDs that were queried
141    pub source_pmids: Vec<u32>,
142    /// Related article PMIDs found
143    pub related_pmids: Vec<u32>,
144    /// Link type (e.g., "pubmed_pubmed", "pubmed_pubmed_reviews")
145    pub link_type: String,
146}
147
148/// PMC links discovered through ELink API
149#[derive(Debug, Serialize, Deserialize, Clone)]
150pub struct PmcLinks {
151    /// Source PMIDs that were queried
152    pub source_pmids: Vec<u32>,
153    /// PMC IDs that have full text available
154    pub pmc_ids: Vec<String>,
155}
156
157/// Citation information from ELink API
158#[derive(Debug, Serialize, Deserialize, Clone)]
159pub struct Citations {
160    /// Source PMIDs that were queried
161    pub source_pmids: Vec<u32>,
162    /// PMIDs of articles that cite the source articles
163    pub citing_pmids: Vec<u32>,
164    /// Link type (e.g., "pubmed_pubmed_citedin")
165    pub link_type: String,
166}
167
168/// Search result with WebEnv session information for history server pagination
169#[derive(Debug, Clone)]
170pub struct SearchResult {
171    /// List of PMIDs matching the search query
172    pub pmids: Vec<String>,
173    /// Total number of results matching the query
174    pub total_count: usize,
175    /// WebEnv session identifier for history server
176    pub webenv: Option<String>,
177    /// Query key for history server
178    pub query_key: Option<String>,
179    /// How PubMed interpreted and translated the search query
180    ///
181    /// For example, searching "asthma" might be translated to:
182    /// `"asthma"[MeSH Terms] OR "asthma"[All Fields]`
183    ///
184    /// This is useful for debugging search queries and understanding
185    /// how PubMed's automatic term mapping works.
186    pub query_translation: Option<String>,
187}
188
189impl SearchResult {
190    /// Get the history session if WebEnv and query_key are available
191    ///
192    /// Returns `Some(HistorySession)` if both webenv and query_key are present,
193    /// `None` otherwise.
194    pub fn history_session(&self) -> Option<HistorySession> {
195        match (&self.webenv, &self.query_key) {
196            (Some(webenv), Some(query_key)) => Some(HistorySession {
197                webenv: webenv.clone(),
198                query_key: query_key.clone(),
199            }),
200            _ => None,
201        }
202    }
203
204    /// Check if this result has history session information
205    pub fn has_history(&self) -> bool {
206        self.webenv.is_some() && self.query_key.is_some()
207    }
208}
209
210/// History server session information for paginated fetching
211///
212/// This represents a session on NCBI's history server that can be used
213/// to efficiently fetch large result sets in batches without re-running
214/// the search query.
215///
216/// # Note
217///
218/// WebEnv sessions typically expire after 1 hour of inactivity.
219#[derive(Debug, Clone, PartialEq, Eq)]
220pub struct HistorySession {
221    /// WebEnv session identifier
222    pub webenv: String,
223    /// Query key within the session
224    pub query_key: String,
225}
226
227/// Result from EPost API for uploading PMIDs to the NCBI History server
228///
229/// EPost stores a list of UIDs (PMIDs) on the History server and returns
230/// WebEnv/query_key identifiers. These can then be used with `fetch_from_history()`
231/// to retrieve article metadata, or combined with other E-utility calls.
232///
233/// # Example
234///
235/// ```ignore
236/// use pubmed_client::PubMedClient;
237///
238/// #[tokio::main]
239/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
240///     let client = PubMedClient::new();
241///
242///     // Upload a list of PMIDs to the history server
243///     let result = client.epost(&["31978945", "33515491", "25760099"]).await?;
244///
245///     // Use the session to fetch articles
246///     let session = result.history_session();
247///     let articles = client.fetch_from_history(&session, 0, 100).await?;
248///     println!("Fetched {} articles", articles.len());
249///
250///     Ok(())
251/// }
252/// ```
253#[derive(Debug, Clone)]
254pub struct EPostResult {
255    /// WebEnv session identifier for the uploaded IDs
256    pub webenv: String,
257    /// Query key for the uploaded IDs within the session
258    pub query_key: String,
259}
260
261impl EPostResult {
262    /// Convert to a HistorySession for use with `fetch_from_history()`
263    ///
264    /// This is a convenience method that creates a `HistorySession` from the
265    /// EPost result, which can then be passed to `fetch_from_history()`.
266    pub fn history_session(&self) -> HistorySession {
267        HistorySession {
268            webenv: self.webenv.clone(),
269            query_key: self.query_key.clone(),
270        }
271    }
272}
273
274/// Medical Subject Heading (MeSH) qualifier/subheading
275#[derive(Debug, Serialize, Deserialize, Clone)]
276pub struct MeshQualifier {
277    /// Qualifier name (e.g., "drug therapy", "genetics")
278    pub qualifier_name: String,
279    /// Unique identifier for the qualifier
280    pub qualifier_ui: String,
281    /// Whether this qualifier is a major topic
282    pub major_topic: bool,
283}
284
285/// Medical Subject Heading (MeSH) descriptor term
286#[derive(Debug, Serialize, Deserialize, Clone)]
287pub struct MeshTerm {
288    /// Descriptor name (e.g., "Diabetes Mellitus, Type 2")
289    pub descriptor_name: String,
290    /// Unique identifier for the descriptor
291    pub descriptor_ui: String,
292    /// Whether this term is a major topic of the article
293    pub major_topic: bool,
294    /// Associated qualifiers/subheadings
295    pub qualifiers: Vec<MeshQualifier>,
296}
297
298/// Supplemental MeSH concept (for substances, diseases, etc.)
299#[derive(Debug, Serialize, Deserialize, Clone)]
300pub struct SupplementalConcept {
301    /// Concept name
302    pub name: String,
303    /// Unique identifier
304    pub ui: String,
305    /// Concept type (e.g., "Disease", "Drug")
306    pub concept_type: Option<String>,
307}
308
309/// Chemical substance mentioned in the article
310#[derive(Debug, Serialize, Deserialize, Clone)]
311pub struct ChemicalConcept {
312    /// Chemical name
313    pub name: String,
314    /// Registry number (e.g., CAS number)
315    pub registry_number: Option<String>,
316    /// Chemical UI
317    pub ui: Option<String>,
318}
319
320/// Complete MeSH heading information for an article
321#[derive(Debug, Serialize, Deserialize, Clone)]
322pub struct MeshHeading {
323    /// MeSH descriptor terms
324    pub mesh_terms: Vec<MeshTerm>,
325    /// Supplemental concepts
326    pub supplemental_concepts: Vec<SupplementalConcept>,
327}
328
329impl PubMedArticle {
330    /// Get all major MeSH terms from the article
331    ///
332    /// # Returns
333    ///
334    /// A vector of major MeSH term names
335    ///
336    /// # Example
337    ///
338    /// ```
339    /// # use pubmed_parser::pubmed::PubMedArticle;
340    /// # let article = PubMedArticle {
341    /// #     pmid: "123".to_string(),
342    /// #     title: "Test".to_string(),
343    /// #     authors: vec![],
344    /// #     author_count: 0,
345    /// #     journal: "Test Journal".to_string(),
346    /// #     pub_date: "2023".to_string(),
347    /// #     doi: None,
348    /// #     pmc_id: None,
349    /// #     abstract_text: None,
350    /// #     structured_abstract: None,
351    /// #     article_types: vec![],
352    /// #     mesh_headings: None,
353    /// #     keywords: None,
354    /// #     chemical_list: None,
355    /// #     volume: None, issue: None, pages: None,
356    /// #     language: None, journal_abbreviation: None, issn: None,
357    /// # };
358    /// let major_terms = article.get_major_mesh_terms();
359    /// ```
360    pub fn get_major_mesh_terms(&self) -> Vec<String> {
361        let mut major_terms = Vec::new();
362
363        if let Some(mesh_headings) = &self.mesh_headings {
364            for heading in mesh_headings {
365                for term in &heading.mesh_terms {
366                    if term.major_topic {
367                        major_terms.push(term.descriptor_name.clone());
368                    }
369                }
370            }
371        }
372
373        major_terms
374    }
375
376    /// Check if the article has a specific MeSH term
377    ///
378    /// # Arguments
379    ///
380    /// * `term` - The MeSH term to check for
381    ///
382    /// # Returns
383    ///
384    /// `true` if the article has the specified MeSH term, `false` otherwise
385    ///
386    /// # Example
387    ///
388    /// ```
389    /// # use pubmed_parser::pubmed::PubMedArticle;
390    /// # let article = PubMedArticle {
391    /// #     pmid: "123".to_string(),
392    /// #     title: "Test".to_string(),
393    /// #     authors: vec![],
394    /// #     author_count: 0,
395    /// #     journal: "Test Journal".to_string(),
396    /// #     pub_date: "2023".to_string(),
397    /// #     doi: None,
398    /// #     pmc_id: None,
399    /// #     abstract_text: None,
400    /// #     structured_abstract: None,
401    /// #     article_types: vec![],
402    /// #     mesh_headings: None,
403    /// #     keywords: None,
404    /// #     chemical_list: None,
405    /// #     volume: None, issue: None, pages: None,
406    /// #     language: None, journal_abbreviation: None, issn: None,
407    /// # };
408    /// let has_diabetes = article.has_mesh_term("Diabetes Mellitus");
409    /// ```
410    pub fn has_mesh_term(&self, term: &str) -> bool {
411        if let Some(mesh_headings) = &self.mesh_headings {
412            for heading in mesh_headings {
413                for mesh_term in &heading.mesh_terms {
414                    if mesh_term.descriptor_name.eq_ignore_ascii_case(term) {
415                        return true;
416                    }
417                }
418            }
419        }
420        false
421    }
422
423    /// Get all MeSH terms from the article
424    ///
425    /// # Returns
426    ///
427    /// A vector of all MeSH term names
428    pub fn get_all_mesh_terms(&self) -> Vec<String> {
429        let mut terms = Vec::new();
430
431        if let Some(mesh_headings) = &self.mesh_headings {
432            for heading in mesh_headings {
433                for term in &heading.mesh_terms {
434                    terms.push(term.descriptor_name.clone());
435                }
436            }
437        }
438
439        terms
440    }
441
442    /// Get corresponding authors from the article
443    ///
444    /// # Returns
445    ///
446    /// A vector of references to authors marked as corresponding
447    pub fn get_corresponding_authors(&self) -> Vec<&Author> {
448        self.authors
449            .iter()
450            .filter(|author| author.is_corresponding)
451            .collect()
452    }
453
454    /// Get authors affiliated with a specific institution
455    ///
456    /// # Arguments
457    ///
458    /// * `institution` - Institution name to search for (case-insensitive substring match)
459    ///
460    /// # Returns
461    ///
462    /// A vector of references to authors with matching affiliations
463    pub fn get_authors_by_institution(&self, institution: &str) -> Vec<&Author> {
464        let institution_lower = institution.to_lowercase();
465        self.authors
466            .iter()
467            .filter(|author| {
468                author.affiliations.iter().any(|affil| {
469                    affil
470                        .institution
471                        .as_ref()
472                        .is_some_and(|inst| inst.to_lowercase().contains(&institution_lower))
473                })
474            })
475            .collect()
476    }
477
478    /// Get all unique countries from author affiliations
479    ///
480    /// # Returns
481    ///
482    /// A vector of unique country names
483    pub fn get_author_countries(&self) -> Vec<String> {
484        use std::collections::HashSet;
485        let mut countries: HashSet<String> = HashSet::new();
486
487        for author in &self.authors {
488            for affiliation in &author.affiliations {
489                if let Some(country) = &affiliation.country {
490                    countries.insert(country.clone());
491                }
492            }
493        }
494
495        countries.into_iter().collect()
496    }
497
498    /// Get authors with ORCID identifiers
499    ///
500    /// # Returns
501    ///
502    /// A vector of references to authors who have ORCID IDs
503    pub fn get_authors_with_orcid(&self) -> Vec<&Author> {
504        self.authors
505            .iter()
506            .filter(|author| author.orcid.is_some())
507            .collect()
508    }
509
510    /// Check if the article has international collaboration
511    ///
512    /// # Returns
513    ///
514    /// `true` if authors are from multiple countries
515    pub fn has_international_collaboration(&self) -> bool {
516        self.get_author_countries().len() > 1
517    }
518
519    /// Calculate MeSH term similarity between two articles
520    ///
521    /// # Arguments
522    ///
523    /// * `other` - The other article to compare with
524    ///
525    /// # Returns
526    ///
527    /// A similarity score between 0.0 and 1.0 based on Jaccard similarity
528    ///
529    /// # Example
530    ///
531    /// ```
532    /// # use pubmed_parser::pubmed::PubMedArticle;
533    /// # let article1 = PubMedArticle {
534    /// #     pmid: "123".to_string(),
535    /// #     title: "Test".to_string(),
536    /// #     authors: vec![],
537    /// #     author_count: 0,
538    /// #     journal: "Test Journal".to_string(),
539    /// #     pub_date: "2023".to_string(),
540    /// #     doi: None,
541    /// #     pmc_id: None,
542    /// #     abstract_text: None,
543    /// #     structured_abstract: None,
544    /// #     article_types: vec![],
545    /// #     mesh_headings: None,
546    /// #     keywords: None,
547    /// #     chemical_list: None,
548    /// #     volume: None, issue: None, pages: None,
549    /// #     language: None, journal_abbreviation: None, issn: None,
550    /// # };
551    /// # let article2 = article1.clone();
552    /// let similarity = article1.mesh_term_similarity(&article2);
553    /// ```
554    pub fn mesh_term_similarity(&self, other: &PubMedArticle) -> f64 {
555        use std::collections::HashSet;
556
557        let terms1: HashSet<String> = self
558            .get_all_mesh_terms()
559            .into_iter()
560            .map(|t| t.to_lowercase())
561            .collect();
562
563        let terms2: HashSet<String> = other
564            .get_all_mesh_terms()
565            .into_iter()
566            .map(|t| t.to_lowercase())
567            .collect();
568
569        if terms1.is_empty() && terms2.is_empty() {
570            return 0.0;
571        }
572
573        let intersection = terms1.intersection(&terms2).count();
574        let union = terms1.union(&terms2).count();
575
576        if union == 0 {
577            0.0
578        } else {
579            intersection as f64 / union as f64
580        }
581    }
582
583    /// Get MeSH qualifiers for a specific term
584    ///
585    /// # Arguments
586    ///
587    /// * `term` - The MeSH term to get qualifiers for
588    ///
589    /// # Returns
590    ///
591    /// A vector of qualifier names for the specified term
592    pub fn get_mesh_qualifiers(&self, term: &str) -> Vec<String> {
593        let mut qualifiers = Vec::new();
594
595        if let Some(mesh_headings) = &self.mesh_headings {
596            for heading in mesh_headings {
597                for mesh_term in &heading.mesh_terms {
598                    if mesh_term.descriptor_name.eq_ignore_ascii_case(term) {
599                        for qualifier in &mesh_term.qualifiers {
600                            qualifiers.push(qualifier.qualifier_name.clone());
601                        }
602                    }
603                }
604            }
605        }
606
607        qualifiers
608    }
609
610    /// Check if the article has any MeSH terms
611    ///
612    /// # Returns
613    ///
614    /// `true` if the article has MeSH terms, `false` otherwise
615    pub fn has_mesh_terms(&self) -> bool {
616        self.mesh_headings
617            .as_ref()
618            .map(|h| !h.is_empty())
619            .unwrap_or(false)
620    }
621
622    /// Get chemicals mentioned in the article
623    ///
624    /// # Returns
625    ///
626    /// A vector of chemical names
627    pub fn get_chemical_names(&self) -> Vec<String> {
628        self.chemical_list
629            .as_ref()
630            .map(|chemicals| chemicals.iter().map(|c| c.name.clone()).collect())
631            .unwrap_or_default()
632    }
633}
634
635// ================================================================================================
636// ESpell API types
637// ================================================================================================
638
639/// Represents a segment of the spelled query from the ESpell API
640///
641/// Each segment is either an original (unchanged) part of the query or a
642/// replacement (corrected spelling suggestion).
643#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
644pub enum SpelledQuerySegment {
645    /// A term or separator that was not changed
646    Original(String),
647    /// A corrected/suggested replacement term
648    Replaced(String),
649}
650
651/// Result from the ESpell API providing spelling suggestions
652///
653/// ESpell provides spelling suggestions for terms within a single text query
654/// in a given database. It acts as a preprocessing/spell-check tool to improve
655/// search accuracy before executing actual searches.
656///
657/// # Example
658///
659/// ```ignore
660/// use pubmed_client::PubMedClient;
661///
662/// #[tokio::main]
663/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
664///     let client = PubMedClient::new();
665///     let result = client.spell_check("asthmaa OR alergies").await?;
666///     println!("Original: {}", result.query);
667///     println!("Corrected: {}", result.corrected_query);
668///     Ok(())
669/// }
670/// ```
671#[derive(Debug, Serialize, Deserialize, Clone)]
672pub struct SpellCheckResult {
673    /// The database that was queried
674    pub database: String,
675    /// The original query string as submitted
676    pub query: String,
677    /// The full corrected/suggested query as a plain string
678    pub corrected_query: String,
679    /// Detailed segments showing which terms were replaced vs. kept
680    pub spelled_query: Vec<SpelledQuerySegment>,
681}
682
683impl SpellCheckResult {
684    /// Check if the query had any spelling corrections.
685    ///
686    /// Returns `true` only when NCBI provided a non-empty corrected query that differs from
687    /// the original. The NCBI ESpell API returns an empty `<CorrectedQuery/>` element when
688    /// no corrections are available.
689    pub fn has_corrections(&self) -> bool {
690        !self.corrected_query.is_empty() && self.query != self.corrected_query
691    }
692
693    /// Get only the replaced (corrected) terms
694    pub fn replacements(&self) -> Vec<&str> {
695        self.spelled_query
696            .iter()
697            .filter_map(|segment| match segment {
698                SpelledQuerySegment::Replaced(s) => Some(s.as_str()),
699                SpelledQuerySegment::Original(_) => None,
700            })
701            .collect()
702    }
703}
704
705// ================================================================================================
706// ECitMatch API types
707// ================================================================================================
708
709/// Input for a single citation match query
710///
711/// Used with the ECitMatch API to find PMIDs from citation information.
712/// Each field corresponds to a part of the citation string sent to the API.
713///
714/// # Example
715///
716/// ```
717/// use pubmed_parser::pubmed::CitationQuery;
718///
719/// let query = CitationQuery::new(
720///     "proc natl acad sci u s a",
721///     "1991",
722///     "88",
723///     "3248",
724///     "mann bj",
725///     "Art1",
726/// );
727/// ```
728#[derive(Debug, Clone)]
729pub struct CitationQuery {
730    /// Journal title abbreviation (e.g., "proc natl acad sci u s a")
731    pub journal: String,
732    /// Publication year (e.g., "1991")
733    pub year: String,
734    /// Volume number (e.g., "88")
735    pub volume: String,
736    /// First page number (e.g., "3248")
737    pub first_page: String,
738    /// Author name (e.g., "mann bj")
739    pub author_name: String,
740    /// User-defined key for identifying results (e.g., "Art1")
741    pub key: String,
742}
743
744impl CitationQuery {
745    /// Create a new citation query
746    pub fn new(
747        journal: &str,
748        year: &str,
749        volume: &str,
750        first_page: &str,
751        author_name: &str,
752        key: &str,
753    ) -> Self {
754        Self {
755            journal: journal.to_string(),
756            year: year.to_string(),
757            volume: volume.to_string(),
758            first_page: first_page.to_string(),
759            author_name: author_name.to_string(),
760            key: key.to_string(),
761        }
762    }
763
764    /// Format this citation as a bdata string for the ECitMatch API
765    pub fn to_bdata(&self) -> String {
766        format!(
767            "{}|{}|{}|{}|{}|{}|",
768            self.journal.replace(' ', "+"),
769            self.year,
770            self.volume,
771            self.first_page,
772            self.author_name.replace(' ', "+"),
773            self.key,
774        )
775    }
776}
777
778/// Status of a citation match result
779#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
780pub enum CitationMatchStatus {
781    /// A unique PMID was found for the citation
782    Found,
783    /// No PMID could be found for the citation
784    NotFound,
785    /// Multiple PMIDs matched the citation (ambiguous)
786    Ambiguous,
787}
788
789/// Result of a single citation match from the ECitMatch API
790#[derive(Debug, Serialize, Deserialize, Clone)]
791pub struct CitationMatch {
792    /// Journal title from the query
793    pub journal: String,
794    /// Year from the query
795    pub year: String,
796    /// Volume from the query
797    pub volume: String,
798    /// First page from the query
799    pub first_page: String,
800    /// Author name from the query
801    pub author_name: String,
802    /// User-defined key from the query
803    pub key: String,
804    /// Matched PMID (if found)
805    pub pmid: Option<String>,
806    /// Match status
807    pub status: CitationMatchStatus,
808}
809
810/// Results from ECitMatch API for batch citation matching
811///
812/// Contains the results of matching multiple citations to PMIDs.
813#[derive(Debug, Serialize, Deserialize, Clone)]
814pub struct CitationMatches {
815    /// List of citation match results
816    pub matches: Vec<CitationMatch>,
817}
818
819impl CitationMatches {
820    /// Get only the successfully matched citations
821    pub fn found(&self) -> Vec<&CitationMatch> {
822        self.matches
823            .iter()
824            .filter(|m| m.status == CitationMatchStatus::Found)
825            .collect()
826    }
827
828    /// Get the number of successful matches
829    pub fn found_count(&self) -> usize {
830        self.matches
831            .iter()
832            .filter(|m| m.status == CitationMatchStatus::Found)
833            .count()
834    }
835}
836
837// ================================================================================================
838// EGQuery API types
839// ================================================================================================
840
841/// Record count for a single NCBI database from the EGQuery API
842#[derive(Debug, Serialize, Deserialize, Clone)]
843pub struct DatabaseCount {
844    /// Internal database name (e.g., "pubmed", "pmc", "nuccore")
845    pub db_name: String,
846    /// Human-readable database name (e.g., "PubMed", "PMC", "Nucleotide")
847    pub menu_name: String,
848    /// Number of records matching the query in this database
849    pub count: u64,
850    /// Status of the query for this database (e.g., "Ok")
851    pub status: String,
852}
853
854/// Results from EGQuery API for global database search
855///
856/// Contains the number of records matching a query across all NCBI Entrez databases.
857///
858/// # Example
859///
860/// ```ignore
861/// use pubmed_client::PubMedClient;
862///
863/// #[tokio::main]
864/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
865///     let client = PubMedClient::new();
866///     let results = client.global_query("asthma").await?;
867///     for db in &results.results {
868///         if db.count > 0 {
869///             println!("{}: {} records", db.menu_name, db.count);
870///         }
871///     }
872///     Ok(())
873/// }
874/// ```
875#[derive(Debug, Serialize, Deserialize, Clone)]
876pub struct GlobalQueryResults {
877    /// The query term that was searched
878    pub term: String,
879    /// Results for each NCBI database
880    pub results: Vec<DatabaseCount>,
881}
882
883impl GlobalQueryResults {
884    /// Get results for databases with matching records (count > 0)
885    pub fn non_zero(&self) -> Vec<&DatabaseCount> {
886        self.results.iter().filter(|r| r.count > 0).collect()
887    }
888
889    /// Get the count for a specific database
890    pub fn count_for(&self, db_name: &str) -> Option<u64> {
891        self.results
892            .iter()
893            .find(|r| r.db_name == db_name)
894            .map(|r| r.count)
895    }
896}
897
898// ================================================================================================
899// ESummary API types
900// ================================================================================================
901
902/// Lightweight article summary from the ESummary API
903///
904/// Contains basic metadata (title, authors, journal, dates) without the full
905/// abstract, MeSH terms, or chemical lists that EFetch provides. Use this when
906/// you only need basic bibliographic information for a large number of articles.
907///
908/// # Example
909///
910/// ```ignore
911/// use pubmed_client::PubMedClient;
912///
913/// #[tokio::main]
914/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
915///     let client = PubMedClient::new();
916///     let summaries = client.fetch_summaries(&["31978945", "33515491"]).await?;
917///     for summary in &summaries {
918///         println!("{}: {} ({})", summary.pmid, summary.title, summary.pub_date);
919///     }
920///     Ok(())
921/// }
922/// ```
923#[derive(Debug, Serialize, Deserialize, Clone)]
924pub struct ArticleSummary {
925    /// PubMed ID
926    pub pmid: String,
927    /// Article title
928    pub title: String,
929    /// Author names (e.g., ["Zhu N", "Zhang D", "Wang W"])
930    pub authors: Vec<String>,
931    /// Journal name (source field)
932    pub journal: String,
933    /// Full journal name (e.g., "The New England journal of medicine")
934    pub full_journal_name: String,
935    /// Publication date (e.g., "2020 Feb")
936    pub pub_date: String,
937    /// Electronic publication date (e.g., "2020 Jan 24")
938    pub epub_date: String,
939    /// DOI (Digital Object Identifier)
940    pub doi: Option<String>,
941    /// PMC ID if available (e.g., "PMC7092803")
942    pub pmc_id: Option<String>,
943    /// Journal volume (e.g., "382")
944    pub volume: String,
945    /// Journal issue (e.g., "8")
946    pub issue: String,
947    /// Page range (e.g., "727-733")
948    pub pages: String,
949    /// Languages (e.g., ["eng"])
950    pub languages: Vec<String>,
951    /// Publication types (e.g., ["Journal Article", "Review"])
952    pub pub_types: Vec<String>,
953    /// ISSN
954    pub issn: String,
955    /// Electronic ISSN
956    pub essn: String,
957    /// Sorted publication date (e.g., "2020/02/20 00:00")
958    pub sort_pub_date: String,
959    /// PMC reference count (number of citing articles in PMC)
960    pub pmc_ref_count: u64,
961    /// Record status (e.g., "PubMed - indexed for MEDLINE")
962    pub record_status: String,
963}
964
965#[cfg(test)]
966mod tests {
967    use super::*;
968    use crate::common::format_author_name;
969
970    fn create_test_author() -> Author {
971        Author {
972            surname: Some("Doe".to_string()),
973            given_names: Some("John A".to_string()),
974            initials: Some("JA".to_string()),
975            suffix: None,
976            full_name: "John A Doe".to_string(),
977            affiliations: vec![
978                Affiliation {
979                    id: None,
980                    institution: Some("Harvard Medical School".to_string()),
981                    department: Some("Department of Medicine".to_string()),
982                    address: Some("Boston, MA".to_string()),
983                    country: Some("USA".to_string()),
984                },
985                Affiliation {
986                    id: None,
987                    institution: Some("Massachusetts General Hospital".to_string()),
988                    department: None,
989                    address: Some("Boston, MA".to_string()),
990                    country: Some("USA".to_string()),
991                },
992            ],
993            orcid: Some("0000-0001-2345-6789".to_string()),
994            email: Some("john.doe@hms.harvard.edu".to_string()),
995            is_corresponding: true,
996            roles: vec![
997                "Conceptualization".to_string(),
998                "Writing - original draft".to_string(),
999            ],
1000        }
1001    }
1002
1003    fn create_test_article_with_mesh() -> PubMedArticle {
1004        PubMedArticle {
1005            pmid: "12345".to_string(),
1006            title: "Test Article".to_string(),
1007            authors: vec![create_test_author()],
1008            author_count: 1,
1009            journal: "Test Journal".to_string(),
1010            pub_date: "2023".to_string(),
1011            doi: None,
1012            pmc_id: None,
1013            abstract_text: None,
1014            structured_abstract: None,
1015            article_types: vec![],
1016            mesh_headings: Some(vec![
1017                MeshHeading {
1018                    mesh_terms: vec![MeshTerm {
1019                        descriptor_name: "Diabetes Mellitus, Type 2".to_string(),
1020                        descriptor_ui: "D003924".to_string(),
1021                        major_topic: true,
1022                        qualifiers: vec![
1023                            MeshQualifier {
1024                                qualifier_name: "drug therapy".to_string(),
1025                                qualifier_ui: "Q000188".to_string(),
1026                                major_topic: false,
1027                            },
1028                            MeshQualifier {
1029                                qualifier_name: "genetics".to_string(),
1030                                qualifier_ui: "Q000235".to_string(),
1031                                major_topic: true,
1032                            },
1033                        ],
1034                    }],
1035                    supplemental_concepts: vec![],
1036                },
1037                MeshHeading {
1038                    mesh_terms: vec![MeshTerm {
1039                        descriptor_name: "Hypertension".to_string(),
1040                        descriptor_ui: "D006973".to_string(),
1041                        major_topic: false,
1042                        qualifiers: vec![],
1043                    }],
1044                    supplemental_concepts: vec![],
1045                },
1046            ]),
1047            keywords: Some(vec!["diabetes".to_string(), "treatment".to_string()]),
1048            chemical_list: Some(vec![ChemicalConcept {
1049                name: "Metformin".to_string(),
1050                registry_number: Some("657-24-9".to_string()),
1051                ui: Some("D008687".to_string()),
1052            }]),
1053            volume: Some("45".to_string()),
1054            issue: Some("3".to_string()),
1055            pages: Some("123-130".to_string()),
1056            language: Some("eng".to_string()),
1057            journal_abbreviation: Some("Test J".to_string()),
1058            issn: Some("1234-5678".to_string()),
1059        }
1060    }
1061
1062    #[test]
1063    fn test_get_major_mesh_terms() {
1064        let article = create_test_article_with_mesh();
1065        let major_terms = article.get_major_mesh_terms();
1066
1067        assert_eq!(major_terms.len(), 1);
1068        assert_eq!(major_terms[0], "Diabetes Mellitus, Type 2");
1069    }
1070
1071    #[test]
1072    fn test_has_mesh_term() {
1073        let article = create_test_article_with_mesh();
1074
1075        assert!(article.has_mesh_term("Diabetes Mellitus, Type 2"));
1076        assert!(article.has_mesh_term("DIABETES MELLITUS, TYPE 2")); // Case insensitive
1077        assert!(article.has_mesh_term("Hypertension"));
1078        assert!(!article.has_mesh_term("Cancer"));
1079    }
1080
1081    #[test]
1082    fn test_get_all_mesh_terms() {
1083        let article = create_test_article_with_mesh();
1084        let all_terms = article.get_all_mesh_terms();
1085
1086        assert_eq!(all_terms.len(), 2);
1087        assert!(all_terms.contains(&"Diabetes Mellitus, Type 2".to_string()));
1088        assert!(all_terms.contains(&"Hypertension".to_string()));
1089    }
1090
1091    #[test]
1092    fn test_mesh_term_similarity() {
1093        let article1 = create_test_article_with_mesh();
1094        let mut article2 = create_test_article_with_mesh();
1095
1096        // Same article should have similarity of 1.0
1097        let similarity = article1.mesh_term_similarity(&article2);
1098        assert_eq!(similarity, 1.0);
1099
1100        // Different MeSH terms
1101        article2.mesh_headings = Some(vec![MeshHeading {
1102            mesh_terms: vec![
1103                MeshTerm {
1104                    descriptor_name: "Diabetes Mellitus, Type 2".to_string(),
1105                    descriptor_ui: "D003924".to_string(),
1106                    major_topic: true,
1107                    qualifiers: vec![],
1108                },
1109                MeshTerm {
1110                    descriptor_name: "Obesity".to_string(),
1111                    descriptor_ui: "D009765".to_string(),
1112                    major_topic: false,
1113                    qualifiers: vec![],
1114                },
1115            ],
1116            supplemental_concepts: vec![],
1117        }]);
1118
1119        let similarity = article1.mesh_term_similarity(&article2);
1120        // Should have partial similarity (1 common term out of 3 unique terms)
1121        assert!(similarity > 0.0 && similarity < 1.0);
1122        assert_eq!(similarity, 1.0 / 3.0); // Jaccard similarity
1123
1124        // No MeSH terms
1125        let article3 = PubMedArticle {
1126            pmid: "54321".to_string(),
1127            title: "Test".to_string(),
1128            authors: vec![],
1129            author_count: 0,
1130            journal: "Test".to_string(),
1131            pub_date: "2023".to_string(),
1132            doi: None,
1133            pmc_id: None,
1134            abstract_text: None,
1135            structured_abstract: None,
1136            article_types: vec![],
1137            mesh_headings: None,
1138            keywords: None,
1139            chemical_list: None,
1140            volume: None,
1141            issue: None,
1142            pages: None,
1143            language: None,
1144            journal_abbreviation: None,
1145            issn: None,
1146        };
1147
1148        assert_eq!(article1.mesh_term_similarity(&article3), 0.0);
1149    }
1150
1151    #[test]
1152    fn test_get_mesh_qualifiers() {
1153        let article = create_test_article_with_mesh();
1154
1155        let qualifiers = article.get_mesh_qualifiers("Diabetes Mellitus, Type 2");
1156        assert_eq!(qualifiers.len(), 2);
1157        assert!(qualifiers.contains(&"drug therapy".to_string()));
1158        assert!(qualifiers.contains(&"genetics".to_string()));
1159
1160        let qualifiers = article.get_mesh_qualifiers("Hypertension");
1161        assert_eq!(qualifiers.len(), 0);
1162
1163        let qualifiers = article.get_mesh_qualifiers("Nonexistent Term");
1164        assert_eq!(qualifiers.len(), 0);
1165    }
1166
1167    #[test]
1168    fn test_has_mesh_terms() {
1169        let article = create_test_article_with_mesh();
1170        assert!(article.has_mesh_terms());
1171
1172        let mut article_no_mesh = article.clone();
1173        article_no_mesh.mesh_headings = None;
1174        assert!(!article_no_mesh.has_mesh_terms());
1175
1176        let mut article_empty_mesh = article.clone();
1177        article_empty_mesh.mesh_headings = Some(vec![]);
1178        assert!(!article_empty_mesh.has_mesh_terms());
1179    }
1180
1181    #[test]
1182    fn test_get_chemical_names() {
1183        let article = create_test_article_with_mesh();
1184        let chemicals = article.get_chemical_names();
1185
1186        assert_eq!(chemicals.len(), 1);
1187        assert_eq!(chemicals[0], "Metformin");
1188
1189        let mut article_no_chemicals = article.clone();
1190        article_no_chemicals.chemical_list = None;
1191        let chemicals = article_no_chemicals.get_chemical_names();
1192        assert_eq!(chemicals.len(), 0);
1193    }
1194
1195    #[test]
1196    fn test_author_creation() {
1197        let author = Author::new(Some("Smith".to_string()), Some("Jane".to_string()));
1198        assert_eq!(author.surname, Some("Smith".to_string()));
1199        assert_eq!(author.given_names, Some("Jane".to_string()));
1200        assert_eq!(author.full_name, "Jane Smith");
1201        assert!(!author.has_orcid());
1202        assert!(!author.is_corresponding);
1203    }
1204
1205    #[test]
1206    fn test_author_affiliations() {
1207        let author = create_test_author();
1208
1209        assert!(author.is_affiliated_with("Harvard"));
1210        assert!(author.is_affiliated_with("Massachusetts General"));
1211        assert!(!author.is_affiliated_with("Stanford"));
1212
1213        let primary = author.primary_affiliation().unwrap();
1214        assert_eq!(
1215            primary.institution,
1216            Some("Harvard Medical School".to_string())
1217        );
1218
1219        assert!(author.has_orcid());
1220        assert!(author.is_corresponding);
1221    }
1222
1223    #[test]
1224    fn test_get_corresponding_authors() {
1225        let article = create_test_article_with_mesh();
1226        let corresponding = article.get_corresponding_authors();
1227
1228        assert_eq!(corresponding.len(), 1);
1229        assert_eq!(corresponding[0].full_name, "John A Doe");
1230    }
1231
1232    #[test]
1233    fn test_get_authors_by_institution() {
1234        let article = create_test_article_with_mesh();
1235
1236        let harvard_authors = article.get_authors_by_institution("Harvard");
1237        assert_eq!(harvard_authors.len(), 1);
1238
1239        let stanford_authors = article.get_authors_by_institution("Stanford");
1240        assert_eq!(stanford_authors.len(), 0);
1241    }
1242
1243    #[test]
1244    fn test_get_author_countries() {
1245        let article = create_test_article_with_mesh();
1246        let countries = article.get_author_countries();
1247
1248        assert_eq!(countries.len(), 1);
1249        assert!(countries.contains(&"USA".to_string()));
1250    }
1251
1252    #[test]
1253    fn test_international_collaboration() {
1254        let article = create_test_article_with_mesh();
1255        assert!(!article.has_international_collaboration());
1256
1257        // Create article with international authors
1258        let mut international_article = article.clone();
1259        let mut uk_author = create_test_author();
1260        uk_author.affiliations[0].country = Some("UK".to_string());
1261        international_article.authors.push(uk_author);
1262        international_article.author_count = 2;
1263
1264        assert!(international_article.has_international_collaboration());
1265    }
1266
1267    #[test]
1268    fn test_get_authors_with_orcid() {
1269        let article = create_test_article_with_mesh();
1270        let authors_with_orcid = article.get_authors_with_orcid();
1271
1272        assert_eq!(authors_with_orcid.len(), 1);
1273        assert_eq!(
1274            authors_with_orcid[0].orcid,
1275            Some("0000-0001-2345-6789".to_string())
1276        );
1277    }
1278
1279    #[test]
1280    fn test_format_author_name() {
1281        assert_eq!(
1282            format_author_name(&Some("Smith".to_string()), &Some("John".to_string()), &None),
1283            "John Smith"
1284        );
1285
1286        assert_eq!(
1287            format_author_name(&Some("Doe".to_string()), &None, &Some("J".to_string())),
1288            "J Doe"
1289        );
1290
1291        assert_eq!(
1292            format_author_name(&Some("Johnson".to_string()), &None, &None),
1293            "Johnson"
1294        );
1295
1296        assert_eq!(
1297            format_author_name(&None, &Some("Jane".to_string()), &None),
1298            "Jane"
1299        );
1300
1301        assert_eq!(format_author_name(&None, &None, &None), "Unknown Author");
1302    }
1303
1304    #[test]
1305    fn test_spell_check_result_has_corrections() {
1306        let result = SpellCheckResult {
1307            database: "pubmed".to_string(),
1308            query: "asthmaa".to_string(),
1309            corrected_query: "asthma".to_string(),
1310            spelled_query: vec![SpelledQuerySegment::Replaced("asthma".to_string())],
1311        };
1312        assert!(result.has_corrections());
1313
1314        let no_correction = SpellCheckResult {
1315            database: "pubmed".to_string(),
1316            query: "asthma".to_string(),
1317            corrected_query: "asthma".to_string(),
1318            spelled_query: vec![SpelledQuerySegment::Original("asthma".to_string())],
1319        };
1320        assert!(!no_correction.has_corrections());
1321    }
1322
1323    #[test]
1324    fn test_spell_check_result_replacements() {
1325        let result = SpellCheckResult {
1326            database: "pubmed".to_string(),
1327            query: "asthmaa OR alergies".to_string(),
1328            corrected_query: "asthma or allergies".to_string(),
1329            spelled_query: vec![
1330                SpelledQuerySegment::Original("".to_string()),
1331                SpelledQuerySegment::Replaced("asthma".to_string()),
1332                SpelledQuerySegment::Original(" OR ".to_string()),
1333                SpelledQuerySegment::Replaced("allergies".to_string()),
1334            ],
1335        };
1336        let replacements = result.replacements();
1337        assert_eq!(replacements.len(), 2);
1338        assert_eq!(replacements[0], "asthma");
1339        assert_eq!(replacements[1], "allergies");
1340    }
1341
1342    #[test]
1343    fn test_bibliographic_fields_on_article() {
1344        let article = create_test_article_with_mesh();
1345
1346        assert_eq!(article.volume, Some("45".to_string()));
1347        assert_eq!(article.issue, Some("3".to_string()));
1348        assert_eq!(article.pages, Some("123-130".to_string()));
1349        assert_eq!(article.language, Some("eng".to_string()));
1350        assert_eq!(article.journal_abbreviation, Some("Test J".to_string()));
1351        assert_eq!(article.issn, Some("1234-5678".to_string()));
1352    }
1353}