pubmed_parser/pmc/domain.rs
1//! PMC domain models based on JATS Archiving 1.4 DTD
2//!
3//! **This module is the single model layer for PMC full-text articles.**
4//! All parsing produces these types directly; there is no separate
5//! intermediate parser model.
6//!
7//! The type hierarchy mirrors the JATS `<article>` content model:
8//!
9//! ```text
10//! <article> → PmcArticle
11//! <front> → Front
12//! <journal-meta> → JournalMeta
13//! <article-meta> → ArticleMeta (ids, TitleGroup, contributors,
14//! pub info, history, Permissions, Abstract,
15//! keywords, funding)
16//! <body> → Body (Vec<Section>, recursive)
17//! <back> → Back (ack, COI, Vec<Reference>, appendices, glossary)
18//! ```
19//!
20//! Design principles:
21//! - DTD-faithful: every field maps to a JATS element/attribute, structured
22//! following the DTD hierarchy and declaration order
23//! - No extraction concerns: fields like `file_path` or inferred `file_type` are excluded
24//! - Type-safe IDs: uses `PmcId` / `PubMedId` instead of raw strings
25//! - Reuses shared types: `Author` and `HistoryDate` from `common::models`
26//! - Text mining ready: structured abstracts, table content, formulas, definitions
27//!
28//! Documented deviations from strict DTD structure:
29//! - `supplementary_materials` and `data_availability` live on [`PmcArticle`]
30//! because in real PMC XML they appear in `<body>` sections, `<back>`, or
31//! `<floats-group>`; the parser collects them document-wide without
32//! tracking their position
33//! - `<contrib-group>` is flattened to `Vec<Author>` (only author contribs
34//! are modeled)
35//!
36//! Flattened read access is provided through accessor methods on
37//! [`PmcArticle`] (e.g. [`PmcArticle::title`], [`PmcArticle::sections`]).
38
39use crate::common::{Author, HistoryDate, PmcId, PubMedId, PublicationDate};
40use serde::{Deserialize, Serialize};
41
42// ============================================================================
43// Top-level article
44// ============================================================================
45
46/// PMC full-text article.
47///
48/// Maps to JATS `<article>`: `front, body?, back?`.
49///
50/// DTD: <https://jats.nlm.nih.gov/archiving/tag-library/1.4/element/article.html>
51#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
52pub struct PmcArticle {
53 /// Article type. From `<article article-type="...">` attribute.
54 pub article_type: Option<String>,
55 /// Front matter. From `<front>`.
56 pub front: Front,
57 /// Article body. From `<body>`. `None` when the article has no body
58 /// (e.g., metadata-only records).
59 pub body: Option<Body>,
60 /// Back matter. From `<back>`. `None` when the article has no back matter.
61 pub back: Option<Back>,
62
63 // --- Document-wide collections (deviation from strict DTD placement) ---
64 /// Supplementary materials. From `<supplementary-material>`, collected
65 /// from the entire document (`<body>` sections, `<back>`, or `<floats-group>`).
66 pub supplementary_materials: Vec<SupplementaryMaterial>,
67 /// Data availability statement. From `<sec sec-type="data-availability">`
68 /// or `<notes notes-type="data-availability">`, wherever it appears.
69 pub data_availability: Option<String>,
70}
71
72// ============================================================================
73// Front matter (<front>)
74// ============================================================================
75
76/// Front matter.
77///
78/// Maps to JATS `<front>`: `journal-meta?, article-meta?`.
79#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
80pub struct Front {
81 /// Journal metadata. From `<journal-meta>`.
82 pub journal_meta: JournalMeta,
83 /// Article metadata. From `<article-meta>`.
84 pub article_meta: ArticleMeta,
85}
86
87/// Journal metadata.
88///
89/// Maps to JATS `<journal-meta>`. Note that `volume` and `issue` are intentionally
90/// excluded here — in the DTD they belong to `<article-meta>`, not `<journal-meta>`,
91/// and are placed on [`ArticleMeta`] accordingly.
92#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
93pub struct JournalMeta {
94 /// Journal title. From `<journal-title-group>/<journal-title>`.
95 pub title: String,
96 /// Abbreviated journal title. From `<journal-id journal-id-type="iso-abbrev">`
97 /// or `<abbrev-journal-title>`.
98 pub abbreviation: Option<String>,
99 /// Print ISSN. From `<issn pub-type="ppub">`.
100 pub issn_print: Option<String>,
101 /// Electronic ISSN. From `<issn pub-type="epub">`.
102 pub issn_electronic: Option<String>,
103 /// Publisher name. From `<publisher>/<publisher-name>`.
104 pub publisher: Option<String>,
105}
106
107/// Article metadata.
108///
109/// Maps to JATS `<article-meta>`. Fields follow the DTD declaration order:
110/// `article-id*, article-categories?, title-group, contrib-group*, pub-date*,
111/// volume?, issue?, fpage?, lpage?, elocation-id?, history?, permissions?,
112/// abstract*, kwd-group*, funding-group*`.
113#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
114pub struct ArticleMeta {
115 // --- Identifiers (<article-id>) ---
116 /// PMC ID (e.g., PMC7906746). From `<article-id pub-id-type="pmc">`.
117 pub pmcid: PmcId,
118 /// PubMed ID. From `<article-id pub-id-type="pmid">`.
119 pub pmid: Option<PubMedId>,
120 /// DOI. From `<article-id pub-id-type="doi">`.
121 pub doi: Option<String>,
122
123 /// Subject categories. From `<article-categories>/<subj-group>/<subject>`.
124 pub categories: Vec<String>,
125
126 /// Title group. From `<title-group>`.
127 pub title_group: TitleGroup,
128
129 /// Authors. From `<contrib-group>/<contrib contrib-type="author">`.
130 pub authors: Vec<Author>,
131
132 /// Publication dates (epub, ppub, collection, etc.). From `<pub-date>`.
133 pub pub_dates: Vec<PublicationDate>,
134 /// Volume number. From `<volume>`.
135 pub volume: Option<String>,
136 /// Issue number. From `<issue>`.
137 pub issue: Option<String>,
138 /// First page. From `<fpage>`.
139 pub fpage: Option<String>,
140 /// Last page. From `<lpage>`.
141 pub lpage: Option<String>,
142 /// Electronic location ID. From `<elocation-id>`.
143 pub elocation_id: Option<String>,
144
145 /// Publication history dates. From `<history>/<date>`.
146 pub history: Vec<HistoryDate>,
147
148 /// Copyright and licensing. From `<permissions>`.
149 pub permissions: Option<Permissions>,
150
151 /// Abstracts. From `<abstract>` (repeatable in the DTD, e.g. a main
152 /// abstract plus a graphical or teaser abstract).
153 pub abstracts: Vec<Abstract>,
154
155 /// Keywords. From `<kwd-group>/<kwd>`.
156 pub keywords: Vec<String>,
157
158 /// Funding information. From `<funding-group>/<award-group>`.
159 pub funding: Vec<FundingInfo>,
160}
161
162/// Title group.
163///
164/// Maps to JATS `<title-group>`.
165#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
166pub struct TitleGroup {
167 /// Article title. From `<article-title>`.
168 pub article_title: String,
169 /// Article subtitle. From `<subtitle>`.
170 pub subtitle: Option<String>,
171}
172
173/// Copyright and licensing information.
174///
175/// Maps to JATS `<permissions>`.
176#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
177pub struct Permissions {
178 /// Copyright statement. From `<copyright-statement>`
179 /// (falls back to `<copyright-year>`).
180 pub copyright_statement: Option<String>,
181 /// License. From `<license>`.
182 pub license: Option<License>,
183}
184
185/// License information.
186///
187/// Maps to JATS `<license>`.
188#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
189pub struct License {
190 /// License URL. From `<license xlink:href="...">` or `<ali:license_ref>`.
191 pub href: Option<String>,
192 /// License text. From `<license-p>` content.
193 pub text: Option<String>,
194}
195
196/// Abstract.
197///
198/// Maps to JATS `<abstract>`. The DTD allows multiple abstracts
199/// distinguished by `@abstract-type`.
200#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
201pub struct Abstract {
202 /// Abstract type (e.g., "graphical", "teaser"). From `<abstract abstract-type="...">`.
203 pub abstract_type: Option<String>,
204 /// Plain abstract text (flattened). Concatenation of all `<p>` texts.
205 pub text: String,
206 /// Structured abstract sections. From `<abstract>/<sec>`.
207 /// Present when the abstract has labeled sections (e.g., Background, Methods, Results).
208 pub sections: Vec<AbstractSection>,
209}
210
211/// Structured abstract section.
212///
213/// Maps to `<abstract>/<sec>`. Many biomedical journals use structured abstracts
214/// with labeled sections (Background, Methods, Results, Conclusions).
215/// This structure preserves the section boundaries for text mining pipelines
216/// that need to process abstract sections independently.
217#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
218pub struct AbstractSection {
219 /// Section label (e.g., "Background", "Methods", "Results", "Conclusions").
220 /// From `<title>` inside `<abstract>/<sec>`.
221 pub label: Option<String>,
222 /// Section text content. From `<p>` inside `<abstract>/<sec>`.
223 pub text: String,
224}
225
226// ============================================================================
227// Body content (<body>)
228// ============================================================================
229
230/// Article body.
231///
232/// Maps to JATS `<body>`: `(p | sec | ...)*`.
233#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
234pub struct Body {
235 /// Article sections. From `<body>/<sec>`.
236 pub sections: Vec<Section>,
237}
238
239/// Article section.
240///
241/// Maps to JATS `<sec>`. Sections form a recursive tree via `subsections`.
242/// Figures, tables, and formulas that appear inline within the section are
243/// collected in their respective fields.
244#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
245pub struct Section {
246 /// Section ID. From `<sec id="...">`.
247 pub id: Option<String>,
248 /// Section type. From `<sec sec-type="...">`.
249 /// Common values: `"intro"`, `"methods"`, `"results"`, `"discussion"`,
250 /// `"conclusions"`, `"supplementary-material"`, `"data-availability"`.
251 pub section_type: Option<String>,
252 /// Section label/number (e.g., "1.", "2.1"). From `<label>`.
253 pub label: Option<String>,
254 /// Section title. From `<title>`.
255 pub title: Option<String>,
256 /// Concatenated paragraph text. From `<p>` elements.
257 pub content: String,
258 /// Nested subsections. From child `<sec>` elements.
259 pub subsections: Vec<Section>,
260 /// Figures within this section. From `<fig>` elements.
261 pub figures: Vec<Figure>,
262 /// Tables within this section. From `<table-wrap>` elements.
263 pub tables: Vec<Table>,
264 /// Display formulas within this section. From `<disp-formula>` elements.
265 pub formulas: Vec<Formula>,
266}
267
268/// Figure.
269///
270/// Maps to JATS `<fig>`. The `graphic_href` field contains the domain-level
271/// reference to the graphic file (from `<graphic xlink:href="...">`).
272/// Actual file extraction paths and sizes belong to the client layer's
273/// `ExtractedFigure` type, not here.
274#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
275pub struct Figure {
276 /// Figure ID. From `<fig id="...">`.
277 pub id: String,
278 /// Figure label (e.g., "Figure 1"). From `<label>`.
279 pub label: Option<String>,
280 /// Figure caption. From `<caption>/<p>`.
281 pub caption: String,
282 /// Alt text. From `<alt-text>`.
283 pub alt_text: Option<String>,
284 /// Figure type (e.g., "figure", "scheme", "chart"). From `<fig fig-type="...">`.
285 pub fig_type: Option<String>,
286 /// Graphic href from the XML. From `<graphic xlink:href="...">`.
287 pub graphic_href: Option<String>,
288}
289
290/// Table wrapper.
291///
292/// Maps to JATS `<table-wrap>`. Table body is parsed into structured rows/cells
293/// for direct programmatic access without requiring downstream HTML parsing.
294#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
295pub struct Table {
296 /// Table ID. From `<table-wrap id="...">`.
297 pub id: String,
298 /// Table label (e.g., "Table 1"). From `<label>`.
299 pub label: Option<String>,
300 /// Table caption. From `<caption>/<p>`.
301 pub caption: String,
302 /// Header rows. From `<thead>/<tr>`.
303 pub head: Vec<TableRow>,
304 /// Body rows. From `<tbody>/<tr>` (or direct `<tr>` if no `<tbody>`).
305 pub body: Vec<TableRow>,
306 /// Table footnotes. From `<table-wrap-foot>/<fn>`.
307 pub footnotes: Vec<String>,
308}
309
310/// A single table row.
311///
312/// Maps to XHTML `<tr>` inside JATS `<table>`.
313#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
314pub struct TableRow {
315 /// Cells in this row. From `<th>` or `<td>` elements.
316 pub cells: Vec<TableCell>,
317}
318
319/// A single table cell.
320///
321/// Maps to XHTML `<th>` or `<td>` inside JATS `<table>`.
322#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
323pub struct TableCell {
324 /// Cell text content (XML tags stripped).
325 pub content: String,
326 /// Whether this is a header cell (`<th>`) vs data cell (`<td>`).
327 pub is_header: bool,
328 /// Column span. From `@colspan` attribute.
329 pub colspan: Option<u32>,
330 /// Row span. From `@rowspan` attribute.
331 pub rowspan: Option<u32>,
332}
333
334/// Display formula.
335///
336/// Maps to JATS `<disp-formula>`. Formulas can be represented as MathML,
337/// TeX/LaTeX, plain text, or as graphic images. The `notation` field indicates
338/// which representation is stored in `content`.
339#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
340pub struct Formula {
341 /// Formula ID. From `<disp-formula id="...">`.
342 pub id: Option<String>,
343 /// Formula label (e.g., "1", "(1)"). From `<label>`.
344 pub label: Option<String>,
345 /// Formula content. From `<tex-math>`, `<mml:math>`, or text content.
346 pub content: Option<String>,
347 /// Notation type indicating the format of `content`.
348 /// `"tex"` for `<tex-math>`, `"mathml"` for `<mml:math>`, `"text"` for plain text.
349 pub notation: Option<String>,
350 /// Graphic href for image-based formulas. From `<graphic xlink:href="...">`.
351 pub graphic_href: Option<String>,
352}
353
354// ============================================================================
355// Back matter (<back>)
356// ============================================================================
357
358/// Back matter.
359///
360/// Maps to JATS `<back>`: `(ack | app-group | bio | fn-group | glossary |
361/// ref-list | notes | sec)*`.
362#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
363pub struct Back {
364 /// Acknowledgments. From `<ack>`.
365 pub acknowledgments: Option<String>,
366 /// Conflict of interest statement. From `<fn-group>/<fn fn-type="COI-statement">`
367 /// or `<sec>` whose title mentions conflicts/competing interests.
368 pub conflict_of_interest: Option<String>,
369 /// Reference list. From `<ref-list>/<ref>`.
370 pub references: Vec<Reference>,
371 /// Appendices. From `<app-group>/<app>`.
372 pub appendices: Vec<Section>,
373 /// Glossary definitions. From `<glossary>/<def-list>`.
374 pub glossary: Vec<Definition>,
375}
376
377/// Reference citation.
378///
379/// Maps to JATS `<ref>` containing `<element-citation>` or `<mixed-citation>`.
380#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
381pub struct Reference {
382 /// Reference ID. From `<ref id="...">`.
383 pub id: String,
384 /// Publication type. From `<element-citation publication-type="...">`.
385 /// Common values: `"journal"`, `"book"`, `"web"`, `"other"`.
386 pub publication_type: Option<String>,
387 /// Article or chapter title. From `<article-title>` or `<chapter-title>`.
388 pub title: Option<String>,
389 /// Authors. From `<person-group>/<name>`.
390 pub authors: Vec<Author>,
391 /// Source (journal name or book title). From `<source>`.
392 pub source: Option<String>,
393 /// Publication year. From `<year>`.
394 pub year: Option<String>,
395 /// Volume. From `<volume>`.
396 pub volume: Option<String>,
397 /// Issue. From `<issue>`.
398 pub issue: Option<String>,
399 /// Page range. From `<fpage>`-`<lpage>`.
400 pub pages: Option<String>,
401 /// PubMed ID. From `<pub-id pub-id-type="pmid">`.
402 pub pmid: Option<String>,
403 /// DOI. From `<pub-id pub-id-type="doi">`.
404 pub doi: Option<String>,
405}
406
407impl Reference {
408 /// Format a human-readable citation string.
409 pub fn format_citation(&self) -> String {
410 let mut parts = Vec::new();
411
412 if !self.authors.is_empty() {
413 let author_names: Vec<String> = self
414 .authors
415 .iter()
416 .map(|a| a.full_name.clone())
417 .filter(|n| !n.trim().is_empty())
418 .collect();
419 if !author_names.is_empty() {
420 parts.push(author_names.join(", "));
421 }
422 }
423
424 if let Some(title) = &self.title
425 && !title.trim().is_empty()
426 {
427 parts.push(title.clone());
428 }
429
430 if let Some(source) = &self.source
431 && !source.trim().is_empty()
432 {
433 let mut source_part = source.clone();
434 if let Some(year) = &self.year
435 && !year.trim().is_empty()
436 && year != "n.d."
437 {
438 source_part.push_str(&format!(" ({year})"));
439 }
440 if let Some(volume) = &self.volume
441 && !volume.trim().is_empty()
442 {
443 source_part.push_str(&format!(" {volume}"));
444 if let Some(issue) = &self.issue
445 && !issue.trim().is_empty()
446 {
447 source_part.push_str(&format!("({issue})"));
448 }
449 }
450 if let Some(pages) = &self.pages
451 && !pages.trim().is_empty()
452 {
453 source_part.push_str(&format!(": {pages}"));
454 }
455 parts.push(source_part);
456 }
457
458 let result = parts.join(". ");
459 if result.trim().is_empty() {
460 format!("Reference {}", self.id)
461 } else {
462 result
463 }
464 }
465}
466
467/// Funding information.
468///
469/// Maps to JATS `<funding-group>/<award-group>`.
470#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
471pub struct FundingInfo {
472 /// Funding source/agency. From `<funding-source>`.
473 pub source: String,
474 /// Grant/award ID. From `<award-id>`.
475 pub award_id: Option<String>,
476 /// Funding statement. From `<funding-statement>`.
477 pub statement: Option<String>,
478}
479
480/// Supplementary material.
481///
482/// Maps to JATS `<supplementary-material>`. Only contains domain-level
483/// data from the XML. Inferred values like file type (derived from URL extension)
484/// and layout attributes like `position` are excluded.
485#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
486pub struct SupplementaryMaterial {
487 /// Material ID. From `<supplementary-material id="...">`.
488 pub id: String,
489 /// Content type. From `<supplementary-material content-type="...">`.
490 pub content_type: Option<String>,
491 /// Title or label. From `<label>` or `<caption>/<title>`.
492 pub title: Option<String>,
493 /// Description. From `<caption>/<p>`.
494 pub description: Option<String>,
495 /// Resource href. From `<supplementary-material xlink:href="...">`.
496 pub href: Option<String>,
497}
498
499impl SupplementaryMaterial {
500 /// Check if this material is a tar archive based on the href extension.
501 pub fn is_tar_file(&self) -> bool {
502 if let Some(url) = &self.href {
503 url.ends_with(".tar")
504 || url.ends_with(".tar.gz")
505 || url.ends_with(".tar.bz2")
506 || url.ends_with(".tgz")
507 } else {
508 false
509 }
510 }
511
512 /// Get file extension from the href.
513 pub fn get_file_extension(&self) -> Option<String> {
514 if let Some(url) = &self.href
515 && let Some(filename) = url.split('/').next_back()
516 && let Some(dot_index) = filename.rfind('.')
517 {
518 return Some(filename[dot_index + 1..].to_lowercase());
519 }
520 None
521 }
522
523 /// Check if this is an archive file (zip, tar, etc.).
524 pub fn is_archive(&self) -> bool {
525 if let Some(ext) = self.get_file_extension() {
526 matches!(
527 ext.as_str(),
528 "zip" | "tar" | "gz" | "bz2" | "tgz" | "rar" | "7z"
529 )
530 } else {
531 false
532 }
533 }
534}
535
536// ============================================================================
537// Text mining support types
538// ============================================================================
539
540/// Term definition.
541///
542/// Maps to JATS `<def-list>/<def-item>`. Used for abbreviation lists and
543/// glossaries commonly found in biomedical articles.
544///
545/// Example XML:
546/// ```xml
547/// <def-item>
548/// <term>HPV</term>
549/// <def><p>Human papillomavirus</p></def>
550/// </def-item>
551/// ```
552#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
553pub struct Definition {
554 /// Term being defined. From `<term>`.
555 pub term: String,
556 /// Definition text. From `<def>/<p>`.
557 pub definition: String,
558}
559
560// ============================================================================
561// Aggregate accessors
562// ============================================================================
563
564impl PmcArticle {
565 /// PMC ID of this article (aggregate identity).
566 pub fn pmcid(&self) -> &PmcId {
567 &self.front.article_meta.pmcid
568 }
569
570 /// PubMed ID, if present.
571 pub fn pmid(&self) -> Option<&PubMedId> {
572 self.front.article_meta.pmid.as_ref()
573 }
574
575 /// DOI, if present.
576 pub fn doi(&self) -> Option<&str> {
577 self.front.article_meta.doi.as_deref()
578 }
579
580 /// Article title.
581 pub fn title(&self) -> &str {
582 &self.front.article_meta.title_group.article_title
583 }
584
585 /// Article subtitle, if present.
586 pub fn subtitle(&self) -> Option<&str> {
587 self.front.article_meta.title_group.subtitle.as_deref()
588 }
589
590 /// Authors.
591 pub fn authors(&self) -> &[Author] {
592 &self.front.article_meta.authors
593 }
594
595 /// Journal metadata.
596 pub fn journal(&self) -> &JournalMeta {
597 &self.front.journal_meta
598 }
599
600 /// Publication dates.
601 pub fn pub_dates(&self) -> &[PublicationDate] {
602 &self.front.article_meta.pub_dates
603 }
604
605 /// Subject categories.
606 pub fn categories(&self) -> &[String] {
607 &self.front.article_meta.categories
608 }
609
610 /// Volume number, if present.
611 pub fn volume(&self) -> Option<&str> {
612 self.front.article_meta.volume.as_deref()
613 }
614
615 /// Issue number, if present.
616 pub fn issue(&self) -> Option<&str> {
617 self.front.article_meta.issue.as_deref()
618 }
619
620 /// Keywords.
621 pub fn keywords(&self) -> &[String] {
622 &self.front.article_meta.keywords
623 }
624
625 /// Funding information.
626 pub fn funding(&self) -> &[FundingInfo] {
627 &self.front.article_meta.funding
628 }
629
630 /// Publication history dates.
631 pub fn history(&self) -> &[HistoryDate] {
632 &self.front.article_meta.history
633 }
634
635 /// Text of the main abstract (first `<abstract>`), if present.
636 pub fn abstract_text(&self) -> Option<&str> {
637 self.front
638 .article_meta
639 .abstracts
640 .first()
641 .map(|a| a.text.as_str())
642 }
643
644 /// Copyright statement, if present.
645 pub fn copyright(&self) -> Option<&str> {
646 self.front
647 .article_meta
648 .permissions
649 .as_ref()
650 .and_then(|p| p.copyright_statement.as_deref())
651 }
652
653 /// License text, if present.
654 pub fn license_text(&self) -> Option<&str> {
655 self.front
656 .article_meta
657 .permissions
658 .as_ref()
659 .and_then(|p| p.license.as_ref())
660 .and_then(|l| l.text.as_deref())
661 }
662
663 /// License URL, if present.
664 pub fn license_url(&self) -> Option<&str> {
665 self.front
666 .article_meta
667 .permissions
668 .as_ref()
669 .and_then(|p| p.license.as_ref())
670 .and_then(|l| l.href.as_deref())
671 }
672
673 /// Body sections (empty slice when the article has no body).
674 pub fn sections(&self) -> &[Section] {
675 self.body.as_ref().map_or(&[], |b| b.sections.as_slice())
676 }
677
678 /// References (empty slice when the article has no back matter).
679 pub fn references(&self) -> &[Reference] {
680 self.back.as_ref().map_or(&[], |b| b.references.as_slice())
681 }
682
683 /// Acknowledgments, if present.
684 pub fn acknowledgments(&self) -> Option<&str> {
685 self.back
686 .as_ref()
687 .and_then(|b| b.acknowledgments.as_deref())
688 }
689
690 /// Conflict of interest statement, if present.
691 pub fn conflict_of_interest(&self) -> Option<&str> {
692 self.back
693 .as_ref()
694 .and_then(|b| b.conflict_of_interest.as_deref())
695 }
696
697 /// Get tar files from supplementary materials.
698 pub fn get_tar_files(&self) -> Vec<&SupplementaryMaterial> {
699 self.supplementary_materials
700 .iter()
701 .filter(|m| m.is_tar_file())
702 .collect()
703 }
704
705 /// Get all archive files from supplementary materials.
706 pub fn get_archive_files(&self) -> Vec<&SupplementaryMaterial> {
707 self.supplementary_materials
708 .iter()
709 .filter(|m| m.is_archive())
710 .collect()
711 }
712
713 /// Check if the article has supplementary materials.
714 pub fn has_supplementary_materials(&self) -> bool {
715 !self.supplementary_materials.is_empty()
716 }
717
718 /// Get supplementary materials by content type.
719 pub fn get_supplementary_materials_by_type(
720 &self,
721 content_type: &str,
722 ) -> Vec<&SupplementaryMaterial> {
723 self.supplementary_materials
724 .iter()
725 .filter(|m| m.content_type.as_ref().is_some_and(|ct| ct == content_type))
726 .collect()
727 }
728}