pubmed_formatter/pubmed/
export.rs

1//! Citation export formats for PubMed articles
2//!
3//! This module provides functionality to export PubMed article metadata to various
4//! standard citation formats commonly used in academic research:
5//!
6//! - **BibTeX** - Used by LaTeX and many reference managers
7//! - **RIS** - Used by Zotero, Mendeley, EndNote, and many others
8//! - **CSL-JSON** - Citation Style Language JSON format
9//! - **NBIB** - MEDLINE/PubMed native format
10
11use pubmed_parser::pubmed::models::PubMedArticle;
12use serde_json::{Value, json};
13
14/// Generate a BibTeX citation key from article metadata
15fn generate_bibtex_key(article: &PubMedArticle) -> String {
16    let first_author = article
17        .authors
18        .first()
19        .map(|a| {
20            a.full_name
21                .split_whitespace()
22                .next()
23                .unwrap_or("Unknown")
24                .to_string()
25        })
26        .unwrap_or_else(|| "Unknown".to_string());
27
28    let year = article
29        .pub_date
30        .split_whitespace()
31        .find(|s| s.len() == 4 && s.chars().all(|c| c.is_ascii_digit()))
32        .unwrap_or("0000");
33
34    format!("{}{}_pmid{}", first_author, year, article.pmid)
35}
36
37/// Escape special BibTeX characters
38fn escape_bibtex(s: &str) -> String {
39    s.replace('&', r"\&")
40        .replace('%', r"\%")
41        .replace('_', r"\_")
42        .replace('#', r"\#")
43        .replace('{', r"\{")
44        .replace('}', r"\}")
45}
46
47/// Trait for exporting PubMed articles to various citation formats
48pub trait ExportFormat {
49    /// Export the article metadata as a BibTeX entry
50    fn to_bibtex(&self) -> String;
51
52    /// Export the article metadata in RIS format
53    fn to_ris(&self) -> String;
54
55    /// Export the article metadata as CSL-JSON
56    fn to_csl_json(&self) -> Value;
57
58    /// Export the article metadata in MEDLINE/NBIB format
59    fn to_nbib(&self) -> String;
60}
61
62impl ExportFormat for PubMedArticle {
63    fn to_bibtex(&self) -> String {
64        let key = generate_bibtex_key(self);
65        let mut lines = Vec::new();
66
67        lines.push(format!("@article{{{key},"));
68
69        lines.push(format!("  title = {{{}}},", escape_bibtex(&self.title)));
70
71        if !self.authors.is_empty() {
72            let authors: Vec<String> = self
73                .authors
74                .iter()
75                .map(|a| {
76                    // BibTeX prefers "Surname, GivenNames" format
77                    if let (Some(surname), Some(given)) = (&a.surname, &a.given_names) {
78                        escape_bibtex(&format!("{surname}, {given}"))
79                    } else {
80                        escape_bibtex(&a.full_name)
81                    }
82                })
83                .collect();
84            lines.push(format!("  author = {{{}}},", authors.join(" and ")));
85        }
86
87        lines.push(format!("  journal = {{{}}},", escape_bibtex(&self.journal)));
88        lines.push(format!("  year = {{{}}},", self.pub_date));
89
90        if let Some(ref volume) = self.volume {
91            lines.push(format!("  volume = {{{volume}}},"));
92        }
93        if let Some(ref issue) = self.issue {
94            lines.push(format!("  number = {{{issue}}},"));
95        }
96        if let Some(ref pages) = self.pages {
97            lines.push(format!("  pages = {{{pages}}},"));
98        }
99        if let Some(ref doi) = self.doi {
100            lines.push(format!("  doi = {{{doi}}},"));
101        }
102        lines.push(format!("  pmid = {{{}}},", self.pmid));
103        if let Some(ref pmc_id) = self.pmc_id {
104            lines.push(format!("  pmcid = {{{pmc_id}}},"));
105        }
106        if let Some(ref issn) = self.issn {
107            lines.push(format!("  issn = {{{issn}}},"));
108        }
109        if let Some(ref lang) = self.language {
110            lines.push(format!("  language = {{{lang}}},"));
111        }
112
113        lines.push("}".to_string());
114
115        lines.join("\n")
116    }
117
118    fn to_ris(&self) -> String {
119        let mut lines = Vec::new();
120
121        lines.push("TY  - JOUR".to_string());
122        lines.push(format!("TI  - {}", self.title));
123
124        for author in &self.authors {
125            // RIS prefers "Surname, GivenNames" format
126            if let (Some(surname), Some(given)) = (&author.surname, &author.given_names) {
127                lines.push(format!("AU  - {surname}, {given}"));
128            } else {
129                lines.push(format!("AU  - {}", author.full_name));
130            }
131        }
132
133        lines.push(format!("JO  - {}", self.journal));
134        if let Some(ref abbr) = self.journal_abbreviation {
135            lines.push(format!("JA  - {abbr}"));
136        }
137
138        lines.push(format!("PY  - {}", self.pub_date));
139
140        if let Some(ref volume) = self.volume {
141            lines.push(format!("VL  - {volume}"));
142        }
143        if let Some(ref issue) = self.issue {
144            lines.push(format!("IS  - {issue}"));
145        }
146        if let Some(ref pages) = self.pages {
147            // RIS uses SP (start page) and EP (end page)
148            if let Some((start, end)) = pages.split_once('-') {
149                lines.push(format!("SP  - {}", start.trim()));
150                lines.push(format!("EP  - {}", end.trim()));
151            } else {
152                lines.push(format!("SP  - {pages}"));
153            }
154        }
155        if let Some(ref doi) = self.doi {
156            lines.push(format!("DO  - {doi}"));
157        }
158        lines.push(format!("AN  - PMID:{}", self.pmid));
159        if let Some(ref pmc_id) = self.pmc_id {
160            lines.push(format!("C1  - {pmc_id}"));
161        }
162        if let Some(ref issn) = self.issn {
163            lines.push(format!("SN  - {issn}"));
164        }
165        if let Some(ref lang) = self.language {
166            lines.push(format!("LA  - {lang}"));
167        }
168        if let Some(ref abstract_text) = self.abstract_text {
169            lines.push(format!("AB  - {abstract_text}"));
170        }
171        for kw in self.keywords.as_deref().unwrap_or(&[]) {
172            lines.push(format!("KW  - {kw}"));
173        }
174
175        lines.push("ER  - ".to_string());
176        lines.join("\n")
177    }
178
179    fn to_csl_json(&self) -> Value {
180        let mut csl = json!({
181            "type": "article-journal",
182            "id": format!("pmid:{}", self.pmid),
183            "title": self.title,
184            "container-title": self.journal,
185        });
186
187        // Authors
188        if !self.authors.is_empty() {
189            let authors: Vec<Value> = self
190                .authors
191                .iter()
192                .map(|a| {
193                    // Prefer structured name fields, fall back to splitting full_name
194                    if a.surname.is_some() || a.given_names.is_some() {
195                        let mut name = json!({});
196                        if let Some(ref surname) = a.surname {
197                            name["family"] = json!(surname);
198                        }
199                        if let Some(ref given) = a.given_names {
200                            name["given"] = json!(given);
201                        }
202                        if let Some(ref suffix) = a.suffix {
203                            name["suffix"] = json!(suffix);
204                        }
205                        name
206                    } else {
207                        let parts: Vec<&str> = a.full_name.rsplitn(2, ' ').collect();
208                        if parts.len() == 2 {
209                            json!({
210                                "family": parts[0],
211                                "given": parts[1]
212                            })
213                        } else {
214                            json!({ "literal": a.full_name })
215                        }
216                    }
217                })
218                .collect();
219            csl["author"] = Value::Array(authors);
220        }
221
222        // Date
223        let year = self
224            .pub_date
225            .split_whitespace()
226            .find(|s| s.len() == 4 && s.chars().all(|c| c.is_ascii_digit()));
227        if let Some(year) = year {
228            csl["issued"] = json!({
229                "date-parts": [[year.parse::<i32>().unwrap_or(0)]]
230            });
231        }
232
233        if let Some(ref volume) = self.volume {
234            csl["volume"] = json!(volume);
235        }
236        if let Some(ref issue) = self.issue {
237            csl["issue"] = json!(issue);
238        }
239        if let Some(ref pages) = self.pages {
240            csl["page"] = json!(pages);
241        }
242        if let Some(ref doi) = self.doi {
243            csl["DOI"] = json!(doi);
244        }
245        csl["PMID"] = json!(self.pmid);
246        if let Some(ref pmc_id) = self.pmc_id {
247            csl["PMCID"] = json!(pmc_id);
248        }
249        if let Some(ref issn) = self.issn {
250            csl["ISSN"] = json!(issn);
251        }
252        if let Some(ref lang) = self.language {
253            csl["language"] = json!(lang);
254        }
255        if let Some(ref abstract_text) = self.abstract_text {
256            csl["abstract"] = json!(abstract_text);
257        }
258        if let Some(ref abbr) = self.journal_abbreviation {
259            csl["container-title-short"] = json!(abbr);
260        }
261
262        csl
263    }
264
265    fn to_nbib(&self) -> String {
266        let mut lines = Vec::new();
267
268        lines.push(format!("PMID- {}", self.pmid));
269        lines.push(format!("TI  - {}", self.title));
270
271        for author in &self.authors {
272            // NBIB uses FAU for full name and AU for abbreviated name
273            lines.push(format!("FAU - {}", author.full_name));
274            if let (Some(surname), Some(initials)) = (&author.surname, &author.initials) {
275                lines.push(format!("AU  - {surname} {initials}"));
276            }
277        }
278
279        lines.push(format!("TA  - {}", self.journal));
280        if let Some(ref abbr) = self.journal_abbreviation {
281            lines.push(format!("JT  - {abbr}"));
282        }
283
284        lines.push(format!("DP  - {}", self.pub_date));
285
286        if let Some(ref volume) = self.volume {
287            lines.push(format!("VI  - {volume}"));
288        }
289        if let Some(ref issue) = self.issue {
290            lines.push(format!("IP  - {issue}"));
291        }
292        if let Some(ref pages) = self.pages {
293            lines.push(format!("PG  - {pages}"));
294        }
295        if let Some(ref doi) = self.doi {
296            lines.push(format!("AID - {doi} [doi]"));
297        }
298        if let Some(ref pmc_id) = self.pmc_id {
299            lines.push(format!("PMC - {pmc_id}"));
300        }
301        if let Some(ref issn) = self.issn {
302            lines.push(format!("IS  - {issn}"));
303        }
304        if let Some(ref lang) = self.language {
305            lines.push(format!("LA  - {lang}"));
306        }
307        if let Some(ref abstract_text) = self.abstract_text {
308            lines.push(format!("AB  - {abstract_text}"));
309        }
310        for pt in &self.article_types {
311            lines.push(format!("PT  - {pt}"));
312        }
313
314        lines.join("\n")
315    }
316}
317
318/// Export multiple articles as a single BibTeX string
319pub fn articles_to_bibtex(articles: &[PubMedArticle]) -> String {
320    articles
321        .iter()
322        .map(|a| a.to_bibtex())
323        .collect::<Vec<_>>()
324        .join("\n\n")
325}
326
327/// Export multiple articles as a single RIS string
328pub fn articles_to_ris(articles: &[PubMedArticle]) -> String {
329    articles
330        .iter()
331        .map(|a| a.to_ris())
332        .collect::<Vec<_>>()
333        .join("\n")
334}
335
336/// Export multiple articles as a CSL-JSON array
337pub fn articles_to_csl_json(articles: &[PubMedArticle]) -> Value {
338    Value::Array(articles.iter().map(|a| a.to_csl_json()).collect())
339}
340
341#[cfg(test)]
342mod tests {
343    use super::*;
344    use pubmed_parser::common::{Affiliation, Author};
345
346    fn create_test_article() -> PubMedArticle {
347        PubMedArticle {
348            pmid: "33515491".to_string(),
349            title: "Effectiveness of COVID-19 Vaccines".to_string(),
350            authors: vec![
351                Author {
352                    surname: Some("Smith".to_string()),
353                    given_names: Some("John".to_string()),
354                    initials: Some("J".to_string()),
355                    suffix: None,
356                    full_name: "John Smith".to_string(),
357                    orcid: None,
358                    email: None,
359                    is_corresponding: false,
360                    roles: vec![],
361                    affiliations: vec![Affiliation {
362                        id: None,
363                        institution: Some("Harvard University".to_string()),
364                        department: None,
365                        address: None,
366                        country: None,
367                    }],
368                },
369                Author {
370                    surname: Some("Doe".to_string()),
371                    given_names: Some("Jane".to_string()),
372                    initials: Some("J".to_string()),
373                    suffix: None,
374                    full_name: "Jane Doe".to_string(),
375                    orcid: None,
376                    email: None,
377                    is_corresponding: false,
378                    roles: vec![],
379                    affiliations: vec![],
380                },
381            ],
382            author_count: 2,
383            journal: "The Lancet".to_string(),
384            pub_date: "2021".to_string(),
385            doi: Some("10.1016/S0140-6736(21)00234-8".to_string()),
386            pmc_id: Some("PMC7906746".to_string()),
387            abstract_text: Some("Background: COVID-19 vaccines have been developed...".to_string()),
388            structured_abstract: None,
389            article_types: vec!["Journal Article".to_string()],
390            mesh_headings: None,
391            keywords: Some(vec!["COVID-19".to_string(), "Vaccine".to_string()]),
392            chemical_list: None,
393            volume: Some("397".to_string()),
394            issue: Some("10275".to_string()),
395            pages: Some("671-681".to_string()),
396            language: Some("eng".to_string()),
397            journal_abbreviation: Some("Lancet".to_string()),
398            issn: Some("0140-6736".to_string()),
399        }
400    }
401
402    #[test]
403    fn test_bibtex_export() {
404        let article = create_test_article();
405        let bibtex = article.to_bibtex();
406
407        assert!(bibtex.starts_with("@article{John2021_pmid33515491,"));
408        assert!(bibtex.contains("title = {Effectiveness of COVID-19 Vaccines}"));
409        assert!(bibtex.contains("author = {Smith, John and Doe, Jane}"));
410        assert!(bibtex.contains("journal = {The Lancet}"));
411        assert!(bibtex.contains("year = {2021}"));
412        assert!(bibtex.contains("volume = {397}"));
413        assert!(bibtex.contains("doi = {10.1016/S0140-6736(21)00234-8}"));
414        assert!(bibtex.contains("pmid = {33515491}"));
415        assert!(bibtex.contains("pmcid = {PMC7906746}"));
416        assert!(bibtex.ends_with('}'));
417    }
418
419    #[test]
420    fn test_ris_export() {
421        let article = create_test_article();
422        let ris = article.to_ris();
423
424        assert!(ris.starts_with("TY  - JOUR"));
425        assert!(ris.contains("TI  - Effectiveness of COVID-19 Vaccines"));
426        assert!(ris.contains("AU  - Smith, John"));
427        assert!(ris.contains("AU  - Doe, Jane"));
428        assert!(ris.contains("JO  - The Lancet"));
429        assert!(ris.contains("PY  - 2021"));
430        assert!(ris.contains("VL  - 397"));
431        assert!(ris.contains("SP  - 671"));
432        assert!(ris.contains("EP  - 681"));
433        assert!(ris.contains("DO  - 10.1016/S0140-6736(21)00234-8"));
434        assert!(ris.contains("KW  - COVID-19"));
435        assert!(ris.contains("KW  - Vaccine"));
436        assert!(ris.ends_with("ER  - "));
437    }
438
439    #[test]
440    fn test_csl_json_export() {
441        let article = create_test_article();
442        let csl = article.to_csl_json();
443
444        assert_eq!(csl["type"], "article-journal");
445        assert_eq!(csl["title"], "Effectiveness of COVID-19 Vaccines");
446        assert_eq!(csl["container-title"], "The Lancet");
447        assert_eq!(csl["volume"], "397");
448        assert_eq!(csl["issue"], "10275");
449        assert_eq!(csl["page"], "671-681");
450        assert_eq!(csl["DOI"], "10.1016/S0140-6736(21)00234-8");
451        assert_eq!(csl["PMID"], "33515491");
452        assert_eq!(csl["PMCID"], "PMC7906746");
453        assert_eq!(csl["language"], "eng");
454        assert_eq!(csl["container-title-short"], "Lancet");
455
456        // Check authors (uses structured surname/given_names fields)
457        let authors = csl["author"].as_array().unwrap();
458        assert_eq!(authors.len(), 2);
459        assert_eq!(authors[0]["family"], "Smith");
460        assert_eq!(authors[0]["given"], "John");
461        assert_eq!(authors[1]["family"], "Doe");
462        assert_eq!(authors[1]["given"], "Jane");
463    }
464
465    #[test]
466    fn test_nbib_export() {
467        let article = create_test_article();
468        let nbib = article.to_nbib();
469
470        assert!(nbib.contains("PMID- 33515491"));
471        assert!(nbib.contains("TI  - Effectiveness of COVID-19 Vaccines"));
472        assert!(nbib.contains("FAU - John Smith"));
473        assert!(nbib.contains("AU  - Smith J"));
474        assert!(nbib.contains("FAU - Jane Doe"));
475        assert!(nbib.contains("AU  - Doe J"));
476        assert!(nbib.contains("TA  - The Lancet"));
477        assert!(nbib.contains("DP  - 2021"));
478        assert!(nbib.contains("VI  - 397"));
479        assert!(nbib.contains("AID - 10.1016/S0140-6736(21)00234-8 [doi]"));
480        assert!(nbib.contains("PMC - PMC7906746"));
481    }
482
483    #[test]
484    fn test_batch_bibtex_export() {
485        let article = create_test_article();
486        let articles = vec![article.clone(), article];
487        let bibtex = articles_to_bibtex(&articles);
488
489        // Should contain two entries separated by blank line
490        let entries: Vec<&str> = bibtex.split("\n\n").collect();
491        assert_eq!(entries.len(), 2);
492    }
493
494    #[test]
495    fn test_batch_csl_json_export() {
496        let article = create_test_article();
497        let articles = vec![article.clone(), article];
498        let csl = articles_to_csl_json(&articles);
499
500        assert!(csl.is_array());
501        assert_eq!(csl.as_array().unwrap().len(), 2);
502    }
503}