pubmed_parser/pmc/
oa_api.rs1use crate::common::PmcId;
11use crate::error::{ParseError, Result};
12use quick_xml::de::from_str;
13use serde::{Deserialize, Serialize};
14use tracing::debug;
15
16#[derive(Debug, Deserialize)]
22#[serde(rename = "OA")]
23struct OaResponse {
24 #[serde(rename = "error")]
25 error: Option<OaError>,
26 #[serde(rename = "records")]
27 records: Option<OaRecords>,
28}
29
30#[derive(Debug, Deserialize)]
32struct OaError {
33 #[serde(rename = "@code")]
34 code: Option<String>,
35 #[serde(rename = "$text")]
36 message: String,
37}
38
39#[derive(Debug, Deserialize)]
41struct OaRecords {
42 #[serde(rename = "record", default)]
43 record: Vec<OaRecord>,
44}
45
46#[derive(Debug, Deserialize)]
48struct OaRecord {
49 #[serde(rename = "@id")]
50 _id: Option<String>,
51 #[serde(rename = "@citation")]
52 citation: Option<String>,
53 #[serde(rename = "@license")]
54 license: Option<String>,
55 #[serde(rename = "@retracted")]
56 retracted: Option<String>,
57 #[serde(rename = "link")]
58 link: Option<OaLink>,
59}
60
61#[derive(Debug, Deserialize)]
63struct OaLink {
64 #[serde(rename = "@format")]
65 format: Option<String>,
66 #[serde(rename = "@updated")]
67 updated: Option<String>,
68 #[serde(rename = "@href")]
69 href: Option<String>,
70}
71
72const OA_API_BASE_URL: &str = "https://www.ncbi.nlm.nih.gov/pmc/utils/oa/oa.fcgi";
78
79pub fn build_oa_api_url(pmcid: &str) -> Result<String> {
81 let pmc_id = PmcId::parse(pmcid)?;
82 Ok(format!("{}?id={}", OA_API_BASE_URL, pmc_id.as_str()))
83}
84
85pub fn parse_oa_response(xml: &str, pmcid: &str) -> Result<OaSubsetInfo> {
96 let oa_response: OaResponse = from_str(xml).map_err(|e| {
97 debug!(pmcid = %pmcid, error = %e, "Failed to parse OA API response");
98 ParseError::XmlError(format!("Failed to parse OA API response: {e}"))
99 })?;
100
101 if let Some(error) = oa_response.error {
103 return Ok(OaSubsetInfo::not_available(
104 pmcid.to_string(),
105 error.code.unwrap_or_else(|| "unknown".to_string()),
106 error.message,
107 ));
108 }
109
110 if let Some(records) = oa_response.records
112 && let Some(record) = records.record.into_iter().next()
113 {
114 let mut info = OaSubsetInfo::available(pmcid.to_string());
115
116 info.citation = record.citation;
117 info.license = record.license;
118 info.retracted = record.retracted.is_some_and(|r| r == "yes");
119
120 if let Some(link) = record.link {
121 info.download_format = link.format;
122 info.updated = link.updated;
123 info.download_link = link.href;
124 }
125
126 return Ok(info);
127 }
128
129 debug!(pmcid = %pmcid, "OA API response has no error and no records");
131 Ok(OaSubsetInfo::not_available(
132 pmcid.to_string(),
133 "parseError".to_string(),
134 "OA API response has no error and no records".to_string(),
135 ))
136}
137
138#[derive(Debug, Serialize, Deserialize, Clone)]
148pub struct OaSubsetInfo {
149 pub pmcid: String,
151 pub is_oa_subset: bool,
153 pub citation: Option<String>,
155 pub license: Option<String>,
157 pub retracted: bool,
159 pub download_link: Option<String>,
161 pub download_format: Option<String>,
163 pub updated: Option<String>,
165 pub error_code: Option<String>,
167 pub error_message: Option<String>,
169}
170
171impl OaSubsetInfo {
172 pub fn available(pmcid: String) -> Self {
174 Self {
175 pmcid,
176 is_oa_subset: true,
177 citation: None,
178 license: None,
179 retracted: false,
180 download_link: None,
181 download_format: None,
182 updated: None,
183 error_code: None,
184 error_message: None,
185 }
186 }
187
188 pub fn not_available(pmcid: String, error_code: String, error_message: String) -> Self {
190 Self {
191 pmcid,
192 is_oa_subset: false,
193 citation: None,
194 license: None,
195 retracted: false,
196 download_link: None,
197 download_format: None,
198 updated: None,
199 error_code: Some(error_code),
200 error_message: Some(error_message),
201 }
202 }
203}
204
205#[cfg(test)]
206mod tests {
207 use super::*;
208
209 #[test]
210 fn test_build_oa_api_url() {
211 let url = build_oa_api_url("PMC7906746").unwrap();
212 assert_eq!(
213 url,
214 "https://www.ncbi.nlm.nih.gov/pmc/utils/oa/oa.fcgi?id=PMC7906746"
215 );
216
217 let url = build_oa_api_url("7906746").unwrap();
218 assert_eq!(
219 url,
220 "https://www.ncbi.nlm.nih.gov/pmc/utils/oa/oa.fcgi?id=PMC7906746"
221 );
222 }
223
224 #[test]
225 fn test_parse_oa_response_not_open_access() {
226 let xml = r#"<OA><responseDate>2026-01-02 10:45:24</responseDate><request>https://www.ncbi.nlm.nih.gov/pmc/utils/oa/oa.fcgi?id=PMC8550608</request><error code="idIsNotOpenAccess">identifier 'PMC8550608' is not Open Access</error></OA>"#;
227
228 let result = parse_oa_response(xml, "PMC8550608").unwrap();
229
230 assert!(!result.is_oa_subset);
231 assert_eq!(result.pmcid, "PMC8550608");
232 assert_eq!(result.error_code, Some("idIsNotOpenAccess".to_string()));
233 assert!(
234 result
235 .error_message
236 .as_ref()
237 .unwrap()
238 .contains("is not Open Access")
239 );
240 assert!(result.download_link.is_none());
241 }
242
243 #[test]
244 fn test_parse_oa_response_open_access() {
245 let xml = r#"<OA><responseDate>2026-01-02 10:45:39</responseDate><request id="PMC7906746">https://www.ncbi.nlm.nih.gov/pmc/utils/oa/oa.fcgi?id=PMC7906746</request><records returned-count="1" total-count="1"><record id="PMC7906746" citation="Lancet. 2021 Jan 27 6-12 February; 397(10273):452-455" license="none" retracted="no"><link format="tgz" updated="2022-12-16 07:10:15" href="ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/oa_package/f1/69/PMC7906746.tar.gz" /></record></records></OA>"#;
246
247 let result = parse_oa_response(xml, "PMC7906746").unwrap();
248
249 assert!(result.is_oa_subset);
250 assert_eq!(result.pmcid, "PMC7906746");
251 assert_eq!(
252 result.citation,
253 Some("Lancet. 2021 Jan 27 6-12 February; 397(10273):452-455".to_string())
254 );
255 assert_eq!(result.license, Some("none".to_string()));
256 assert!(!result.retracted);
257 assert_eq!(result.download_format, Some("tgz".to_string()));
258 assert_eq!(result.updated, Some("2022-12-16 07:10:15".to_string()));
259 assert_eq!(
260 result.download_link,
261 Some(
262 "ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/oa_package/f1/69/PMC7906746.tar.gz".to_string()
263 )
264 );
265 assert!(result.error_code.is_none());
266 }
267
268 #[test]
269 fn test_parse_oa_response_retracted() {
270 let xml = r#"<OA><records><record id="PMC1234567" citation="Test" license="cc-by" retracted="yes"><link format="tgz" href="ftp://test.com/file.tar.gz" /></record></records></OA>"#;
271
272 let result = parse_oa_response(xml, "PMC1234567").unwrap();
273
274 assert!(result.is_oa_subset);
275 assert!(result.retracted);
276 assert_eq!(result.license, Some("cc-by".to_string()));
277 }
278}