Expand description
§PubMed Client
A Rust client library for accessing PubMed and PMC (PubMed Central) APIs. This crate provides easy-to-use interfaces for searching, fetching, and parsing biomedical research articles.
§Features
- PubMed API Integration: Search and fetch article metadata
- PMC Full Text: Retrieve and parse structured full-text articles
- Markdown Export: Convert PMC articles to well-formatted Markdown
- Response Caching: Reduce API quota usage with intelligent caching
- Async Support: Built on tokio for async/await support
- Error Handling: Comprehensive error types for robust error handling
- Type Safety: Strongly typed data structures for all API responses
§Quick Start
§Searching for Articles
use pubmed_client::PubMedClient;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let client = PubMedClient::new();
// Search for articles with query builder
let articles = client
.search()
.query("covid-19 treatment")
.free_full_text_only()
.published_after(2020)
.limit(10)
.search_and_fetch(&client)
.await?;
for article in articles {
println!("Title: {}", article.title);
let author_names: Vec<&str> = article.authors.iter().map(|a| a.full_name.as_str()).collect();
println!("Authors: {}", author_names.join(", "));
}
Ok(())
}§Fetching Full Text from PMC
use pubmed_client::PmcClient;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let client = PmcClient::new();
// Check if PMC full text is available
if let Some(pmcid) = client.check_pmc_availability("33515491").await? {
// Fetch structured full text
let full_text = client.fetch_full_text(&pmcid).await?;
println!("Title: {}", full_text.title);
println!("Sections: {}", full_text.sections.len());
println!("References: {}", full_text.references.len());
}
Ok(())
}§Converting PMC Articles to Markdown
use pubmed_client::{PmcClient, PmcMarkdownConverter, HeadingStyle, ReferenceStyle};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let client = PmcClient::new();
// Fetch and parse a PMC article
if let Ok(full_text) = client.fetch_full_text("PMC1234567").await {
// Create a markdown converter with custom configuration
let converter = PmcMarkdownConverter::new()
.with_include_metadata(true)
.with_include_toc(true)
.with_heading_style(HeadingStyle::ATX)
.with_reference_style(ReferenceStyle::Numbered);
// Convert to markdown
let markdown = converter.convert(&full_text);
println!("{}", markdown);
// Or save to file
std::fs::write("article.md", markdown)?;
}
Ok(())
}§Downloading and Extracting PMC Articles as TAR files
use pubmed_client::PmcClient;
use std::path::Path;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let client = PmcClient::new();
let output_dir = Path::new("./extracted_articles");
// Download and extract a PMC article as tar.gz from the OA API
let files = client.download_and_extract_tar("PMC7906746", output_dir).await?;
println!("Extracted {} files:", files.len());
for file in files {
println!(" - {}", file);
}
Ok(())
}§Extracting Figures with Captions
use pubmed_client::PmcClient;
use std::path::Path;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let client = PmcClient::new();
let output_dir = Path::new("./extracted_articles");
// Extract figures and match them with captions from XML
let figures = client.extract_figures_with_captions("PMC7906746", output_dir).await?;
for figure in figures {
println!("Figure {}: {}", figure.figure.id, figure.figure.caption);
println!("File: {}", figure.extracted_file_path);
if let Some(dimensions) = figure.dimensions {
println!("Dimensions: {}x{}", dimensions.0, dimensions.1);
}
}
Ok(())
}§Response Caching
The library supports intelligent caching to reduce API quota usage and improve performance.
§Basic Caching
use pubmed_client::{PmcClient, ClientConfig};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Enable default memory caching
let config = ClientConfig::new().with_cache();
let client = PmcClient::with_config(config);
// First fetch - hits the API
let article1 = client.fetch_full_text("PMC7906746").await?;
// Second fetch - served from cache
let article2 = client.fetch_full_text("PMC7906746").await?;
Ok(())
}§Advanced Caching Options
use pubmed_client::{PmcClient, ClientConfig};
use pubmed_client::cache::CacheConfig;
use std::time::Duration;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Memory cache with custom settings
let cache_config = CacheConfig {
max_capacity: 5000,
time_to_live: Duration::from_secs(24 * 60 * 60), // 24 hours
..Default::default()
};
let config = ClientConfig::new()
.with_cache_config(cache_config);
let client = PmcClient::with_config(config);
// Use the client normally - caching happens automatically
let article = client.fetch_full_text("PMC7906746").await?;
Ok(())
}§Hybrid Cache with Disk Persistence
#[cfg(not(target_arch = "wasm32"))]
{
use pubmed_client::{PmcClient, ClientConfig};
use pubmed_client::cache::CacheConfig;
use std::time::Duration;
use std::path::PathBuf;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Memory cache configuration
let cache_config = CacheConfig {
max_capacity: 1000,
time_to_live: Duration::from_secs(24 * 60 * 60),
..Default::default()
};
let config = ClientConfig::new()
.with_cache_config(cache_config);
let client = PmcClient::with_config(config);
// Articles are cached in memory
let article = client.fetch_full_text("PMC7906746").await?;
Ok(())
}
}§Cache Management
use pubmed_client::{PmcClient, ClientConfig};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = ClientConfig::new().with_cache();
let client = PmcClient::with_config(config);
// Fetch some articles
client.fetch_full_text("PMC7906746").await?;
client.fetch_full_text("PMC10618641").await?;
// Check cache statistics
let count = client.cache_entry_count();
println!("Cached items: {}", count);
// Clear the cache when needed
client.clear_cache().await;
Ok(())
}Re-exports§
pub use config::ClientConfig;pub use error::PubMedError;pub use error::Result;pub use pmc::ExtractedFigure;pub use pmc::PmcClient;pub use pmc::PmcTarClient;pub use pubmed::ArticleType;pub use pubmed::Language;pub use pubmed::PubMedClient;pub use pubmed::SearchQuery;pub use pubmed::SortOrder;pub use rate_limit::RateLimiter;pub use time::Duration;pub use time::Instant;pub use time::sleep;
Modules§
- cache
- common
- Common data structures and utilities shared between PubMed and PMC modules
- config
- error
- export
- Citation export formats for PubMed articles
- pmc
- PMC (PubMed Central) client for fetching full-text articles
- pubmed
- PubMed client for searching and fetching article metadata
- rate_
limit - Rate limiting implementation for NCBI API compliance
- retry
- Retry logic with exponential backoff for handling transient network failures
- time
- Internal time management module for cross-platform compatibility
Structs§
- Abstract
Section - A labeled section within a structured abstract
- Affiliation
- Represents an author’s institutional affiliation
- Article
Summary - Lightweight article summary from the ESummary API
- Author
- Represents a detailed author with enhanced metadata
- Citation
Match - Result of a single citation match from the ECitMatch API
- Citation
Matches - Results from ECitMatch API for batch citation matching
- Citation
Query - Input for a single citation match query
- Citations
- Citation information from ELink API
- Client
- Convenience client that combines both PubMed and PMC functionality
- Database
Count - Record count for a single NCBI database from the EGQuery API
- Database
Info - Database information from EInfo API
- EPost
Result - Result from EPost API for uploading PMIDs to the NCBI History server
- Field
Info - Information about a database search field
- Figure
- Figure.
- Funding
Info - Funding information.
- Global
Query Results - Results from EGQuery API for global database search
- History
Session - History server session information for paginated fetching
- Journal
Meta - Journal metadata.
- Link
Info - Information about database links
- Markdown
Config - Configuration options for Markdown conversion
- OaSubset
Info - Information about OA (Open Access) subset availability for a PMC article
- PmcArticle
- PMC full-text article.
- PmcId
- A validated PubMed Central ID (PMC ID)
- PmcLinks
- PMC links discovered through ELink API
- PmcMarkdown
Converter - PMC to Markdown converter
- PubMed
Article - Represents a PubMed article with metadata
- PubMed
Id - A validated PubMed ID (PMID)
- Reference
- Reference citation.
- Related
Articles - Results from ELink API for related article discovery
- Search
Result - Search result with WebEnv session information for history server pagination
- Section
- Article section.
- Spell
Check Result - Result from the ESpell API providing spelling suggestions
- Table
- Table wrapper.
Enums§
- Citation
Match Status - Status of a citation match result
- Heading
Style - Heading style options
- Parse
Error - Error types for PubMed/PMC parsing operations
- Reference
Style - Reference formatting style
- Spelled
Query Segment - Represents a segment of the spelled query from the ESpell API
Traits§
- Export
Format - Trait for exporting PubMed articles to various citation formats
Functions§
- parse_
article_ from_ xml - Parse article from EFetch XML response
- parse_
pmc_ xml - Parse PMC XML content into a
PmcArticledomain model.