Crate pubmed_client

Crate pubmed_client 

Source
Expand description

§PubMed Client

A Rust client library for accessing PubMed and PMC (PubMed Central) APIs. This crate provides easy-to-use interfaces for searching, fetching, and parsing biomedical research articles.

§Features

  • PubMed API Integration: Search and fetch article metadata
  • PMC Full Text: Retrieve and parse structured full-text articles
  • Markdown Export: Convert PMC articles to well-formatted Markdown
  • Response Caching: Reduce API quota usage with intelligent caching
  • Async Support: Built on tokio for async/await support
  • Error Handling: Comprehensive error types for robust error handling
  • Type Safety: Strongly typed data structures for all API responses

§Quick Start

§Searching for Articles

use pubmed_client::PubMedClient;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let client = PubMedClient::new();

    // Search for articles with query builder
    let articles = client
        .search()
        .query("covid-19 treatment")
        .free_full_text_only()
        .published_after(2020)
        .limit(10)
        .search_and_fetch(&client)
        .await?;

    for article in articles {
        println!("Title: {}", article.title);
        let author_names: Vec<&str> = article.authors.iter().map(|a| a.full_name.as_str()).collect();
        println!("Authors: {}", author_names.join(", "));
    }

    Ok(())
}

§Fetching Full Text from PMC

use pubmed_client::PmcClient;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let client = PmcClient::new();

    // Check if PMC full text is available
    if let Some(pmcid) = client.check_pmc_availability("33515491").await? {
        // Fetch structured full text
        let full_text = client.fetch_full_text(&pmcid).await?;

        println!("Title: {}", full_text.title);
        println!("Sections: {}", full_text.sections.len());
        println!("References: {}", full_text.references.len());
    }

    Ok(())
}

§Converting PMC Articles to Markdown

use pubmed_client::{PmcClient, PmcMarkdownConverter, HeadingStyle, ReferenceStyle};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let client = PmcClient::new();

    // Fetch and parse a PMC article
    if let Ok(full_text) = client.fetch_full_text("PMC1234567").await {
        // Create a markdown converter with custom configuration
        let converter = PmcMarkdownConverter::new()
            .with_include_metadata(true)
            .with_include_toc(true)
            .with_heading_style(HeadingStyle::ATX)
            .with_reference_style(ReferenceStyle::Numbered);

        // Convert to markdown
        let markdown = converter.convert(&full_text);
        println!("{}", markdown);

        // Or save to file
        std::fs::write("article.md", markdown)?;
    }

    Ok(())
}

§Downloading and Extracting PMC Articles as TAR files

use pubmed_client::PmcClient;
use std::path::Path;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let client = PmcClient::new();
    let output_dir = Path::new("./extracted_articles");

    // Download and extract a PMC article as tar.gz from the OA API
    let files = client.download_and_extract_tar("PMC7906746", output_dir).await?;

    println!("Extracted {} files:", files.len());
    for file in files {
        println!("  - {}", file);
    }

    Ok(())
}

§Extracting Figures with Captions

use pubmed_client::PmcClient;
use std::path::Path;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let client = PmcClient::new();
    let output_dir = Path::new("./extracted_articles");

    // Extract figures and match them with captions from XML
    let figures = client.extract_figures_with_captions("PMC7906746", output_dir).await?;

    for figure in figures {
        println!("Figure {}: {}", figure.figure.id, figure.figure.caption);
        println!("File: {}", figure.extracted_file_path);
        if let Some(dimensions) = figure.dimensions {
            println!("Dimensions: {}x{}", dimensions.0, dimensions.1);
        }
    }

    Ok(())
}

§Response Caching

The library supports intelligent caching to reduce API quota usage and improve performance.

§Basic Caching

use pubmed_client::{PmcClient, ClientConfig};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    // Enable default memory caching
    let config = ClientConfig::new().with_cache();
    let client = PmcClient::with_config(config);

    // First fetch - hits the API
    let article1 = client.fetch_full_text("PMC7906746").await?;

    // Second fetch - served from cache
    let article2 = client.fetch_full_text("PMC7906746").await?;

    Ok(())
}

§Advanced Caching Options

use pubmed_client::{PmcClient, ClientConfig};
use pubmed_client::cache::CacheConfig;
use std::time::Duration;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    // Memory cache with custom settings
    let cache_config = CacheConfig {
        max_capacity: 5000,
        time_to_live: Duration::from_secs(24 * 60 * 60), // 24 hours
        ..Default::default()
    };

    let config = ClientConfig::new()
        .with_cache_config(cache_config);
    let client = PmcClient::with_config(config);

    // Use the client normally - caching happens automatically
    let article = client.fetch_full_text("PMC7906746").await?;

    Ok(())
}

§Hybrid Cache with Disk Persistence

#[cfg(not(target_arch = "wasm32"))]
{
use pubmed_client::{PmcClient, ClientConfig};
use pubmed_client::cache::CacheConfig;
use std::time::Duration;
use std::path::PathBuf;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    // Memory cache configuration
    let cache_config = CacheConfig {
        max_capacity: 1000,
        time_to_live: Duration::from_secs(24 * 60 * 60),
        ..Default::default()
    };

    let config = ClientConfig::new()
        .with_cache_config(cache_config);
    let client = PmcClient::with_config(config);

    // Articles are cached in memory
    let article = client.fetch_full_text("PMC7906746").await?;

    Ok(())
}
}

§Cache Management

use pubmed_client::{PmcClient, ClientConfig};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfig::new().with_cache();
    let client = PmcClient::with_config(config);

    // Fetch some articles
    client.fetch_full_text("PMC7906746").await?;
    client.fetch_full_text("PMC10618641").await?;

    // Check cache statistics
    let count = client.cache_entry_count();
    println!("Cached items: {}", count);

    // Clear the cache when needed
    client.clear_cache().await;

    Ok(())
}

Re-exports§

pub use config::ClientConfig;
pub use error::PubMedError;
pub use error::Result;
pub use pmc::ExtractedFigure;
pub use pmc::PmcClient;
pub use pmc::PmcTarClient;
pub use pubmed::ArticleType;
pub use pubmed::Language;
pub use pubmed::PubMedClient;
pub use pubmed::SearchQuery;
pub use pubmed::SortOrder;
pub use rate_limit::RateLimiter;
pub use time::Duration;
pub use time::Instant;
pub use time::sleep;

Modules§

cache
common
Common data structures and utilities shared between PubMed and PMC modules
config
error
export
Citation export formats for PubMed articles
pmc
PMC (PubMed Central) client for fetching full-text articles
pubmed
PubMed client for searching and fetching article metadata
rate_limit
Rate limiting implementation for NCBI API compliance
retry
Retry logic with exponential backoff for handling transient network failures
time
Internal time management module for cross-platform compatibility

Structs§

AbstractSection
A labeled section within a structured abstract
Affiliation
Represents an author’s institutional affiliation
ArticleSummary
Lightweight article summary from the ESummary API
Author
Represents a detailed author with enhanced metadata
CitationMatch
Result of a single citation match from the ECitMatch API
CitationMatches
Results from ECitMatch API for batch citation matching
CitationQuery
Input for a single citation match query
Citations
Citation information from ELink API
Client
Convenience client that combines both PubMed and PMC functionality
DatabaseCount
Record count for a single NCBI database from the EGQuery API
DatabaseInfo
Database information from EInfo API
EPostResult
Result from EPost API for uploading PMIDs to the NCBI History server
FieldInfo
Information about a database search field
Figure
Figure.
FundingInfo
Funding information.
GlobalQueryResults
Results from EGQuery API for global database search
HistorySession
History server session information for paginated fetching
JournalMeta
Journal metadata.
LinkInfo
Information about database links
MarkdownConfig
Configuration options for Markdown conversion
OaSubsetInfo
Information about OA (Open Access) subset availability for a PMC article
PmcArticle
PMC full-text article.
PmcId
A validated PubMed Central ID (PMC ID)
PmcLinks
PMC links discovered through ELink API
PmcMarkdownConverter
PMC to Markdown converter
PubMedArticle
Represents a PubMed article with metadata
PubMedId
A validated PubMed ID (PMID)
Reference
Reference citation.
RelatedArticles
Results from ELink API for related article discovery
SearchResult
Search result with WebEnv session information for history server pagination
Section
Article section.
SpellCheckResult
Result from the ESpell API providing spelling suggestions
Table
Table wrapper.

Enums§

CitationMatchStatus
Status of a citation match result
HeadingStyle
Heading style options
ParseError
Error types for PubMed/PMC parsing operations
ReferenceStyle
Reference formatting style
SpelledQuerySegment
Represents a segment of the spelled query from the ESpell API

Traits§

ExportFormat
Trait for exporting PubMed articles to various citation formats

Functions§

parse_article_from_xml
Parse article from EFetch XML response
parse_pmc_xml
Parse PMC XML content into a PmcArticle domain model.