puremoe provides a unified interface to PubMed and NLM data. Search with search_pubmed(), then retrieve data from any of five endpoints with get_records().

search_pubmed() accepts standard PubMed query syntax and returns a vector of PMIDs.

pmids <- puremoe::search_pubmed('("political ideology"[TiAb])')
length(pmids)
#> [1] 936
pmids_sub <- head(pmids, 50L)

Abstracts

abstracts <- puremoe::get_records(
  pmids_sub,
  endpoint = "pubmed_abstracts",
  cores    = 1L,
  sleep    = 0.5
)

abstracts <- abstracts |> mutate(pmid = as.character(pmid))
abstracts |>
  select(pmid, year, journal, articletitle) |>
  DT::datatable(rownames = FALSE)

The annotations column is a list of per-article data frames containing MeSH terms, chemical names, and keywords.

bind_rows(abstracts$annotations) |>
  head(20) |>
  DT::datatable(rownames = FALSE)

Affiliations

affiliations <- puremoe::get_records(
  head(pmids_sub, 25L),
  endpoint = "pubmed_affiliations",
  cores    = 1L,
  sleep    = 0.5
)

affiliations |>
  DT::datatable(rownames = FALSE)

iCite metrics

icites <- puremoe::get_records(
  pmids_sub,
  endpoint = "icites",
  cores    = 1L,
  sleep    = 0.25
)

icites |>
  mutate(pmid = as.character(pmid)) |>
  select(-citation_net, -cited_by_clin) |>
  DT::datatable(rownames = FALSE, options = list(scrollX = TRUE))

PubTator annotations

pubtations <- puremoe::get_records(
  head(pmids_sub, 30L),
  endpoint = "pubtations",
  cores    = 1L
)

pubtations |>
  DT::datatable(rownames = FALSE)

Full text

Full-text retrieval requires open-access PMC articles. pmid_to_ftp() converts PMIDs to FTP URLs, filtering to only those with open-access full text available.

ftp <- puremoe::pmid_to_ftp(pmids = pmids_sub)
ftp |> DT::datatable(rownames = FALSE, options = list(scrollX = TRUE))
fulltext <- puremoe::get_records(
  head(ftp$url, 2L),
  endpoint = "pmc_fulltext",
  cores    = 1L
)

fulltext |>
  mutate(text = sapply(strsplit(text, "\\s+"), function(w) paste0(paste(head(w, 15), collapse = " "), "..."))) |>
  slice(1:5) |>
  DT::datatable(rownames = FALSE, options = list(scrollX = TRUE))

Endpoint schemas

endpoint_info() returns column definitions, rate limits, and notes for any endpoint.

puremoe::endpoint_info()
#> [1] "pubmed_abstracts"    "pubmed_affiliations" "icites"             
#> [4] "pubtations"          "pmc_fulltext"
puremoe::endpoint_info("icites")
#> $description
#> [1] "NIH iCite citation metrics and influence scores"
#> 
#> $returns
#> [1] "data.frame"
#> 
#> $columns
#> $columns$pmid
#> [1] "PubMed ID - join key to link with pubmed_abstracts (character)"
#> 
#> $columns$citation_count
#> [1] "Total citations received (integer)"
#> 
#> $columns$relative_citation_ratio
#> [1] "RCR: field-adjusted citation rate comparing to NIH baseline (numeric)"
#> 
#> $columns$nih_percentile
#> [1] "Percentile rank vs NIH-funded publications (numeric)"
#> 
#> $columns$field_citation_rate
#> [1] "Expected citation rate for article's co-citation field (numeric)"
#> 
#> $columns$is_research_article
#> [1] "Flag for primary research articles (logical)"
#> 
#> $columns$is_clinical
#> [1] "Flag for clinical articles (logical)"
#> 
#> $columns$provisional
#> [1] "Flag indicating RCR is provisional due to recent publication (logical)"
#> 
#> $columns$citation_net
#> [1] "Citation network edge list: 'from' and 'to' PMIDs within result set (list-column)"
#> 
#> $columns$cited_by_clin
#> [1] "PMIDs of clinical articles citing this paper (character/list)"
#> 
#> 
#> $parameters
#> $parameters$cores
#> [1] "parallel workers"
#> 
#> $parameters$sleep
#> [1] "delay between requests"
#> 
#> 
#> $rate_limit
#> [1] "Relatively permissive"
#> 
#> $notes
#> [1] "Join to pubmed_abstracts on pmid for complete metadata (title, journal, authors, etc. not included to avoid redundancy). citation_net enables intra-corpus network analysis."