Last active
September 21, 2025 18:26
-
-
Save janxkoci/f6538194706103447b9826b653e6d7db to your computer and use it in GitHub Desktop.
Getting BibTeX bibliography from PubMed with R, rentrez, and glue
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ## Getting BibTeX bibliography from PubMed with R, rentrez, and glue | |
| library(rentrez) | |
| library(tidyverse) | |
| library(magrittr) | |
| library(glue) | |
| ## search term | |
| search_term = "\"flegontov p\"[au] OR \"flegontov pn\"[au]" | |
| ## number of publications | |
| npubs = entrez_search(db = "pubmed", term = search_term, retmax = 100)$count | |
| ## get ncbi uids | |
| ncbi_ids = entrez_search(db = "pubmed", term = search_term, retmax = npubs)$ids #%>% str | |
| ## get all publications as json, save into list object | |
| esummary = entrez_summary(db = "pubmed", id = ncbi_ids, retmode = "json") | |
| ## extract items into table | |
| papers <- esummary %>% | |
| lapply(extract, c("uid", "title", "fulljournalname", "source","pubdate", "sortpubdate", "issue", "volume", "pages")) %>% | |
| bind_rows %>% | |
| mutate( | |
| journalname = str_remove(fulljournalname, ":.*"), | |
| year = str_extract(sortpubdate, "\\d{4}"), | |
| pages = str_replace(pages, "-","--") | |
| ) | |
| papers$doi <- esummary %>% | |
| lapply(function(x) use_series(x, articleids) %>% subset(idtype == "doi", select = value) %>% pluck(1)) | |
| papers$authors <- esummary %>% | |
| lapply(function(x) use_series(x, authors) %>% pull(name) %>% paste(collapse = ", ")) | |
| #papers %>% glimpse | |
| bibtex <- papers %>% | |
| glue_data(" | |
| @article{{{uid}, | |
| author={{{authors}}}, | |
| year={{{year}}}, | |
| title={{{title}}}, | |
| journal={{{source}}}, | |
| number={{{issue}}}, | |
| volume={{{volume}}}, | |
| pages={{{pages}}}, | |
| doi={{{doi}}} | |
| }} | |
| ") | |
| ## write the bibtex string into a file | |
| writeLines(bibtex, "papers.bib") | |
| ## remove empty fields | |
| system("sed -i '/{}/d' papers.bib") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: pubmed2bibtex | |
| channels: | |
| - conda-forge | |
| dependencies: | |
| - r-base=4 | |
| - r-tidyverse # includes magrittr & glue | |
| - r-rentrez | |
| ## pick an IDE | |
| # - rstudio-desktop | |
| # - jupyterlab | |
| # - r-irkernel |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment