如何根据pubmed id从ncbi上下载摘要

#如果提交的id太多,也会报错

#split -l 600 pubmed_id_uniq.txt split_pubmed_ids.txt

#分成了3组

input_file =  'split_pubmed_ids.txtaa'
result_file = "result1.csv"

input_file =  'split_pubmed_ids.txtab'
result_file = "result2.csv"

input_file =  'split_pubmed_ids.txtac'
result_file = "result3.csv"

#测试的
input_file =  'test_id.txt'
result_file = "test.csv"

重复跑下面的流程

library(rentrez)
library(XML)


test_ids = read.table(input_file)
your.ids = as.character(test_ids[,1])

# rentrez function to get the data from pubmed db

fetch.pubmed <- entrez_fetch(db = "pubmed", id = your.ids,rettype = "xml", parsed = T)

# Extract the Abstracts for the respective IDS.  

abstracts = xpathApply(fetch.pubmed, '//PubmedArticle//Article', function(x)
                               xmlValue(xmlChildren(x)$Abstract))

ArticleTitle  = xpathApply(fetch.pubmed, '//PubmedArticle//Article', function(x)
                               xmlValue(xmlChildren(x)$ArticleTitle))

PMID = xpathApply(fetch.pubmed, '//PubmedArticle//MedlineCitation', function(x)
      xmlValue(xmlChildren(x)$PMID))

#Change the abstract names with the IDS.

col.PMID <- do.call(rbind.data.frame,PMID)
colnames(col.PMID) = c('pmid')
pubmed.id = as.character(col.PMID$pmid)

names(abstracts) <- pubmed.id
names(ArticleTitle) = pubmed.id

col.abstracts <- do.call(rbind.data.frame,abstracts)
col.articletile <- do.call(rbind.data.frame,ArticleTitle)

col.abstracts$id <- pubmed.id
col.articletile$id <- pubmed.id

colnames(col.abstracts) = c('abstract','pubmedID')
colnames(col.articletile) = c('title','pubmedID')

pubmed_info = merge(col.articletile,col.abstracts,by='pubmedID')

write.csv(pubmed_info, file = result_file,row.names=FALSE)

合并结果

个人公众号,比较懒,很少更新,可以在上面提问题:

更多精彩,请移步公众号阅读:

Sam avatar
About Sam
专注生物信息 专注转化医学