【1.6.3】如何根据pubmed id从ncbi上下载摘要

October 01, 2017 biodatabase 阅读量：次

##如果提交的id太多，也会报错

#split -l 600 pubmed_id_uniq.txt split_pubmed_ids.txt

#分成了3组

input_file =  'split_pubmed_ids.txtaa'
result_file = "result1.csv"

input_file =  'split_pubmed_ids.txtab'
result_file = "result2.csv"

input_file =  'split_pubmed_ids.txtac'
result_file = "result3.csv"

#测试的
input_file =  'test_id.txt'
result_file = "test.csv"

##重复跑下面的流程

library(rentrez)
library(XML)


test_ids = read.table(input_file)
your.ids = as.character(test_ids[,1])

# rentrez function to get the data from pubmed db

fetch.pubmed <- entrez_fetch(db = "pubmed", id = your.ids,rettype = "xml", parsed = T)

# Extract the Abstracts for the respective IDS.  

abstracts = xpathApply(fetch.pubmed, '//PubmedArticle//Article', function(x)
                               xmlValue(xmlChildren(x)$Abstract))

ArticleTitle  = xpathApply(fetch.pubmed, '//PubmedArticle//Article', function(x)
                               xmlValue(xmlChildren(x)$ArticleTitle))

PMID = xpathApply(fetch.pubmed, '//PubmedArticle//MedlineCitation', function(x)
      xmlValue(xmlChildren(x)$PMID))

##Change the abstract names with the IDS.

col.PMID <- do.call(rbind.data.frame,PMID)
colnames(col.PMID) = c('pmid')
pubmed.id = as.character(col.PMID$pmid)

names(abstracts) <- pubmed.id
names(ArticleTitle) = pubmed.id

col.abstracts <- do.call(rbind.data.frame,abstracts)
col.articletile <- do.call(rbind.data.frame,ArticleTitle)

col.abstracts$id <- pubmed.id
col.articletile$id <- pubmed.id

colnames(col.abstracts) = c('abstract','pubmedID')
colnames(col.articletile) = c('title','pubmedID')

pubmed_info = merge(col.articletile,col.abstracts,by='pubmedID')

write.csv(pubmed_info, file = result_file,row.names=FALSE)

药企，独角兽，苏州。团队长期招人，感兴趣的都可以发邮件聊聊：tiehan@sina.cn

个人公众号，比较懒，很少更新，可以在上面提问题，如果回复不及时，可发邮件给我： tiehan@sina.cn