【1.6.3】如何根据pubmed id从ncbi上下载摘要
##如果提交的id太多,也会报错
#split -l 600 pubmed_id_uniq.txt split_pubmed_ids.txt
#分成了3组
input_file = 'split_pubmed_ids.txtaa'
result_file = "result1.csv"
input_file = 'split_pubmed_ids.txtab'
result_file = "result2.csv"
input_file = 'split_pubmed_ids.txtac'
result_file = "result3.csv"
#测试的
input_file = 'test_id.txt'
result_file = "test.csv"
##重复跑下面的流程
library(rentrez)
library(XML)
test_ids = read.table(input_file)
your.ids = as.character(test_ids[,1])
# rentrez function to get the data from pubmed db
fetch.pubmed <- entrez_fetch(db = "pubmed", id = your.ids,rettype = "xml", parsed = T)
# Extract the Abstracts for the respective IDS.
abstracts = xpathApply(fetch.pubmed, '//PubmedArticle//Article', function(x)
xmlValue(xmlChildren(x)$Abstract))
ArticleTitle = xpathApply(fetch.pubmed, '//PubmedArticle//Article', function(x)
xmlValue(xmlChildren(x)$ArticleTitle))
PMID = xpathApply(fetch.pubmed, '//PubmedArticle//MedlineCitation', function(x)
xmlValue(xmlChildren(x)$PMID))
##Change the abstract names with the IDS.
col.PMID <- do.call(rbind.data.frame,PMID)
colnames(col.PMID) = c('pmid')
pubmed.id = as.character(col.PMID$pmid)
names(abstracts) <- pubmed.id
names(ArticleTitle) = pubmed.id
col.abstracts <- do.call(rbind.data.frame,abstracts)
col.articletile <- do.call(rbind.data.frame,ArticleTitle)
col.abstracts$id <- pubmed.id
col.articletile$id <- pubmed.id
colnames(col.abstracts) = c('abstract','pubmedID')
colnames(col.articletile) = c('title','pubmedID')
pubmed_info = merge(col.articletile,col.abstracts,by='pubmedID')
write.csv(pubmed_info, file = result_file,row.names=FALSE)
这里是一个广告位,,感兴趣的都可以发邮件聊聊:tiehan@sina.cn
个人公众号,比较懒,很少更新,可以在上面提问题,如果回复不及时,可发邮件给我: tiehan@sina.cn
个人公众号,比较懒,很少更新,可以在上面提问题,如果回复不及时,可发邮件给我: tiehan@sina.cn