從GEO數(shù)據(jù)庫下載數(shù)據(jù)并處理

rm(list=ls())

Version info: R 3.2.3, Biobase 2.30.0, GEOquery 2.40.0, limma 3.26.8

R scripts generated Sun Aug 11 23:08:50 EDT 2019

################################################################

rm(list=ls())

#  使用limma包對數(shù)據(jù)分析

library(Biobase)

library(GEOquery)

library(limma)

# 從GEO加載series與platform數(shù)據(jù)

gset <- getGEO("GSE101728", GSEMatrix =TRUE, AnnotGPL=FALSE)##series

if (length(gset) > 1) idx <- grep("GPL21047", attr(gset, "names")) else idx <- 1##platform

gset <- gset[[idx]]##由于該整個數(shù)據(jù)是一個list,一個元素,所以需要取該數(shù)據(jù)的第一個list的第一個元素

# 從下載的平臺獲取ID名字

fvarLabels(gset) <- make.names(fvarLabels(gset))

# group names for all samples

gsms <- "1010101XX0XXXX"

sml <- c()

for (i in 1:nchar(gsms)) { sml[i] <- substr(gsms,i,i) }

# eliminate samples marked as "X"

sel <- which(sml != "X")

sml <- sml[sel]

gset <- gset[ ,sel]

# log2 transform

ex <- exprs(gset)

qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T))

LogC <- (qx[5] > 100) ||

  (qx[6]-qx[1] > 50 && qx[2] > 0) ||

  (qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2)

if (LogC) { ex[which(ex <= 0)] <- NaN

exprs(gset) <- log2(ex) }

# set up the data and proceed with analysis

sml <- paste("G", sml, sep="")    # set group names

fl <- as.factor(sml)

gset$description <- fl

design <- model.matrix(~ description + 0, gset)

colnames(design) <- levels(fl)

fit <- lmFit(gset, design)

cont.matrix <- makeContrasts(G1-G0, levels=design)

fit2 <- contrasts.fit(fit, cont.matrix)

fit2 <- eBayes(fit2, 0.01)

tT <- topTable(fit2, adjust="fdr", sort.by="B", number=250)
##這里需要注意的是只選取了前250個數(shù)據(jù)

tT <- subset(tT, select=c("ID","adj.P.Val","P.Value","t","B","logFC","RANGE_START",
"RANGE_END","RANGE_STRAND","SEQUENCE","GB_ACC","ORF","SPOT_ID"))

write.table(tT, file=stdout(), row.names=F, sep="\t")

################################################################

#  Boxplot for selected GEO samples

library(Biobase)

library(GEOquery)

# load series and platform data from GEO

gset <- getGEO("GSE101728", GSEMatrix =TRUE, getGPL=FALSE)

if (length(gset) > 1) idx <- grep("GPL21047", attr(gset, "names")) else idx <- 1

gset <- gset[[idx]]

# group names for all samples in a series

gsms <- "1010101XX0XXXX"

sml <- c()

for (i in 1:nchar(gsms)) { sml[i] <- substr(gsms,i,i) }

sml <- paste("G", sml, sep="")  #set group names

# eliminate samples marked as "X"

sel <- which(sml != "X")

sml <- sml[sel]

gset <- gset[ ,sel]

# order samples by group

ex <- exprs(gset)[ , order(sml)]

sml <- sml[order(sml)]

fl <- as.factor(sml)

labels <- c("cancer+tissue","adjacent+tissue")

# set parameters and draw the plot

palette(c("#dfeaf4","#f4dfdf", "#AABBCC"))

dev.new(width=4+dim(gset)[[2]]/5, height=6)

par(mar=c(2+round(max(nchar(sampleNames(gset)))/2),4,2,1))

title <- paste ("GSE101728", '/', annotation(gset), " selected samples", sep ='')

boxplot(ex, boxwex=0.6, notch=T, main=title, outline=FALSE, las=2, col=fl)

legend("topleft", labels, fill=palette(), bty="n")

write.table(tT,file = '下載處理數(shù)據(jù).RData')
?著作權歸作者所有,轉載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務。

相關閱讀更多精彩內(nèi)容

  • mean to add the formatted="false" attribute?.[ 46% 47325/...
    ProZoom閱讀 3,219評論 0 3
  • 31 go gopath32 gopath33 cd /var/www/html/34 ls35 ls...
    Helen_Cat閱讀 717評論 0 3
  • Linux Shell常用shell命令 一、文件、目錄操作命令 1、ls命令 功能:顯示文件和目錄的信息 ls以...
    楓葉魚水閱讀 644評論 0 2
  • Linux常用命令大全(非常全?。。。┰逆溄樱篽ttp://www.cnblogs.com/yjd_hycf_s...
    JokerJin閱讀 702評論 0 3
  • 春來大地萬物新, 花鳥蟲樹共雨晴, 蝴蝶蜜蜂同翻飛, 春光有限景無盡。
    明月映我心閱讀 178評論 0 5

友情鏈接更多精彩內(nèi)容