WGCNA(加權(quán)基因共表達(dá)網(wǎng)絡(luò)分析) - 簡(jiǎn)書
WGCNA分析,簡(jiǎn)單全面的最新教程 - 簡(jiǎn)書
http://pages.stat.wisc.edu/~yandell/statgen/ucla/WGCNA/wgcna.html
STEP6:WGCNA相關(guān)性分析 - 簡(jiǎn)書
學(xué)習(xí)WGCNA總結(jié) | Public Library of Bioinformatics


rm(list = ls())#清空環(huán)境變量
getwd()
setwd("D:/AAA-臺(tái)式機(jī)-LY-WorkSpace/data-wrangling-with-R/")
library(WGCNA)
library(reshape2)
library(stringr)
options(stringsAsFactors = F)
#打開多路線程
allowWGCNAThreads()#enableWGCNAThreads()
mycount <- read.csv("D:/AAA-臺(tái)式機(jī)-LY-WorkSpace/data-wrangling-with-R/htseq-count-readcount - 無注釋.csv", row.names=1, header=T, check.names=F)
dim(mycount)
data <- log2(mycount+1)
##篩選方差前25%的基因,上一步是為了減少運(yùn)算量,因?yàn)橐粋€(gè)測(cè)序數(shù)據(jù)可能會(huì)有好幾萬個(gè)探針,
#而可能其中很多基因在各個(gè)樣本中的表達(dá)情況并沒有什么太大變化,為了減少運(yùn)算量,這里我們篩選方差前25%的基因。
m.vars=apply(data,1,var)
data.upper <- data[which(m.vars > quantile(m.vars, probs = seq(0,1,0.25))[4]),]
datExpr <- as.data.frame(t(data.upper))#行和列轉(zhuǎn)換位置
nGenes <- ncol(datExpr)
nSamples <- nrow(datExpr)
##樣本聚類檢查離群值##
gsg <- goodSamplesGenes(datExpr, verbose = 3)
gsg$allOK
sampleTree <- hclust(dist(datExpr), method = "average")
plot(sampleTree, main = "sample clustering to detect outliers", sub = "", xlab = "")
save(datExpr, file = "readcount-01-dataInput.RData")
##軟閾值篩選##
# 橫軸是Soft threshold (power),縱軸是無標(biāo)度網(wǎng)絡(luò)的評(píng)估參數(shù),數(shù)值越高,
# 網(wǎng)絡(luò)越符合無標(biāo)度特征 (non-scale)
powers <- c(seq(1,10,by=1), seq(12,20,by=2))
sft <- pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
par(mfrow=c(1,2))
cex1 = 0.9
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
? ? xlab = "Soft Threshold (power)", ylab = "Scale Free Topology Model Fit,signed R^2", type = "n",
? ? main = paste("scale independence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
? ? labels = powers, cex = cex1, col = "red")
abline(h=0.90, col="red")
plot(sft$fitIndices[,1], sft$fitIndices[,5],
? ? xlab = "Soft Threshold (power)", ylab = "Mean Connectivity", type = "n",
? ? main = paste("Mean Connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels = powers, cex = cex1, col = "red")
# 運(yùn)行下列代碼,讓程序推薦你一個(gè)power, 數(shù)據(jù)質(zhì)量太差啦,程序給了我"NA",自己設(shè)定power=14
sft$powerEstimate
#推薦的power是4
##一步法網(wǎng)絡(luò)構(gòu)建:One-step network construction and module detection##
net <- blockwiseModules(datExpr,power = 4,maxBlockSize = nGenes,
? ? ? ? ? ? ? ? ? ? ? ? TOMType = "unsigned", minModuleSize = 30,
? ? ? ? ? ? ? ? ? ? ? ? reassignThreshold = 0, mergeCutHeight = 0.25,
? ? ? ? ? ? ? ? ? ? ? ? numericLabels = T, pamRespectsDendro = F,
? ? ? ? ? ? ? ? ? ? ? ? saveTOM = T,
? ? ? ? ? ? ? ? ? ? ? ? saveTOMFileBase = "AS-green-readcount-TOM",
? ? ? ? ? ? ? ? ? ? ? ? verbose = 3)
table(net$colors)
#層級(jí)聚類樹展示各個(gè)模塊
## 灰色的為**未分類**到模塊的基因。
# Convert labels to colors for plotting
moduleLabels = net$colors
moduleColors = labels2colors(moduleLabels)
# Plot the dendrogram and the module colors underneath
# 如果對(duì)結(jié)果不滿意,還可以recutBlockwiseTrees,節(jié)省計(jì)算時(shí)間
plotDendroAndColors(net$dendrograms[[1]], moduleColors[net$blockGenes[[1]]],
? ? ? ? ? ? ? ? ? ? "Module colors",
? ? ? ? ? ? ? ? ? ? dendroLabels = F, hang = 0.03,
? ? ? ? ? ? ? ? ? ? addGuide = T, guideHang = 0.05)
table(moduleColors)
MEs = net$MEs
geneTree = net$dendrograms[[1]]
save(MEs, moduleLabels, geneTree,
? ? file = "AS-green-readcount-02-networkConstruction-auto.RData")##結(jié)果保存###
##表型與模塊相關(guān)性##(沒有,下一步)
moduleLabelsAutomatic = net$colors
moduleColorsAutomatic = labels2colors(moduleLabelsAutomatic)
moduleColorsWW = moduleColorsAutomatic
MEs0 = moduleEigengenes(datExpr, moduleColorsWW)$eigengenes
MEsWW = orderMEs(MEs0)
samples <- read.csv()
modTraitor = cor(MEsWW, )
#繪制模塊之間相關(guān)性圖
# module eigengene, 可以繪制線圖,作為每個(gè)模塊的基因表達(dá)趨勢(shì)的展示
MEs = net$MEs
### 不需要重新計(jì)算,改下列名字就好
### 官方教程是重新計(jì)算的,其實(shí)可以不用這么麻煩
MEs_col = MEs
colnames(MEs_col) = paste0("ME", labels2colors(as.numeric(str_replace_all(colnames(MEs),"ME", ""))))
MEs_col = orderMEs(MEs_col)
# 根據(jù)基因間表達(dá)量進(jìn)行聚類所得到的各模塊間的相關(guān)性圖
# marDendro/marHeatmap 設(shè)置下、左、上、右的邊距
plotEigengeneNetworks(MEs_col, "Eigengene adjacency heatmap",
? ? ? ? ? ? ? ? ? ? ? marDendro = c(3,3,2,4),
? ? ? ? ? ? ? ? ? ? ? marHeatmap = c(3,4,2,2), plotDendrograms = T,
? ? ? ? ? ? ? ? ? ? ? xLabelsAngle = 90)
#可視化基因網(wǎng)絡(luò) (TOM plot)
# 如果采用分步計(jì)算,或設(shè)置的blocksize>=總基因數(shù),直接load計(jì)算好的TOM結(jié)果
# 否則需要再計(jì)算一遍,比較耗費(fèi)時(shí)間
# TOM = TOMsimilarityFromExpr(dataExpr, power=power, corType=corType, networkType=type)
load(net$TOMFiles[1], verbose = T)
TOM <- as.matrix(TOM)
dissTOM = 1-TOM
# Transform dissTOM with a power to make moderately strong
# connections more visible in the heatmap
plotTOM = dissTOM^7
# Set diagonal to NA for a nicer plot
diag(plotTOM) = NA
#call the plot function
# 這一部分特別耗時(shí),行列同時(shí)做層級(jí)聚類
#TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")
#all genes 耗時(shí)太長(zhǎng)了,隨便選取1000個(gè)基因來可視化
nSelect = 1000
# For reproducibility, we set the random seed
set.seed(10)
select = sample(nGenes, size = nSelect)
selectTOM = dissTOM[select, select]
# There’s no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster
selectTree = hclust(as.dist(selectTOM), method = "average")
selectColors = moduleColors[select]
# Open a graphical window
sizeGrWindow(9,9)
# Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing
# the color palette; setting the diagonal to NA also improves the clarity of the plot
plotDiss = selectTOM^7;
diag(plotDiss) = NA;
TOMplot(plotDiss, selectTree, selectColors, main = "Network heatmap plot, selected genes")