單細(xì)胞之軌跡分析-2:monocle2 原理解讀+實(shí)操 - 簡(jiǎn)書 (jianshu.com)
跟著Cell學(xué)單細(xì)胞轉(zhuǎn)錄組分析(九):Monocle2擬時(shí)分析演示(上) - 簡(jiǎn)書 (jianshu.com)
加載R包,構(gòu)建擬時(shí)分析文件
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("monocle")
library(monocle)#monocle構(gòu)建CDS需要3個(gè)矩陣:expr.matrix、pd、featuredata
sample_ann <- Macro@meta.data
#構(gòu)建featuredata,一般featuredata需要兩個(gè)col,一個(gè)是gene_id,一個(gè)是gene_short_name,row對(duì)應(yīng)counts的rownames
gene_ann <- data.frame(gene_short_name = rownames(Macro@assays$RNA),
row.names = rownames(Macro@assays$RNA))
#head(gene_ann)
pd <- new("AnnotatedDataFrame",data=sample_ann)
fd <- new("AnnotatedDataFrame",data=gene_ann)
#構(gòu)建matrix
ct=as.data.frame(Macro@assays$RNA@counts)#單細(xì)胞counts矩陣
有了matrix,pd,fd,就可以構(gòu)建monocle對(duì)象,進(jìn)行預(yù)處理。
#構(gòu)建monocle對(duì)象
sc_cds <- newCellDataSet(
as.matrix(ct),
phenoData = pd,
featureData =fd,
expressionFamily = negbinomial.size(),
lowerDetectionLimit=1)
sc_cds <- detectGenes(sc_cds, min_expr = 1)
sc_cds <- sc_cds[fData(sc_cds)$num_cells_expressed > 10, ]
cds <- sc_cds
cds <- estimateSizeFactors(cds)
cds <- estimateDispersions(cds)
基因篩選
disp_table <- dispersionTable(cds)
unsup_clustering_genes <- subset(disp_table, mean_expression >= 0.1)
cds <- setOrderingFilter(cds, unsup_clustering_genes$gene_id)
plot_ordering_genes(cds)
plot_pc_variance_explained(cds, return_all = F)

image.png
數(shù)據(jù)降維
cds <- reduceDimension(cds, max_components = 2, num_dim = 20,
reduction_method = 'tSNE', verbose = T)
cds <- clusterCells(cds, num_clusters = 5)
plot_cell_clusters(cds, 1, 2 )
table(pData(cds)$Cluster)
colnames(pData(cds))
將擬時(shí)與seurat分群對(duì)應(yīng),并挑選顯著性基因可視化
table(pData(cds)$Cluster)
table(pData(cds)$Cluster,pData(cds)$celltype)
pData(cds)$Cluster=pData(cds)$celltype
diff_test_res <- differentialGeneTest(cds, fullModelFormulaStr = "~Cluster")
sig_genes <- subset(diff_test_res, qval < 0.1)
sig_genes=sig_genes[order(sig_genes$pval),]
head(sig_genes[,c("gene_short_name", "pval", "qval")] )
cg=as.character(head(sig_genes$gene_short_name))
# 挑選差異最顯著的基因可視化
plot_genes_jitter(cds[cg,],
grouping = "Cluster",
color_by = "Cluster",
nrow= 3,
ncol = NULL )
cg2=as.character(tail(sig_genes$gene_short_name))
plot_genes_jitter(cds[cg2,],
grouping = "Cluster",
color_by = "Cluster",
nrow= 3,
ncol = NULL )

image.png
前面差異基因篩選后,開始擬時(shí)推測(cè)
# 第一步: 挑選合適的基因
ordering_genes <- row.names (subset(diff_test_res, qval < 0.01))
ordering_genes
cds <- setOrderingFilter(cds, ordering_genes)
plot_ordering_genes(cds)
#第二步降維
cds <- reduceDimension(cds, max_components = 2,
method = 'DDRTree')
# 第三步: 對(duì)細(xì)胞進(jìn)行排序
cds <- orderCells(cds)
#可視化細(xì)胞分化軌跡
plot_cell_trajectory(cds, color_by = "Cluster")

image.png
可視化基因時(shí)序圖
plot_genes_in_pseudotime(cds[cg,],
color_by = "Cluster")

image.png
保存擬時(shí)cds文件,這將是后期可視化的基礎(chǔ)
可視化
跟著Cell學(xué)單細(xì)胞轉(zhuǎn)錄組分析(十):Monocle2擬時(shí)分析演示之結(jié)果可視化(下) - 簡(jiǎn)書 (jianshu.com)
先做一個(gè)細(xì)胞群的譜系分化圖。從這個(gè)圖可以看出我們關(guān)注的細(xì)胞分化軌跡
library(ggsci)
plot_cell_trajectory(cds, color_by = "Cluster") + scale_color_nejm()

image.png
plot_cell_trajectory(cds, color_by = "State") + scale_color_npg()

image.png
plot_cell_trajectory(cds, color_by = "Pseudotime")
image.png
如果細(xì)胞軌跡全部在一起,很難看出不同細(xì)胞狀態(tài)在分支上的位置,這時(shí),我們可以將每個(gè)狀態(tài)單獨(dú)畫出來(lái),看起來(lái)比較清晰
plot_cell_trajectory(cds, color_by = "State") +
facet_wrap(~State, nrow = 1)

image.png
處理細(xì)胞譜系擬時(shí)可視化,我們還關(guān)注分化軌跡過(guò)程中基因的情況。選定關(guān)注的基因,看看其在擬時(shí)中的表達(dá)
pData(cds)$TGFBR2 = log2( exprs(cds)['TGFBR2',]+1)
image.png
通過(guò)擬時(shí)基因表達(dá)模式聚類
cds$id <- rownames(cds)
library(dplyr)
cds %>% arrange(qval) %>% head(10) %>% select(id) -> gene_to_cluster
gene_to_cluster <- gene_to_cluster$id
my_pseudotime_cluster <- plot_pseudotime_heatmap(cds[gene_to_cluster,],
num_clusters = 3,
cores = 8,
show_rownames = TRUE)

image.png
BEAM進(jìn)行統(tǒng)計(jì)分析
BEAM_res <- BEAM(my_cds_subset, branch_point = 1, cores = 8)
BEAM_res <- BEAM_res[order(BEAM_res$qval),]
BEAM_res <- BEAM_res[,c("gene_short_name", "pval", "qval")]
head(BEAM_res)
table(BEAM_res$qval < 1e-4)
plot_genes_branched_heatmap(my_cds_subset[row.names(subset(BEAM_res, qval < 1e-4)),],
branch_point = 1,
num_clusters = 4,
cores = 8,
use_gene_short_name = TRUE,
show_rownames = TRUE)

image.png