前面我們剛更新完CytoTRACE2,那么也是想著用將其與monocle2分析結合起來,一方面看看效果,一方面輔助起點確定。同時近期不止有一個小伙伴在跑monocle2分析,但是還是因為包出了問題,我們之前發(fā)布過一個終集修改版2.26.0(Monocle2終極修改版),使用起來是沒有問題的,但是小伙伴確有問題,這讓我下定決心要找出問題。此外,也想將整個流程做一個視頻講解,彌補之前沒有視頻教程的遺憾,有很多小伙伴有此需求。
首先說結論,為什么終極修改包有些人出錯,有些人不出錯。經過我們的測試發(fā)現(xiàn),安裝monocle 2.26.0修改版在R 4.2中,無論是服務器還是本地電腦的運行中,ordercell和BEAM都不會出此案錯誤,但是在R 4.3中,ordercell沒有問題,BEAM依然報錯,可見2.26.0修改版與R版本有些關系。那么如果是R4.3,總不能為了monocle降級吧,我們測試發(fā)現(xiàn)monocle 2.32.0 在R 4.3中Ordercell沒有問題,BEAM有問題,所以對2.32.0也做了修改版,讓其在R 4.3中運行無障礙。(注意了:包修改的是它本身的問題,如果是因為流程或者數(shù)據的錯誤,需要自行檢查)


接下來走走monocle2的流程吧,這里為了方便,數(shù)據也小,我們直接包裝了一個函數(shù),實際運行我們還是建議自己按照流程,因為需要中間調整參數(shù)。如果你的參數(shù)流程已經探索沒有問題了,那么可以包裝函數(shù),方便分析:
library(Seurat)
library(monocle)
library(viridis)
#========================================================================================
setwd("D:\\KS項目\\公眾號文章\\視頻教程---monocle2分析測試")
#load data-加載之前做了cytroTRACE2的那個obj
#可以結合輔助看看cell起點
load("D:/KS項目/公眾號文章/CytroTRACE2:單細胞轉錄組數(shù)據推斷細胞發(fā)育潛能/sce_trace.RData")
DimPlot(cytotrace2_sce, label = T, pt.size = 1)
#這個數(shù)據是大群里面直接提取了目標細胞進行分析
#========================================================================================
#monocle pieline function
#pay attention---Seuratobject version 5.0.0
ks_run_Monocle2 <- function(object, #seurat obj or expression matrix (建議數(shù)據格式轉為matrix,如果數(shù)據量大轉化為稀疏矩陣as(as.matrix(data), "sparseMatrix"))
layer, #used when object is a seurat obj
assay, #used when object is a seurat obj
lowerDetectionLimit = 0.1,
VARgenesM=c("dispersionTable","seurat","differentialGeneTest"),
cellAnno=NULL,
define_root=F,
root_state,
reverse=NULL
){
if(class(object)[1] == 'Seurat') {
data <- GetAssayData(object=object, layer=layer, assay=assay)#get expression matrix data
pd <- new("AnnotatedDataFrame", data = object@meta.data)
fData <- data.frame(gene_short_name = row.names(data), row.names = row.names(data))
fd <- new("AnnotatedDataFrame", data = fData)
if(all(data == floor(data))) {
expressionFamily <- negbinomial.size()
} else if(any(data < 0)){
expressionFamily <- uninormal()
} else {
expressionFamily <- tobit()
}
#Creates a new CellDateSet object.
monocle_cds <- newCellDataSet(data,
phenoData = pd,
featureData = fd,
lowerDetectionLimit=0.1,
expressionFamily=expressionFamily)
}else{
print("This fucntions only apply for a seurat obj")
}
#Estimate size factors and dispersions
#數(shù)據處理
monocle_cds <- estimateSizeFactors(monocle_cds)#size facotr標準化細胞之間的mRNA差異
monocle_cds <- estimateDispersions(monocle_cds)
#質量控制-filter cells
monocle_cds <- detectGenes(monocle_cds, min_expr = 0.1)
# print(head(fData(monocle_cds)))
# print(head(pData(monocle_cds)))
# expressed_genes <- row.names(subset(fData(mouse_monocle), num_cells_expressed >= 10))
monocle_cds <- monocle_cds[fData(monocle_cds)$num_cells_expressed >= 10, ]
#select methods for VariableFeatures
if(VARgenesM=="dispersionTable"){
disp_table <- dispersionTable(monocle_cds)
ordering_genes <- subset(disp_table,
mean_expression >= 0.1 &
dispersion_empirical >= 1.5* dispersion_fit)$gene_id
}
if(VARgenesM=="seurat"){
ordering_genes <- VariableFeatures(FindVariableFeatures(object, assay = "RNA"), assay = "RNA")
}
if(VARgenesM=="differentialGeneTest"){
diff_test_res <- differentialGeneTest(monocle_cds,fullModelFormulaStr = paste0("~",cellAnno))##~后面是表示對誰做差異分析的變量
diff_test_res_sig <- diff_test_res[order(diff_test_res$qval,decreasing=F),]
ordering_sce <- diff_test_res_sig[diff_test_res_sig$qval< 0.01,]
if(nrow(ordering_sce)>3000){
ordering_genes <- ordering_sce$gene_short_name[1:3000]
}else{
ordering_genes <- rdering_sce$gene_short_name
}
}
#Marks genes for clustering
monocle_cds <- setOrderingFilter(monocle_cds, ordering_genes)
plot_ordering_genes(monocle_cds)
#cluster
monocle_cds <- reduceDimension(monocle_cds, max_components = 2,reduction_method = 'DDRTree')
#order cells
monocle_cds <- orderCells(monocle_cds, reverse=reverse)
if(define_root){
monocle_cds <- monocle_cds <- orderCells(monocle_cds,root_state = root_state)
}
return(monocle_cds)
}
分析數(shù)據以及一些基本可視化修飾:
#run monocle2
sce_CDS <- ks_run_Monocle2(object = cytotrace2_sce,
layer = 'counts',
assay = "RNA",
VARgenesM="dispersionTable",
cellAnno = "cluster")
#可視化擬時
plot_cell_trajectory(sce_CDS,show_branch_points = F,color_by = "Pseudotime",cell_size = 3)+
scale_color_gradientn(colours = colorRampPalette(c('blue','green','yellow','red'))(100))

#按照celltype著色
plot_cell_trajectory(sce_CDS,show_branch_points = F,color_by = "cluster",cell_size = 3)+
theme_dr(arrow = grid::arrow(length = unit(0, "inches")))+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())+
scale_color_manual(values = c("#E69253", "#EDB931", "#E4502E", "#4378A0"))+
geom_curve(aes(x=9,y=2,yend=0,xend=5),
arrow=arrow(length=unit(0.03,"npc")),
size=1,curvature=0,
color="black")+
annotate("text", x=6.5, y=1.5, label = "bold(YSMP)", parse = TRUE, angle=45, size=4)+
geom_curve(aes(x=-4,y=-1,yend=3,xend=-6),
arrow=arrow(length=unit(0.03,"npc")),
size=1,curvature=0,
color="black")+
annotate("text", x=-4, y=1, label = "bold(Monocyte~Fate)", parse = TRUE, angle=-75, size=4)+
geom_curve(aes(x=-5,y=-3,yend=-5,xend=-6),
arrow=arrow(length=unit(0.03,"npc")),
size=1,curvature=0,
color="black")+
annotate("text", x=-6.5, y=-4, label = "bold(Neu~Fate)", parse = TRUE, angle=75, size=4)

#結合可視化cytroTRACE2結果
plot_cell_trajectory(sce_CDS,show_branch_points = F,color_by = "CytoTRACE2_Relative",cell_size = 3)+
scale_colour_gradientn(colours = (c( "#000004FF", "#3B0F70FF", "#8C2981FF", "#DE4968FF", "#FE9F6DFF", "#FCFDBFFF")),
na.value = "transparent",
limits=c(0,1),
breaks = c(0,1),
labels=c("(More diff.)", "(Less diff.)"),
name = "Relative\norder \n" ,
guide = guide_colorbar(frame.colour = "black", ticks.colour = "black"))+
theme_dr(arrow = grid::arrow(length = unit(0, "inches")))+#坐標軸主題修改
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())

#=======================================================================================
#擬時差異基因
expressed_genes=row.names(subset(fData(sce_CDS),use_for_ordering=="TRUE"))
peu_gene <- differentialGeneTest(sce_CDS[expressed_genes,],fullModelFormulaStr = "~sm.ns(Pseudotime)",cores = 1)
# write.csv(peu_gene,file='peu_gene.csv')#保存好文件,這個分析過程挺費時間
peu_gene <- peu_gene[which(peu_gene$qval<0.01 & peu_gene$num_cells_expressed>100),]#篩選顯著是的
# peu_gene %>% arrange(qval) -> peu_gene#按照qval排個序
# peu_gene <- peu_gene[1:100,] #這里我們取前100個基因演示
#改造熱圖函數(shù),展示需要的基因
source('./new_heatmap.R')
library(pheatmap)
library(grid)
p1 <-plot_pseudotime_heatmap(sce_CDS[peu_gene$gene_short_name,],
num_clusters = 4,
cores = 2,
show_rownames = T,return_heatmap =T,
hmcols = viridis(256),
use_gene_short_name = T,
clustercolor = c("#d2981a", "#a53e1f", "#457277", "#8f657d"))
p1
#只展示感興趣的基因
genes <- c("C1QB","C1QC","C1QA","MRC1","LGMN","MS4A7","MAF","FOLR2",
"HLA-DPA1","CLEC10A","IL10RA","CD163","KCTD12","CLEC7A","MS4A6A","CD14",
"ITM2A","CYTL1","MDK","SELP","CD24",
"S100A8","S100A9","S100A12")
source('./add.flag.R')
add.flag(p1, kept.labels = genes, repel.degree = 0.2)

library(RColorBrewer)
plot_cell_trajectory(sce_CDS,show_branch_points = T,color_by = "State",cell_size = 3)
#BEAM
#查看在分叉處的差異基因
BEAM_res <- BEAM(sce_CDS, branch_point = 3, cores=2)
BEAM_res <- BEAM_res[order(BEAM_res$qval),]
BEAM_res <- BEAM_res[,c("gene_short_name", "pval", "qval")]
# write.csv(BEAM_res,file = "BEAM_res.csv")
branch_p <- plot_genes_branched_heatmap(sce_CDS[row.names(subset(BEAM_res, qval < 1e-8)),],
branch_point = 3,
num_clusters = 4,
cores = 2,
hmcols = colorRampPalette(c("steelblue", "white","darkorange2", "orangered4"))(100),
use_gene_short_name = T,
show_rownames = T,
return_heatmap = T)

總之,整個流程是比較簡單的,最重要的自己對于結果的把握和解釋,以及一些新的發(fā)現(xiàn)。希望我們的流程對您有所幫助!