單細(xì)胞轉(zhuǎn)錄組之?dāng)M時(shí)序分析

1.數(shù)據(jù)的準(zhǔn)備

1.1選擇想要進(jìn)行時(shí)間軌跡分析的seurat亞群,,并將亞群提取出來

#走一遍seurat標(biāo)準(zhǔn)流程后,得到數(shù)據(jù),提取亞群
table( Idents(sce ))
table(sce@meta.data$seurat_clusters) 
table(sce@meta.data$orig.ident) 

# 取子集
levels(Idents(sce))
sce = sce[, Idents(sce) %in% c( "FCGR3A+ Mono", "CD14+ Mono"  )] 
#sce[基因,細(xì)胞]
levels(Idents(sce))

markers_df <- FindMarkers(object = sce, ident.1 = 'FCGR3A+ Mono',ident.2 = 'CD14+ Mono', #logfc.threshold = 0,min.pct = 0.25)
head(markers_df)
挑選差異基因
cg_markers_df=markers_df[abs(markers_df$avg_log2FC) >1,]

1.2 創(chuàng)建CellDataSet對(duì)象及monocle標(biāo)準(zhǔn)流程

詳見此鏈接:http://www.itdecent.cn/p/34c23dbd9dc1

創(chuàng)建CellDataSet對(duì)象

library(monocle)

sample_ann <-  sce@meta.data  

sample_ann$celltype=Idents(sce)

#準(zhǔn)備newCellDataSet函數(shù)的輸入文件
gene_ann <- data.frame(gene_short_name = rownames(sce@assays$RNA,  row.names =  rownames(sce@assays$RNA))
pd <- new("AnnotatedDataFrame",
          data=sample_ann)
fd <- new("AnnotatedDataFrame",
          data=gene_ann)
ct=as.data.frame(sce@assays$RNA@counts)

sc_cds <- newCellDataSet(as.matrix(ct), phenoData = pd,featureData =fd,expressionFamily = negbinomial.size(),lowerDetectionLimit=1)

monocle標(biāo)準(zhǔn)流程

sc_cds <- detectGenes(sc_cds, min_expr = 1) 

sc_cds <- sc_cds[fData(sc_cds$num_cells_expressed > 10, ]

cds <- estimateSizeFactors(cds)
cds <- estimateDispersions(cds) 

disp_table <- dispersionTable(cds)

unsup_clustering_genes <- subset(disp_table,mean_expression >= 0.1)

cds <- setOrderingFilter(cds,unsup_clustering_genes$gene_id)

cds <- reduceDimension(cds, max_components = 2, num_dim = 6,reduction_method = 'tSNE', verbose = T)

cds <- clusterCells(cds, num_clusters = 6) 

2.查找差異基因及時(shí)間軌跡分析

2.1 查找差異基因

根據(jù)自己生物學(xué)意圖,選擇查看哪個(gè)性狀的軌跡

將monocle的分群改為seurat分群
pData(cds)$Cluster=pData(cds)$celltype
table(pData(cds1)$Cluster)

#測(cè)試每個(gè)基因作為偽時(shí)間函數(shù)或根據(jù)指定的其他協(xié)變量的差異表達(dá)。fullModelFormulaStr 選擇按照什么找差異
diff_test_res <- differentialGeneTest(cds,fullModelFormulaStr = "~Cluster")

# 選擇 FDR < 10% 的基因作為差異基因
sig_genes <- subset(diff_test_res, qval < 0.1)

sig_genes<-sig_genes[order(sig_genes$pval),]

head(sig_genes[,c("gene_short_name", "pval", "qval")] ) 

cg=as.character(head(sig_genes$gene_short_name)) 
#  挑選差異最顯著的基因可視化,將一個(gè)或多個(gè)基因的表達(dá)繪制點(diǎn)圖

plot_genes_jitter(cds[cg,], grouping = "Cluster", color_by = "Cluster",nrow= 3,ncol = NULL )

cg2=as.character(tail(sig_genes$gene_short_name)) 
plot_genes_jitter(cds[cg2,],grouping = "Cluster",color_by = "Cluster",nrow= 3,ncol = NULL )

2.2 時(shí)間軌跡分析

第一步: 挑選合適的基因. 有多個(gè)方法,例如提供已知的基因集,這里選取統(tǒng)計(jì)學(xué)顯著的差異基因列表。

ordering_genes <- row.names (subse(diff_test_res, qval < 0.01))

ordering_genes

#準(zhǔn)備聚類基因名單
cds <- setOrderingFilter(cds, ordering_genes)
plot_ordering_genes

第二步: 降維。降維的目的是為了更好的展示數(shù)據(jù)。函數(shù)里提供了很多種方法,不同方法的最后展示的圖都不太一樣, 其中“DDRTree”是Monocle2使用的默認(rèn)方法。

cds <- reduceDimension(cds, max_components = 2, method = 'DDRTree')

第三步: 對(duì)細(xì)胞進(jìn)行排序 學(xué)習(xí)描述細(xì)胞正在經(jīng)歷的生物過程的“軌跡”,并計(jì)算每個(gè)細(xì)胞在該軌跡內(nèi)的位置。

cds <- orderCells(cds)

最后: 可視化
注意,偽時(shí)序的解讀需要結(jié)合生物學(xué)意義

plot_cell_trajectory(cds, color_by = "Cluster")  

#繪制一個(gè)或多個(gè)基因的表達(dá)作為偽時(shí)序。
plot_genes_in_pseudotime(cds[cg,],color_by = "Cluster") 

前面根據(jù)差異基因,推斷好了擬時(shí)序,也就是說把差異基因動(dòng)態(tài)化了,后面就可以具體推斷哪些基因隨著擬時(shí)序如何的變化

my_cds_subset=cds
# 擬時(shí)序數(shù)據(jù)和細(xì)胞位置在pData 中
head(pData(my_cds_subset))

# 這個(gè)differentialGeneTest會(huì)比較耗費(fèi)時(shí)間,測(cè)試每個(gè)基因的擬時(shí)序表達(dá)
my_pseudotime_de <- differentialGeneTest(my_cds_subset,fullModelFormulaStr = "~sm.ns(Pseudotime)",cores = 4 )#cores調(diào)用的核心數(shù)

head(my_pseudotime_de)

3.分析結(jié)果的精致可視化

library(Seurat)
library(gplots)
library(ggplot2)
library(monocle)
library(dplyr)

cds=my_cds_subset
phe=pData(cds)
colnames(phe)
library(ggsci)

#繪制最小生成樹
p1=plot_cell_trajectory(cds, color_by = "Cluster")  + scale_color_nejm() 
p1
ggsave('trajectory_by_cluster.pdf')
plot_cell_trajectory(cds, color_by = "celltype")  

p2=plot_cell_trajectory(cds, color_by = "Pseudotime")  
p2
ggsave('trajectory_by_Pseudotime.pdf')

p3=plot_cell_trajectory(cds, color_by = "State")  + scale_color_npg()
p3
ggsave('trajectory_by_State.pdf')
library(patchwork)#拼圖
p1+p2/p3

以qval前六的基因做圖

phe=pData(cds)
head(phe)
table(phe$State,phe$Cluster) 

library(dplyr)  
#%>% 管道函數(shù) 把左邊的值發(fā)送給右邊的表達(dá)式,并作為右件表達(dá)式函數(shù)的第一個(gè)參數(shù)
my_pseudotime_de %>% arrange(qval) %>% head() 

# 保存前六的基因
my_pseudotime_de %>% arrange(qval) %>% head() %>% select(gene_short_name) -> my_pseudotime_gene
my_pseudotime_gene=my_pseudotime_gene[,1]
my_pseudotime_gene

#繪制一個(gè)或多個(gè)基因的擬時(shí)序
plot_genes_in_pseudotime(my_cds_subset[my_pseudotime_gene,])+ scale_color_npg()
ggsave('monocle_top6_pseudotime_by_state.pdf')

將一個(gè)或多個(gè)基因的表達(dá)繪制點(diǎn)圖

plot_genes_jitter(my_cds_subset[my_pseudotime_gene,],grouping = "Cluster",color_by = "Cluster", nrow= 3,ncol = NULL )+ scale_color_nejm()
ggsave('monocle_top6_pseudotime_by_cluster.pdf')

將前50個(gè)隨擬時(shí)序變化的基因做聚類熱圖

# cluster the top 50 genes that vary as a function of pseudotime
my_pseudotime_de %>% arrange(qval) %>% head(50) %>% select(gene_short_name) -> gene_to_cluster
gene_to_cluster <- gene_to_cluster[,1]
gene_to_cluster
colnames(pData(my_cds_subset))
table(pData(my_cds_subset)$Cluster,pData(my_cds_subset)$State) 
ac=pData(my_cds_subset)[c('celltype','State','Pseudotime')]
head(ac)

# 這個(gè)熱圖繪制的并不是純粹的細(xì)胞基因表達(dá)量矩陣,而是被 Pseudotime 好了的100列,50行的矩陣

my_pseudotime_cluster <- plot_pseudotime_heatmap(my_cds_subset[gene_to_cluster,],# num_clusters = 2, # add_annotation_col = ac,show_rownames = TRUE,
return_heatmap = TRUE)

my_pseudotime_cluster

pdf('monocle_top50_heatmap.pdf')
print(my_pseudotime_cluster)
dev.off()

分支表達(dá)分析建模 識(shí)別具有分支依賴性表達(dá)的基因。

#計(jì)算建模分支節(jié)點(diǎn)
BEAM_branch1 <- BEAM(my_cds_subset, branch_point = 1, cores = 4)

head(BEAM_branch1)
colnames(BEAM_branch1)

BEAM_branch1 <- BEAM_branch1[order(BEAM_branch1$qval),]

BEAM_branch1 <- BEAM_branch1[,c("gene_short_name", "pval", "qval")]
head(BEAM_branch1) 
  
BEAM_branch2 <- BEAM(my_cds_subset, branch_point = 2, cores = 20)

BEAM_branch2 <- BEAM_branch2[order(BEAM_branch2$qval),]
BEAM_branch2 <- BEAM_branch2[,c("gene_short_name", "pval", "qval")]
head(BEAM_branch2)

使用全部的基因進(jìn)行繪圖 創(chuàng)建一個(gè)熱圖來展示基因表達(dá)沿兩個(gè)分支的分叉

BEAM_res = BEAM_branch1
my_branched_heatmap <- plot_genes_branched_heatmap( my_cds_subset[row.names(subset(BEAM_res, qval < 1e-4)),], branch_point = 1,num_clusters = 4, e_short_name = TRUE,show_rownames = F,return_heatmap = TRUE)

pdf('monocle_BEAM_branch1_heatmap.pdf')
print(my_branched_heatmap$ph)
dev.off()
BEAM_res = BEAM_branch2

my_branched_heatmap <- plot_genes_branched_heatmap(my_cds_subset[row.names(subset(BEAM_res, qval < 1e-4)),],branch_point = 1,num_clusters = 4, use_gene_short_name = TRUE,show_rownames = F,return_heatmap = TRUE)

pdf('monocle_BEAM_branch2_heatmap.pdf')
print(my_branched_heatmap$ph)
dev.off()
#將所做熱圖的基因和cluster提取出來
head(my_branched_heatmap$annotation_row)
table(my_branched_heatmap$annotation_row$Cluster) 
my_row <- my_branched_heatmap$annotation_row
my_row <- data.frame(cluster = my_row$Cluster,
                     gene = row.names(my_row),
                     stringsAsFactors = FALSE)

head(my_row[my_row$cluster == 3,'gene']) 

my_gene <- row.names(subset(fData(my_cds_subset),
                            gene_short_name %in% head(my_row[my_row$cluster == 2,'gene'])))
my_gene

# 繪制分支處的基因擬時(shí)序軌跡
#分支1
plot_genes_branched_pseudotime(my_cds_subset[my_gene,],
                               branch_point = 1,
                               ncol = 1)
#分支2
plot_genes_branched_pseudotime(my_cds_subset[my_gene,],
                               branch_point = 2,
                               ncol = 1)
分支1
分支2

做熱圖查看擬時(shí)序基因在兩個(gè)亞群的表達(dá)

cds=my_cds_subset
phe=pData(cds)
colnames(phe)
plot_cell_trajectory(cds)
counts = Biobase::exprs(cds)
dim(counts)

library(dplyr) 
my_pseudotime_de %>% arrange(qval) %>% head(100) %>% select(gene_short_name) -> my_pseudotime_gene
my_pseudotime_gene=my_pseudotime_gene[,1]
my_pseudotime_gene


library(pheatmap)
#數(shù)據(jù)中心化和歸一化
n=t(scale(t( counts[my_pseudotime_gene,] ))) # 'scale'可以對(duì)log-ratio數(shù)值進(jìn)行歸一化
n[n>2]=2 
n[n< -2]= -2
n[1:4,1:4]
pheatmap(n,show_colnames =F,show_rownames = F)
ac=phe[,c(10,16,17)]
head(ac)
rownames(ac)=colnames(n)
dim(n)
n[1:4,1:4]
pheatmap(n,show_colnames =F,
         show_rownames = F,
         annotation_col=ac)
od=order(ac$Pseudotime)
pheatmap(n[,od],show_colnames =F,
         show_rownames = F,cluster_cols = F,
         annotation_col=ac[od,])


參考來源

#section 3已更新#「生信技能樹」單細(xì)胞公開課2021_嗶哩嗶哩_bilibili

致謝

I thank Dr.Jianming Zeng(University of Macau), and all the members of his bioinformatics team, biotrainee, for generously sharing their experience and codes.

THE END

最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容