分析目的
- 不同于PCA聚類,geneNMF算法更適合局部聚類,比如epithelial cell做進(jìn)一步的亞群分類。此外,類似的軟件還有Hotspot, NMF等;
- 分出module可根據(jù)其代表性gene做功能富集分析,結(jié)果更加聚焦;
- 代表性gene也是后續(xù)靶向分析的gene,與cell function相關(guān);
- 不同的module可作為不同的cluster,用作后續(xù)擬時(shí)序等分析。
與harmony等聚類方法不同:
- 它并不是聚類,比如像harmony一樣可以畫Umap聚類圖;
- 它可以給出共表達(dá)的gene module,這點(diǎn)用harmony是做不到的;
-
二者的算法有所不同
image.png
代碼及結(jié)果展示
1.#加載數(shù)據(jù)
load("/data/wanghao/yangjie_project/Project/1.OC_multi_sites/2.analysis/2.2_cell_cluster_merge/scdata1.Rdata")
scdata3 <- scdata1[,scdata1@meta.data$celltype2 == "Meso-like Fibroblast"]
2.#設(shè)置默認(rèn)變量
Idents(scdata3) = scdata3$celltype2
DefaultAssay(scdata3) <- 'RNA'
3.#run NMF
seu.list <- SplitObject(scdata3, split.by = "orig.ident")
geneNMF.programs <- multiNMF(seu.list, assay="RNA",
k=2:10,
min.exp = 0.05,
nfeatures=2000)
4.#merge
geneNMF.metaprograms <- getMetaPrograms(geneNMF.programs,
metric = "cosine",
weight.explained = 0.5,
nMP=9,
max.genes=200)
5.#check
geneNMF.metaprograms$metaprograms.metrics
6.#plot
pdf('./geneNMF_program.pdf')
GeneNMF::plotMetaPrograms(geneNMF.metaprograms,similarity.cutoff = c(0.1,1))
dev.off()
7.#module genes
MP_genes <- unlist(geneNMF.metaprograms$metaprograms.genes)MP <- rep(names(geneNMF.metaprograms$metaprograms.genes), lengths(geneNMF.metaprograms$metaprograms.genes))
MP_genes <- data.frame(gene = MP_genes, MPs = MP)
聚類的module再做功能富集分析
run GO enrichment
1.#GO enrich功能富集
library(clusterProfiler)
Gene_ID <- bitr(MP_genes$gene, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")#構(gòu)建文件并分析
data <- merge(Gene_ID,MP_genes,by.x='SYMBOL',by.y='gene')
data_GO <- compareCluster(
ENTREZID~MPs,
data=data,
fun="enrichGO",
OrgDb="org.Hs.eg.db",
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05
)
2.#去除冗余terms
data_GO_sim <- simplify(data_GO,
cutoff=0.7,
by="p.adjust",
select_fun=min)
3.#作圖及輸出表格
p <- dotplot(data_GO_sim, showCategory=5,font.size = 8)
ggsave(p,filename='./MP_enrich.png',height=10,width=8) #需要自己挑選與課題相關(guān)的通路
data_GO_sim_fil <- data_GO_sim@compareClusterResult %>% dplyr::filter(p.adjust <=0.05)
library(openxlsx)
write.xlsx(data_GO_sim_fil,'Fibro_MP_GOenrich.xlsx')
run GSEA
library(msigdbr)
library(fgsea)
top_p <- lapply(geneNMF.metaprograms$metaprograms.genes, function(program) {
runGSEA(program, universe=rownames(scdata3), category = "C5", subcategory = "GO:BP")
})
