本筆記來源于B站@生信技能樹-jimmy;學(xué)習(xí)視頻鏈接: 「生信技能樹」單細(xì)胞數(shù)據(jù)挖掘
- Empty and double droplets
load("../../scRNA.Rdata")
library(Seurat)
# 1.1 empty droplet
# BiocManager::install("DropletUtils")
library(DropletUtils)
e.out <- emptyDrops(GetAssayData(scRNA,slot="counts",assay="RNA"))
# Error in testEmptyDrops(m, lower = lower, ...) :
# no counts available to estimate the ambient profile
## https://support.bioconductor.org/p/123554/#123562
# 如上回答所說,empty droplet往往在第一步就已經(jīng)過濾掉了,而一般上傳到GEO的也都是過濾掉空液滴的。
# 1.2 double droplet
# https://osca.bioconductor.org/doublet-detection.html
# BiocManager::install("scran")
head(scRNA@meta.data)
library(scran)
# GetAssayData(scRNA,slot="counts",assay="RNA")[1:8,1:4]
?doubletCluster #檢查有無double droplet聚在一起的類
db.test <- doubletCluster(GetAssayData(scRNA,slot="counts",assay="RNA"),
clusters=scRNA@meta.data$seurat_clusters)
head(db.test)
table(scRNA@meta.data$seurat_clusters)
library(scater)
chosen.doublet <- rownames(db.test)[isOutlier(db.test$N,
type="lower", log=TRUE)]
chosen.doublet #結(jié)果顯示沒有:character(0)
# 還有其它多種方法
2.周期判斷
# 在挑選hvg gene那一步里,可能會找到一些細(xì)胞周期相關(guān)基因;
# 它們會導(dǎo)致細(xì)胞聚類發(fā)生一定的偏移,即相同類型的細(xì)胞在聚類時(shí)會因?yàn)榧?xì)胞周期的不同而分開。
# 因此有必要查看是否有細(xì)胞周期相關(guān)基因的存在;若有,則剔除
# 細(xì)胞周期有關(guān)基因
?cc.genes
head(c(cc.genes$s.genes,cc.genes$g2m.genes))
length(c(cc.genes$s.genes,cc.genes$g2m.genes))
# [1] "MCM5" "PCNA" "TYMS" "FEN1" "MCM2" "MCM4"
# 97
# 查看我們選擇的高變基因中有哪些細(xì)胞周期相關(guān)基因,并打分
CaseMatch(c(cc.genes$s.genes,cc.genes$g2m.genes),VariableFeatures(scRNA))

SingleCell7_1.jpeg
# 在scRNA@meta.data中添加S.Score、G2M.Score和Phase三列有關(guān)細(xì)胞周期的信息。
g2m_genes = cc.genes$g2m.genes
g2m_genes = CaseMatch(search = g2m_genes, match = rownames(scRNA))
s_genes = cc.genes$s.genes
s_genes = CaseMatch(search = s_genes, match = rownames(scRNA))
scRNA <- CellCycleScoring(object=scRNA, g2m.features=g2m_genes, s.features=s_genes)
head(scRNA@meta.data) #此時(shí)可以看到,meta.data增加了3列
# 觀察細(xì)胞周期相關(guān)基因是否影響聚類
scRNA <- RunPCA(scRNA, features = c(s_genes, g2m_genes))
p1 <- DimPlot(scRNA, reduction = "pca", group.by = "Phase")
p1
ggsave("../../out/3.2cell-cycle.pdf", plot = p1)
# 影響不大,基本重合在一起了

SingleCell7_2.jpeg