本筆記來源于B站@生信技能樹-jimmy;學(xué)習(xí)視頻鏈接: 「生信技能樹」單細胞數(shù)據(jù)挖掘
- 構(gòu)建seurat對象、質(zhì)控、繪圖
# 2.1 構(gòu)建seurat對象,質(zhì)控
# In total, 2,343 cells from tumor cores were included in this analysis.
# quality control standards:
# 1) genes detected in < 3 cells were excluded; 篩選基因
# 2) cells with < 50 total detected genes were excluded; 篩選細胞
# 3) cells with ≥ 5% of mitochondria-expressed genes were excluded. 篩選細胞
sessionInfo()
library("Seurat")
?CreateSeuratObject
sce.meta <- data.frame(Patient_ID=group$Patient_ID,
row.names = group$sample)
head(sce.meta)
table(sce.meta$Patient_ID)
SingleCell2_1.JPG
# 函數(shù) CreateSeuratObject 有多種多樣的執(zhí)行方式
scRNA = CreateSeuratObject(counts=a.filt,
meta.data = sce.meta,
min.cells = 3,
min.features = 50)
# counts:a matrix-like object with unnormalized data with cells as columns and features as rows
# meta.data:Additional cell-level metadata to add to the Seurat object
# min.cells: features detected in at least this many cells.
# min.features:cells where at least this many features are detected.
head(scRNA@meta.data)
# nCount_RNA:the number of cell total counts
# nFeature_RNA:the number of cell's detected gene
summary(scRNA@meta.data)
scRNA@assays$RNA@counts[1:4,1:4]
# 可以看到,之前的counts矩陣存儲格式發(fā)生了變化:4 x 4 sparse Matrix of class "dgCMatrix"
SingleCell2_2.JPG
dim(scRNA)
# 20047 2342 -- 僅過濾掉1個細胞
# 接下來根據(jù)線粒體基因表達篩選低質(zhì)量細胞
# Calculate the proportion of transcripts mapping to mitochondrial genes
table(grepl("^MT-",rownames(scRNA)))
# > FALSE
# > 20047 沒有線粒體基因
scRNA[["percent.mt"]] <- PercentageFeatureSet(scRNA, pattern = "^MT-")
# 在meta.data中增加了一列“percent.mt”
head(scRNA@meta.data)
summary(scRNA@meta.data)
# 結(jié)果顯示沒有線粒體基因,因此這里過濾也就沒有意義,但是代碼留在這里
# 萬一大家的數(shù)據(jù)里面有線粒體基因,就可以如此這般進行過濾啦。
pctMT=5 # ≥ 5% of mitochondria-expressed genes
scRNA <- subset(scRNA, subset = percent.mt < pctMT)
dim(scRNA)
table(grepl("^ERCC-",rownames(scRNA)))
# >FALSE TRUE
# >19961 86 #發(fā)現(xiàn)是有ERCC基因
# External RNA Control Consortium,是常見的已知濃度的外源RNA分子spike-in的一種
# 指標(biāo)含義類似線粒體含量,ERCC含量大,則說明total sum變小
scRNA[["percent.ERCC"]] <- PercentageFeatureSet(scRNA, pattern = "^ERCC-")
head(scRNA@meta.data)
summary(scRNA@meta.data)
rownames(scRNA)[grep("^ERCC-",rownames(scRNA))]
# 可以看到有不少ERCC基因
sum(scRNA$percent.ERCC< 40)
# 較接近原文過濾后的數(shù)量2149,但感覺條件有點寬松了
# 網(wǎng)上看了相關(guān)教程,一般ERCC占比不高于10%!
sum(scRNA$percent.ERCC< 10) #就只剩下460個cell,明顯低于文獻中的數(shù)量
pctERCC=40
scRNA <- subset(scRNA, subset = percent.ERCC < pctERCC)
dim(scRNA)
# >20047 2142 #原文為19752 2149
dim(a.filt)
# >23460 2343 未過濾前
# 2.2 可視化
# 圖A:觀察不同組cell的counts、feature分布
col.num <- length(unique(scRNA@meta.data$Patient_ID))
library(ggplot2)
p1_1.1 <- VlnPlot(scRNA,
features = c("nFeature_RNA"),
group.by = "Patient_ID",
cols =rainbow(col.num)) +
theme(legend.position = "none") +
labs(tag = "A")
p1_1.1
p1_1.2 <- VlnPlot(scRNA,
features = c("nCount_RNA"),
group.by = "Patient_ID",
cols =rainbow(col.num)) +
theme(legend.position = "none")
p1_1.2
p1_1 <- p1_1.1 | p1_1.2
p1_1

Fig1.A.jpeg
# Quality Control figure
VlnPlot(scRNA,
features = c("nFeature_RNA","nCount_RNA","percent.ERCC"))

QualityControl.jpeg
#圖B:nCount_RNA與對應(yīng)的nFeature_RNA關(guān)系
p1_2 <- FeatureScatter(scRNA, feature1 = "nCount_RNA", feature2 = "nFeature_RNA",
group.by = "Patient_ID",pt.size = 1.3) +
labs(tag = "B")
p1_2

Fig1.B.jpeg
FeatureScatter(scRNA, feature1 = "nCount_RNA", feature2 = "percent.ERCC")
sessionInfo()

FeatureScatter.jpeg