5.細(xì)胞類型注釋
多樣本的注釋和單樣本的是一樣的。
用的之前的自動(dòng)注釋
library(celldex)
library(SingleR)
ls("package:celldex")
f = "ref_BlueprintEncode.RData"
if(!file.exists(f)){
ref <- celldex::BlueprintEncodeData()
save(ref,file = f)
}
ref <- get(load(f))
library(BiocParallel)
scRNA = sce.all
test = scRNA@assays$RNA$data
pred.scRNA <- SingleR(test = test,
ref = ref,
labels = ref$label.main,
clusters = scRNA@active.ident)
pred.scRNA$pruned.labels
new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
save(scRNA,file = "scRNA.Rdata")
p2 <- DimPlot(scRNA, reduction = "umap",label = T,pt.size = 0.5) + NoLegend()
p1+p2

6.分組可視化及組間細(xì)胞比例比較
示例中:54 55 56是實(shí)驗(yàn)組,57 58 59是對(duì)照
如果看不到,需要自己去提取,方法如下:
library(tinyarray)
edat = geo_download("GSE231920")
pd = edat$pd
從右邊點(diǎn)pd可以看到pd的內(nèi)容

6.1 ==和%in%
==表示判斷是否相等,會(huì)把x和y一一比較,相等返回TRUE,不相等返回FALSE。且==只會(huì)把x的第一個(gè)位置和y的第一個(gè)位置比較,x的第二個(gè)位置和y的第二個(gè)位置比較,不會(huì)錯(cuò)位來比較。
%in%可以實(shí)現(xiàn)錯(cuò)位比較!只要x里面的元素在y里面是有的,就可以返回TRUE,不受位置限制。當(dāng)x和y的長度(也就是元素個(gè)數(shù))不一樣時(shí),%in%也同樣好用(這時(shí)候就不考慮==了)。
x <- c('a','b','c')
y <- c('c','b','a')
x == y
## [1] FALSE TRUE FALSE
x %in% y #x里的每個(gè)元素在y里面有沒有
## [1] TRUE TRUE TRUE
y %in% x #y里的每個(gè)元素在x里面有沒有
## [1] TRUE TRUE TRUE
#產(chǎn)生出來的邏輯值會(huì)和%in% 前面的那個(gè)數(shù)據(jù)一一對(duì)應(yīng)。
a = c(1,4,6,2,5)
b = c(2,5,7)
a %in% b #5個(gè)邏輯值,和a一一對(duì)應(yīng),a里的每個(gè)元素在b里面有沒有
## [1] FALSE FALSE FALSE TRUE TRUE
b %in% a #3個(gè)邏輯值,和b一一對(duì)應(yīng),b里的每個(gè)元素在a里面有沒有
## [1] TRUE TRUE FALSE
6.2 ifelse
可以根據(jù)邏輯值是T還是F產(chǎn)生不同的值
age <- 0:28
ifelse(age>18,'+','-')
## [1] "-" "-" "-" "-" "-" "-" "-" "-" "-" "-" "-" "-" "-" "-" "-" "-" "-" "-" "-"
## [20] "+" "+" "+" "+" "+" "+" "+" "+" "+" "+"
#結(jié)合前面的%in%
ifelse(a %in% b,"在","不在")
## [1] "不在" "不在" "不在" "在" "在"
6.3 添加分組信息
scRNA$group = ifelse(scRNA$orig.ident %in% c("sample1","sample2","sample3"), "treat","control")
DimPlot(scRNA, reduction = "umap", group.by = "group")
這里寫scRNAgroup其實(shí)是一樣的,都會(huì)給meta.data添加一列,名為group。

6.4 計(jì)算每個(gè)亞群的細(xì)胞數(shù)量和占全部細(xì)胞的比例
# 每種細(xì)胞的數(shù)量和比例
cell_counts <- table(Idents(scRNA))
cell.all <- cbind(cell_counts = cell_counts,
cell_Freq = round(prop.table(cell_counts)*100,2))
#各組中每種細(xì)胞的數(shù)量和比例
cell.num.group <- table(Idents(scRNA), scRNA$group)
cell.freq.group <- round(prop.table(cell.num.group, margin = 2) *100,2)
cell.all = cbind(cell.all,cell.num.group,cell.freq.group)
cell.all = cell.all[,c(1,3,4,2,5,6)]
colnames(cell.all) = paste(rep(c("all","control","treat"),times = 2),
rep(c("count","freq"),each = 3),sep = "_")
cell.all
## all_count control_count treat_count all_freq control_freq treat_freq
##CD4+ T-cells 1007 283 724 24.98 13.88 36.35
##Fibroblasts 762 664 98 18.90 32.56 4.92
##B-cells 667 183 484 16.55 8.97 24.30
##CD8+ T-cells 547 204 343 13.57 10.00 17.22
##Neutrophils 378 260 118 9.38 12.75 5.92
##Monocytes 311 208 103 7.72 10.20 5.17
##Adipocytes 244 164 80 6.05 8.04 4.02
##NK cells 87 51 36 2.16 2.50 1.81
##Endothelial cells 28 22 6 0.69 1.08 0.30
7.差異分析
7.1 找某一細(xì)胞內(nèi)部的組間差異基因
sub.obj = subset(scRNA,idents = "NK cells")
dfmarkers <- FindMarkers(scRNA, ident.1 = "treat", group.by = "group",min.pct = 0.25, logfc.threshold = 0.25,verbose = F)
head(dfmarkers)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## XIST 0.000000e+00 -13.156832 0.000 0.655 0.000000e+00
## RPS4Y1 0.000000e+00 12.921082 0.650 0.000 0.000000e+00
## DDX3Y 0.000000e+00 12.553759 0.531 0.000 0.000000e+00
## RPS26 2.582808e-301 1.878636 0.905 0.733 6.290945e-297
## IGHG4 2.033975e-197 6.489441 0.444 0.037 4.954154e-193
## CFD 2.462775e-197 -4.424015 0.142 0.575 5.998581e-193
7.2 找conserved marker基因
西湖鏡像
options(BioC_mirror="https://mirrors.westlake.edu.cn/bioconductor")
if(!require("multtest"))BiocManager::install('multtest')
if(!require("metap"))install.packages('metap')
sub.markers <- FindConservedMarkers(scRNA, ident.1 = "NK cells", grouping.var = "group", min.pct = 0.25, logfc.threshold = 0.25,verbose = F)
head(sub.markers)
FindConservedMarkers和前面的FindMarkers 不大一樣。它結(jié)合了“分組”和“細(xì)胞類型”,是找出不同細(xì)胞類型之間在不同條件(treat和control)下都有差異的基因。結(jié)合上面的代碼來看,就是找出在control組內(nèi)部NK和其他細(xì)胞的差異基因,再找出treat組內(nèi)部NK和其他細(xì)胞的差異基因,二者取交集,就是NK和其他細(xì)胞的差異保守的基因,即conservedmarkers。

7.3 組間比較的氣泡圖
markers.to.plot = c("CD3D", "CREM", "HSPH1", "SELL", "GIMAP5", "CACYBP", "GNLY", "NKG7", "CCL5",
"CD8A", "MS4A1", "CD79A", "MIR155HG", "NME1", "FCGR3A", "VMO1", "CCL2", "S100A9", "HLA-DQA1",
"GPR183", "PPBP", "GNG11", "HBA2", "HBB", "TSPAN13", "IL3RA", "PRSS57") #一組感興趣的基因
#如果idents有NA會(huì)報(bào)錯(cuò)https://github.com/satijalab/seurat/issues/8772
#scRNA <- subset(scRNA, seurat_annotations %in% na.omit(scRNA$seurat_annotations))
DotPlot(scRNA, features = markers.to.plot, cols = c("blue", "red"),
dot.scale = 8, split.by = "group") +
RotatedAxis()

7.4 FeaturePlot
一行是一個(gè)基因,一列是一個(gè)分組
FeaturePlot(scRNA, features = c("CD3D", "GNLY", "IFI6", "PRSS57"), split.by = "group", max.cutoff = 3, cols = c("grey",
"red"), reduction = "umap")

7.5 VlnPlot
把每種細(xì)胞類型分成兩組來展示
plots <- VlnPlot(scRNA, features = c("LYZ", "ISG15", "MS4A1", "IFI6"),
split.by = "group", group.by = "seurat_annotations",
pt.size = 0, combine = FALSE)
library(patchwork)
wrap_plots(plots = plots, ncol = 2)

8.偽bulk 轉(zhuǎn)錄組差異分析
8.1 單細(xì)胞樣本整合為偽bulk轉(zhuǎn)錄組樣本
每個(gè)組要有多個(gè)樣本才能做。
AggregateExpression是把單細(xì)胞數(shù)據(jù)整合為常規(guī)轉(zhuǎn)錄組數(shù)據(jù)的方式。group.by = c("seurat_annotations","orig.ident", "group")就是同一細(xì)胞類型、同一樣本、同一分組的細(xì)胞匯總在一起成為一個(gè)“樣本”。
bulk <- AggregateExpression(scRNA, return.seurat = T, slot = "counts", assays = "RNA", group.by = c("seurat_annotations","orig.ident", "group"))
colnames(bulk)#整合成了多個(gè)“樣本”
可以只取出一種細(xì)胞,然后找treat和control之間的差異基因,這和常規(guī)轉(zhuǎn)錄組的差異分析是一樣的。
sub <- subset(bulk, seurat_annotations == "Monocytes")
colnames(sub)
##[1] "Monocytes_sample1_treat" "Monocytes_sample2_treat" "Monocytes_sample3_treat"
##[4] "Monocytes_sample4_control" "Monocytes_sample5_control" "Monocytes_sample6_control"
Idents(sub) <- "group"
de_markers <- FindMarkers(sub, ident.1 = "treat", ident.2 = "control", slot = "counts", test.use = "DESeq2",
verbose = F)
de_markers$gene <- rownames(de_markers)
8.2 邏輯值連接符號(hào)
&是AND,|(shift+回車上方的)是OR
8.3 火山圖
k1 = de_markers$avg_log2FC< -1 & de_markers$p_val <0.01
k2 = de_markers$avg_log2FC> 1 & de_markers$p_val <0.01
de_markers$change <- ifelse(k1,"down",ifelse(k2,"up","not"))
#滿足k1就是down,不滿足的話再看是否滿足k2,滿足k2就是up,不滿足就是not。
library(ggplot2)
ggplot(de_markers, aes(avg_log2FC, -log10(p_val),color = change)) +
geom_point(size = 2, alpha = 0.5) +
geom_vline(xintercept = c(1,-1),linetype = 4)+
geom_hline(yintercept = -log10(0.01),linetype = 4)+
scale_color_manual(values = c("#2874C5", "grey", "#f87669"))+
theme_bw() +
ylab("-log10(unadjusted p-value)")
