單細(xì)胞轉(zhuǎn)錄組測(cè)序---多樣本整合分析完整代碼

最近做了天然胸腺單細(xì)胞測(cè)序結(jié)果的分析,我在GEO數(shù)據(jù)庫中下載了10個(gè)樣本(GSE182135),完整的的代碼如下,做一個(gè)記錄,還在摸索階段,如果有不對(duì)的地方也請(qǐng)大佬們指正。
后續(xù)想再寫一下每個(gè)步驟的知識(shí)點(diǎn),也有助于自己的學(xué)習(xí)梳理。

1. 首先將十個(gè)樣本的測(cè)序數(shù)據(jù)下載到本地。過濾后合并

#GEO:GSE182135, 10個(gè)樣本合并
library("scde")
library("DEsingle")
library("Seurat")
library("dplyr")
library("magrittr")
library("SingleCellExperiment")
library("patchwork")
library("cowplot")
library("stringr")
library("tidyverse")
library("export")

# step1:導(dǎo)入10個(gè)樣本數(shù)據(jù),并過濾
setwd("E:/scRNA-seq/20221010/原始數(shù)據(jù)/1")
rm(list=ls())#清空環(huán)境變量
#1
E9_5_rep2 <- Read10X( data.dir="E9_5_rep2_2018AUG07/")
#將矩陣轉(zhuǎn)換為Seurat對(duì)象
E9_5_rep2 <- CreateSeuratObject(counts = E9_5_rep2, project = "E9_5_rep2", min.cells = 3, min.features = 200)
E9_5_rep2[["percent.mt"]] <- PercentageFeatureSet(E9_5_rep2, pattern = "^MT-") #計(jì)算線粒體RNA比例
VlnPlot(E9_5_rep2, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) #數(shù)值分布的小提琴圖
## 過濾
E9_5_rep2 <- subset(E9_5_rep2, subset = nFeature_RNA > 1000 & nFeature_RNA < 6000 & percent.mt < 1)
dim(E9_5_rep2)
#2
E9_5_rep1 <- Read10X( data.dir="E9_5_rep1_2018JULY18/")
E9_5_rep1 <- CreateSeuratObject(counts = E9_5_rep1, project = "E9_5_rep1", min.cells = 3, min.features = 200)
E9_5_rep1[["percent.mt"]] <- PercentageFeatureSet(E9_5_rep1, pattern = "^MT-")
VlnPlot(E9_5_rep1, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
E9_5_rep1 <- subset(E9_5_rep1, subset = nFeature_RNA > 1000 & nFeature_RNA < 5000 & percent.mt < 1)
dim(E9_5_rep1)
#3
E10_5_rep7 <- Read10X( data.dir="E10_5_rep7_2018AUG16/")
E10_5_rep7 <- CreateSeuratObject(counts = E10_5_rep7, project = "E10_5_rep7", min.cells = 3, min.features = 200)
E10_5_rep7[["percent.mt"]] <- PercentageFeatureSet(E10_5_rep7, pattern = "^MT-")
VlnPlot(E10_5_rep7, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
E10_5_rep7 <- subset(E10_5_rep7, subset = nFeature_RNA > 1000 & nFeature_RNA < 6500 & percent.mt < 1)
dim(E10_5_rep7)
#4
E10_5_rep6 <- Read10X( data.dir="E10_5_rep6_2018SEP22/")
E10_5_rep6 <- CreateSeuratObject(counts = E10_5_rep6, project = "E10_5_rep6", min.cells = 3, min.features = 200)
E10_5_rep6[["percent.mt"]] <- PercentageFeatureSet(E10_5_rep6, pattern = "^MT-")
VlnPlot(E10_5_rep6, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
E10_5_rep6 <- subset(E10_5_rep6, subset = nFeature_RNA > 1000 & nFeature_RNA < 6500 & percent.mt < 1)
dim(E10_5_rep6)
#5
E10_5_rep5 <- Read10X( data.dir="E10_5_rep5_2018AUG16/")
E10_5_rep5 <- CreateSeuratObject(counts = E10_5_rep5, project = "E10_5_rep5", min.cells = 3, min.features = 200)
E10_5_rep5[["percent.mt"]] <- PercentageFeatureSet(E10_5_rep5, pattern = "^MT-")
VlnPlot(E10_5_rep5, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
E10_5_rep5 <- subset(E10_5_rep5, subset = nFeature_RNA > 1000 & nFeature_RNA < 6500 & percent.mt < 1)
dim(E10_5_rep5)
#6
E11_5_rep3 <- Read10X( data.dir="E11_5_rep3_2018SEP22/")
E11_5_rep3 <- CreateSeuratObject(counts = E11_5_rep3, project = "E11_5_rep3", min.cells = 3, min.features = 200)
E11_5_rep3[["percent.mt"]] <- PercentageFeatureSet(E11_5_rep3, pattern = "^MT-")
VlnPlot(E11_5_rep3, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
E11_5_rep3 <- subset(E11_5_rep3, subset = nFeature_RNA > 1000 & nFeature_RNA < 5000 & percent.mt < 1)
dim(E11_5_rep3)
#7
E11_5_rep2B <- Read10X( data.dir="E11_5_rep2B_2018JULY27/")
E11_5_rep2B <- CreateSeuratObject(counts = E11_5_rep2B, project = "E11_5_rep2B", min.cells = 3, min.features = 200)
E11_5_rep2B[["percent.mt"]] <- PercentageFeatureSet(E11_5_rep2B, pattern = "^MT-")
VlnPlot(E11_5_rep2B, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
E11_5_rep2B <- subset(E11_5_rep2B, subset = nFeature_RNA > 1000 & nFeature_RNA < 6000 & percent.mt < 1)
dim(E11_5_rep2B)
#8
E11_5_rep2 <- Read10X( data.dir="E11_5_rep2_2018JUNE26/")
E11_5_rep2 <- CreateSeuratObject(counts = E11_5_rep2, project = "E11_5_rep2", min.cells = 3, min.features = 200)
E11_5_rep2[["percent.mt"]] <- PercentageFeatureSet(E11_5_rep2, pattern = "^MT-")
VlnPlot(E11_5_rep2, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
E11_5_rep2 <- subset(E11_5_rep2, subset = nFeature_RNA > 1000 & nFeature_RNA < 6000 & percent.mt < 1)
dim(E11_5_rep2)
#9
E12_5_rep2 <- Read10X( data.dir="E12_5_rep2_2018JUNE28/")
E12_5_rep2 <- CreateSeuratObject(counts = E12_5_rep2, project = "E12_5_rep2", min.cells = 3, min.features = 200)
E12_5_rep2[["percent.mt"]] <- PercentageFeatureSet(E12_5_rep2, pattern = "^MT-")
VlnPlot(E12_5_rep2, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
E12_5_rep2 <- subset(E12_5_rep2, subset = nFeature_RNA > 1000 & nFeature_RNA < 5000 & percent.mt < 1)
dim(E12_5_rep2)
#10
E12_5_rep1 <- Read10X( data.dir="E12_5_rep1_2018JUNE28/")
E12_5_rep1 <- CreateSeuratObject(counts = E12_5_rep1, project = "E12_5_rep1", min.cells = 3, min.features = 200)
E12_5_rep1[["percent.mt"]] <- PercentageFeatureSet(E12_5_rep1, pattern = "^MT-")
VlnPlot(E12_5_rep1, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
E12_5_rep1 <- subset(E12_5_rep1, subset = nFeature_RNA > 1000 & nFeature_RNA < 5000 & percent.mt < 1)
dim(E12_5_rep1)

多樣本整合分析

##########################################################
# 提取每個(gè)數(shù)據(jù)集中篩選到的高變基因
alldata.list<- c(E9_5_rep2,E9_5_rep1,E10_5_rep7,E10_5_rep6,E10_5_rep5,E11_5_rep3,E11_5_rep2B,E11_5_rep2,E12_5_rep2,E12_5_rep1)
hvgs_per_dataset <- lapply(alldata.list, function(x) {
  x@assays$RNA@var.features
})
head(hvgs_per_dataset)
#install.packages("venn") #安裝
library(venn) #導(dǎo)
# 繪制venn圖查看不同樣本中高變基因的重疊情況
venn::venn(hvgs_per_dataset, opacity = 0.4, zcolor = (scales::hue_pal())(3), cexsn = 1, 
           cexil = 1, lwd = 1, col = "white", frame = F, borders = NA)

### Integration ----
testAB.anchors <- FindIntegrationAnchors(object.list = list(E9_5_rep2,E9_5_rep1,E12_5_rep2,E12_5_rep1,E11_5_rep3,E11_5_rep2B,E11_5_rep2,E10_5_rep7,E10_5_rep6,E10_5_rep5), dims = 1:30)
testAB.integrated <- IntegrateData(anchorset = testAB.anchors, dims = 1:30, new.assay.name = "CCA")
#這一步之后就多了一個(gè)整合后的assay(原先有一個(gè)RNA的assay),整合前后的數(shù)據(jù)分別存儲(chǔ)在這兩個(gè)assay中
testAB.integrated ##57717samples
names(testAB.integrated@assays)
## [1] "RNA" "CCA"
# by default, Seurat now sets the integrated assay as the default assay, so any
# operation you now perform will be on the ingegrated data.
testAB.integrated@active.assay
## [1] "CCA"

dim(testAB.integrated[["RNA"]]@counts)
dim(testAB.integrated[["RNA"]]@data)
dim(testAB.integrated[["CCA"]]@counts) #因?yàn)槭菑腞NA這個(gè)assay的data矩陣開始整合的,所以這個(gè)矩陣為空
dim(testAB.integrated[["CCA"]]@data)
##后續(xù)仍然是標(biāo)準(zhǔn)流程,基于上面得到的整合data矩陣
# Run the standard workflow for visualization and clustering
testAB.integrated <- ScaleData(testAB.integrated, features = rownames(testAB.integrated))
testAB.integrated <- RunPCA(testAB.integrated, npcs = 100, verbose = FALSE)#npcs : 要計(jì)算和存儲(chǔ)的PC總數(shù)(默認(rèn)為50
ElbowPlot(testAB.integrated)
DimHeatmap(testAB.integrated, dims = 1:100, cells = 500, balanced = TRUE)
library(data.table)
saveRDS(testAB.integrated,file = "1011seurat-all.rds")
##或者直接讀入之前保存的數(shù)據(jù)
testAB.integrated <- readRDS(file = "1011seurat-all.rds")
class(testAB.integrated)

testAB.integrated <- FindNeighbors(testAB.integrated, dims = 1:50)
testAB.integrated <- FindClusters(testAB.integrated, resolution = 2)
testAB.integrated <- RunUMAP(testAB.integrated, dims = 1:50)
testAB.integrated <- RunTSNE(testAB.integrated, dims = 1:50)

##
#library(cowplot)
#library(stringr)
#library(tidyverse)
testAB.integrated$patient=str_replace(testAB.integrated$orig.ident,"_.*$","")

#后面兩個(gè)參數(shù)用來添加文本標(biāo)簽
p1 <- DimPlot(testAB.integrated, reduction = "tsne", group.by = "orig.ident", pt.size=0.5) 
p1
p2 <- DimPlot(testAB.integrated, reduction = "tsne", group.by = "ident", pt.size=0.5, label = TRUE,repel = TRUE) 
p2
fig_umap <- plot_grid(p1, p2, labels = c('orig.ident','ident'),align = "v",ncol = 2) 
fig_umap
ggsave(filename = "0916整合tsne.pdf", plot = fig_umap, device = 'pdf', width = 30, height = 15, units = 'cm')

p3 <- DimPlot(testAB.integrated, reduction = "umap", group.by = "orig.ident", pt.size=0.5) 
p3
p4 <- DimPlot(testAB.integrated, reduction = "umap", group.by = "ident", pt.size=0.5, label = TRUE,repel = TRUE) 
p4
fig_umap <- plot_grid(p3, p4, labels = c('patient','ident'),align = "v",ncol = 2) 
fig_umap
ggsave(filename = "0916整合umap.pdf", plot = fig_umap, device = 'pdf', width = 30, height = 15, units = 'cm')

saveRDS(testAB.integrated,file = "0916umap.rds")
testAB.integrated <- readRDS(file = "0916umap.rds")

區(qū)分一下常見marker,文章中的marker

celltype_marker=c("Pax9", "Neurod1","Hoxa3","Pdgfra"," Dlx5","Esam",
                  "Epcam","Rxrg","Sox10","Fstl1")
##做小提琴圖
VlnPlot(testAB.integrated,features = celltype_marker,pt.size = 0,ncol = 2)

table(testAB.integrated@meta.data$seurat_clusters)
#####################################################################

提取Pax9+Epcam+的單細(xì)胞亞群重新分析

pax9_sce1 = testAB.integrated[,!(testAB.integrated@meta.data$seurat_clusters %in% c(6,26,39,40,43,45))]
pax9_sce1##53693
saveRDS(pax9_sce1,file = "0916刪減umap.rds")
pax9_sce1 <- readRDS(file = "0916刪減umap.rds")

cd4_sce1=pax9_sce1

cd4_sce1 <- ScaleData(cd4_sce1, features = rownames(cd4_sce1))
cd4_sce1 <- RunPCA(cd4_sce1, features = VariableFeatures(cd4_sce1),npcs = 100)
DimHeatmap(cd4_sce1, dims =1:30, cells = 500, balanced = TRUE)
ElbowPlot(cd4_sce1, ndims =100)##碎石圖
cd4_sce1 <- FindNeighbors(cd4_sce1, dims = 1:60)
cd4_sce1 <- FindClusters(cd4_sce1, resolution = 0.5)
cd4_sce1 <- RunUMAP(cd4_sce1, dims = 1:60)
cd4_sce1 <- RunTSNE(cd4_sce1, dims = 1:60)
library(stringr)
library(tidyverse)
library(export)
library(cowplot)
p1 <- DimPlot(cd4_sce1, reduction = "tsne", group.by = "orig.ident", pt.size=0.5)
p2 <- DimPlot(cd4_sce1, reduction = "tsne", group.by = "ident",   pt.size=0.5, label = TRUE,repel = TRUE)

fig_tsne <- plot_grid(p1, p2, labels = c('orig.ident','ident'),align = "v",ncol = 2)
fig_tsne
ggsave(filename = "0916刪減_tsne.pdf", plot = fig_tsne, device = 'pdf', width = 27, height = 12, units = 'cm')

p3 <- DimPlot(cd4_sce1, reduction = "umap", group.by = "orig.ident", pt.size=0.5) 
p3
p4 <- DimPlot(cd4_sce1, reduction = "umap", group.by = "ident", pt.size=0.5, label = TRUE,repel = TRUE) 
p4
fig_umap <- plot_grid(p3, p4, labels = c('orig.ident','ident'),align = "v",ncol = 2) 
fig_umap
ggsave(filename = "0916刪減_umap.pdf", plot = fig_umap, device = 'pdf', width = 30, height = 15, units = 'cm')

saveRDS(cd4_sce1,file = "0916刪減-60pctest.rds")
cd4_sce1 <- readRDS(file = "0916刪減-60pctest.rds")

###########################################################

細(xì)胞類型注釋

####區(qū)分一下常見marker,文章中的marker
PPE_marker <- c("Eya1","Pax9","Hoxa3","Foxn1","Bmp4","Six1","Pax1","Tbx1")
##做小提琴圖
VlnPlot(cd4_sce1,features = PPE_marker,pt.size = 0,ncol = 2)
##鑒定完大類,把注釋信息添加上去
table(cd4_sce1@meta.data$seurat_clusters)
Third_pharyngeal_pouch <- c(0,1,5,9,16,20)
Unknown <- c(2,3,4,6,7,8,10,11,12,13,14,15,17,18,19,21,22,23,24)
current.cluster.ids <- c(Third_pharyngeal_pouch,
                         Unknown)
new.cluster.ids <- c(rep("Third pharyngeal pouch",length(Third_pharyngeal_pouch)),
                     rep("Unknown",length(Unknown)))

cd4_sce1@meta.data$celltype <- plyr::mapvalues(x = as.integer(as.character(cd4_sce1@meta.data$seurat_clusters)), 
                                          from = current.cluster.ids, to = new.cluster.ids)

##新的cd4_sce1@meta.data是這樣的
##統(tǒng)計(jì)一下各種細(xì)胞的數(shù)目

head(cd4_sce1@meta.data)
##,畫最后的tsne圖

plotCB=as.data.frame(cd4_sce1@meta.data%>%filter(seurat_clusters!="13" &
                                                   seurat_clusters!="15"))[,"CB"]
colors <-  c("#FFD700","#DCDCDC")#
DimPlot(cd4_sce1, reduction = "tsne", group.by = "celltype", pt.size=0.5,cols = colors,label = TRUE,repel = TRUE)

p1 <- DimPlot(cd4_sce1, reduction = "tsne", group.by = "orig.ident", pt.size=0.5)
p2 <- DimPlot(cd4_sce1, reduction = "tsne", group.by = "celltype", pt.size=0.5,cols = colors,repel = TRUE)

fig_tsne <- plot_grid(p1, p2, labels = c('orig.ident','celltype'),align = "v",ncol = 2)
fig_tsne
ggsave(filename = "注釋_tsne.pdf", plot = fig_tsne, device = 'pdf', width = 30, height = 12, units = 'cm')


saveRDS(cd4_sce1,file = "注釋.rds") #保存test.seu對(duì)象,下次可以直接調(diào)用,不用重復(fù)以上步驟,cells = plotCB
cd4_sce1 <- readRDS(file ="E:/scRNA-seq/20221010/原始數(shù)據(jù)/1/注釋.rds")

提取3PPE細(xì)胞群

PPE <- cd4_sce1[,(cd4_sce1@meta.data$seurat_clusters %in% c(0,1,5,9,16,20))]
table(PPE@meta.data$seurat_clusters)
Idents(PPE) <- "orig.ident"  ##細(xì)胞身份,屬于哪個(gè)亞群

小提琴圖

vln_df = PPE[,PPE@meta.data$orig.ident %in% c("E9_5_rep1",'E10_5_rep7',"E11_5_rep2","E12_5_rep2")]

table(vln_df@meta.data$orig.ident)
###"Eya1","Six1","Pax9","Hoxa3","Epcam"
marker=c("Eya1","Six1","Pax9","Epcam","Hoxa3")
VlnPlot(vln_df,features = marker,pt.size = 0,adjust = 1,ncol = 2)
#我們知道一個(gè)關(guān)鍵的參數(shù)scale = "width"導(dǎo)致了這種局面,其他沒有出現(xiàn)小提琴的應(yīng)該是零值比例太多
#數(shù)據(jù)過濾。
VlnPlot(subset(vln_df,Hoxa3 > 0.2 ), "Hoxa3",adjust = .5,pt.size=0)+ theme_bw()+NoLegend()
#改腰圍adjust = .5

p1 <- VlnPlot(subset(vln_df,Hoxa3 > 0.2 ),"Hoxa3",adjust = .8,pt.size=0)+ theme_bw()+geom_boxplot(width=.2,col="black",fill="white")+  NoLegend()
p2 <- VlnPlot(subset(vln_df,Eya1 > 0.2 ),"Eya1",adjust = .8,pt.size=0)+ theme_bw()+geom_boxplot(width=.2,col="black",fill="white")+  NoLegend()
p3 <- VlnPlot(subset(vln_df,Six1 > 0.2 ),"Six1",adjust = .8,pt.size=0)+ theme_bw()+geom_boxplot(width=.2,col="black",fill="white")+  NoLegend()
p4 <- VlnPlot(subset(vln_df,Epcam > 0.2 ),"Epcam",adjust = .8,pt.size=0)+ theme_bw()+geom_boxplot(width=.2,col="black",fill="white") +NoLegend()
p5 <- VlnPlot(subset(vln_df,Pax9 > 0.2 ),"Pax9",adjust = .8,pt.size=0)+ theme_bw()+geom_boxplot(width=.2,col="black",fill="white") +NoLegend()

library(patchwork)##拼圖
#p1+p2+p3+p4+p5+plot_layout(nrow=5)
(p1|p2)/(p3|p4)/(p5|plot_spacer())
ggsave(filename = "3PPE小提琴.pdf", width = 22, height = 20, 
       device = 'pdf', units = 'cm')

批量畫圖-單基因的umap圖

gene <- c("Epcam","Pax9","Bmp4","Hoxa3","Pax1","Ptprc","Six1","Tbx1") #Figure1_A和Figure1_D
for(i in gene){
  print(paste0("the current column is ",i))
  FeaturePlot(cd4_sce1,features = i,reduction = "tsne", min.cutoff = "q10", max.cutoff = "q90", pt.size = 1, repel = F,label = F,
              order = T,cols = c("#DCDCDC", "#DC143C"))+
    scale_x_continuous("")+scale_y_continuous("")+
    theme_bw()+ 
    theme( 
      panel.grid.major = element_blank(),panel.grid.minor = element_blank(), 
      axis.ticks = element_blank(),axis.text = element_blank(), 
      plot.title = element_text(hjust = 0.5,size=14)
    )
  ggsave(filename = paste0(i,"_tsne.pdf"), width = 10, height = 8, 
         device = 'pdf', units = 'cm')
}

#Figure1_E批量畫圖
F1_E <- c("Aire","Cldn4","Cldn3","Foxn1","Ly75","Psmb11","Krt5","Krt8") 
for(i in F1_E){
  print(paste0("the current column is ",i))
  FeaturePlot(cd4_sce1,features = i,reduction = "tsne", min.cutoff = "q10", max.cutoff = "q90", pt.size = 1, repel = F,label = F,
              order = T,cols = c('#D5F0E2','#4682B4'))+
    scale_x_continuous("")+scale_y_continuous("")+
    theme_bw()+ 
    theme( 
      panel.grid.major = element_blank(),panel.grid.minor = element_blank(), 
      axis.ticks = element_blank(),axis.text = element_blank(), 
      plot.title = element_text(hjust = 0.5,size=14)
    )
  ggsave(filename = paste0(i,"_tsne.pdf"), width = 10, height = 8, 
         device = 'pdf', units = 'cm')
}

##
gene <- c("Gcm2","Rhox4") 
for(i in gene){
  print(paste0("the current column is ",i))
  FeaturePlot(cd4_sce1,features = i,reduction = "tsne", min.cutoff = "q10", max.cutoff = "q90", pt.size = 1, repel = F,label = F,
              order = T,cols = c("#DCDCDC", "#DC143C"))+
    scale_x_continuous("")+scale_y_continuous("")+
    theme_bw()+ 
    theme( 
      panel.grid.major = element_blank(),panel.grid.minor = element_blank(), 
      axis.ticks = element_blank(),axis.text = element_blank(), 
      plot.title = element_text(hjust = 0.5,size=14)
    )
  ggsave(filename = paste0(i,"_tsne.pdf"), width = 10, height = 8, 
         device = 'pdf', units = 'cm')
}


?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容