運(yùn)行SCENIC參考文獻(xiàn)
Cytoscape視頻教程
本教包括:
一、pyscenic對(duì)單細(xì)胞進(jìn)行轉(zhuǎn)錄因子的分析;
二、篩選不同細(xì)胞類型差異表達(dá)轉(zhuǎn)錄因子;
三、通過Cytoscape對(duì)不用細(xì)胞類型差異轉(zhuǎn)錄因子進(jìn)行可視化。
1、pyscenic對(duì)單細(xì)胞進(jìn)行轉(zhuǎn)錄因子的分析
需要一些依賴
conda create -n pyscenic python=3.7
conda activate pyscenic
mamba install -y numpy scanpy
mamba install -y -c anaconda cytoolz
pip install pyscenic
數(shù)據(jù)庫配置
轉(zhuǎn)錄因子分析還需要自行下載最新更新的數(shù)據(jù)庫(https://resources.aertslab.org/cistarget/databases/):數(shù)據(jù)庫儲(chǔ)存在/data/index_genome/cisTarget_databases/
mkdir /data/index_genome/cisTarget_databases/
cd /data/index_genome/cisTarget_databases/
cat >download.bash
##1, For human:
wget -c https://resources.aertslab.org/cistarget/databases/homo_sapiens/hg19/refseq_r45/mc9nr/gene_based/hg19-500bp-upstream-10species.mc9nr.genes_vs_motifs.rankings.feather
wget -c https://resources.aertslab.org/cistarget/databases/homo_sapiens/hg19/refseq_r45/mc9nr/gene_based/hg19-tss-centered-10kb-10species.mc9nr.genes_vs_motifs.rankings.feather
##2, For mouse:
wget -c https://resources.aertslab.org/cistarget/databases/mus_musculus/mm9/refseq_r45/mc9nr/gene_based/mm9-500bp-upstream-10species.mc9nr.genes_vs_motifs.rankings.feather
wget -c https://resources.aertslab.org/cistarget/databases/mus_musculus/mm9/refseq_r45/mc9nr/gene_based/mm9-tss-centered-10kb-10species.mc9nr.genes_vs_motifs.rankings.feather
wget https://github.com/aertslab/pySCENIC/blob/master/resources/hs_hgnc_tfs.txt
wget https://resources.aertslab.org/cistarget/motif2tf/motifs-v9-nr.hgnc-m0.001-o0.0.tbl
## 下載
bash download.bash
Step1.提取單細(xì)胞表達(dá)量矩陣
library(SCENIC)
packageVersion("SCENIC")
setwd("D:/D/temp/PySCENIC/hexiaoying")
sce <- readRDS("D:/D/metropolypus/FastMNN/Mmerge.rds")
###根據(jù)colname名字區(qū)分BD單細(xì)胞和10X單細(xì)胞測(cè)序,提取10X單細(xì)胞測(cè)序數(shù)據(jù)
colnames(sce)
sce$tech <- sapply(colnames(x = sce), function(x) unlist(strsplit(x, "_"))[1])
sce$IF=sapply(sce$tech, function(x) unlist(nchar(x)>10))
table(sce$IF)
Idents(sce)<-sce$IF
ten=subset(sce,idents = c("TRUE"))
table(ten$sample)
#由于pySCENIC運(yùn)行比較慢,隨機(jī)提取2萬個(gè)細(xì)胞進(jìn)行分析
subcell <- sample(colnames(ten),20000)
End.combinedsub <- ten[,subcell]
write.csv(t(as.matrix(End.combinedsub@assays$RNA@counts)),file = "for.scenic.data.csv")
saveRDS(End.combinedsub, "SCENIC.sub.rds")
Step2.pySCENIC常規(guī)運(yùn)行(Python+Linux環(huán)境)
cd /mnt/d/D/temp/PySCENIC/hexiaoying
cat >change.py
import os,sys
os.getcwd()
os.listdir(os.getcwd())
import loompy as lp;
import numpy as np;
import scanpy as sc;
x=sc.read_csv("for.scenic.data.csv",first_column_names=True);
row_attrs = {"Gene": np.array(x.var_names),};
col_attrs = {"CellID": np.array(x.obs_names)};
lp.create("sample.loom",x.X.transpose(),row_attrs,col_attrs);
python change.py
##這一步可能會(huì)出現(xiàn)的問題
##這一步pyscenic aucell 會(huì)報(bào)錯(cuò)AttributeError: 'Series' object has no attribute 'iteritems'只要把"count in values.value_counts().iteritems():" to "count in values.value_counts().items():" 解決辦法參考https://github.com/dusty-nv/jetson-inference/issues/1640
cat >scenic.sh
#!/bin/sh
##運(yùn)行pySCENIC
# 不同物種的數(shù)據(jù)庫不一樣,這里是人類是human
dir=/mnt/d/D/linux/cisTarget_databases/ #改成自己的目錄
tfs=$dir/hs_hgnc_tfs.txt
feather=$dir/hg38__refseq-r80__10kb_up_and_down_tss.mc9nr.genes_vs_motifs.rankings.feather
tbl=$dir/motifs-v9-nr.hgnc-m0.001-o0.0.tbl
# 一定要保證上面的數(shù)據(jù)庫文件完整無誤哦
input_loom=/mnt/d/D/temp/PySCENIC/hexiaoying/sample.loom
ls $tfs $feather $tbl
#2.1 grn
pyscenic grn \
--num_workers 20 \
--output adj.sample.tsv \
--method grnboost2 \
sample.loom \
$tfs #轉(zhuǎn)錄因子文件,1839個(gè)基因的名字列表
#2.2 cistarget
pyscenic ctx \
adj.sample.tsv $feather \
--annotations_fname $tbl \
--expression_mtx_fname $input_loom \
--mode "dask_multiprocessing" \
--output reg.csv \
--num_workers 20 \
--mask_dropouts
#2.3 AUCell
pyscenic aucell \
$input_loom \
reg.csv \
--output out_SCENIC.loom \
--num_workers 20
bash scenic.sh
輸出結(jié)果

image.png
Step3.sample_SCENIC.loom導(dǎo)入R里面進(jìn)行可視化
##可視化
rm(list=ls())
library(Seurat)
library(SCopeLoomR)
library(AUCell)
library(SCENIC)
library(dplyr)
library(KernSmooth)
library(RColorBrewer)
library(plotly)
library(BiocParallel)
library(grid)
library(ComplexHeatmap)
library(data.table)
library(scRNAseq)
library(patchwork)
library(ggplot2)
library(stringr)
library(circlize)
#### 1.提取 out_SCENIC.loom 信息
pyScenicLoomFile <- file.path(getwd(), "out_SCENIC.loom")
loom <- open_loom(pyScenicLoomFile)
regulons_incidMat <- get_regulons(loom, column.attr.name="Regulons")
regulons_incidMat[1:4,1:4]
regulons <- regulonsToGeneLists(regulons_incidMat)
regulonAUC <- get_regulons_AUC(loom,column.attr.name='RegulonsAUC')
regulonAucThresholds <- get_regulon_thresholds(loom)
tail(regulonAucThresholds[order(as.numeric(names(regulonAucThresholds)))])
embeddings <- get_embeddings(loom)
close_loom(loom)
rownames(regulonAUC)
names(regulons)
#### 2.加載SeuratData
seurat.data <- readRDS("D:/D/temp/PySCENIC/hexiaoying/SCENIC.sub.rds")
Idents(seurat.data)<-seurat.data$celltype
DimPlot(seurat.data,label = T)
#可視化
sub_regulonAUC <- regulonAUC[,match(colnames(seurat.data),colnames(regulonAUC))]
dim(sub_regulonAUC)
seurat.data
#確認(rèn)是否一致
identical(colnames(sub_regulonAUC), colnames(seurat.data))
cellClusters <- data.frame(row.names = colnames(seurat.data),
seurat_clusters = as.character(seurat.data$celltype))
cellTypes <- data.frame(row.names = colnames(seurat.data),
celltype = seurat.data$celltype)
head(cellTypes)
head(cellClusters)
sub_regulonAUC[1:4,1:4]
#保存一下
save(sub_regulonAUC,cellTypes,cellClusters,seurat.data,
file = 'for_rss_and_visual.Rdata')
##
#B細(xì)胞有兩個(gè)非常出名的轉(zhuǎn)錄因子,TCF4(+) 以及NR2C1(+)
regulonsToPlot = c('TAL1(+)','GATA1(+)')
regulonsToPlot %in% row.names(sub_regulonAUC)
seurat.data@meta.data = cbind(seurat.data@meta.data ,t(assay(sub_regulonAUC[regulonsToPlot,])))
# Vis
p1 = DotPlot(seurat.data, features = unique(regulonsToPlot)) + RotatedAxis()
p1
p2 = RidgePlot(seurat.data, features = regulonsToPlot , ncol = 2)
p2
p3 = VlnPlot(seurat.data, features = regulonsToPlot,pt.size = 0)
p3
p4 = FeaturePlot(seurat.data,features = regulonsToPlot)
p4
wrap_plots(p1,p2,p3,p4)
#可選
#scenic_res = assay(sub_regulonAUC) %>% as.matrix()
#seurat.data[["scenic"]] <- SeuratObject::CreateAssayObject(counts = scenic_res)
#seurat.data <- SeuratObject::SetAssayData(seurat.data, slot = "scale.data",
# new.data = scenic_res, assay = "scenic")
###B細(xì)胞有兩個(gè)非常出名的轉(zhuǎn)錄因子,TCF4(+) 以及NR2C1(+)
regulonsToPlot = c('TCF4(+)','NR2C1(+)')
regulonsToPlot %in% row.names(sub_regulonAUC)
seurat.data@meta.data = cbind(seurat.data@meta.data ,t(assay(sub_regulonAUC[regulonsToPlot,])))
# Vis
p1 = DotPlot(seurat.data, features = unique(regulonsToPlot)) + RotatedAxis()
p2 = RidgePlot(seurat.data, features = regulonsToPlot , ncol = 2)
p3 = VlnPlot(seurat.data, features = regulonsToPlot,pt.size = 0)
p4 = FeaturePlot(seurat.data,features = regulonsToPlot)
wrap_plots(p1,p2,p3,p4)
#可選
#scenic_res = assay(sub_regulonAUC) %>% as.matrix()
#seurat.data[["scenic"]] <- SeuratObject::CreateAssayObject(counts = scenic_res)
#seurat.data <- SeuratObject::SetAssayData(seurat.data, slot = "scale.data",
# new.data = scenic_res, assay = "scenic")
##4.1 TF活性均值
### 4.1. TF活性均值
# 看看不同單細(xì)胞亞群的轉(zhuǎn)錄因子活性平均值
# Split the cells by cluster:
selectedResolution <- "celltype" # select resolution
cellsPerGroup <- split(rownames(cellTypes),
cellTypes[,selectedResolution])
# 去除extened regulons
sub_regulonAUC <- sub_regulonAUC[onlyNonDuplicatedExtended(rownames(sub_regulonAUC)),]
dim(sub_regulonAUC)
# Calculate average expression:
regulonActivity_byGroup <- sapply(cellsPerGroup,
function(cells)
rowMeans(getAUC(sub_regulonAUC)[,cells]))
# Scale expression.
# Scale函數(shù)是對(duì)列進(jìn)行歸一化,所以要把regulonActivity_byGroup轉(zhuǎn)置成細(xì)胞為行,基因?yàn)榱?# 參考:http://www.itdecent.cn/p/115d07af3029
regulonActivity_byGroup_Scaled <- t(scale(t(regulonActivity_byGroup),
center = T, scale=T))
# 同一個(gè)regulon在不同cluster的scale處理
dim(regulonActivity_byGroup_Scaled)
#[1] 209 9
regulonActivity_byGroup_Scaled=na.omit(regulonActivity_byGroup_Scaled)
#熱圖查看每個(gè)單細(xì)胞亞群都是有 自己的特異性的激活的轉(zhuǎn)錄因子
p6<-Heatmap(
regulonActivity_byGroup_Scaled,
name = "z-score",
col = colorRamp2(seq(from=-2,to=2,length=11),rev(brewer.pal(11, "Spectral"))),
show_row_names = TRUE,
show_column_names = TRUE,
row_names_gp = gpar(fontsize = 6),
clustering_method_rows = "ward.D2",
clustering_method_columns = "ward.D2",
row_title_rot = 0,
cluster_rows = TRUE,
cluster_row_slices = FALSE,
cluster_columns = FALSE)
pdf("TF.activity1.heatmap.pdf",width = 14,height = 27);p6;dev.off()
#4.2 rss查看特異TF
### 4.2. rss查看特異TF
rss <- calcRSS(AUC=getAUC(sub_regulonAUC),
cellAnnotation=cellTypes[colnames(sub_regulonAUC), selectedResolution])
rss=na.omit(rss)
rssPlot <- plotRSS(rss)
plotly::ggplotly(rssPlot$plot)
ggsave("TF_activity.pdf",width = 14,height = 27)
#4.3其他方式查看TF
rss=regulonActivity_byGroup_Scaled
head(rss)
df = do.call(rbind,
lapply(1:ncol(rss), function(i){
dat= data.frame(
path = rownames(rss),
cluster = colnames(rss)[i],
sd.1 = rss[,i],
sd.2 = apply(rss[,-i], 1, median)
)
}))
df$fc = df$sd.1 - df$sd.2
top5 <- df %>% group_by(cluster) %>% top_n(5, fc)
rowcn = data.frame(path = top5$cluster)
n = rss[top5$path,]
#rownames(rowcn) = rownames(n)
p7<-pheatmap(n,
annotation_row = rowcn,
show_rownames = T)
pdf("TF.activity2.heatmap.pdf",width = 14,height = 14);p7;dev.off()
##建立可以用于轉(zhuǎn)錄因子調(diào)控基因Cytoscape網(wǎng)絡(luò)圖的表格
#melt的用法https://www.cnblogs.com/liujiaxin2018/p/16673493.html
library(reshape2)
edges <- melt(regulons,id="regulons")
colnames(edges) <- c("to","from")
##提取參與調(diào)節(jié)的差異轉(zhuǎn)錄因子進(jìn)行分析
##############------------------------------------pySCENIC
edges.DEG<-edges[edges$to %in% DEG.A$gene,]
levels(factor(edges.tf$from))
DEG.A.tf<-DEG.A[DEG.A$gene %in% str_trim(str_extract(edges$from, '[^(]*')),]
##str_trim(str_extract(edges$from, '[^(]*')) 刪除ATF3(+)后面的(+)
###寫入上調(diào)和下調(diào)標(biāo)記
DEG.A.tf$change <- ifelse((DEG.A.tf$avg_log2FC>0),'up',
ifelse((DEG.A.tf$avg_log2FC<0),'down','nochange'))
##提取細(xì)胞對(duì)應(yīng)的基因
DEG.tf<-dplyr::select(DEG.A.tf,c("cluster","gene"))
write.csv(DEG.tf,file = "DEG.tf.csv",row.names = F)
##提取基因和上下調(diào)標(biāo)記
label.tf<-dplyr::select(DEG.A.tf,c("gene","change"))
write.csv(label.tf,file = "label.tf.csv",row.names = F)
##提取基因FC
fc.tf<-dplyr::select(DEG.A.tf,c("gene","avg_log2FC"))
write.csv(fc.tf,file = "fc.tf.csv",row.names = F)
##統(tǒng)計(jì)上下調(diào)轉(zhuǎn)錄因子在細(xì)胞類型中的個(gè)數(shù)
celltype.tf.chang<-dcast(celltype.tf,cluster~change)
write.csv(celltype.tf.chang,file = "celltype.tf.change.csv",row.names = F)
Step4 導(dǎo)入Cytoscape進(jìn)行可視化(按住control鍵可以對(duì)結(jié)點(diǎn)進(jìn)行選擇)
1、輸入文件

image.png
通過file-import-Network from File選擇文件

image.png
2、其他參數(shù)全部是通過table導(dǎo)入
輸入文件

image.png
通過file-import-Table from File選擇文件

image.png

image.png
3、給細(xì)胞類型設(shè)置分組

image.png
分組的樣式

image.png
4、根據(jù)第一步的分組,對(duì)選中的結(jié)點(diǎn)設(shè)置顏色

image.png

image.png
5、根據(jù)avg_log2FC定義node的顏色
這是avg_log2FC的表格

image.png

image.png
6、輸入細(xì)胞上下調(diào)轉(zhuǎn)錄因子

image.png
細(xì)胞轉(zhuǎn)錄差異可視化

image.png
結(jié)果

image.png
7、最后導(dǎo)出結(jié)果

image.png
操作技巧
過濾的方式選擇結(jié)點(diǎn):比如直接選擇之前設(shè)置的type結(jié)點(diǎn)

image.png
選中的結(jié)點(diǎn)如果疊加在一起,通過這樣設(shè)置可以把結(jié)點(diǎn)分散開來

image.png