pySCENIC下游可視化(python版本)

## Regulon specificity scores (RSS) across predicted cell types
from math import sqrt
import numpy as np
import pandas as pd
from scipy.spatial.distance import jensenshannon
def regulon_specificity_scores(auc_mtx, cell_type_series):
    """
    Calculates the Regulon Specificty Scores (RSS). [doi: 10.1016/j.celrep.2018.10.045]
    :param auc_mtx: The dataframe with the AUC values for all cells and regulons (n_cells x n_regulons).
    :param cell_type_series: A pandas Series object with cell identifiers as index and cell type labels as values.
    :return: A pandas dataframe with the RSS values (cell type x regulon).
    """
    cell_types = list(cell_type_series.unique())
    n_types = len(cell_types)
    regulons = list(auc_mtx.columns)
    n_regulons = len(regulons)
    rss_values = np.empty(shape=(n_types, n_regulons), dtype=float)
    def rss(aucs, labels):
        # jensenshannon function provides distance which is the sqrt of the JS divergence.
        return 1.0 - jensenshannon(aucs / aucs.sum(), labels / labels.sum())
    for cidx, regulon_name in enumerate(regulons):
        for ridx, cell_type in enumerate(cell_types):
            rss_values[ridx, cidx] = rss(auc_mtx[regulon_name], (cell_type_series == cell_type).astype(int))
    return pd.DataFrame(data=rss_values, index=cell_types, columns=regulons)

# Calculate the regulon specificity scores
rss = regulon_specificity_scores(df_aucell, myeloid_cells_har.obs['cell_type'])

# RSS plot
from math import ceil, floor
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

def plot_rss(rss, cell_type, top_n=5, max_n=None, ax=None):
    if ax is None:
        _, ax = plt.subplots(1, 1, figsize=(4, 4))
    if max_n is None:
        max_n = rss.shape[1]
    data = rss.T[cell_type].sort_values(ascending=False)[0:max_n]
    ax.plot(np.arange(len(data)), data, ".")
    ax.set_ylim([floor(data.min() * 100.0) / 100.0, ceil(data.max() * 100.0) / 100.0])
    ax.set_ylabel("RSS")
    ax.set_xlabel("Regulon")
    ax.set_title(cell_type)
    ax.set_xticklabels([])
    font = {"color": "red", "weight": "normal", "size": 4}
    for idx, (regulon_name, rss_val) in enumerate(
        zip(data[0:top_n].index, data[0:top_n].values)
    ):
        ax.plot([idx, idx], [rss_val, rss_val], "r.")
        ax.text(idx + (max_n / 25), rss_val, regulon_name, fontdict=font, horizontalalignment="left", verticalalignment="center")

## Select the top 5 regulons from each cell type
for cell_type in rss.index:
    plot_rss(rss, cell_type=cell_type, top_n=5)
    plt.savefig(cell_type+"-RSS-top5.pdf", dpi=300, bbox_inches = "tight")
    plt.close()

# Visualize the top regulons for each cell type
for cell_type in rss.index:
    print(rss.T[cell_type].sort_values(ascending=False).head(5))
# Plot a heatmap of the regulon specificity scores
import seaborn as sns
sns.heatmap(rss, cmap="viridis", annot=False)
plt.title("Regulon Specificity Scores")
plt.xlabel("Cell Type")
plt.ylabel("Regulon")
plt.savefig("RSS_heatmap.pdf")
plt.close()

Python版SCENIC轉錄因子分析(四)一文就夠了-騰訊云開發(fā)者社區(qū)-騰訊云 (tencent.com)
單細胞轉錄組實戰(zhàn)06: pySCENIC轉錄因子分析(可視化)-騰訊云開發(fā)者社區(qū)-騰訊云 (tencent.com)

?著作權歸作者所有,轉載或內容合作請聯(lián)系作者
【社區(qū)內容提示】社區(qū)部分內容疑似由AI輔助生成,瀏覽時請結合常識與多方信息審慎甄別。
平臺聲明:文章內容(如有圖片或視頻亦包括在內)由作者上傳并發(fā)布,文章內容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務。

相關閱讀更多精彩內容

友情鏈接更多精彩內容