關(guān)鍵詞:互作,弦圖,python
摘要:本帖分享的是簡單的弦圖實(shí)現(xiàn)
#輸入數(shù)據(jù)每列意義:"基因/蛋白","互作基因/互作蛋白","第一列基因分類","pvalue","color(同一分類顏色一致)"
gene1 gene2 fam1 0.5 red
gene2 gene3 fam1 1 red
gene3 gene1 fam2 0.1 blue
gene4 gene5 fam2 0.3 blue
gene5 gene2 fam2 0.9 blue
gene6 gene3 fam3 1.8 green
gene7 gene1 fam3 1.771428571 green
gene8 gene5 fam3 1.742857143 green
gene9 gene2 fam3 1.714285714 green
gene10 gene3 fam5 1.685714286 gold
gene11 gene1 fam5 1.657142857 gold
gene12 gene3 fam5 1.628571429 gold
gene13 gene1 fam5 1.6 gold
gene14 gene5 fam5 1.571428571 gold
gene15 gene2 fam4 1.542857143 orange
gene16 gene3 fam4 1.514285714 orange
gene17 gene1 fam4 1.485714286 orange
gene18 gene5 fam4 1.457142857 orange
gene19 gene2 fam4 1.428571429 orange
gene20 gene3 fam4 1.4 orange
cell1 cell2 fam3 0.7 green
cell2 cell1 fam3 0.2 green
cell3 cell4 fam3 5 green
cell4 cell6 fam6 0.7 grey
cell5 cell6 fam6 0.1 grey
cell6 cell4 fam6 0.3 grey
cell7 cell4 fam6 2 grey
#############################################代碼#############################################
#Usage:python gene_act.py 半徑(10) gene_act.xls(上述輸入文件) gene_act1.pdf(輸出文件)
import sys
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
import seaborn as sns
import pandas as pd
def getB(i):
t = np.math.factorial(n)*init_t**i*(1-init_t)**(n-i)/(np.math.factorial(i)*np.math.factorial(n-i))
return np.array([t,t]).T
#數(shù)據(jù)導(dǎo)入
r=float(sys.argv[1])
inputfile = sys.argv[2]
outpdf = sys.argv[3]
head_lst=["gene","act_gene","family","pvalue","color"]
df = pd.read_csv(inputfile, names=head_lst, header=None, sep="\t")
df = df.sort_values(by=['family', 'pvalue'], ascending=[True, True]).reset_index(drop=True)
famlist=df["family"].drop_duplicates().to_list()
#均分角度,計(jì)算X,y坐標(biāo)
sample_angle = np.linspace(0, 2 * np.pi, len(df) + len(famlist), endpoint=False)
circles = []
x = r * np.cos(sample_angle)
y = r * np.sin(sample_angle)
t = sample_angle * 180 /np.pi
circles.append(np.c_[x, y, t])
data = pd.DataFrame(circles[0],columns=['X','Y','angle'])
space_index = []
fam_len = 0
for i in famlist:
if fam_len == 0:
fam_len = df[df["family"] == i].shape[0]
else:
fam_len = fam_len + df[df["family"] == i].shape[0] + 1
space_index.append(fam_len)
data=data.drop(space_index).reset_index(drop=True)
fig, ax = plt.subplots()
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
plt.ylim(-2.5*r,2*r)
plt.xlim(-2.5*r,2*r)
# plt.legend(loc='upper left')
cmap = []
#添加數(shù)據(jù)
df.loc[:, 'X'] = data["X"]
df.loc[:, 'Y'] = data["Y"]
df.loc[:, 'angle'] = data["angle"]
end_x = []
end_y = []
end_xy = defaultdict(list)
start_x = []
start_y = []
for i in range(0,df.shape[0]):
s_x = df.iloc[i,-3]
s_y = df.iloc[i,-2]
Color = df.iloc[i,-4]
if Color not in cmap:
cmap.append(Color)
line = df.iloc[i,3]
for act in df.iloc[i,1].split(","):
e_x = df[df["gene"] == act].iloc[0,-3]
e_y = df[df["gene"] == act].iloc[0,-2]
points = np.array([[s_x,s_y],[0,0],[e_x,e_y]])# 在此處修改坐標(biāo)
n = points.shape[0]-1
init_t = np.linspace(0,1,1000)
P = np.zeros((1000,2))
for i in range(n+1):
P += getB(i)*points[i]
plt.plot(P[:,0],P[:,1],marker="None",color=Color,zorder=0,linewidth=line)
plt.plot(points[:,0],points[:,1],'r.',zorder=0,alpha=0,linewidth=line)
df.to_csv("test.out",sep = "\t")
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
plt.ylim(-1.5*r,1.5*r)
plt.xlim(-1.5*r,1.5*r)
sns.scatterplot(data=df, x="X", y="Y", size="pvalue", hue="family",hue_order=famlist,alpha=1,zorder=3,cmap=cmap)
for i in range(0,df.shape[0]):
ax.text(df.iloc[i,-3] * 1.5, df.iloc[i,-2]* 1.5, df.iloc[i,0],ha="center",va="center",rotation=df.iloc[i,-1], color=df.iloc[i,-4],size=10,zorder=3)
ax.axis('off')
ax.set_aspect('equal')
ax.margins(x=0.1, y=0.1)
plt.legend(loc='upper left',ncol=2,prop={'size':5})
plt.savefig(outpdf)
plt.close()

結(jié)果圖展示