關(guān)于招聘信息簡(jiǎn)單分析第三彈,對(duì)招聘信息中融資情況、工作經(jīng)驗(yàn)、學(xué)歷要求和薪酬之間的關(guān)系進(jìn)行簡(jiǎn)單分析和數(shù)據(jù)可視化操作(本例中數(shù)據(jù)主要集中為Python相關(guān)職位分析)
分析工具
import pandas as pd
from pyecharts import Boxplot
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv("數(shù)據(jù)文件")
# matplotlib 中文顯示設(shè)置
plt.rcParams['font.sans-serif']=['SimHei']
# 取平均值為薪酬值
def avg_salary(salary):
salary_list = salary.split('-')
salary_min = salary_list[0][:-1]
salary_max = salary_list[1][:-1]
return (int(salary_min)+int(salary_max))/2.0
北京部分區(qū)縣薪酬比較
薪酬和區(qū)域之間關(guān)系數(shù)據(jù)處理
df["salary_avg"] = df["工資"].apply(avg_salary).astype(int)
groupy_by_dist = df.groupby(["區(qū)域"])["salary_avg"]
count_by_dist = df.groupby(['區(qū)域'])['區(qū)域'].count()
df_dist =[]
for group in count_by_dist.index:
v = groupy_by_dist.get_group(group).values
df_dist.append(v)
- seaborn工具實(shí)現(xiàn)數(shù)據(jù)展示
ax = plt.figure(figsize=(10, 6)).add_subplot(111)
sns.boxplot(data=df_dist)
ax.set_xticklabels(count_by_dist.index, rotation=-30,fontsize=18)
ax.set_title('北京部分地區(qū)薪酬比較', fontsize=22)
ax.set_ylabel('薪酬K/月', fontsize=20)
plt.savefig('北京部分地區(qū)薪酬比較', bbox_inches='tight')
plt.show()
- pycharts工具實(shí)現(xiàn)數(shù)據(jù)展示
boxplot = Boxplot("北京部分城區(qū)薪酬比較")
boxplot.use_theme("dark")
_yaxis = boxplot.prepare_data(df_dist)
boxplot.add("",count_by_dist.index , _yaxis,is_area_show=True,yaxis_name="薪酬(k/月)",is_label_show=True,
is_axisline_show=True,is_fill=True,mark_point=["max","min","average"])
boxplot.render()


朝陽(yáng)區(qū)薪酬最高,海淀區(qū)緊跟其后。并且北京各區(qū)Python職位薪資水平普遍還是較高的,薪酬分布最大值為25k(石景山區(qū))、最小值為2K(海淀區(qū))、平均值為11K(東城區(qū))。
不同工作經(jīng)驗(yàn)的薪酬情況
薪酬和工作經(jīng)驗(yàn)之間關(guān)系數(shù)據(jù)處理
count_by_experience = df.groupby(['工作經(jīng)驗(yàn)'])['工作經(jīng)驗(yàn)'].count()
value_by_experience = pd.DataFrame([count_by_experience.index, count_by_experience.values], index = ['experience', 'counts']).T
sort_by_experience = value_by_experience.copy()
mappings = {'應(yīng)屆畢業(yè)生':1, '1年以下':2, '1-3年':3, '3-5年':4, '5-10年':5, '10年以上':6,'不限':7}
sort_by_experience['sortby'] = sort_by_experience['experience'].map(mappings)
sort_by_experience.sort_values(by='sortby', inplace=True)
df["salary_avg"] = df["工資"].apply(avg_salary).astype(int)
groupy_by_experience = df.groupby(['工作經(jīng)驗(yàn)'])["salary_avg"]
df_exper = []
for group in sort_by_experience["experience"]:
v = groupy_by_experience.get_group(group).values
df_exper.append(v)
- seaborn工具實(shí)現(xiàn)數(shù)據(jù)展示
ax = plt.figure(figsize=(10,6)).add_subplot(111,axisbg='#FFDAB9')
# add_subplot(111)
sns.boxplot(data=df_exper)
ax.set_xticklabels(sort_by_experience["experience"],fontsize=18)
ax.set_title('不同工作經(jīng)驗(yàn)的薪酬分布',fontsize=20)
ax.set_ylabel('薪酬K/月', fontsize=20)
plt.savefig('不同工作經(jīng)驗(yàn)的薪酬分布')
plt.show()
- Pyecharts工具實(shí)現(xiàn)數(shù)據(jù)展示
from pyecharts import Boxplot
boxplot = Boxplot("不同工作經(jīng)驗(yàn)的薪酬分布")
boxplot.use_theme("dark")
_yaxis = boxplot.prepare_data(df_exper)
boxplot.add("",sort_by_experience["experience"] , _yaxis,is_area_show=True,yaxis_name="薪酬(k/月)",is_label_show=True,
is_axisline_show=True,mark_point=["max","min","average"],
is_geo_effect_show=True)
boxplot.render("exp_sal.html")


工作經(jīng)驗(yàn)和薪資呈正相關(guān)分布,而3-5年和5-10年工作經(jīng)驗(yàn)基本代表Python職位薪資普遍薪酬。薪酬分布最大值為40k(10年以上)、最小值為2K(應(yīng)屆畢業(yè)生)、平均值為11K(3-5年)。
不同學(xué)歷要求的薪酬分布
學(xué)歷要求和薪酬之間關(guān)系數(shù)據(jù)處理
count_by_degree = df.groupby(['學(xué)歷要求'])['學(xué)歷要求'].count()
value_by_degree = pd.DataFrame([count_by_degree.index, count_by_degree.values], index = ['degree', 'counts']).T
sort_by_degree = value_by_degree.copy()
degree_mappings = {'不限':1, '大專(zhuān)':2, '本科':3, '碩士':4,'博士':5}
sort_by_degree['sortby'] = sort_by_degree['degree'].map(degree_mappings)
sort_by_degree.sort_values(by='sortby', inplace=True)
df["salary_avg"] = df['工資'].apply(avg_salary).astype(int)
group_by_degree = df.groupby(['學(xué)歷要求'])['salary_avg']
df_deg = []
for group in sort_by_degree['degree']:
v = group_by_degree.get_group(group).values
df_deg.append(v)
-
seaborn工具實(shí)現(xiàn)數(shù)據(jù)展示
ax = plt.figure(figsize=(10, 8)).add_subplot(111,axisbg='#FFDAB9') sns.boxplot(data=df_deg) ax.set_xticklabels(sort_by_degree['degree'], fontsize=18) ax.set_title('不同學(xué)歷的薪酬分布', fontsize=22) ax.set_ylabel('薪酬K/月', fontsize=20) plt.savefig('不同學(xué)歷的薪酬分布') plt.show()``` -
Pyecharts工具實(shí)現(xiàn)數(shù)據(jù)展示
from pyecharts import Boxplot boxplot = Boxplot("不同學(xué)歷要求的薪酬分布") boxplot.use_theme("dark") _yaxis = boxplot.prepare_data(df_deg) boxplot.add("",sort_by_degree["degree"] , _yaxis,is_area_show=True,yaxis_name="薪酬(k/月)",is_label_show=True, is_axisline_show=True,mark_point=["max","min","average"], is_geo_effect_show=True) boxplot.render("deg_sal.html")


同樣學(xué)歷和薪資呈正相關(guān)分布,而大專(zhuān)和本科基本代表Python職位薪資普遍薪酬。薪酬分布最大值為35k(博士)、最小值為2K(碩士)、平均值為7K(不限)。而由于數(shù)據(jù)樣本過(guò)于集中導(dǎo)致數(shù)據(jù)分析結(jié)果出現(xiàn)極值,本科占整個(gè)數(shù)據(jù)比例為90%左右。
公司融資情況和薪酬之間關(guān)系
公司融資情況和薪酬之間關(guān)系數(shù)據(jù)處理
count_by_financing = df.groupby(['融資階段'])['融資階段'].count()
value_by_financing = pd.DataFrame([count_by_financing.index, count_by_financing.values], index = ['financing', 'counts']).T
sort_by_financing = value_by_financing.copy()
mappings = {'未融資':1, '天使輪':2, 'A輪':3, 'B輪':4, 'C輪':5,'D輪及以上':6, '上市公司':7,'不需要融資':8}
sort_by_financing['sortby'] = sort_by_financing['financing'].map(mappings)
sort_by_financing.sort_values(by='sortby', inplace=True)
df["salary_avg"] = df["工資"].apply(avg_salary).astype(int)
groupy_by_financing = df.groupby(['融資階段'])["salary_avg"]
df_financing = []
for group in sort_by_financing["financing"]:
v = groupy_by_financing.get_group(group).values
df_financing.append(v)
-
seaborn工具實(shí)現(xiàn)數(shù)據(jù)展示
ax = plt.figure(figsize=(10,6)).add_subplot(111,axisbg='#FFDAB9') sns.boxplot(data=df_financing) ax.set_xticklabels(sort_by_financing["financing"],rotation=-45,fontsize=18) ax.set_title('融資情況和薪酬之間的關(guān)系',fontsize=20) ax.set_ylabel('薪酬K/月', fontsize=20) plt.savefig('融資情況和薪酬之間的關(guān)系') plt.show() -
Pyecharts工具實(shí)現(xiàn)數(shù)據(jù)展示
from pyecharts import Boxplot boxplot = Boxplot("融資情況和薪酬之間的關(guān)系") boxplot.use_theme("dark") _yaxis = boxplot.prepare_data(df_financing) boxplot.add("",sort_by_financing["financing"] , _yaxis,is_area_show=True,yaxis_name="薪酬(k/月)",is_label_show=True, is_axisline_show=True,mark_point=["max","min","average"], is_geo_effect_show=True) boxplot.render("financing_sal.html")


公司融資和薪資關(guān)系并不是很緊密,公司獲得天使輪投資的Python職位薪資相對(duì)較高。薪酬分布最大值為6k(天使輪)、最小值為2K(A輪)、平均值為3K(B輪)。