全部代碼都已上傳至我的KLab—??嗨,你有一份微信好友報(bào)告待查收~,F(xiàn)ork后可運(yùn)行生成你自己的微信好友報(bào)告~
本次項(xiàng)目統(tǒng)計(jì)展示了如下信息:
??好友地域分布
??性別統(tǒng)計(jì)
??備注比例
??首字母統(tǒng)計(jì)
??使用最多的emoji
??簽名詞云
其他可視化報(bào)告:
登錄微信
因?yàn)樵贙Lab里面沒法調(diào)起其他應(yīng)用來打開二維碼圖片,所以這邊是通過多線程來處理:
??線程1:itchat獲取二維碼圖片,等待掃碼完成;
??線程2: 讀取本地二維碼圖片然后通過matplotlib加載到KLab;
具體代碼如下,不算復(fù)雜~
?? 之前有小伙伴遇到不能掃碼登錄的,是因?yàn)?strong>微信那邊做了限制,對(duì)于有些賬號(hào)(特別是新注冊(cè)的賬號(hào))不能在網(wǎng)頁端登錄;
code_path = os.path.join('/home/kesci/work', 'QR.png')
def show_qrcode():
# 等待圖片下載
time.sleep(3)
while True:
if os.path.exists(code_path):
img = Image.open(code_path)
plt.figure(figsize=(15, 8))
plt.imshow(img)
plt.axis('off') # 關(guān)掉坐標(biāo)軸為 off
plt.show()
break
t= threading.Thread(target=show_qrcode)#創(chuàng)建線程
t.setDaemon(True)#設(shè)置為后臺(tái)線程,這里默認(rèn)是False,設(shè)置為True之后則主線程不用等待子線程
t.start()#開啟線程
t = threading.Thread(target=itchat.login(picDir=code_path))
t.start()
地域分布
微信返回的好友信息中包括了Province和City兩個(gè)字段,不過有亮點(diǎn)要注意的:
對(duì)于北京等四個(gè)直轄市,
Province中是存的城市名,City中是行政區(qū);另外地域信息是國外的我這邊是都?xì)w到一類下面了,二級(jí)分類用的
Province的信息;
數(shù)據(jù)處理
friends = itchat.get_friends(update=True)
df_friends = pd.DataFrame(list(friends))
f_loc = df_friends.groupby(
['Province', 'City'])['UserName'].count().reset_index()
# 篩選掉位置信息缺失的
f_loc = f_loc[f_loc.Province != '']
for idx, row in f_loc.iterrows():
# 位置信息缺失的歸到其他中
if not row.Province:
f_loc.loc[idx, 'Province'] = '其他'
f_loc.loc[idx, 'City'] = '其他'
# 國外的統(tǒng)一歸到一類
if re.match('[a-zA-Z]', row.Province):
f_loc.loc[idx, 'Province'] = '國外'
f_loc.loc[idx, 'City'] = row['Province']
# 四個(gè)直轄市City中是行政區(qū)
f_loc['City'].loc[f_loc.Province == '北京'] = '北京'
f_loc['City'].loc[f_loc.Province == '上海'] = '上海'
f_loc['City'].loc[f_loc.Province == '重慶'] = '重慶'
f_loc['City'].loc[f_loc.Province == '天津'] = '天津'
# 重新聚合求和
f_loc = f_loc.groupby(['Province', 'City'])['UserName'].sum().reset_index()
f_loc.columns = ['Province', 'City', 'num']
data_pair = []
parent_data = f_loc.Province.unique().tolist()
for province in parent_data:
t_data = f_loc[f_loc.Province==province]
t_dict = {"name": province,
"label":{"show": False},
"children": []}
# 父層級(jí)--好友數(shù)量大于15的顯示標(biāo)簽
if t_data.num.sum() > 15:
t_dict['label']['show'] = True
t_data.sort_values(by="num",ascending=False,inplace=True)
t_data = t_data.reset_index(drop=True)
else_num = 0
for idx, row in t_data.iterrows():
"""
因?yàn)樯婕暗降某鞘羞^多,全部顯示太亂了
以下兩種情況下顯示,否則將歸入「其他城市」
1. 每個(gè)父目錄下好友最多的城市;
2. 該城市好友數(shù)量大于10;
"""
if idx == 0:
child_data = {"name": row.City, "value":row.num, "label":{"show": False}}
# 子層級(jí)--好友數(shù)量大于10的顯示標(biāo)簽
if child_data['value'] > 10:
child_data['label']['show'] = True
t_dict['children'].append(child_data)
elif row.num > 10:
child_data = {"name": row.City, "value":row.num, "label":{"show": True}}
t_dict['children'].append(child_data)
else:
else_num += row.num
if else_num > 10:
child_data = {"name": '其他城市', "value":else_num, "label":{"show": True}}
t_dict['children'].append(child_data)
elif else_num:
child_data = {"name": '其他城市', "value":else_num, "label":{"show": False}}
t_dict['children'].append(child_data)
data_pair.append(t_dict)
可視化
c = (Sunburst(
init_opts=opts.InitOpts(
theme='light',
width="1000px",
height="1000px"))
.add(
"",
data_pair=data_pair,
highlight_policy="ancestor",
radius=[0, "100%"],
sort_='null',
levels=[
{},
{
"r0": "20%",
"r": "45%",
"itemStyle": {"borderColor": 'rgb(220,220,220)', "borderWidth": 2}
},
{"r0": "45%", "r": "80%", "label": {"align": "right"},
"itemStyle": {"borderColor": 'rgb(220,220,220)', "borderWidth": 1}}
],
)
.set_global_opts(title_opts=opts.TitleOpts(title="好 友\n\n地 域 分 布",
pos_left="center",
pos_top="center",
title_textstyle_opts=opts.TextStyleOpts(font_style='oblique', color="black", font_size=30),))
.set_series_opts(label_opts=opts.LabelOpts(font_size=18, formatter=": {c}"))
)
c.render_notebook()

好友性別占比
f_sex = df_friends.groupby(['Sex'])['UserName'].count().reset_index()
f_sex['f_sex'] = f_sex['Sex'].astype(str).str.replace('1', '男').replace('2', '女').replace('0', '信息缺失')
background_color_js = """new echarts.graphic.RadialGradient(0.5, 0.5, 1, [{
offset: 0,
color: '#696969'
}, {
offset: 1,
color: '#000000'
}])"""
pie = (Pie(init_opts=opts.InitOpts(theme='light', width='1000px', height='800px'))
.add('WeChat?', [(row['f_sex'], row['UserName']) for _, row in f_sex.iterrows()],
radius=["50%", "75%"])
.set_global_opts(title_opts=opts.TitleOpts(title="好友性別占比",
pos_left="center",
title_textstyle_opts=opts.TextStyleOpts(color="black", font_size=20), ),
legend_opts=opts.LegendOpts(is_show=True, pos_top='5%'))
.set_series_opts(label_opts=opts.LabelOpts(formatter=": u0z1t8os%", font_size=18),
tooltip_opts=opts.TooltipOpts(trigger="item", formatter="{a} <br/>: {c} (u0z1t8os%)"),)
)
pie.render_notebook()

好友備注比例
你有給好友備注的習(xí)慣嗎?
remark_num_f = len(df_friends.RemarkName[(
df_friends.RemarkName != '') & (df_friends.Sex == 2)])
total_num_f = len(df_friends.RemarkName[(df_friends.Sex == 2)])
remark_num_m = len(df_friends.RemarkName[(
df_friends.RemarkName != '') & (df_friends.Sex == 1)])
total_num_m = len(df_friends.RemarkName[(df_friends.Sex == 1)])
l1 = Liquid(
init_opts=opts.InitOpts(
theme='light',
width='1000px',
height='800px'))
l1.add("", [remark_num_f/total_num_f],
center=["70%", "50%"],
label_opts=opts.LabelOpts(font_size=50,
formatter=JsCode(
"""function (param) {
return (Math.floor(param.value * 10000) / 100) + '%';
}"""),
position="inside",
))
l1.set_global_opts(
title_opts=opts.TitleOpts(
title="女性好友備注比例",
pos_left='62%',
pos_top='8%'))
l1.set_series_opts(tooltip_opts=opts.TooltipOpts(is_show=False))
l2 = Liquid(
init_opts=opts.InitOpts(
theme='light',
width='1000px',
height='800px'))
l2.add("",
[remark_num_m/total_num_m],
center=["25%", "50%"],
label_opts=opts.LabelOpts(font_size=50,
formatter=JsCode(
"""function (param) {
return (Math.floor(param.value * 10000) / 100) + '%';
}"""),
position="inside",
),)
l2.set_global_opts(
title_opts=opts.TitleOpts(
title="男性好友備注比例",
pos_left='16%',
pos_top='8%'))
l2.set_series_opts(tooltip_opts=opts.TooltipOpts(is_show=False))
grid = Grid().add(
l1, grid_opts=opts.GridOpts()).add(
l2, grid_opts=opts.GridOpts())
grid.render_notebook()

首字母分布
這個(gè)統(tǒng)計(jì)與微信-聯(lián)系人里面的歸類有點(diǎn)不一樣,微信-聯(lián)系人里面是優(yōu)先使用備注名的,這里只與好友的微信昵稱有關(guān);
first_letter = []
for item in df_friends.PYQuanPin:
# 替換掉emoji表情和空格
item = re.sub('spanclassemojiemoji[a-z0-9]{5}?|span', '' , item)
try:
if re.match('[A-Z]', item.upper()[0]):
first_letter.append(item.upper()[0])
else:
first_letter.append('#')
except IndexError:
first_letter.append('#')
letters = [chr(i) for i in range(65,91)]
letters.append('#')
data_pair = [(w, first_letter.count(w)) for w in letters]
data_pair = sorted(data_pair, key=lambda x: x[1], reverse=True)
pie = (Pie(init_opts=opts.InitOpts(theme='light', width='1000px', height='800px'))
.add("Wechat", data_pair,
radius=["50%", "75%"])
.set_global_opts(title_opts=opts.TitleOpts(title="微信名首字母",
pos_left="center",
title_textstyle_opts=opts.TextStyleOpts(color="black", font_size=20),),
legend_opts=opts.LegendOpts(is_show=False, pos_top='5%'))
.set_series_opts(label_opts=opts.LabelOpts(formatter=": u0z1t8os%", font_size=18),
tooltip_opts=opts.TooltipOpts(trigger="item", formatter="{a} <br/>: {c} (u0z1t8os%)"),)
)
pie.render_notebook()

Emoji表情
包括了微信昵稱和簽名中的emoji表情~
emoji_list = []
for name in df_friends.NickName:
emoji = re.findall(u'[\U00010000-\U0010ffff]', name)
if emoji:
emoji_list.extend(emoji)
with open('/home/kesci/input/emoji6441/emoji.json', 'r') as f:
emoji_code = json.load(f)
def find_emoji(code):
for item in emoji_code:
if item['codes'] == code.upper():
return item['char']
break
for sig in df_friends.Signature:
emoji = re.findall('emoji([a-z0-9]{5})', sig)
if emoji:
emoji = [find_emoji(code) for code in emoji]
emoji_list.extend(emoji)
counter = Counter(emoji_list).most_common(18)
bar = (Bar(init_opts=opts.InitOpts(theme='light', width='1000px', height='800px'))
.add_xaxis([x for x, y in counter[::-1]])
.add_yaxis('使用次數(shù)', [y for x, y in counter[::-1]])
.set_global_opts(title_opts=opts.TitleOpts(title="使用最多的emoji表情",
pos_left="center",
title_textstyle_opts=opts.TextStyleOpts(color="black",
font_size=20)),
legend_opts=opts.LegendOpts(is_show=False),
xaxis_opts=opts.AxisOpts(is_show=False,),
yaxis_opts=opts.AxisOpts(
axistick_opts=opts.AxisTickOpts(is_show=False),
axisline_opts=opts.AxisLineOpts(is_show=False)))
.set_series_opts(label_opts=opts.LabelOpts(is_show=True,
position='right',
font_style='italic'),
itemstyle_opts={"normal": {
"color": JsCode(
"""new echarts.graphic.LinearGradient(1, 1, 0, 0, [{
offset: 0,
color: 'rgba(0, 244, 255, 1)'
}, {
offset: 1,
color: 'rgba(0, 77, 167, 1)'
}], false)"""
),
"barBorderRadius": [30, 30, 30, 30],
"shadowColor": "rgb(0, 160, 221)",
}
}
).reversal_axis())
bar.render_notebook()

簽名詞云圖
簽名說的最多的詞語是什么呢?
back_color = imread('/home/kesci/work/font/wechat_logo.jpeg') # 解析該圖片
wc = WordCloud(background_color='white', # 背景顏色
max_words=1000, # 最大詞數(shù)
mask=back_color, # 以該參數(shù)值作圖繪制詞云,這個(gè)參數(shù)不為空時(shí),width和height會(huì)被忽略
max_font_size=100, # 顯示字體的最大值
font_path="/home/kesci/work/font/simhei.ttf", # 解決顯示口字型亂碼問題
random_state=42, # 為每個(gè)詞返回一個(gè)PIL顏色
)
text=''
pattern = u"[\u4e00-\u9fa5]" #保留漢字
for x in df_friends['Signature']:
text_temp = re.findall(pattern, x)
text = text + ''.join(text_temp)
def word_cloud(texts):
words_list = []
word_generator = jieba.cut(texts, cut_all=False) # 返回的是一個(gè)迭代器
for word in word_generator:
if len(word) > 1: #去掉單字
words_list.append(word)
return ' '.join(words_list)
text = word_cloud(text)
wc.generate(text)
# 基于彩色圖像生成相應(yīng)彩色
image_colors = ImageColorGenerator(back_color)
plt.figure(figsize = (15,15))
plt.axis('off')
# 繪制詞云
plt.imshow(wc.recolor(color_func=image_colors))
plt.axis('off')
# 保存圖片
plt.show()

- ????整理不易,歡迎大家點(diǎn)贊支持~