#import jieba
#1.讀取小說內(nèi)容
with open('./novel/threeekingdom.txt','r',encoding='utf-8') as f:
? words=f.read()
? counts={}
? #2.分詞

#詞語過濾,刪除無關(guān)詞,重復(fù)詞
excludes = {"將軍", "卻說", "丞相", "二人", "不可", "荊州", "不能", "如此", "商議",
? ? ? ? ? ? ? ? "如何", "主公", "軍士", "軍馬", "左右", "次日", "引兵", "大喜", "天下",
? ? ? ? ? ? ? ? "東吳", "于是", "今日", "不敢", "魏兵", "陛下", "都督", "人馬", "不知"}
import jieba
# 1.讀取小說內(nèi)容

? ? # 2. 分詞
? ? words_list = jieba.lcut(words)
? ? # print(words_list)
? ? for word in words_list:
? ? ? ? if len(word) <= 1:
? ? ? ? ? ? continue
? ? ? ? else:
? ? ? ? ? ? # 更新字典中的值
? ? ? ? ? ? # counts[word] = 取出字典中原來鍵對應(yīng)的值 + 1
? ? ? ? ? ? # counts[word] = counts[word] + 1? # counts[word]如果沒有就要報錯
? ? ? ? ? ? # 字典。get(k) 如果字典中沒有這個鍵 返回 NONE
? ? ? ? ? ? counts[word] = counts.get(word, 0) + 1
? ? print(len(counts))
? ? # 3. 詞語過濾,刪除無關(guān)詞,重復(fù)詞
? ? counts['孔明'] =? counts['孔明'] +? counts['孔明曰']
? ? counts['玄德'] = counts['玄德'] + counts['玄德曰'] +counts['劉備']
? ? counts['關(guān)公'] = counts['關(guān)公'] +counts['云長']
? ? for word in excludes:
? ? ? ? del counts[word]
? ? # 4.排序 [(), ()]
? ? items = list(counts.items())
? ? print(items)
? ? def sort_by_count(x):
? ? ? ? return x[1]
? ? items.sort(key=sort_by_count, reverse=True)
? ? for i in range(10):
? ? ? ? # 序列解包
? ? ? ? role, count = items[i]
? ? ? ? print(role, count)
? ? # 5得出結(jié)論
#lambda x1,x2....xn:表達(dá)式
sum_num=lambda x1,x2:x1+x2
print(sum_num(2,3))
#參數(shù)可以是無限多個,但是表達(dá)式只有一個
name_info_list=[
? ? ? ('張三',4500),
? ? ? ('李四',9900),
? ? ? ('王五',2000),
? ? ? ('趙六',5500),
]
name_info_list.sort(key=lambda x:x[1],reverse=True)
print(name_info_list)
stu_info=[
? {"name":'zhangsan',"age":18},
? {"name":'lisi',"age":30},
? {"name":'wangwu',"age":99},
? {"name":'zhaoliu',"age":3},
]
stu_info.sort(key=lambda i:i['age'])
import jieba
from wordcloud import WordCloud
# 1.讀取小說內(nèi)容
with open('./novel/threekingdom.txt', 'r', encoding='utf-8') as f:
? ? words = f.read()
? ? counts = {}? # {‘曹操’:234,‘回寨’:56}
? ? excludes = {"將軍", "卻說", "丞相", "二人", "不可", "荊州", "不能", "如此", "商議",
? ? ? ? ? ? ? ? "如何", "主公", "軍士", "軍馬", "左右", "次日", "引兵", "大喜", "天下",
? ? ? ? ? ? ? ? "東吳", "于是", "今日", "不敢", "魏兵", "陛下", "都督", "人馬", "不知",
? ? ? ? ? ? ? ? "孔明曰","玄德曰","劉備","云長"}
? ? # 2. 分詞
? ? words_list = jieba.lcut(words)
? ? # print(words_list)
? ? for word in words_list:
? ? ? ? if len(word) <= 1:
? ? ? ? ? ? continue
? ? ? ? else:
? ? ? ? ? ? # 更新字典中的值
? ? ? ? ? ? # counts[word] = 取出字典中原來鍵對應(yīng)的值 + 1
? ? ? ? ? ? # counts[word] = counts[word] + 1? # counts[word]如果沒有就要報錯
? ? ? ? ? ? # 字典。get(k) 如果字典中沒有這個鍵 返回 NONE
? ? ? ? ? ? counts[word] = counts.get(word, 0) + 1
? ? print(len(counts))
? ? # 3. 詞語過濾,刪除無關(guān)詞,重復(fù)詞
? ? counts['孔明'] =? counts['孔明'] +? counts['孔明曰']
? ? counts['玄德'] = counts['玄德'] + counts['玄德曰'] +counts['劉備']
? ? counts['關(guān)公'] = counts['關(guān)公'] +counts['云長']
? ? for word in excludes:
? ? ? ? del counts[word]
? ? # 4.排序 [(), ()]
? ? items = list(counts.items())
? ? print(items)
? ? def sort_by_count(x):
? ? ? ? return x[1]
? ? items.sort(key=sort_by_count, reverse=True)
? ? li = []? # ['孔明', 孔明, 孔明,孔明...., '曹操'。。。。。]
? ? for i in range(10):
? ? ? ? # 序列解包
? ? ? ? role, count = items[i]
? ? ? ? print(role, count)
? ? ? ? # _ 是告訴看代碼的人,循環(huán)里面不需要使用臨時變量
? ? ? ? for _ in range(count):
? ? ? ? ? ? li.append(role)
? ? # 5得出結(jié)論
? ? text = ' '.join(li)
? ? WordCloud(
? ? ? ? font_path='msyh.ttc',
? ? ? ? background_color='white',
? ? ? ? width=800,
? ? ? ? height=600,
? ? ? ? # 相鄰兩個重復(fù)詞之間的匹配
? ? ? ? collocations=False
? ? ).generate(text).to_file('TOP10.png')
#? matplotlib
#? 導(dǎo)入
from matplotlib import pyplot as plt
plt.rcParams["font.sans-serif"] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
import numpy as np
#? 使用100個點(diǎn) 繪制 [0 , 2π]正弦曲線圖
#.linspace 左閉右閉區(qū)間的等差數(shù)列
x = np.linspace(0, 2*np.pi, num=100)
print(x)
y = np.sin(x)
#? 正弦和余弦在同一坐標(biāo)系下
cosy = np.cos(x)
plt.plot(x, y, color='g', linestyle='--',label='sin(x)')
plt.plot(x, cosy, color='r',label='cos(x)')
plt.xlabel('時間(s)')
plt.ylabel('電壓(V)')
plt.title('歡迎來到python世界')
# 圖例
plt.legend()
plt.show()
#? matplotlib
#? 導(dǎo)入
from matplotlib import pyplot as plt
plt.rcParams["font.sans-serif"] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
import numpy as np
# #? 使用100個點(diǎn) 繪制 [0 , 2π]正弦曲線圖
# #.linspace 左閉右閉區(qū)間的等差數(shù)列
# x = np.linspace(0, 2*np.pi, num=100)
# print(x)
# y = np.sin(x)
# #? 正弦和余弦在同一坐標(biāo)系下
# cosy = np.cos(x)
# plt.plot(x, y, color='g', linestyle='--',label='sin(x)')
# plt.plot(x, cosy, color='r',label='cos(x)')
# plt.xlabel('時間(s)')
# plt.ylabel('電壓(V)')
# plt.title('歡迎來到python世界')
# # 圖例
# plt.legend()
# plt.show()
# 柱狀圖
# import string
# from random import randint
# # print(string.ascii_uppercase[0:6])
# # ['A', 'B', 'C'...]
# x = ['口紅{}'.format(x) for x in string.ascii_uppercase[:5] ]
# y = [randint(200, 500) for _ in range(5)]
# print(x)
# print(y)
# plt.xlabel('口紅品牌')
# plt.ylabel('價格(元)')
# plt.bar(x, y)
# plt.show()
#餅圖

# 散點(diǎn)圖
# 均值為 0 標(biāo)準(zhǔn)差為1 的正太分布數(shù)據(jù)
# x = np.random.normal(0, 1, 100)
# y = np.random.normal(0, 1, 100)
# plt.scatter(x, y)
# plt.show(
x = np.random.normal(0, 1, 1000000)
y = np.random.normal(0, 1, 1000000)
# alpha透明度
plt.scatter(x, y, alpha=0.1)
plt.show()
# 繪制 三國top10 餅圖
# 紅樓夢 top1o人物分析