# 1.讀取小說內(nèi)容
with open('./novel/threekingdom.txt', 'r', encoding='utf-8') as f:
? ? words = f.read()
? ? counts = {}? # {‘曹操’:234,‘回寨’:56}
? ? excludes = {"將軍", "卻說", "丞相", "二人", "不可", "荊州", "不能", "如此", "商議",
? ? ? ? ? ? ? ? "如何", "主公", "軍士", "軍馬", "左右", "次日", "引兵", "大喜", "天下",
? ? ? ? ? ? ? ? "東吳", "于是", "今日", "不敢", "魏兵", "陛下", "都督", "人馬", "不知",
? ? ? ? ? ? ? ? "孔明曰","玄德曰","劉備","云長"}
? ? # 2. 分詞
? ? words_list = jieba.lcut(words)
? ? # print(words_list)
? ? for word in words_list:
? ? ? ? if len(word) <= 1:
? ? ? ? ? ? continue
? ? ? ? else:
? ? ? ? ? ? # 更新字典中的值
? ? ? ? ? ? # counts[word] = 取出字典中原來鍵對應(yīng)的值 + 1
? ? ? ? ? ? # counts[word] = counts[word] + 1? # counts[word]如果沒有就要報(bào)錯(cuò)
? ? ? ? ? ? # 字典。get(k) 如果字典中沒有這個(gè)鍵 返回 NONE
? ? ? ? ? ? counts[word] = counts.get(word, 0) + 1
? ? print(len(counts))
? ? # 3. 詞語過濾,刪除無關(guān)詞,重復(fù)詞
? ? counts['孔明'] =? counts['孔明'] +? counts['孔明曰']
? ? counts['玄德'] = counts['玄德'] + counts['玄德曰'] +counts['劉備']
? ? counts['關(guān)公'] = counts['關(guān)公'] +counts['云長']
? ? for word in excludes:
? ? ? ? del counts[word]
? ? # 4.排序 [(), ()]
? ? items = list(counts.items())
? ? print(items)
? ? def sort_by_count(x):
? ? ? ? return x[1]
? ? items.sort(key=sort_by_count, reverse=True)
? ? li = []? # ['孔明', 孔明, 孔明,孔明...., '曹操'。。。。。]
? ? for i in range(10):
? ? ? ? # 序列解包
? ? ? ? role, count = items[i]
? ? ? ? print(role, count)
? ? ? ? # _ 循環(huán)里面不需要使用臨時(shí)變量
? ? ? ? for _ in range(count):
? ? ? ? ? ? li.append(role)
#5.lambda x1,x2....xn:表達(dá)式
sum_num=lambda x1,x2:x1+x2
print(sum_num(2,3))
#參數(shù)可以是無限多個(gè),但是表達(dá)式只有一個(gè)
name_info_list=[
? ? ? ('張三',4500),
? ? ? ('李四',9900),
? ? ? ('王五',2000),
? ? ? ('趙六',5500),
]
plt.rcParams["font.sans-serif"]?=?['SimHei']
plt.rcParams['axes.unicode_minus']?=?False
#7. matplotlib可視化圖形
7.1曲線圖


曲線圖
7.2餅狀圖


7.3柱狀圖


7.4散點(diǎn)圖

