數(shù)據(jù)可視化
原始
# 可視化爬取結(jié)果
import requests
from bs4 import BeautifulSoup # 從bs4引入BeautifulSoup
from pyecharts import Page, Pie, Bar # 引入繪圖需要的模塊
#請求網(wǎng)頁
url = "https://movie.douban.com/cinema/later/chengdu/"
response = requests.get(url)
soup = BeautifulSoup(response.content.decode('utf-8'), 'lxml')
all_movies = soup.find('div', id="showing-soon") # 先找到最大的div
# 先把所有的數(shù)據(jù)存到這個list里面
all_movies_info = []
for each_movie in all_movies.find_all('div', class_="item"): # 從最大的div里面找到影片的div
# print(each_movie) # 輸出每個影片div的內(nèi)容
all_a_tag = each_movie.find_all('a')
all_li_tag = each_movie.find_all('li')
movie_name = all_a_tag[1].text
moive_href = all_a_tag[1]['href']
movie_date = all_li_tag[0].text
movie_type = all_li_tag[1].text
movie_area = all_li_tag[2].text
movie_lovers = all_li_tag[3].text.replace('人想看', '') # 去掉除了數(shù)字之外的字
# 把電影數(shù)據(jù)添加到list
all_movies_info.append({'name': movie_name, 'date': movie_date, 'type': movie_type,
'area': movie_area, 'lovers': movie_lovers})
# print('名字:{},日期:{},類型:{},地區(qū):{}, 關(guān)注者:{}'.format(
# movie_name, movie_date, movie_type, movie_area, movie_lovers))
print(all_movies_info) # 輸出一下檢查數(shù)據(jù)是否傳遞成功
關(guān)注者排行榜柱狀圖
# 可視化爬取結(jié)果
import requests
from bs4 import BeautifulSoup # 從bs4引入BeautifulSoup
from pyecharts import Page, Pie, Bar # 引入繪圖需要的模塊
#請求網(wǎng)頁
url = "https://movie.douban.com/cinema/later/chengdu/"
response = requests.get(url)
soup = BeautifulSoup(response.content.decode('utf-8'), 'lxml')
all_movies = soup.find('div', id="showing-soon") # 先找到最大的div
# 先把所有的數(shù)據(jù)存到這個list里面
all_movies_info = []
for each_movie in all_movies.find_all('div', class_="item"): # 從最大的div里面找到影片的div
# print(each_movie) # 輸出每個影片div的內(nèi)容
all_a_tag = each_movie.find_all('a')
all_li_tag = each_movie.find_all('li')
movie_name = all_a_tag[1].text
moive_href = all_a_tag[1]['href']
movie_date = all_li_tag[0].text
movie_type = all_li_tag[1].text
movie_area = all_li_tag[2].text
movie_lovers = all_li_tag[3].text.replace('人想看', '') # 去掉除了數(shù)字之外的字
# 把電影數(shù)據(jù)添加到list
all_movies_info.append({'name': movie_name, 'date': movie_date, 'type': movie_type,
'area': movie_area, 'lovers': movie_lovers})
# print('名字:{},日期:{},類型:{},地區(qū):{}, 關(guān)注者:{}'.format(
# movie_name, movie_date, movie_type, movie_area, movie_lovers))
# 繪制關(guān)注者排行榜圖
# i['name'] for i in all_movies_info 這個是Python的快捷方式,
# 這一句的作用是從all_movies_info這個list里面依次取出每個元素,
# 并且取出這個元素的 name 屬性
sort_by_lovers = sorted(all_movies_info, key=lambda x: int(x['lovers']))
all_names = [i['name'] for i in sort_by_lovers]
all_lovers = [i['lovers'] for i in sort_by_lovers]
lovers_rank_bar = Bar('電影關(guān)注者排行榜') # 初始化圖表,給個名字
# all_names是所有電影名,作為X軸, all_lovers是關(guān)注者的數(shù)量,作為Y軸。二者數(shù)據(jù)一一對應(yīng)。
# is_convert=True設(shè)置x、y軸對調(diào),。is_label_show=True 顯示y軸值。 label_pos='right' Y軸值顯示在右邊
lovers_rank_bar.add('', all_names, all_lovers, is_convert=True, is_label_show=True, label_pos='right')
lovers_rank_bar # jupyter下直接顯示圖表在輸出框內(nèi)
電影類型占比圖
# 可視化爬取結(jié)果
import requests
from bs4 import BeautifulSoup # 從bs4引入BeautifulSoup
from pyecharts import Page, Pie, Bar # 引入繪圖需要的模塊
#請求網(wǎng)頁
url = "https://movie.douban.com/cinema/later/chengdu/"
response = requests.get(url)
soup = BeautifulSoup(response.content.decode('utf-8'), 'lxml')
all_movies = soup.find('div', id="showing-soon") # 先找到最大的div
# 先把所有的數(shù)據(jù)存到這個list里面
all_movies_info = []
for each_movie in all_movies.find_all('div', class_="item"): # 從最大的div里面找到影片的div
# print(each_movie) # 輸出每個影片div的內(nèi)容
all_a_tag = each_movie.find_all('a')
all_li_tag = each_movie.find_all('li')
movie_name = all_a_tag[1].text
moive_href = all_a_tag[1]['href']
movie_date = all_li_tag[0].text
movie_type = all_li_tag[1].text
movie_area = all_li_tag[2].text
movie_lovers = all_li_tag[3].text.replace('人想看', '') # 去掉除了數(shù)字之外的字
# 把電影數(shù)據(jù)添加到list
all_movies_info.append({'name': movie_name, 'date': movie_date, 'type': movie_type,
'area': movie_area, 'lovers': movie_lovers})
# print('名字:{},日期:{},類型:{},地區(qū):{}, 關(guān)注者:{}'.format(
# movie_name, movie_date, movie_type, movie_area, movie_lovers))
# 繪制電影類型占比圖
all_types = [i['type'] for i in all_movies_info]
type_count = {}
for each_types in all_types:
# 把 愛情 / 奇幻 這種分成[愛情, 奇幻]
type_list = each_types.split(' / ')
for e_type in type_list:
if e_type not in type_count:
type_count[e_type] = 1
else:
type_count[e_type] += 1
# print(type_count) # 檢測是否數(shù)據(jù)歸類成功
type_pie = Pie('上映類型占比', title_top=20) # 因為類型過多影響標(biāo)題,所以標(biāo)題向下移20px
# 直接取出統(tǒng)計的類型名和數(shù)量并強(qiáng)制轉(zhuǎn)換為list。
type_pie.add('', list(type_count.keys()), list(type_count.values()), is_label_show=True)
type_pie # jupyter下直接顯示
上映日期圖
# 可視化爬取結(jié)果
import requests
from bs4 import BeautifulSoup # 從bs4引入BeautifulSoup
from pyecharts import Page, Pie, Bar # 引入繪圖需要的模塊
#請求網(wǎng)頁
url = "https://movie.douban.com/cinema/later/chengdu/"
response = requests.get(url)
soup = BeautifulSoup(response.content.decode('utf-8'), 'lxml')
all_movies = soup.find('div', id="showing-soon") # 先找到最大的div
# 先把所有的數(shù)據(jù)存到這個list里面
all_movies_info = []
for each_movie in all_movies.find_all('div', class_="item"): # 從最大的div里面找到影片的div
# print(each_movie) # 輸出每個影片div的內(nèi)容
all_a_tag = each_movie.find_all('a')
all_li_tag = each_movie.find_all('li')
movie_name = all_a_tag[1].text
moive_href = all_a_tag[1]['href']
movie_date = all_li_tag[0].text
movie_type = all_li_tag[1].text
movie_area = all_li_tag[2].text
movie_lovers = all_li_tag[3].text.replace('人想看', '') # 去掉除了數(shù)字之外的字
# 把電影數(shù)據(jù)添加到list
all_movies_info.append({'name': movie_name, 'date': movie_date, 'type': movie_type,
'area': movie_area, 'lovers': movie_lovers})
# print('名字:{},日期:{},類型:{},地區(qū):{}, 關(guān)注者:{}'.format(
# movie_name, movie_date, movie_type, movie_area, movie_lovers))
# 繪制電影上映日期柱狀圖
all_dates = [i['date'] for i in all_movies_info]
dates_count = {}
for date in all_dates:
if date not in dates_count:
dates_count[date] = 1
else:
dates_count[date] += 1
# print(dates_count) # 輸出驗證數(shù)據(jù)是否正確
dates_bar = Bar('上映日期占比')
dates_bar.add('',list(dates_count.keys()), list(dates_count.values()), is_label_show=True)
dates_bar # jupyter下直接顯示
完整數(shù)據(jù)可視化
# 可視化爬取結(jié)果
import requests
from bs4 import BeautifulSoup # 從bs4引入BeautifulSoup
from pyecharts import Page, Pie, Bar
#請求網(wǎng)頁
url = "https://movie.douban.com/cinema/later/chengdu/"
response = requests.get(url)
soup = BeautifulSoup(response.content.decode('utf-8'), 'lxml')
all_movies = soup.find('div', id="showing-soon") # 先找到最大的div
all_movies_info = []
for each_movie in all_movies.find_all('div', class_="item"): # 從最大的div里面找到影片的div
# print(each_movie) # 輸出每個影片div的內(nèi)容
all_a_tag = each_movie.find_all('a')
all_li_tag = each_movie.find_all('li')
movie_name = all_a_tag[1].text
moive_href = all_a_tag[1]['href']
# 運行報錯 index out of range:是因為有電影沒顯示日期
if len(all_li_tag) == 4:
movie_date = all_li_tag[0].text
movie_type = all_li_tag[1].text
movie_area = all_li_tag[2].text
movie_lovers = all_li_tag[3].text.replace('人想看', '')
else: # 網(wǎng)站結(jié)構(gòu)改變,跟著改變代碼
movie_date = "未知"
movie_type = all_li_tag[0].text
movie_area = all_li_tag[1].text
movie_lovers = all_li_tag[2].text.replace('人想看', '')
all_movies_info.append({'name': movie_name, 'date': movie_date, 'type': movie_type,
'area': movie_area, 'lovers': movie_lovers})
# print('名字:{},日期:{},類型:{},地區(qū):{}, 關(guān)注者:{}'.format(
# movie_name, movie_date, movie_type, movie_area, movie_lovers))
# print(all_movies_info) # 輸出一下檢查數(shù)據(jù)是否傳遞成功
page = Page() # 同一個網(wǎng)頁顯示多個圖
# 繪制關(guān)注者排行榜圖
# i['name'] for i in all_movies_info 這個是Python的快捷方式
# 這一句的作用是從all_movies_info這個list里面依次取出每個元素,
# 并且取出這個元素的 name 屬性
sort_by_lovers = sorted(all_movies_info, key=lambda x: int(x['lovers']))
all_names = [i['name'] for i in sort_by_lovers]
all_lovers = [i['lovers'] for i in sort_by_lovers]
lovers_rank_bar = Bar('電影關(guān)注者排行榜')
lovers_rank_bar.add('', all_names, all_lovers, is_convert=True, is_label_show=True, label_pos='right')
page.add(lovers_rank_bar)
# lovers_rank_bar
# 繪制電影類型占比圖
all_types = [i['type'] for i in all_movies_info]
type_count = {}
for each_types in all_types:
# 把 愛情 / 奇幻 這種分成[愛情, 奇幻]
type_list = each_types.split(' / ')
for e_type in type_list:
if e_type not in type_count:
type_count[e_type] = 1
else:
type_count[e_type] += 1
# print(type_count) # 檢測是否數(shù)據(jù)歸類成功
type_pie = Pie('上映類型占比', title_top=20)
type_pie.add('', list(type_count.keys()), list(type_count.values()), is_label_show=True)
# type_pie
page.add(type_pie)
# 繪制電影上映日期柱狀圖
all_dates = [i['date'] for i in all_movies_info]
dates_count = {}
for date in all_dates:
if date not in dates_count:
dates_count[date] = 1
else:
dates_count[date] += 1
# print(dates_count) # 輸出驗證數(shù)據(jù)是否正確
dates_bar = Bar('上映日期占比')
dates_bar.add('',list(dates_count.keys()), list(dates_count.values()), is_label_show=True)
# dates_bar
page.add(dates_bar)
page # jupyter下自動顯示
數(shù)據(jù)分析
- 關(guān)注者排行榜圖里,網(wǎng)絡(luò)迷蹤,12.14上映,美國犯罪嫌疑劇情片,關(guān)注人數(shù)123710;狗十三,12.07上映,大陸家庭劇情片,關(guān)注人數(shù)106787;龍貓,12.14上映,日漫,關(guān)注人數(shù)98370;這三部電影的受歡迎度遠(yuǎn)遠(yuǎn)超過其他電影,最受大眾期待‘
- 上映電影類型圖里,最多的是劇情類,其次是愛情和喜劇。
- 上映日期也表明了,12月14日上映的電影最多,其中最受歡迎的兩部網(wǎng)絡(luò)迷蹤和龍貓就在當(dāng)天上映。
- 龍貓 如果你在下雨天的車站,遇到被淋濕的妖怪,請把雨傘借給它,你會得到森林的通行證哦