豆瓣電影數(shù)據(jù)可視化12-02

數(shù)據(jù)可視化

原始

# 可視化爬取結(jié)果
import requests
from bs4 import BeautifulSoup  # 從bs4引入BeautifulSoup
from pyecharts import Page, Pie, Bar  # 引入繪圖需要的模塊
 
#請求網(wǎng)頁
url = "https://movie.douban.com/cinema/later/chengdu/"
response = requests.get(url)
 
soup = BeautifulSoup(response.content.decode('utf-8'), 'lxml')
 
all_movies = soup.find('div', id="showing-soon")  # 先找到最大的div
 
# 先把所有的數(shù)據(jù)存到這個list里面
all_movies_info = []
for each_movie in all_movies.find_all('div', class_="item"):  # 從最大的div里面找到影片的div
    # print(each_movie)  # 輸出每個影片div的內(nèi)容
    all_a_tag = each_movie.find_all('a')
    all_li_tag = each_movie.find_all('li')
    movie_name = all_a_tag[1].text
    moive_href = all_a_tag[1]['href']
    movie_date = all_li_tag[0].text
    movie_type = all_li_tag[1].text
    movie_area = all_li_tag[2].text
    movie_lovers = all_li_tag[3].text.replace('人想看', '') #  去掉除了數(shù)字之外的字
    # 把電影數(shù)據(jù)添加到list
    all_movies_info.append({'name': movie_name, 'date': movie_date, 'type': movie_type, 
                            'area': movie_area, 'lovers': movie_lovers})
    # print('名字:{},日期:{},類型:{},地區(qū):{}, 關(guān)注者:{}'.format(
        # movie_name, movie_date, movie_type, movie_area, movie_lovers))
print(all_movies_info)  # 輸出一下檢查數(shù)據(jù)是否傳遞成功

關(guān)注者排行榜柱狀圖

# 可視化爬取結(jié)果
import requests
from bs4 import BeautifulSoup  # 從bs4引入BeautifulSoup
from pyecharts import Page, Pie, Bar  # 引入繪圖需要的模塊
 
#請求網(wǎng)頁
url = "https://movie.douban.com/cinema/later/chengdu/"
response = requests.get(url)
 
soup = BeautifulSoup(response.content.decode('utf-8'), 'lxml')
 
all_movies = soup.find('div', id="showing-soon")  # 先找到最大的div
 
# 先把所有的數(shù)據(jù)存到這個list里面
all_movies_info = []
for each_movie in all_movies.find_all('div', class_="item"):  # 從最大的div里面找到影片的div
    # print(each_movie)  # 輸出每個影片div的內(nèi)容
    all_a_tag = each_movie.find_all('a')
    all_li_tag = each_movie.find_all('li')
    movie_name = all_a_tag[1].text
    moive_href = all_a_tag[1]['href']
    movie_date = all_li_tag[0].text
    movie_type = all_li_tag[1].text
    movie_area = all_li_tag[2].text
    movie_lovers = all_li_tag[3].text.replace('人想看', '') #  去掉除了數(shù)字之外的字
    # 把電影數(shù)據(jù)添加到list
    all_movies_info.append({'name': movie_name, 'date': movie_date, 'type': movie_type, 
                            'area': movie_area, 'lovers': movie_lovers})
    # print('名字:{},日期:{},類型:{},地區(qū):{}, 關(guān)注者:{}'.format(
        # movie_name, movie_date, movie_type, movie_area, movie_lovers))
# 繪制關(guān)注者排行榜圖
 
# i['name'] for i in all_movies_info 這個是Python的快捷方式,
# 這一句的作用是從all_movies_info這個list里面依次取出每個元素,
# 并且取出這個元素的 name 屬性
sort_by_lovers = sorted(all_movies_info, key=lambda x: int(x['lovers']))
all_names = [i['name'] for i in sort_by_lovers]
all_lovers = [i['lovers'] for i in sort_by_lovers]
 
lovers_rank_bar = Bar('電影關(guān)注者排行榜')  # 初始化圖表,給個名字
# all_names是所有電影名,作為X軸, all_lovers是關(guān)注者的數(shù)量,作為Y軸。二者數(shù)據(jù)一一對應(yīng)。
# is_convert=True設(shè)置x、y軸對調(diào),。is_label_show=True 顯示y軸值。 label_pos='right' Y軸值顯示在右邊
lovers_rank_bar.add('', all_names, all_lovers, is_convert=True, is_label_show=True, label_pos='right')
lovers_rank_bar  # jupyter下直接顯示圖表在輸出框內(nèi)

電影類型占比圖

# 可視化爬取結(jié)果
import requests
from bs4 import BeautifulSoup  # 從bs4引入BeautifulSoup
from pyecharts import Page, Pie, Bar  # 引入繪圖需要的模塊
 
#請求網(wǎng)頁
url = "https://movie.douban.com/cinema/later/chengdu/"
response = requests.get(url)
 
soup = BeautifulSoup(response.content.decode('utf-8'), 'lxml')
 
all_movies = soup.find('div', id="showing-soon")  # 先找到最大的div
 
# 先把所有的數(shù)據(jù)存到這個list里面
all_movies_info = []
for each_movie in all_movies.find_all('div', class_="item"):  # 從最大的div里面找到影片的div
    # print(each_movie)  # 輸出每個影片div的內(nèi)容
    all_a_tag = each_movie.find_all('a')
    all_li_tag = each_movie.find_all('li')
    movie_name = all_a_tag[1].text
    moive_href = all_a_tag[1]['href']
    movie_date = all_li_tag[0].text
    movie_type = all_li_tag[1].text
    movie_area = all_li_tag[2].text
    movie_lovers = all_li_tag[3].text.replace('人想看', '') #  去掉除了數(shù)字之外的字
    # 把電影數(shù)據(jù)添加到list
    all_movies_info.append({'name': movie_name, 'date': movie_date, 'type': movie_type, 
                            'area': movie_area, 'lovers': movie_lovers})
    # print('名字:{},日期:{},類型:{},地區(qū):{}, 關(guān)注者:{}'.format(
        # movie_name, movie_date, movie_type, movie_area, movie_lovers))
# 繪制電影類型占比圖
all_types = [i['type'] for i in all_movies_info]
type_count = {}
for each_types in all_types:
    # 把 愛情 / 奇幻 這種分成[愛情, 奇幻]
    type_list = each_types.split(' / ')
    for e_type in type_list:
        if e_type not in type_count:
            type_count[e_type] = 1
        else:
            type_count[e_type] += 1
# print(type_count) # 檢測是否數(shù)據(jù)歸類成功
 
type_pie = Pie('上映類型占比', title_top=20)  # 因為類型過多影響標(biāo)題,所以標(biāo)題向下移20px
# 直接取出統(tǒng)計的類型名和數(shù)量并強(qiáng)制轉(zhuǎn)換為list。
type_pie.add('', list(type_count.keys()), list(type_count.values()), is_label_show=True)
type_pie  # jupyter下直接顯示                

上映日期圖

# 可視化爬取結(jié)果
import requests
from bs4 import BeautifulSoup  # 從bs4引入BeautifulSoup
from pyecharts import Page, Pie, Bar  # 引入繪圖需要的模塊
 
#請求網(wǎng)頁
url = "https://movie.douban.com/cinema/later/chengdu/"
response = requests.get(url)
 
soup = BeautifulSoup(response.content.decode('utf-8'), 'lxml')
 
all_movies = soup.find('div', id="showing-soon")  # 先找到最大的div
 
# 先把所有的數(shù)據(jù)存到這個list里面
all_movies_info = []
for each_movie in all_movies.find_all('div', class_="item"):  # 從最大的div里面找到影片的div
    # print(each_movie)  # 輸出每個影片div的內(nèi)容
    all_a_tag = each_movie.find_all('a')
    all_li_tag = each_movie.find_all('li')
    movie_name = all_a_tag[1].text
    moive_href = all_a_tag[1]['href']
    movie_date = all_li_tag[0].text
    movie_type = all_li_tag[1].text
    movie_area = all_li_tag[2].text
    movie_lovers = all_li_tag[3].text.replace('人想看', '') #  去掉除了數(shù)字之外的字
    # 把電影數(shù)據(jù)添加到list
    all_movies_info.append({'name': movie_name, 'date': movie_date, 'type': movie_type, 
                            'area': movie_area, 'lovers': movie_lovers})
    # print('名字:{},日期:{},類型:{},地區(qū):{}, 關(guān)注者:{}'.format(
        # movie_name, movie_date, movie_type, movie_area, movie_lovers))
# 繪制電影上映日期柱狀圖
all_dates = [i['date'] for i in all_movies_info]
dates_count = {}
for date in all_dates:
    if date not in dates_count:
        dates_count[date] = 1
    else:
        dates_count[date] += 1
# print(dates_count)  # 輸出驗證數(shù)據(jù)是否正確
 
dates_bar = Bar('上映日期占比')
dates_bar.add('',list(dates_count.keys()), list(dates_count.values()), is_label_show=True)
dates_bar  # jupyter下直接顯示

完整數(shù)據(jù)可視化

# 可視化爬取結(jié)果
import requests
from bs4 import BeautifulSoup  # 從bs4引入BeautifulSoup
from pyecharts import Page, Pie, Bar
 
#請求網(wǎng)頁
url = "https://movie.douban.com/cinema/later/chengdu/"
response = requests.get(url)
 
soup = BeautifulSoup(response.content.decode('utf-8'), 'lxml')
 
all_movies = soup.find('div', id="showing-soon")  # 先找到最大的div
 
all_movies_info = []
for each_movie in all_movies.find_all('div', class_="item"):  # 從最大的div里面找到影片的div
    # print(each_movie)  # 輸出每個影片div的內(nèi)容
    all_a_tag = each_movie.find_all('a')
    all_li_tag = each_movie.find_all('li')
    movie_name = all_a_tag[1].text
    moive_href = all_a_tag[1]['href']
    # 運行報錯 index out of range:是因為有電影沒顯示日期
    if len(all_li_tag) == 4:
        movie_date = all_li_tag[0].text
        movie_type = all_li_tag[1].text
        movie_area = all_li_tag[2].text
        movie_lovers = all_li_tag[3].text.replace('人想看', '')
    else:  # 網(wǎng)站結(jié)構(gòu)改變,跟著改變代碼
        movie_date = "未知"
        movie_type = all_li_tag[0].text
        movie_area = all_li_tag[1].text
        movie_lovers = all_li_tag[2].text.replace('人想看', '')
    all_movies_info.append({'name': movie_name, 'date': movie_date, 'type': movie_type, 
                            'area': movie_area, 'lovers': movie_lovers})
    # print('名字:{},日期:{},類型:{},地區(qū):{}, 關(guān)注者:{}'.format(
        # movie_name, movie_date, movie_type, movie_area, movie_lovers))
# print(all_movies_info)  # 輸出一下檢查數(shù)據(jù)是否傳遞成功
 
page = Page() # 同一個網(wǎng)頁顯示多個圖
 
# 繪制關(guān)注者排行榜圖
 
# i['name'] for i in all_movies_info 這個是Python的快捷方式
# 這一句的作用是從all_movies_info這個list里面依次取出每個元素,
# 并且取出這個元素的 name 屬性
sort_by_lovers = sorted(all_movies_info, key=lambda x: int(x['lovers']))
all_names = [i['name'] for i in sort_by_lovers]
all_lovers = [i['lovers'] for i in sort_by_lovers]
lovers_rank_bar = Bar('電影關(guān)注者排行榜')
lovers_rank_bar.add('', all_names, all_lovers, is_convert=True, is_label_show=True, label_pos='right')
page.add(lovers_rank_bar)
 
# lovers_rank_bar
 
# 繪制電影類型占比圖
all_types = [i['type'] for i in all_movies_info]
type_count = {}
for each_types in all_types:
    # 把 愛情 / 奇幻 這種分成[愛情, 奇幻]
    type_list = each_types.split(' / ')
    for e_type in type_list:
        if e_type not in type_count:
            type_count[e_type] = 1
        else:
            type_count[e_type] += 1
# print(type_count) # 檢測是否數(shù)據(jù)歸類成功
 
type_pie = Pie('上映類型占比', title_top=20)
type_pie.add('', list(type_count.keys()), list(type_count.values()), is_label_show=True)
# type_pie
 
page.add(type_pie)
 
# 繪制電影上映日期柱狀圖
all_dates = [i['date'] for i in all_movies_info]
dates_count = {}
for date in all_dates:
    if date not in dates_count:
        dates_count[date] = 1
    else:
        dates_count[date] += 1
# print(dates_count)  # 輸出驗證數(shù)據(jù)是否正確
 
dates_bar = Bar('上映日期占比')
dates_bar.add('',list(dates_count.keys()), list(dates_count.values()), is_label_show=True)
# dates_bar
 
page.add(dates_bar)
 
page  # jupyter下自動顯示

數(shù)據(jù)分析

  • 關(guān)注者排行榜圖里,網(wǎng)絡(luò)迷蹤,12.14上映,美國犯罪嫌疑劇情片,關(guān)注人數(shù)123710;狗十三,12.07上映,大陸家庭劇情片,關(guān)注人數(shù)106787;龍貓,12.14上映,日漫,關(guān)注人數(shù)98370;這三部電影的受歡迎度遠(yuǎn)遠(yuǎn)超過其他電影,最受大眾期待‘
  • 上映電影類型圖里,最多的是劇情類,其次是愛情和喜劇。
  • 上映日期也表明了,12月14日上映的電影最多,其中最受歡迎的兩部網(wǎng)絡(luò)迷蹤和龍貓就在當(dāng)天上映。
  • 龍貓 如果你在下雨天的車站,遇到被淋濕的妖怪,請把雨傘借給它,你會得到森林的通行證哦
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

  • 【飏飔妍兒20161115學(xué)而思D94】靜怡帶讀《愛彌兒》D2興趣是最好的老師,嘿嘿,想更科學(xué)的育兒就成了香知悅讀...
    飏飔妍兒閱讀 229評論 0 0
  • 在我眼里 浪費是個中性詞 像夏日的午后 風(fēng)扇對著小腿呼呼的吹 冰鎮(zhèn)飲料和熟透了的西瓜 時間和陽光一起從窗口流走 就...
    余余余余余余余_閱讀 294評論 1 0
  • 我相信了一種觀點,就是人面臨生命的最終點,腦海中會不自覺地閃現(xiàn)出人生歷史的重要片段。幾乎是毫無阻擋地迅速...
    古月xv閱讀 172評論 0 0
  • 我眼中的保險 保險是國內(nèi)最主要的三個金融行業(yè)之一。三個金融行業(yè)分別是:銀行、證券、保險。有句話是這么說的:你不理財...
    A_JEAN閱讀 4,312評論 0 0

友情鏈接更多精彩內(nèi)容