Python:定時(shí)獲取通知內(nèi)容并發(fā)送到郵箱

由于需要看通知內(nèi)容,但是每次從手機(jī)上看需要輸入賬號(hào)密碼之后才能看,于是便萌生了用 Python 獲取通知內(nèi)容并定時(shí)發(fā)送到自己郵箱的想法。

實(shí)現(xiàn)并不算復(fù)雜,用 BeautifulSoup 抓取內(nèi)容,Redis 記錄文章是否閱讀過(guò),Jinja2 是郵件內(nèi)容的模板引擎。用和風(fēng)天氣的API 在郵件正文前加了個(gè)天氣預(yù)報(bào)。

只是有一個(gè)點(diǎn)要注意,啟動(dòng)程序前要先留意 locale (Linux 命令)輸出的內(nèi)容是否為zh_CN.UTF-8。最后我是寫了個(gè) shell 腳本啟動(dòng)并在運(yùn)行前 export LC_ALL=zh_CN.UTF-8

Redis
Welcome to Jinja2 — Jinja2 Documentation (2.9)
yagmail 0.10.190 : Python Package Index
API說(shuō)明文檔 | 和風(fēng)天氣
Beautiful Soup 4.4.0 文檔 — beautifulsoup 4.4.0 文檔

程序在啟動(dòng)的時(shí)候加-t的參數(shù)只會(huì)給自己的郵箱發(fā)郵件,用作測(cè)試(當(dāng)然需要提前配置好)
實(shí)現(xiàn)如下:

主文件

#!/usr/bin/python3
# -*- coding:utf-8 -*-

'''
【留意??!】
啟動(dòng)程序前要先留意 locale (Linux 命令)輸出的內(nèi)容是否為zh_CN.UTF-8
建議寫 shell 腳本啟動(dòng)并在運(yùn)行前 export LC_ALL=zh_CN.UTF-8 
'''

from conf import *
from sys import argv
from urllib.parse import unquote
from bs4 import BeautifulSoup
from jinja2 import  Environment,FileSystemLoader,select_autoescape
import re,os,json,time,redis,yagmail,requests

session = requests.Session()
session.headers.update({'UserAgent':'Mozilla/5.0 (X11; Linux x86_64; rv:57.0) Gecko/20100101 Firefox/57.0'})

jinja2_env = Environment(
    loader = FileSystemLoader(os.getcwd()+'/template'),
    autoescape = select_autoescape(['html'])
)


def printf(string):
    if string:
        print(time.strftime("%Y-%m-%d %H:%M:%S : ", time.localtime()) + string)


def article_id_exist(id):
    if not id:
        printf('empty article id')
        return False

    if not id.isdigit():
        printf('need number instead of other value type')
        return False

    r = redis.StrictRedis(host='localhost', port=6379, db=0)

    if not r.get(id):
        r.set(id,"True")
        return False
    else:
        return True


def get_weather_data():
    weather_request_url = WEATHER_API_URL + 'city=' + WEATHER_API_CITY + '&&key=' + WEATHER_API_KEY
    weather_request = requests.get(weather_request_url)
    weather_data = []
    if weather_request.status_code == 200:
        weather_data = json.loads(weather_request.content.decode(encoding='utf-8'))
    return weather_data


def get_index():
    session.get(url=ROOT_URL+'/UserLogin.aspx')
    result = session.post(ROOT_URL+'/UserLogin.aspx', data=LOGIN_DATA)
    if result.status_code == 200 and result.url == ROOT_URL+'/':
        category = session.get(ROOT_URL + '/ArticleList.aspx?category=4')
        if category.status_code == 200:
            return category.content
    else:
        printf('login failed '+str(result.status_code) )


def parse_html(html):
    if not html:
        printf('empty html')
        return

    html_soup = BeautifulSoup(html, 'lxml')
    articles = html_soup.find('div', attrs={'class': 'articles'})

    if not articles:
        printf('article not found')
        return

    article_result = []

    for val in articles.find_all('p'):
        article_id = val.find('a')['href'][-6:]
        article_url = ROOT_URL + val.find('a')['href'][1:]
        article_date = val.find_all('span')[1].getText()[:-1]
        article_title = val.find('a')['title']
        article_author = val.span['title']
        article_attachment = []
        article_excerpt = ''

        if article_id_exist(article_id):
            printf('article exist in database %s' % (article_title))
            continue

        article_detail = session.get(url=article_url)

        if article_detail.status_code != 200:
            printf('get article detail error %s' % (article_id) )
            continue

        article_soup = BeautifulSoup(article_detail.content,'lxml')
        article_content = article_soup.find('div', attrs={'id': 'articleBody'})

        article_link = article_content.find_all('a')
        attachment_url_pattern = re.compile(r'http://news.gdut.edu.cn/DepartmentUploadFiles/(.+)/files/(.+)')
        for link in article_link:
            if 'http://news.gdut.edu.cn/DepartmentUploadFiles' not in link['href']:
                printf('%s do not have file attachment' % (link['href']))
                continue

            match = attachment_url_pattern.match(link['href'])
            if not match:
                printf('%s do not have file attachment' % (link['href']))
                continue

            attachment_name = match.group(2)
            attachment_url = link['href']

            if '%' in attachment_name:
                attachment_name = unquote(attachment_name)

            article_attachment.append({'attach_name':attachment_name,'attach_url':attachment_url})

        info = ''.join(article_content.getText().split())
        info = info.replace(article_title, '')
        info = info.replace('單位:'+article_author,'')
        article_excerpt = article_excerpt.join(info[:150])

        article_result.append(
            {
                'url':article_url,
                'date':article_date,
                'title':article_title,
                'author':article_author,
                'excerpt':article_excerpt,
                'attachment':article_attachment
            }
        )
    return article_result


if __name__ == '__main__':

    welcome_string = [
        '周日:今天是周末的最后一天,好好珍惜時(shí)間\n',
        '周一:你從周末的作息里調(diào)整過(guò)來(lái)了嗎?把上周的通知郵件都刪了吧\n',
        '周二:吾日三省吾身\n',
        '周三:生活仍將繼續(xù)\n',
        '周四:未來(lái)近在咫尺\(yùn)n',
        '周五:明天就是周末了,加油!\n',
        '周六:你今天打算做什么?別浪費(fèi)時(shí)間\n',
    ]

    welcome_content = welcome_string[ int( time.strftime('%w',time.localtime(time.time())) ) ]
    update_content = '最近更新:'+VERSION+':'+ANNOUNCEMENT+'\n'

    weather_data = get_weather_data()
    printf('get weather data finish')

    weather_render = jinja2_env.get_template('weather.html')

    now = weather_data['HeWeather5'][0]['now']
    forecast =  weather_data['HeWeather5'][0]['hourly_forecast'];
    weather_content = weather_render.render(now=now,forecast=forecast)

    index = get_index()
    article_data = parse_html(index)

    article_render = jinja2_env.get_template('article.html')
    if article_data:
        article_content = article_render.render(articles=article_data)
    else:
        article_content = article_render.render()

    mail_client = yagmail.SMTP(user=SEND_MAIL_USER, password=SEND_MAIL_PWD, host=SEND_MAIL_HOST, port=SEND_MAIL_PORT)
    mail_content = welcome_content + weather_content + update_content + article_content
    if len(argv) == 2 and '-t' in argv:
        for addr in SEND_TO_LIST_TEST:
            printf('sending[test user]: ' + addr)
            mail_client.send(addr, subject=SEND_MAIL_SUBJECT, contents=mail_content)
            time.sleep(1)
    else:
        for addr in SEND_TO_LIST:
            printf('sending : '+addr)
            mail_client.send(addr,subject=SEND_MAIL_SUBJECT,contents =mail_content)
            time.sleep(1)

同級(jí)目錄下的 conf.py 的配置文件

#!/usr/bin/python3
# -*- coding:utf-8 -*-

import time

#通知網(wǎng)站的地址
ROOT_URL = 'http://test.com'

LOGIN_DATA = {}
LOGIN_DATA['__VIEWSTATE'] = '/wEPDwUKLTQwOTA4NzE2NmQYAQUeX19Db250cm9sc1JlcXVpcmVQb3N0QmFja0tleV9fFgEFI2N0bDAwJENvbnRlbnRQbGFjZUhvbGRlcjEkQ2hlY2tCb3gxBufpEJuDDaf6eTj0A4Cn2Erf8u98KcGrQqATTB3mEaQ='
LOGIN_DATA['__EVENTVALIDATION'] = '/wEWBQKb37HjDwLgvLy9BQKi4MPwCQL+zqO2BAKA4sljg4IvzC7ksG01o7aN0RZUOKEC4lV0bTeXI4zrbaQsj0c='

# 聯(lián)系校內(nèi)人員獲取賬號(hào)密碼,此處的賬號(hào)密碼無(wú)效
LOGIN_DATA['ctl00$ContentPlaceHolder1$userEmail'] = 'test'
LOGIN_DATA['ctl00$ContentPlaceHolder1$userPassWord'] = 'test'

LOGIN_DATA['ctl00$ContentPlaceHolder1$CheckBox1'] = 'on'
LOGIN_DATA['ctl00$ContentPlaceHolder1$Button1'] = '%E7%99%BB%E5%BD%95'

#發(fā)送者郵箱
SEND_MAIL_USER = 'account'
#發(fā)送者郵箱對(duì)應(yīng)的密碼
SEND_MAIL_PWD = 'password'
#騰訊企業(yè)郵箱
SEND_MAIL_HOST = 'smtp.exmail.qq.com'
#發(fā)送端口
SEND_MAIL_PORT = 465
#郵件正文標(biāo)題
SEND_MAIL_SUBJECT = time.strftime("%Y-%m-%d",time.localtime()) + '@今日校內(nèi)通知'
#接收郵件的人
SEND_TO_LIST = [
   'mail@mail.com',
]
#用來(lái)測(cè)試接收郵件的用戶,加上-t選項(xiàng)即可
SEND_TO_LIST_TEST = ['mail@mail.com']
#和風(fēng)天氣API地址
WEATHER_API_URL = 'https://free-api.heweather.com/v5/weather?'
#天氣API城市,拼音漢字均可
WEATHER_API_CITY = 'guangzhou'
#免費(fèi)版key,一天4000次調(diào)用,注冊(cè)后可用
WEATHER_API_KEY = 'key'

ANNOUNCEMENT = '重構(gòu),使用模板引擎取代字符串拼接生成郵件內(nèi)容(https://github.com/ypingcn/)'
VERSION = '2017.09.26'

template文件夾的內(nèi)容是郵件正文的模板

  • article.html
{%- if articles %}
<p> 今日的新聞通知如下 </p>
    <ul>
    {%- for article in articles %}
    <li>
        <a href='{{article.url}}'>
            <font color="red"> {{ article.title }} </font>
        </a>
        {{ article.author }} - {{ article.date }}
        {{ article.excerpt }}
        {%- for link in article.attachment %}
        <a href='{{link.attach_url}}'>{{ link.attach_name }}</a>
        {%- endfor %}
    </li>
    {%- endfor %}
</ul>
{%- else %}
<p> 暫無(wú)未讀的新聞通知 </p>
{%- endif %}
  • weather.html
<p>天氣:{{ now.cond.txt }},氣溫:{{ now.tmp }}℃,體感溫度:{{ now.fl }}攝氏度</p>
<br>未來(lái)幾個(gè)小時(shí)內(nèi)的天氣預(yù)報(bào)為:
{%- for hour in forecast %}
<br>{{ hour.date }} : {{ hour.cond.txt }}
{%- endfor %}

寫的不是太好,還是有很多需要改正的地方。以后再作修改。

來(lái)自個(gè)人 Python 文集

最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容