抓取并推送厦大经济学院招生新闻更新

简单 Python 代码

import requests
from bs4 import BeautifulSoup
from pymongo import MongoClient

# mail
import smtplib
from email.mime.text import MIMEText
from email.header import Header
from email.mime.multipart import MIMEMultipart

database = "mongodb://10.211.55.5:27017"
client = MongoClient(database)
db = client.xmunv


def get_list():
    r = requests.session()
    html = r.get('http://economic.xmu.edu.cn/info/enrollment/').text
    soap = BeautifulSoup(html, "html.parser")
    container = soap.find(class_='module paged_list')
    news = container.findAll("li")
    return news


def print_list(news):
    for each in news:
        print(each.text.strip())
        print("http://economic.xmu.edu.cn%s" % each.a.get('href'))


def lookup_database(news):
    for each in news:
        title = each.text.strip()
        url = "http://economic.xmu.edu.cn%s" % each.a.get('href')

        if db.news.find_one({"title": title}) is None:
            db.news.insert_one({"title": title})
            push(title, url)


def push(title, url):
    text = """

    <h2>%s</h2><br>
    <a href="%s" >%s</a>

    """ % (title, url, url)
    print("pushing", title)
    send_mail(text)


def send_mail(data):
    sender = '*********@163.com'
    smtp_server = 'smtp.163.com'
    username = sender
    password = '*********'
    smtp = smtplib.SMTP()
    smtp.connect(smtp_server)
    smtp.login(username, password)

    receiver = ['chych123@foxmail.com']
    subject = '厦大经济学院新闻更新'
    msg = MIMEMultipart('alternative')
    part = MIMEText(data, 'html', 'utf-8')
    msg['Subject'] = Header(subject, 'utf-8')
    msg['from'] = sender
    msg.attach(part)
    smtp.sendmail(sender, receiver, msg.as_string())
    smtp.quit()


lookup_database(get_list())

发表评论

电子邮件地址不会被公开。