一共两个脚本:
第一个是【借书完毕以及借书信息有变更(续借等)】的时候需要执行的脚本;实现模拟登陆,同时最新的借书信息的下载到本地文本;之所以没有这样做,是因为如果每次爬取一遍的话,需要每次输入一遍验证码,就没有意义了;现在输入一次就可以了;导出到文本上,以后直接读取就行了。
第二个脚本,需要添加到启动计划中每天自动执行的(每天自动检查有没有超期书籍)
__author__ = 'Oscar_Yang'
#-*- coding= utf-8 -*-
import subprocess
import sys
import os
import requests
import re
import random
import time
from bs4 import BeautifulSoup
import smtplib
from email.mime.text import MIMEText
from email.header import Header
import pytesseract
from PIL import Image def login(name, password):
random_num = random.random() # 生成随机数,构造获取验证码的链接
url = 'http://202.206.242.99//reader/captcha.php?' + str(random_num) get_captcha = session.get(url).content
with open('captcha.png', 'wb') as f:
f.write(get_captcha)
f.close() '''
这段代码是为了方便我们打开图片,它可以直接打开图片
我们就不用去文件夹里去找,里面是判断使用什么系统,
不同系统打开方式有点差异,可以找python文档了解这部分内容
'''
if sys.platform.find('darwin') >= 0:
subprocess.call(['open', 'captcha.png'])
elif sys.platform.find('linux') >= 0:
subprocess.call(['xdg-open', 'captcha.png'])
else:
os.startfile('captcha.png') # image = Image.open('captcha.png')
# input_captcha = pytesseract.image_to_string(image)
# # print(vcode) input_captcha = input('请输入验证码:')
input_captcha = str(input_captcha) # 构造登录表单,里面就是我们上面提及的四项
post_data = {
'number': name,
'passwd': password,
'captcha': input_captcha,
'select': 'cert_no'
} login_url = 'http://202.206.242.99/reader/redr_verify.php' html = session.post(login_url, data=post_data).content book_hist_url = 'http://202.206.242.99/reader/book_lst.php'
content = session.get(book_hist_url).content.decode('utf-8')
from bs4 import BeautifulSoup
soup=BeautifulSoup(content,"lxml")
return soup def get_data(soup):
titles=soup.select("a.blue")
deadlines=soup.select("font")[1:]
item_urls=soup.select("a.blue")
# print(item_urls)
base_data_list=[]
for title,deadline,item_url in zip(titles,deadlines,item_urls):
base_data={
"title":title.text,
"deadline":deadline.text.strip(),
"item_url":"http://202.206.242.99/"+item_url["href"]
}
# print(data)
base_data_list.append(base_data)
return base_data_list # deadline = data["deadline"]
# yinghuan_time_list = deadline.split("-")
# yinghuan_month = yinghuan_time_list[1]
# yinghuan_day = yinghuan_time_list[2]
def get_detail_data(item_url):
res=requests.get(item_url)
res.encoding="utf8"
soup=BeautifulSoup(res.text,"lxml")
intro=soup.find_all(class_="sharing_zy")
# tupus="http://202.206.242.99/"+soup.select("p > a > img")[0]["src"]
intro=re.findall(r'href="(.*?)"',str(intro))
# data={
# "tupu:":tupus,
# "xiangxi:":intro
# }
print(intro)
# return data def send_email(deadline,title,item_url, day,name):
from_addr = '###'
password = '###'
to_addr = '###'
smtp_server = '###' msg=MIMEText('''hello:\n\n 《{}》这本书还有{}天到期,deadline为{},尽快去还吧!\n\n 注意:为了防止被识别为垃圾邮件,以下内容为自动添加,同时供您查看!\n\n点击链接查看图书详情{}'''.format(title,day,deadline,item_url), 'plain', 'utf-8')
msg['From'] = Header("{}请注意借书到期通知".format(name), 'utf-8')
msg['To'] = Header("{}同学".format(name), 'utf-8') subject = 'hello'
msg['Subject'] = Header(subject, 'utf-8') server = smtplib.SMTP(smtp_server, 25)
server.set_debuglevel(1)
server.login(from_addr, password)
server.sendmail(from_addr, [to_addr], msg.as_string())
server.quit() def data1file(data):
path = r"C:\Users\Oscar\Desktop\数据.txt"
file = open(path, "a", encoding="utf-8")
file.write("\n")
file.write(str(data))
file.close() if __name__ == '__main__':
"""
获取当前时间
"""
local_time = time.strftime("%Y-%m-%d", time.localtime()) # 获取当前时间
local_time = str(local_time)
times = re.split(r'-', local_time)
year = times[0]
now_month = times[1]
now_day = times[2] session = requests.Session()
session.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36'
} name = input("请输入用户名:")
password = input("请输入密码:")
soup=login(name,password)
# print(soup) base_data_list=get_data(soup)
for base_data in base_data_list:
deadline = base_data["deadline"] #应还时间
title=base_data["title"] #书名
item_url=base_data["item_url"] #图书馆的详情页
yinghuan_time_list = deadline.split("-") yinghuan_month = yinghuan_time_list[1]
yinghuan_day = yinghuan_time_list[2]
data_all={
"deadline":deadline,
"title":title,
"item_url":item_url,
"name":name
}
data1file(data_all) if int(now_month) == int(yinghuan_month) - 1:
day = 30 - int(now_day) + int(yinghuan_day)
if day < 7:
send_email(deadline,title,item_url, day,name)
elif now_month == yinghuan_month:
day = int(yinghuan_day) - int(now_day)
if day < 7:
send_email(deadline,title,item_url, day,name)
# time.sleep(120)
else:
pass
# time.sleep(200)
import subprocess
import sys
import os
import requests
import re
import random
import time
from bs4 import BeautifulSoup
import smtplib
from email.mime.text import MIMEText
from email.header import Header def send_email(deadline,title,item_url, day,name):
from_addr = '###@qq.com'
password = '###'
to_addr = '###@qq.com'
smtp_server = 'smtp.qq.com' msg=MIMEText('''hello:\n\n 《{}》这本书还有{}天到期,deadline为{},尽快去还吧!\n\n 注意:为了防止被识别为垃圾邮件,以下内容为自动添加,同时供您查看!\n\n点击链接查看图书详情{}'''.format(title,day,deadline,item_url), 'plain', 'utf-8')
msg['From'] = Header("还书通知:{}请注意".format(name), 'utf-8')
msg['To'] = Header("{}".format(name), 'utf-8') subject = 'hello'
msg['Subject'] = Header(subject, 'utf-8') server = smtplib.SMTP(smtp_server, 25)
server.set_debuglevel(1)
server.login(from_addr, password)
server.sendmail(from_addr, [to_addr], msg.as_string())
server.quit() if __name__ == '__main__':
"""
先是获取当前时间
"""
local_time = time.strftime("%Y-%m-%d", time.localtime()) # 获取当前时间
local_time = str(local_time)
times = re.split(r'-', local_time)
now_year = times[0]
now_month = times[1]
now_day = times[2]
"""
读取之前存好的下载好的借书信息
"""
path=r"C:\Users\Oscar\Desktop\数据.txt"
with open(path,encoding="utf8") as f:
a=f.readlines()
# print(a)
a=a[1:]
for item in a:
# print(item)
deadline=re.findall(r"'deadline': '(.*?)'",item)
item_url=re.findall(r"'item_url': '(.*?)'",item)
title=re.findall(r"'title': '(.*?)'",item)
name=re.findall(r"'name': '(.*?)'",item) yinghuan_time_list = str(deadline).split("-")
yinghuan_month = yinghuan_time_list[1]
yinghuan_day = yinghuan_time_list[2].split("'")[0]
title=str(title).split("'")[1]
name=str(name).split("'")[1]
item_url=str(item_url).split("'")[1] """
判断发不发邮件
"""
if int(now_month) == int(yinghuan_month) - 1:
day = 30 - int(now_day) + int(yinghuan_day)
if day < 7:
send_email(deadline, title, item_url, day,name)
elif now_month == yinghuan_month:
day = int(yinghuan_day) - int(now_day)
if day < 7:
send_email(deadline, title, item_url, day,name)
# time.sleep(120)
else:
pass
# time.sleep(200)
最后效果