爬zol村壁纸篇

时间:2023-03-09 12:59:28
爬zol村壁纸篇
# -*- coding: utf-8 -*-
# @Author : Jackzz import requests,os
from pyquery import PyQuery as pq def get_request(url):
#构造请求头
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
}
html = requests.get(url=url, headers=headers).content.decode("gb2312")#网站编码格式是2312具体看网页源码
# print(html)
doc =pq(html)
items = doc('.photo-list-padding').items()
for item in items:
# print(item)
url_img = item.find('img').attr('src')
# print(url_img) #打印每个封面url
name = item.find('span').attr('title')
print(name)#获取每个封面的标题
# download_url = requests.get(url=url_img,headers=headers).content
# print("下载的图片: %s 路径: %s"%(name,url_img))
# try:
# with open("./imgs/"+name+".jpg","wb") as file:
# file.write(download_url)
# except OSError as e:
# continue if __name__ == '__main__':
#获取绝对路径拼接
file_path=os.path.join(os.getcwd(),"imgs")
#判断当前路径是否存在,不存在则创建路径
if not os.path.exists(file_path):
#创建路径
os.makedirs(file_path)
#多页爬取设置,爬取一到五页图片
for i in range(1,5):
url="http://desk.zol.com.cn/pc/%s"%str(i)+'.html'
get_request(url)