老司机带你30行代码爬糗事百科成人版

时间:2025-05-12 08:20:46
import urllib import re import requests from import RequestException #这里是我们要爬的网址,为了示例只爬取20页 for j in range(1, 20): url = '/' + str(j) + '.html' #得到网页源代码 def get_page_index(url): try: response=(url) if response.status_code==200: return ('gbk') else: return None except RequestException: print('its error') return None def download_img(html): #这里使用正则匹配出我们要拿到图片的网址 pattern = ('<img alt=.*? src="(.*?)".*? />', ) items = (pattern, html) x=0 for item in items: print('正在下载中....') bytes = (item) f = open("f:/qiushibaike/" + str(x) + '.jpg', 'wb') () x = x + 1 def main(): html=get_page_index(url) download_img(html) if __name__=='__main__': main()