python图片小爬虫

时间:2023-03-09 22:11:10
python图片小爬虫
 import re
import urllib
import os def rename(name):
name = name + '.jpg'
return name def getHtml(url):
page = urllib.urlopen(url)
html = page.read()
return html def getImg(html):
reg = r'src="(.+?\.jpg)" pic_ext'
imgre = re.compile(reg)
imglist = re.findall(imgre,html) os.chdir("E:\\pic")
os.getcwd()
x=1
for imgurl in imglist:
img=urllib.urlopen(imgurl) name=str(x)
name = rename(name)
print(name)
x=x+1 f=open(name,'wb')
f.write(img.read())
f.close() html = getHtml("http://tieba.baidu.com/p/3553148164")
getImg(html)
print 'pic save!'

爬取的网页是  http://tieba.baidu.com/p/3553148164

图片保存在E盘pic文件夹下

爬取结果如下:

python图片小爬虫