python3+ 简单爬虫笔记

 import urllib.request

 import re

 def getHtml(url):

     html = urllib.request.urlopen(url).read()

     return html

 def getImg(html):

     reg = r'src="(.+?\.jpg)" pic_ext'

     imgre = re.compile(reg)

     html = html.decode('utf-8')

     imglist = re.findall(imgre,html)

     x = 

     for imgurl in imglist:

         urllib.request.urlretrieve(imgurl,'%s.jpg' %x)

         x +=

     return imglist

 html = getHtml("http://tieba.baidu.com/p/2460150866")

 print(getImg(html))

秒客网

python3+ 简单爬虫笔记

相关文章