python3 spider [ urllib.request ]

# # 导入urllib库的urlopen函数

# from urllib.request import urlopen

# # 发出请求，获取html

# html = urlopen("https://www.baidu.com/")

# # 获取的html内容是字节，将其转化为字符串

# html_text = bytes.decode(html.read())

# # 打印html内容

# print(html_text)

from urllib.request import urlopen, urlretrieve

from bs4 import BeautifulSoup as bf

html = urlopen("https://www.baidu.com/")

obj = bf(html.read(), 'html.parser')

title = obj.head.title

logo_pic_info = obj.find_all('img', class_="index-logo-src")

logo_url = "https:" + logo_pic_info[0]['src']

# download the image

urlretrieve(logo_url, 'logo.png')

秒客网

python3 spider [ urllib.request ]

相关文章