python爬取某美食数据-全民厨子美食系列

时间:2022-11-26 18:54:50

1、分析网页,爬取美食数据

​https://mip.xiachufang.com/explore/?page=2​​​

​​​https://mip.xiachufang.com/explore/?page=3​​​

url = "​​​https://mip.xiachufang.com/explore/?page={0}​​"

for page in range(1, 4):

resp = requests.get(url=url.format(page), headers=headers)

2、网页解析库BeautifulSoup

bs = BeautifulSoup(resp.text, 'html.parser')

recipes = bs.find('section', class_='recipes')

article = recipes.find_all('article', class_='recipe-332-horizon pos-r')

for item in article:

count +=1

recipe_name = item.find('div', class_='recipe-name bold ellipsis').text

uthor_name = item.find('div', class_='author-name font15 ellipsis').text

font14 = item.find('div', class_='font14').text

click_expand = item.find('a', class_='click-expand')['href']

lst_food = "https://mip.xiachufang.com" + click_expand

lst.append([count, recipe_name, author_name, font14, lst_food]) 3、数据的存储

wk = openpyxl.Workbook()

sheet = wk.active

for item in lst: sheet.append(item)

wk.save('12-全民美食.xlsx')


import requests
from bs4 import BeautifulSoup
import time
import openpyxl

url = "https://mip.xiachufang.com/explore/?page={0}"
headers = {
"User-Agent": "Mozilla/5.0(Windows NT 6.1;WOW64) AppleWebKit/537.36(KABUL, like Gecko) "
"Chrome/86.0.4240.198Safari/537.36 "
}
lst = []
count = 0 #用于记录美食个数
for page in range(1, 4):
resp = requests.get(url=url.format(page), headers=headers)
# print(resp.status_code)
bs = BeautifulSoup(resp.text, 'html.parser')
# print(type(bs))
recipes = bs.find('section', class_='recipes')
# print(recipes)
article = recipes.find_all('article', class_='recipe-332-horizon pos-r')
# print(article)
for item in article:
count +=1
recipe_name = item.find('div', class_='recipe-name bold ellipsis').text
author_name = item.find('div', class_='author-name font15 ellipsis').text
font14 = item.find('div', class_='font14').text
click_expand = item.find('a', class_='click-expand')['href']
lst_food = "https://mip.xiachufang.com" + click_expand
# print(recipe_name, author_name, font14, lst_food)
lst.append([count, recipe_name, author_name, font14, lst_food])
time.sleep(5)
# for i in lst:
# print(i)
wk = openpyxl.Workbook()
sheet = wk.active
for item in lst:
sheet.append(item)
wk.save('12-全民美食.xlsx')


python爬取某美食数据-全民厨子美食系列