Python爬虫-第三章-5-利用xpath爬取某八戒网相关词语公司的信息和价格

时间:2023-01-12 07:13:38
# Demo Describe:数据解析 xpath 

import requests
from lxml import etree
from fake_useragent import UserAgent

'''
company
title
price
'''

# picType = input('输入想要爬取的词语: ')
# domain = f'https://www.zbj.com/search/f/?kw={picType}'
domain = 'https://www.zbj.com/search/f/?kw=saas'
ua = UserAgent()
user_agent = ua.random
headers = {
    'user-agent': user_agent
}
resp = requests.get(domain, headers=headers)
# get web html
html = etree.HTML(resp.text)
divs = html.xpath('/html/body/div[6]/div/div/div[2]/div[5]/div')
for element in divs:
    company = element.xpath('./div/div/div/a[1]/div[1]/p/text()')
    title = 'sass'.join(element.xpath('./div/div/div/a[2]/div[2]/div[2]/p/text()'))
    price = element.xpath('./div/div/div/a[2]/div[2]/div[1]/span[1]/text()')
    print(company)