python爬虫:爬取豌豆荚APP第一页数据信息(selenium)

时间:2022-09-18 17:29:22
from selenium import webdriver
import time

driver = webdriver.Chrome(r'C:\Users\Auraro\Desktop/chromedriver.exe')

try:
    driver.implicitly_wait(20)
    driver.get('https://www.wandoujia.com/category/6001')

    time.sleep(5)
    js_code = '''
                   window.scrollTo(0,5000)
                   '''
    driver.execute_script(js_code)

    time.sleep(5) # 商品信息加载,等待5s
    app_list = driver.find_elements_by_class_name('card')
    for app in app_list:
        # app名称
        app_name = app.find_element_by_css_selector('.app-title-h2 a').text

        # 详情页url
        detail_url = app.find_element_by_css_selector('.app-title-h2 a').get_attribute('href')

        # 下载人数
        # app大小
        download_num_size= app.find_element_by_class_name('meta').text

        app_content = """
            app名字:{}
            详情页url:{}
            下载人数:{}
            app大小:{}
            \n
        """.format(app_name,detail_url,download_num_size[:8],download_num_size[11:])
        print(app_content)
        with open('豌豆荚app.txt','a',encoding='utf-8') as f:
            f.write(app_content)
    time.sleep(3)


finally:
    driver.close()