Python简单爬虫获取岗位招聘人数

#encoding=utf-8

import selenium

import selenium.webdriver

import re

import time

# pip install selenium

# 需要下载火狐浏览器的 webdriver 驱动放到 d:/python27目录下，即python的安装目录

def getnumberbyname(searchname):

    url = "https://search.51job.com/list/040000,000000,0000,00,9,99," + searchname + ",2,1.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare="

    # 模拟打开网页

    driver = selenium.webdriver.Firefox()

    driver.get(url)

    pagesource = driver.page_source

    # 打印抓取网页

    # print pagesource

    restr = """<div class="rt">([\s\S]*?)</div>"""

    # restr = '<div class="rt">(\\d+)</div>'

    regex = re.compile(restr, re.IGNORECASE)

    mylist = regex.findall(pagesource)

    # 去掉空格

    newstr = mylist[0].strip()

    # print newstr

    # 再次匹配抓取的数据

    restr2="""(\\d+)"""

    regex2 = re.compile(restr2, re.IGNORECASE)

    mylist = regex2.findall(newstr)

    driver.close()

    # print mylist[0]

    return mylist[0]

jobs = ['python', 'python 数据', 'python 运维', 'python web', 'linux 运维']

for work_name in jobs:

    print work_name, getnumberbyname(work_name)
秒客网

Python简单爬虫获取岗位招聘人数

相关文章