Beautiful Soup 解析html表格

from bs4 import BeautifulSoup

import urllib.request

doc = urllib.request.urlopen('http://www.bkzy.org/Index/Declaration?intPageNo=1')

doc = doc.read().decode('utf-8')

soup = BeautifulSoup(doc, "html.parser")

school = 0

pro_code = 1

pro_name = 2

xuewei = 3

pdf = 4

# find_all 查到所有tr列表

for tr in soup.find_all('tr',):
　　# 在每个tr找td

    td = tr.find_all('td')

    try:

        print('%s_%s_%s_%s.pdf' % (

            td[school].text.strip(),

            td[pro_code].text.strip(),

            td[pro_name].text.strip(),

            td[xuewei].text.strip())

            ,td[pdf].find('a')['href'])

    except IndexError as e:

        pass

秒客网

Beautiful Soup 解析html表格

相关文章