爬虫_豆瓣全部正在热映电影（xpath）

单纯地练习一下xpath

 import requests

 from lxml import etree

 def get_url(url):

     html = requests.get(url)

     return html.text

 def parse_html(html):

     informations = []

     html_element = etree.HTML(html)

     ul = html_element.xpath('//ul[@class="lists"]')[0]

     for li in ul:

         href = li.xpath('.//li[@class="poster"]/a/@href')

         title = li.xpath('normalize-space(.//li[@class="stitle"]/a/@title)')  #normalize-space 去掉换行符

         mark = li.xpath('.//span[@class="subject-rate"]/text()')

         actor = li.xpath('@data-actors')

         director = li.xpath('@data-director')

     # print(etree.tostring(uls, encoding='utf-8').decode('utf-8'))

         information = {

             'href': href,

             'title': title,

             'mark': mark,

             'actors': actor,

             'director': director

         }

         informations.append(information)

     print(informations)

 def main():

     url = 'https://movie.douban.com/cinema/nowplaying/beijing/'

     html = get_url(url)

     parse_html(html)

 if __name__ == '__main__':

     main()

秒客网

爬虫_豆瓣全部正在热映电影（xpath）

相关文章

爬虫_豆瓣全部正在热映电影 （xpath）

相关文章

爬虫_豆瓣全部正在热映电影（xpath）