爬取网易云音乐飙升榜(100首)

时间:2024-02-25 17:05:24

爬取网易云音乐飙升榜(100首)

目的

  • 学习request模块的使用
  • 练习爬虫的机制
  • 仅学习使用,不做非法事情

爬取页面

  • 网易云音乐飙升榜不定期会更换

代码

import requests
from lxml import etree
import time


class DownLoad(object):
    def __init__(self):
        self.url = \'https://music.163.com/discover/toplist\'
        self.headers = {\'user-agent\': \'Mozilla/5.0 (Windows NT 10.0; WOW64)\',
                        \'referer\': \'https://music.163.com/\'}
        self.song_list = []
        self.url_list = []

    def project(self):
        self.getresponse()
        self.downsong()

    def getresponse(self):
        response = requests.get(url=self.url, headers=self.headers)
        html_page = etree.HTML(response.content.decode())
        song_info_list = html_page.xpath(\'//div/ul[@class="f-hide"]/li\')
        self.getdownurl(song_info_list)

    def getdownurl(self, url):
        for item in url:
            song_player_url = item.xpath("./a/@href")[0]
            song_name = item.xpath("./a/text()")[0]
            song_id = song_player_url.split(\'=\')[1]
            download_url = \'http://music.163.com/song/media/outer/url?id=\' + str(song_id)
            self.song_list.append(song_name)
            self.url_list.append(download_url)

    def downsong(self):
        while True:
            print(\'编号\t\t\', \'名称\')
            self.display()
            nums = int(input(\'请输入歌曲编号(退出请按:0): \'))
            try:
                if nums == 0:
                    break
                else:
                    with open(\'./musics/{}\'.format(self.song_list[nums - 1]) + \'.mp3\', mode=\'wb\') as f:
                        f.write(requests.get(url=self.url_list[nums - 1], headers=self.headers).content)
                    print(\'{}\t\t\t下载成功......\'.format(self.song_list[nums - 1]))
                    time.sleep(3)
            except Exception:
                print(\'请输入有效数字编号......\')
                time.sleep(3)

    def display(self):
        for item in self.song_list:
            print(self.song_list.index(item) + 1, \'\t\t\', item)


if __name__ == \'__main__\':
    down = DownLoad()
    down.project()

结果演示


代码升级

import requests
from bs4 import BeautifulSoup
import threading


class Music(object):
    def __init__(self):
        self.url = \'https://music.163.com/discover/toplist\'
        self.headers = {\'user-agent\': \'Mozilla/5.0 (Windows NT 10.0; WOW64)\'}
        self.threads = []

    def get_song(self):
        response = requests.get(self.url, headers=self.headers).text

        self.soup = BeautifulSoup(response, \'html.parser\')
        song_ul_list = self.soup.find_all(\'ul\', class_=\'f-hide\')
        for song_ul in song_ul_list:
            song_a_list = song_ul.find_all(\'a\')
            for song_a in song_a_list:
                self.get_song_url(song_a[\'href\'], song_a.get_text())

        for thread in self.threads:
            thread.start()

    def get_song_url(self, song_url, song_name):
        song_id = song_url.split(\'=\')[1]
        play_url = \'http://music.163.com/song/media/outer/url?id=\' + song_id
        self.threads.append(threading.Thread(target=self.download_song, args=(play_url, song_name)))

    def download_song(self, song_url, song_name):
        response = requests.get(song_url, headers=self.headers).content
        with open(f\'./music/{song_name}.mp3\', \'wb\') as f:
            f.write(response)


if __name__ == \'__main__\':
    app = Music()
    app.get_song()