Python爬虫-第三章-2-requests 进阶• 爬取某视频网相关视频

时间:2022-12-31 14:16:01

Python-requests 进阶

1.浏览器登录-cookie

2.防盗链处理-爬取某视频网相关视频

3.使用代理访问网站

# Demo Describe:requests 进阶

import requests
from fake_useragent import UserAgent

# start--------1,浏览器登录-cookie----------------------
'''
登录某网站简单示例 或者用session中cookie
'''
# 方式一
# domain = 'https://qifenglou.co/login'
# ua = UserAgent()
# user_agent = ua.random
# headers = {
# 'user-agent': user_agent,
# 'Cookie': 'BDUSS_BFESS=FiWm9iT3hld3Z6d2ZTflRzODhsYXA5LUtwaDk5YllDU3MxY2RLbjh-UXZiLUpnRVFBQUFBJCQAAAAAAAAAAAEAAAAuIOxHutrRqr3Mu8q36M7eycsAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC~iumAv4rpgSX; HMACCOUNT_BFESS=86F86115EC09848E; BAIDUID_BFESS=4B5BB612ABD0FC92E176FB000C54FBE5:FG=1'
# }
# resp = requests.get(domain,headers=headers)
# print(resp.text)

# 方式二
# session = requests.session()
# data = {
# 'key1':'val',
# 'key2':'val'
# }
#
# domain = 'https://qifenglou.co/login'
# ua = UserAgent()
# user_agent = ua.random
# headers = {
# 'user-agent': user_agent
# }
# resp = session.post(domain,headers=headers,data=data)
# print(resp.text)

# end--------1,浏览器登录-cookie----------------------

# start--------2,防盗链处理-爬取梨视频网相关视频----------------------
'''
https://www.pearvideo.com/video_1760587 -domain的视频编号

思路
1.在网页中获取可播放视频链接,拿到contId
2.拿到video相关的json信息,获取防盗处理后链接
3.观察可播放视频和防盗处理后链接,分析防盗处理方式,发现本网站用时间戳对contId进行替换
4.下载视频
'''
ua = UserAgent()
user_agent = ua.random
url = 'https://www.pearvideo.com/video_1760587'
headers = {
'user-agent': user_agent,
'Referer': url # 溯源-防盗链处理(指定当前请求页面的上一级网址)
}
# 1.在网页中获取可播放视频链接,拿到contId
contId = url.split('_')[1]
# 2.拿到video相关的json信息,获取防盗处理后链接
videoStatusUrl = f'https://www.pearvideo.com/videoStatus.jsp?contId={contId}&mrd=0.15584035416211606'
resp = requests.get(videoStatusUrl, headers=headers)
'''
print(resp.json())
{
'resultCode': '1',
'resultMsg': 'success',
'reqId': '4f35e83e-84bf-401e-ba72-a463a2318a80',
'systemTime': '1651285616400',
'videoInfo':
{'playSta': '1',
'video_image': 'https://image1.pearvideo.com/cont/20220429/cont-1760587-12673456.jpg',
'videos':
{'hdUrl': '', 'hdflvUrl': '', 'sdUrl': '', 'sdflvUrl': '',
'srcUrl': 'https://video.pearvideo.com/mp4/adshort/20220429/1651285616400-15871613_adpkg-ad_hd.mp4'}}}

'''
# 3.观察可播放视频和防盗处理后链接,分析防盗处理方式,发现本网站用时间戳对contId进行替换
dic = resp.json()
srcUrl = dic['videoInfo']['videos']['srcUrl']
systemTime = dic['systemTime']
realVideoSrcUrl = srcUrl.replace(systemTime, f'cont-{contId}')
'''
https://video.pearvideo.com/mp4/adshort/20220429/cont-1760587-15871613_adpkg-ad_hd.mp4 -srcUrl
print(realVideoSrcUrl)
https://video.pearvideo.com/mp4/adshort/20220429/1651281784322-15871613_adpkg-ad_hd.mp4 -realVideoSrcUrl
'''
# 4.下载视频
with open(f'../FileForDemo/MP4pearvideo/demo.mp4', mode='wb') as file:
file.write(requests.get(realVideoSrcUrl, headers=headers).content)

# end--------1,防盗链处理-爬取梨视频网相关视频----------------------