用requests库和BeautifulSoup4库爬取新闻列表

时间:2024-01-06 16:47:20
import requests
from bs4 import BeautifulSoup url='http://news.gzcc.cn/html/xiaoyuanxinwen/' res=requests.get(url)
res.encoding='utf-8' soup=BeautifulSoup(res.text,'html.parser') for news in soup.select('li'):
if len(news.select('.news-list-title'))>:
title=news.select('.news-list-title')[].text
time=news.select('.news-list-info')[].contents[].text
url=news.select('a')[]['href']
thumb=news.select('.news-list-thumb')[].contents[]
print(time)
print(title)
print(url)
print(thumb)

用requests库和BeautifulSoup4库爬取新闻列表

import requests
from bs4 import BeautifulSoup
jq='http://www.gamersky.com/pcgame/'
res = requests.get(jq)
res.encoding='utf-8'
soup = BeautifulSoup(res.text,'html.parser') for news in soup.select('li'):
if len(news.select('a'))>:
title=news.select('a')[].text
url=news.select('a')[]['href']
#time=news.select('span')[].contents[].text
#print(time,title,url)
print(title,url)

用requests库和BeautifulSoup4库爬取新闻列表