import pymongo client = pymongo.MongoClient('localhost', 27017) # MongoDB 客户端
walden = client['walden'] # 数据库中创建的名称
sheet_tab = walden['sheet_tab'] # 创建Table # 演示代码1
# path = '/Users/qiongyanzhu/Documents/Plan-for-combating-master/week2/2_1/2_1code_of_video/walden.txt'
# with open(path, 'r') as f:
# lines = f.readlines()
# for index, line in enumerate(lines):
# data = {
# 'index': index,
# 'line': line,
# 'words': len(line.split())
# }
# print(data)
# sheet_tab.insert_one(data) # 演示代码2
# for item in sheet_tab.find({'words': 0}):
# print(item) # 演示代码3
# $lt/$lte/$gt/$gte/$ne
for item in sheet_tab.find({'words': {'$lt': 5}}):
print(item) # 演示代码4
for item in sheet_tab.find():
print(item['line'])
from bs4 import BeautifulSoup
import requests
import pymongo client = pymongo.MongoClient('localhost', 27017)
xiaozhu = client['xiaozhu']
sheet_tab = xiaozhu['sheet_tab'] url_as = ['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(number)) for number in range(1, 4)] def insert_house_info(url_s):
for url_a in url_s:
# 获取页面数据
wb_data = requests.get(url_a)
# 采用lxml解析引擎,解析数据
soup = BeautifulSoup(wb_data.text, 'lxml')
prices = soup.select('span.result_price')
titles = soup.select('#page_list > ul > li > div.result_btm_con.lodgeunitname > div > a > span')
urls = soup.select('#page_list > ul > li > div.result_btm_con.lodgeunitname') for price, title, url in zip(prices, titles, urls):
info = {
'price': int(price.get_text()[1:len(price.get_text())-2]),
'title': title.get_text(),
'url': url.get('detailurl')
}
# print(info)
sheet_tab.insert_one(info) def find_house():
for info in sheet_tab.find({'price': {'$gt': 500}}):
print(info) insert_house_info(url_as)
find_house()