# coding:utf-8 import urllib2
from bs4 import BeautifulSoup
import json
import sys reload(sys) sys.setdefaultencoding('utf-8')
class dataBean(object) : def __init__(self, title, url,date):
self.date = date
self.url = url
self.title = title
def obj_2_json(obj):
return {
"title":obj.title,
"url":obj.url,
"date":obj.date
}
url = "http://localhost:8088/news.html"
response3 = urllib2.urlopen(url)
soup = BeautifulSoup(response3.read(), 'html.parser', from_encoding='utf-8')
links = soup.find_all('a',class_='')
data=[]
contents = soup.find('ul', class_="w_newslistpage_list").findAll("li") for content in contents:
bean = dataBean(content.find("span").find("a").get_text(), content.find("span").find("a")['href'],
content.find('span', class_="date").get_text())
data.append(dataBean(content.find("span").find("a").get_text(), content.find("span").find("a")['href'],
content.find('span', class_="date").get_text())) jsondata= json.dumps(data,default=dataBean.obj_2_json, ensure_ascii=False,encoding='utf-8')
fileObject = open('data.json', 'w')
fileObject.write(jsondata)
fileObject.close()
print jsondata
java中调用,借助jython.jar,并将bs4文件拷贝在当前文件夹下即可
import org.python.core.Py;
import org.python.core.PyString;
import org.python.util.PythonInterpreter; public class Main {
//jython安装
public static void main(String[] args) {
String code = "# -*- coding: utf-8 -*-\n" +
"import sys\n" +
"reload(sys)\n" + "import urllib2\n" +
"sys.setdefaultencoding('utf-8')\n" +
"import json\n";
new Thread(new Runnable() {
@Override
public void run() {
PythonInterpreter interpreter = new PythonInterpreter();
interpreter.exec("from bs4 import BeautifulSoup");
PyString code2 = Py.newStringUTF8(code);
interpreter.exec(code2);
interpreter.execfile("D:\\java\\test\\src\\GetNewsDataToLocal.py"); }
}
).start();
} }
可在当前文件夹看到json文件