Jsoup Element网页信息采集

时间:2023-12-20 21:22:50
package zeze;

import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements; public class TestJsoup { public Document getDocument (String url){
try {
return Jsoup.connect(url).get();
} catch (IOException e) {
e.printStackTrace();
}
return null;
} public static void main(String[] args) {
TestJsoup t = new TestJsoup();
Document doc = t.getDocument("http://www.weather.com.cn/html/weather/101280101.shtml");
// 获取目标HTML代码
Elements elements1 = doc.select("[class=dn on][data-dn=7d1]");
// 今天
Elements elements2 = elements1.select("h1");
String today = elements2.get(0).text();
System.out.println(today);
// 几号
Elements elements3 = elements1.select("h2");
String number = elements3.get(0).text();
System.out.println(number);
// 是否有雨
Elements elements4 = elements1.select("[class=wea]");
String rain = elements4.get(0).text();
System.out.println(rain);
// 高的温度
Elements elements5 = elements1.select("span");
String highTemperature = elements5.get(0).text()+"°C";
System.out.println(highTemperature);
// 低的温度
String lowTemperature = elements5.get(1).text()+"°C";
System.out.println(lowTemperature);
// 风力
Elements elements6 = elements1.select("i");
String wind = elements6.get(2).text();
System.out.println(wind);
}
}
package zeze;

import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; public class TestJsoup { public Document getDocument (String url){
try {
return Jsoup.connect(url).get();
} catch (IOException e) {
e.printStackTrace();
}
return null;
} public static void main(String[] args) {
TestJsoup t = new TestJsoup();
Document doc = t.getDocument("http://www.weather.com.cn/html/weather/101280101.shtml");
// 获取目标HTML代码
Elements elements1 = doc.select("[class=dn][data-dn=7d2]");//getElementsByClass("dn");
// 今天
Elements elements2 = elements1.select("h1");
String today = elements2.get(0).text();
System.out.println(today);
// 几号
Elements elements3 = elements1.select("h2");
String number = elements3.get(0).text();
System.out.println(number);
// 是否有雨
Elements elements4 = elements1.select("[class=wea]");
String rain = elements4.get(0).text();
System.out.println(rain);
// 高的温度
Elements elements5 = elements1.select("span");
String highTemperature = elements5.get(0).text()+"°C";
System.out.println(highTemperature);
// 低的温度
String lowTemperature = elements5.get(1).text()+"°C";
System.out.println(lowTemperature);
// 风力
Elements elements6 = elements1.select("i");
String wind = elements6.get(2).text();
System.out.println(wind); Elements zeze = doc.select("[class=Lcontent]");
for (Element result : zeze)
{
Elements links = result.getElementsByTag("a");
for (Element link : links)
{//筛选
String linkHref = link.attr("href");
String linkText = link.text();
System.out.println(linkHref+"\n"+linkText);
}
}
}
}

  http://www.cnblogs.com/xiaoMzjm/p/3899366.html?utm_source=tuicool&utm_medium=referral