Java HTML页面抓取实例

时间:2023-03-10 00:00:54
Java HTML页面抓取实例
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL; public class Url { public static void main(String[] args) throws Exception{
String html = getURLContent();
System.out.println(html);
} /**
* 获取网页内容
*/
private static String getURLContent() throws MalformedURLException, IOException, UnsupportedEncodingException {
URL urlmy = new URL("http://www.baidu.com"); HttpURLConnection con = (HttpURLConnection) urlmy.openConnection();
HttpURLConnection.setFollowRedirects(true);
con.setInstanceFollowRedirects(false);
con.connect(); BufferedReader br = new BufferedReader(new InputStreamReader(con.getInputStream(),"UTF-8")); String s = ""; StringBuffer sb = new StringBuffer(); while ((s = br.readLine()) != null) {
sb.append(s+"\r\n");
} return sb.toString();
} }