1、该网站的ppt链接全部都在页面上,用正则手动提取所有链接,放在指定位置的,以txt形式保存,格式如下
2、写个java文件处理一下,如下:
package platform; import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.HashMap;
import java.util.Map; import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.DefaultHttpClient; public class TestQConDownload { public static void main(String[] args) {
BufferedReader bufferedReader;
String lineTxt = null;
String title="1";
String url="";
try {
//读文件
bufferedReader = readTxtFile("E:\\test\\downinfo.txt");
//循环遍历每行
while((lineTxt = bufferedReader.readLine()) != null){
if(lineTxt.startsWith("【标题】")){
title = lineTxt.substring(4).replaceAll(":", "");
System.out.println(title);
}
if(lineTxt.startsWith("【下载地址】")){
url= lineTxt.substring(6);
//获取跳转后的地址
url = getRedirectLocation(url);
System.out.println(url);
//下载到指定位置
downloadFile(url, "E:\\test\\download\\"+title+".pdf");
}
}
bufferedReader.close();
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} } public static String getRedirectLocation(String url) throws ClientProtocolException, IOException {
String SEND_MESSAGE_URL = url;
Map<String, Object> params = new HashMap<String, Object>();
HttpPost get = new HttpPost(SEND_MESSAGE_URL);
get.setHeader("Cookie", "dx_un=%E5%B9%B4%E8%BD%BB%E7%9A%84%E7%96%AF%E5%AD%90; dx_avatar=http%3A%2F%2F7xil0e.com1.z0.glb.clouddn.com%2Fuser_580d84f25ea61.png; dx_token=0c6b719ffff50f3746b64f058cb4e719");
get.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
get.setHeader("Accept-Encoding", "zh-CN,zh;q=0.8");
get.setHeader("Connection", "keep-alive");
get.setHeader("Host", "ppt.geekbang.org");
get.setHeader("Referer", "http://2016.qconshanghai.com/schedule");
get.setHeader("Upgrade-Insecure-Requests", "1");
get.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36"); // 设置编码
HttpResponse re = new DefaultHttpClient().execute(get);
/*if (re.getStatusLine().getStatusCode() == 200) {// 如果状态码为200,就是正常返回
String result = EntityUtils.toString(re.getEntity());
System.out.println(result);
}*/
String location = re.getFirstHeader("Location").getValue();
get.releaseConnection();
return location;
} /**
* 下载远程文件并保存到本地
* @param remoteFilePath 远程文件路径
* @param localFilePath 本地文件路径
*/
public static void downloadFile(String remoteFilePath, String localFilePath)
{
URL urlfile = null;
HttpURLConnection httpUrl = null;
BufferedInputStream bis = null;
BufferedOutputStream bos = null;
File f = new File(localFilePath);
try
{
urlfile = new URL(remoteFilePath);
httpUrl = (HttpURLConnection)urlfile.openConnection();
httpUrl.connect();
bis = new BufferedInputStream(httpUrl.getInputStream());
bos = new BufferedOutputStream(new FileOutputStream(f));
int len = 2048;
byte[] b = new byte[len];
while ((len = bis.read(b)) != -1)
{
bos.write(b, 0, len);
}
bos.flush();
bis.close();
httpUrl.disconnect();
}
catch (Exception e)
{
e.printStackTrace();
}
finally
{
try
{
bis.close();
bos.close();
}
catch (IOException e)
{
e.printStackTrace();
}
}
} public static BufferedReader readTxtFile(String filePath) throws UnsupportedEncodingException, FileNotFoundException{
String encoding="UTF-8";
File file=new File(filePath);
InputStreamReader read = new InputStreamReader(
new FileInputStream(file),encoding);//考虑到编码格式
BufferedReader bufferedReader = new BufferedReader(read);
return bufferedReader;
}
}