腾讯微博数据抓取(java实现)

时间:2022-09-18 11:47:44
 不多说,同样贴出相关代码
参数实体:

package token.def; import java.io.Serializable;
import java.util.Properties; public class TLoginParams implements Serializable { private static final long serialVersionUID = 6120319409538285515L;
private String saltUin;
private String dataRedirect;
private String loginSig;
private String loginUrl;
private String imgURl;
private String imgCookie;
private boolean isLogin = true;
private Properties prop; public String getSaltUin() {
return saltUin;
}
public void setSaltUin(String saltUin) {
this.saltUin = saltUin;
}
public String getDataRedirect() {
return dataRedirect;
}
public void setDataRedirect(String dataRedirect) {
this.dataRedirect = dataRedirect;
}
public String getLoginSig() {
return loginSig;
}
public void setLoginSig(String loginSig) {
this.loginSig = loginSig;
}
public String getLoginUrl() {
return loginUrl;
}
public void setLoginUrl(String loginUrl) {
this.loginUrl = loginUrl;
}
public String getImgURl() {
return imgURl;
}
public void setImgURl(String imgURl) {
this.imgURl = imgURl;
}
public String getImgCookie() {
return imgCookie;
}
public void setImgCookie(String imgCookie) {
this.imgCookie = imgCookie;
}
public boolean isLogin() {
return isLogin;
}
public void setLogin(boolean isLogin) {
this.isLogin = isLogin;
}
public Properties getProp() {
return prop;
}
public void setProp(Properties prop) {
this.prop = prop;
} @Override
public String toString() {
return "TLoginParams [saltUin=" + saltUin + ", dataRedirect="
+ dataRedirect + ", loginSig=" + loginSig + ", loginUrl="
+ loginUrl + ", imgURl=" + imgURl + ", imgCookie=" + imgCookie
+ ", isLogin=" + isLogin + ", prop=" + prop + "]";
} } 加密实现:

package token.exe; import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
import java.security.MessageDigest; public class TencentWeiboEncryption { private static final String HEXSTRING = "0123456789ABCDEF"; /**
* 获取指定字符串的md5值
* @param originalText
* @return
* @throws Exception
*/
private static String md5(String originalText) throws Exception { byte buf[] = originalText.getBytes("ISO-8859-1");
StringBuffer hexString = new StringBuffer();
String result = "";
String digit = "";
try {
MessageDigest algorithm = MessageDigest.getInstance("MD5");
algorithm.reset();
algorithm.update(buf);
byte[] digest = algorithm.digest();
for (int i = 0; i < digest.length; i++) {
digit = Integer.toHexString(0xFF & digest[i]);
if (digit.length() == 1) {
digit = "0" + digit;
}
hexString.append(digit);
}
result = hexString.toString();
} catch (Exception ex) {
result = "";
}
return result.toUpperCase();
} /**
* 将16进制编码转换为相应的ASCII字符串
* @param md5str
* @return
* @throws UnsupportedEncodingException
*/
private static String hexchar2bin(String md5str) throws UnsupportedEncodingException { ByteArrayOutputStream baos = new ByteArrayOutputStream(md5str.length() / 2);
for (int i = 0; i < md5str.length(); i = i + 2) {
baos.write((HEXSTRING.indexOf(md5str.charAt(i)) << 4 | HEXSTRING.indexOf(md5str.charAt(i + 1))));
}
return new String(baos.toByteArray(), "ISO-8859-1");
} /**
* 获取加密后的密码
* @param qq
* @param password
* @param verifycode
* @return
* @throws Exception
*/
public static String getPassword(String qq, String password, String verifycode) throws Exception {
String P = hexchar2bin(md5(password));
String U = md5(P + hexchar2bin(qq.replace("\\x", "").toUpperCase()));
String V = md5(U + verifycode.toUpperCase());
return V;
} } 微博登陆实现:

package token.exe; import java.io.ByteArrayInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Properties;
import java.util.Scanner; import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager; import org.apache.http.Header;
import org.apache.http.HttpHeaders;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.HttpVersion;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.params.CookiePolicy;
import org.apache.http.client.params.HttpClientParams;
import org.apache.http.conn.params.ConnRoutePNames;
import org.apache.http.conn.routing.HttpRoute;
import org.apache.http.conn.scheme.PlainSocketFactory;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
import org.apache.http.message.BasicHeader;
import org.apache.http.params.CoreConnectionPNames;
import org.apache.http.params.HttpParams;
import org.apache.http.params.HttpProtocolParams;
import org.apache.http.params.SyncBasicHttpParams;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import token.TencentWeiboOAuth;
import token.def.TLoginParams; import com.tencent.weibo.beans.RouteCfg; public class TencentWeiboLoginer { private DefaultHttpClient httpClient; //默认连接配置参数
private static final int CONNECT_TIME_OUT = 5000;
private static final int SOCKET_TIME_OUT = 5000;
private static final int MAX_CONNECTIONS_PRE_HOST = 20;
private static final int MAX_TOTAL_CONNECTIONS = 200; public TencentWeiboLoginer() {
this(CONNECT_TIME_OUT, SOCKET_TIME_OUT, MAX_CONNECTIONS_PRE_HOST, MAX_TOTAL_CONNECTIONS, null, null);
} public TencentWeiboLoginer(int connectTimeOut, int socketTimeOut, int maxConnectionsPreHost,
int maxTotalConnections, List<RouteCfg> routeCfgs, HttpHost proxy) { //注册ssl协议
SSLContext ssl = null;
SchemeRegistry schemeRegistry = null;
X509TrustManager x509TrustManager = null;
SSLSocketFactory sslSocketFactory = null;
try {
ssl = SSLContext.getInstance("TLS");
x509TrustManager = new X509TrustManager() { @Override
public X509Certificate[] getAcceptedIssuers() {
// TODO Auto-generated method stub
return null;
} @Override
public void checkServerTrusted(X509Certificate[] chain, String authType)
throws CertificateException {
// TODO Auto-generated method stub } @Override
public void checkClientTrusted(X509Certificate[] chain, String authType)
throws CertificateException {
// TODO Auto-generated method stub }
};
ssl.init(null, new TrustManager[]{x509TrustManager}, null);
sslSocketFactory = new SSLSocketFactory(ssl);
sslSocketFactory.setHostnameVerifier(SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER); //注册http和https协议
schemeRegistry = new SchemeRegistry();
schemeRegistry.register(new Scheme("https", 443, sslSocketFactory));
// schemeRegistry.register(new Scheme("http", 80, PlainSocketFactory.getSocketFactory()));
} catch (NoSuchAlgorithmException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (KeyManagementException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} //配置客户端链接管理类
ThreadSafeClientConnManager connManager = new ThreadSafeClientConnManager(schemeRegistry);
connManager.setDefaultMaxPerRoute(maxConnectionsPreHost);
connManager.setMaxTotal(maxTotalConnections); //配置http请求连接参数
HttpParams httpParams = new SyncBasicHttpParams();
httpParams.setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, connectTimeOut);
httpParams.setParameter(CoreConnectionPNames.SO_TIMEOUT, socketTimeOut); //http协议参数配置
HttpProtocolParams.setVersion(httpParams, HttpVersion.HTTP_1_1);
HttpProtocolParams.setUseExpectContinue(httpParams, false); //启用cookie
HttpClientParams.setCookiePolicy(httpParams, CookiePolicy.BROWSER_COMPATIBILITY); //对特定ip端口修改最大连接数
if (routeCfgs != null) {
for (RouteCfg routeCfg : routeCfgs) {
HttpHost host = new HttpHost(routeCfg.getHost(), routeCfg.getPort());
connManager.setMaxForRoute(new HttpRoute(host), routeCfg.getMaxConnetions());
}
} //初始化httpClient
httpClient = new DefaultHttpClient(connManager,httpParams); //添加headers
List<Header> headers = new ArrayList<Header>();
headers.add(new BasicHeader(HttpHeaders.ACCEPT, "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"));
headers.add(new BasicHeader(HttpHeaders.ACCEPT_LANGUAGE, "zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3"));
headers.add(new BasicHeader(HttpHeaders.ACCEPT_CHARSET, "UTF-8"));
headers.add(new BasicHeader(HttpHeaders.USER_AGENT, "Mozilla/5.0 (Windows NT 5.1; rv:25.0) Gecko/20100101 Firefox/25.0"));
headers.add(new BasicHeader(HttpHeaders.CONNECTION, "keep-alive"));
headers.add(new BasicHeader("X-Forwarded-For", "192.168.0.1"));
headers.add(new BasicHeader("Client-IP", "192.168.0.1"));
headers.add(new BasicHeader("API-RemoteIP", "192.168.0.1"));
httpClient.getParams().setParameter("http.default-headers", headers); //设置代理
if (proxy != null) {
httpClient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, proxy);
} } /**
* 模拟腾讯微博登陆
* @return code值
*/
public TLoginParams doLogin(String username, String password) { Properties properties = initProperties();
String clientID = properties.getProperty("client_id");
String redirectURI = properties.getProperty("redirect_uri"); HashMap<String, String> urlMap = getUrlMap(clientID, redirectURI);
String dataRedirect = urlMap.get("data-redirect"); HashMap<String, String> loginInfoMap = preLogin(urlMap);
String loginSig = loginInfoMap.get("login_sig");
String loginUrl = loginInfoMap.get("login_url"); HashMap<String, String> checkMap = isHasVC(dataRedirect, username, loginSig, loginUrl);
String isHasVC = checkMap.get("isHasVC");
String vc = checkMap.get("vc");
String saltUin = checkMap.get("saltUin"); TLoginParams tLoginParams = new TLoginParams();
if (Integer.parseInt(isHasVC) != 0) {
tLoginParams.setDataRedirect(dataRedirect);
tLoginParams.setLoginSig(loginSig);
tLoginParams.setLoginUrl(loginUrl);
tLoginParams.setSaltUin(saltUin);
tLoginParams.setImgURl(getVCode(username));
return tLoginParams;
} String checkSigUrl = finalLogin(vc, saltUin, dataRedirect, username,
password, loginSig, loginUrl);
Properties result = authorize(loginUrl, checkSigUrl);
tLoginParams.setProp(result);
return tLoginParams;
} /**
* 有验证码时验证登陆
* @param vc
* @param saltUin
* @param dataRedirect
* @param username
* @param password
* @param loginSig
* @param loginUrl
* @return
*/
public TLoginParams doLoginByVC(String vc, String saltUin, String dataRedirect, String username,
String password, String loginSig, String loginUrl) { TLoginParams tLoginParams = new TLoginParams(); String checkSigUrl = finalLogin(vc, saltUin, dataRedirect, username, password, loginSig, loginUrl);
if (checkSigUrl.equals("您输入的验证码不正确,请重新输入。")) {
tLoginParams.setLogin(false);
return tLoginParams;
}
Properties prop = authorize(loginUrl, checkSigUrl); tLoginParams.setProp(prop);
return tLoginParams;
} /**
* 初始化登陆,获取含有sessionkey的url提交链接
* @param clientID 应用ID
* @param redirectURI 应用回调地址
* @return
*/
private HashMap<String, String> getUrlMap(String clientID, String redirectURI) { String url = "https://open.t.qq.com/cgi-bin/oauth2/authorize?"
+ "client_id=" + clientID
+ "&response_type=code"
+ "&redirect_uri=" + redirectURI
+ "&forcelogin=true";
Header[] headers = new BasicHeader[]{
new BasicHeader(HttpHeaders.HOST, "open.t.qq.com")
}; String htmlDatas = httpGetDatas(url, headers);
HashMap<String, String> map = new HashMap<String, String>();
String data_redirect = null;
String data_proxy = null; Document document = Jsoup.parse(htmlDatas);
Element element = document.getElementsByTag("noscript").first();
data_redirect = element.attr("data-redirect");
map.put("data-redirect", data_redirect);
data_proxy = element.attr("data-proxy");
map.put("data-proxy", data_proxy);
return map;
} /**
* 预登陆腾讯微博,获取login_sig
* @param urlMap 初始化登陆返回的urlMap
* @return
*/
private HashMap<String, String> preLogin(HashMap<String, String> urlMap) { String s_url_encode = null;
String proxy_url_encode = null;
String script = null;
try {
s_url_encode = URLEncoder.encode(urlMap.get("data-redirect"), "UTF-8");
proxy_url_encode = URLEncoder.encode(urlMap.get("data-proxy"), "UTF-8");
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
String url = "https://ui.ptlogin2.qq.com/cgi-bin/login?appid=46000101"
+ "&s_url=" + s_url_encode
+ "&proxy_url=" + proxy_url_encode
+ "&f_url=loginerroralert"
+ "&style=13"
+ "&daid=6"
+ "&pt_no_auth=1"
+ "&hide_close_icon=1"
+ "&link_target=blank"
+ "&target=blank"
+ "&hide_title_bar=1"
+ "&no_drop_domain=1"
+ "&dummy=1"
+ "&bgcolor=ffffff"
+ "&r=" + Math.random();
Header[] headers = new BasicHeader[]{
new BasicHeader(HttpHeaders.HOST, "ui.ptlogin2.qq.com")
};
String htmlDatas = httpGetDatas(url, headers); Document document = Jsoup.parse(htmlDatas);
Element headElement = document.getElementsByTag("head").first();
Element element = headElement.getElementsByTag("script").first();
script = element.html(); String login_sig = script.substring(script.indexOf("login_sig:"), script.indexOf("\",clientip"));
String login_sig_key = login_sig.substring(login_sig.indexOf("\"") + 1); HashMap<String, String> loginMap = new HashMap<String, String>();
loginMap.put("login_sig", login_sig_key);
loginMap.put("login_url", url);
return loginMap;
} /**
* 检查预登陆时是否需要验证码
* @param dataRedirect 初始化登陆返回的map
* @param username 用户名
* @param loginSig TODO
* @param loginUrl TODO
* @return
*/
private HashMap<String, String> isHasVC(String dataRedirect, String username,
String loginSig, String loginUrl){ String url = null;
try {
url = "https://ssl.ptlogin2.qq.com/check?"
+ "regmaster="
+ "&uin=" + username
+ "&appid=46000101"
+ "&js_ver=10052"
+ "&js_type=1"
+ "&login_sig=" + loginSig
+ "&u1=" + URLEncoder.encode(dataRedirect, "UTF-8")
+ "&r=" + Math.random();
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
Header[] headers = new BasicHeader[]{
new BasicHeader(HttpHeaders.REFERER, loginUrl)
}; String htmlDatas = httpGetDatas(url, headers); String str = htmlDatas.substring(htmlDatas.indexOf("(") + 1, htmlDatas.indexOf(");"));
String[] strs = str.split(","); String isHasVC = strs[0].substring(strs[0].indexOf("'") + 1, strs[0].lastIndexOf("'"));
HashMap<String,String> checkVCMap = new HashMap<String, String>();
checkVCMap.put("isHasVC", isHasVC);
String vc = strs[1].substring(strs[1].indexOf("'") + 1, strs[1].lastIndexOf("'"));
checkVCMap.put("vc", vc);
String saltUin = strs[2].substring(strs[2].indexOf("'") + 1, strs[2].lastIndexOf("'"));
checkVCMap.put("saltUin", saltUin); return checkVCMap;
} /**
* 获取当前用户登陆所需要的验证码
* @param username 用户名
* @return
*/
public String getVCode(String username) { String imageUrl = "https://ssl.captcha.qq.com/getimage?"
+ "uin=" +username
+ "&aid=46000101"
+ "&" + Math.random(); return imageUrl;
} /**
* 保存验证码
* @param url 验证码链接
*/
public void saveVCodeImg(String url) { HttpGet getImages = new HttpGet(url);
HttpResponse response = null;
try {
response = httpClient.execute(getImages);
byte[] imageBytes = EntityUtils.toByteArray(response.getEntity());
FileOutputStream fileWrite = new FileOutputStream("vc.jpg");
fileWrite.write(imageBytes);
fileWrite.close();
} catch (ClientProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} /**
* 模拟最终登陆
* @param vc 验证码信息
* @param dataRedirect 链接信息
* @param username 用户名
* @param password 密码
* @param loginSig TODO
* @param loginUrl TODO
* @param saltUin TODO
* @return
*/
private String finalLogin(String vc, String saltUin, String dataRedirect, String username,
String password, String loginSig, String loginUrl){ String p = null;
try {
p = TencentWeiboEncryption.getPassword(saltUin, password, vc);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
String url = null;
try {
url = "https://ssl.ptlogin2.qq.com/login?"
+ "u=" + URLEncoder.encode(username, "UTF-8")
+ "&p=" + p
+ "&verifycode=" + vc
+ "&aid=46000101"
+ "&u1=" + URLEncoder.encode(dataRedirect, "UTF-8")
+ "&h=1"
+ "&ptredirect=1"
+ "&ptlang=2052"
+ "&daid=6"
+ "&from_ui=1"
+ "&dumy="
+ "&low_login_enable=0"
+ "&regmaster="
+ "&fp=loginerroralert"
+ "&action=2-20-" + new Date().getTime()
+ "&mibao_css="
+ "&t=1"
+ "&g=1"
+ "&js_ver=10052"
+ "&js_type=1"
+ "&login_sig=" + loginSig
+ "&pt_rsa=0";
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} Header[] headers = new BasicHeader[]{
new BasicHeader(HttpHeaders.REFERER, loginUrl)
}; String htmlDatas = httpGetDatas(url, headers);
String str = htmlDatas.substring(htmlDatas.indexOf("(") + 1, htmlDatas.indexOf(");"));
String[] strs = str.split(","); String checkUrl = strs[2].substring(strs[2].indexOf("'") + 1, strs[2].lastIndexOf("'"));
String loginResult = strs[4].substring(strs[4].indexOf("'") + 1, strs[4].lastIndexOf("'")); if (loginResult.equals("登录成功!")) {
return checkUrl;
}
return loginResult;
} /**
* 获取最终授权
* @param loginUrl
* @param checkSigUrl
* @return
*/
private Properties authorize(String loginUrl, String checkSigUrl) { Properties prop = null;
if (checkSigUrl != null) {
Header[] headers = new BasicHeader[]{
new BasicHeader(HttpHeaders.REFERER, loginUrl)
};
String htmlDatas = httpGetDatas(checkSigUrl, headers); Document document = Jsoup.parse(htmlDatas);
Element element = document.getElementsByTag("meta").first();
String content = element.attr("content");; String subContent = content.substring(content.indexOf("?") + 1);
String propStr = subContent.replace("&", "\n"); prop = new Properties();
InputStream stream = new ByteArrayInputStream(propStr.getBytes());
try {
prop.load(stream);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
return prop;
} /**
* 提交URL,并获取页面数据(GET方式)
* @param url 请求页面
* @param headers http请求header
* @return
*/
private String httpGetDatas(String url,Header[] headers) { String response =null;
HttpResponse httpResponse = null;
if (url == null) {
throw new NullPointerException("URL is null");
}
HttpGet httpGet = new HttpGet(url);
httpGet.setHeaders(headers); try {
httpResponse = httpClient.execute(httpGet);
response = EntityUtils.toString(httpResponse.getEntity()); } catch (ClientProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return response;
} /**
* 初始化配置信息
* @return
*/
public Properties initProperties() { Properties properties = new Properties();
InputStream inputStream = Thread.currentThread().
getContextClassLoader().getResourceAsStream("cfg.properties");
try {
properties.load(inputStream);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return properties;
} public static void main(String[] args) { TencentWeiboLoginer loginer = new TencentWeiboLoginer();
TLoginParams tLoginParams = loginer.doLogin("","");
//有验证码时
if (tLoginParams.getProp() == null) {
String saltUin = tLoginParams.getSaltUin();
String dataRedirect = tLoginParams.getDataRedirect();
String loginSig = tLoginParams.getLoginSig();
String loginUrl = tLoginParams.getLoginUrl();
String imgUrl = tLoginParams.getImgURl();
//要返回的验证码
System.err.println(imgUrl); //测试再次获取验证码
imgUrl = loginer.getVCode("");
//保存验证码(用于测试并查看验证码)
loginer.saveVCodeImg(imgUrl); Scanner input = new Scanner(System.in);
String vc = input.nextLine(); TLoginParams loginresult =loginer.doLoginByVC(vc, saltUin, dataRedirect, "",
"", loginSig, loginUrl);
//如果验证码录入错误,则重新获取并返回验证码
if (!loginresult.isLogin()) {
System.err.println("验证码错误!重新录入");
imgUrl = loginer.getVCode("");
loginer.saveVCodeImg(imgUrl);
Scanner input2 = new Scanner(System.in);
String vc1 = input2.nextLine();
Properties codeProp = loginer.doLoginByVC(vc1, saltUin, dataRedirect, "",
"", loginSig, loginUrl).getProp();
System.out.println(TencentWeiboOAuth.getOAuthV2Instance(codeProp));
}else {
//验证码正确则直接输出结果
Properties codeProp = loginresult.getProp();
System.out.println(TencentWeiboOAuth.getOAuthV2Instance(codeProp));
} }else {
//无验证码时
Properties codeProp = tLoginParams.getProp();
System.out.println(TencentWeiboOAuth.getOAuthV2Instance(codeProp));
}
}
} 上述代码完整模拟了腾讯微博的登陆过程,并最终获得授权

腾讯微博数据抓取(java实现)的更多相关文章

  1. 腾讯微博模拟登陆&plus;数据抓取&lpar;java实现&rpar;

    不多说,贴出相关代码. 参数实体: package token.def; import java.io.Serializable; import java.util.Properties; publi ...

  2. 新浪微博数据抓取&lpar;java实现&rpar;

    多了不说,直接贴出相关部分的实现代码 加密部分实现: package token.exe; import java.math.BigInteger; import java.util.Random; ...

  3. 新浪微博模拟登陆&plus;数据抓取&lpar;java实现&rpar;

    模拟登陆部分实现: package token.exe; import java.math.BigInteger; import java.util.Random; import org.apache ...

  4. Java实现多种方式的http数据抓取

    前言: 时下互联网第一波的浪潮已消逝,随着而来的基于万千数据的物联网时代,因而数据成为企业的重要战略资源之一.基于数据抓取技术,本文介绍了java相关抓取工具,并附上demo源码供感兴趣的朋友测试! ...

  5. 大数据抓取采集框架&lpar;摘抄至http&colon;&sol;&sol;blog&period;jobbole&period;com&sol;46673&sol;&rpar;

    摘抄至http://blog.jobbole.com/46673/ 随着BIG DATA大数据概念逐渐升温,如何搭建一个能够采集海量数据的架构体系摆在大家眼前.如何能够做到所见即所得的无阻拦式采集.如 ...

  6. 网页数据抓取工具,webscraper 最简单的数据抓取教程,人人都用得上

    Web Scraper 是一款免费的,适用于普通用户(不需要专业 IT 技术的)的爬虫工具,可以方便的通过鼠标和简单配置获取你所想要数据.例如知乎回答列表.微博热门.微博评论.淘宝.天猫.亚马逊等电商 ...

  7. Android MaoZhuaWeiBo 好友动态信息列表数据抓取 -3

    前面2篇把大致的开发说的几乎相同了,接下来说说粉丝动态消息列表或时间线数据的抓取与解析显示,我将他所有写在了一个 类里.并以封装类对象的形式存储数据.以下看看基本的服务代码: 粉丝动态消息列表数据抓取 ...

  8. &lbrack;nodejs&comma;expressjs&comma;angularjs2&rsqb; LOL英雄列表数据抓取及查询显示应用

    新手练习,尝试使用angularjs2 [angularjs2 数据绑定,监听数据变化自动修改相应dom值,非常方便好用,但与传统js(jquery)的使用方法会很不同,Dom操作也不太习惯] 应用效 ...

  9. &lbrack;原创&period;数据可视化系列之十二&rsqb;使用 nodejs通过async await建立同步数据抓取

    做数据分析和可视化工作,最重要的一点就是数据抓取工作,之前使用Java和python都做过简单的数据抓取,感觉用的很不顺手. 后来用nodejs发现非常不错,通过js就可以进行数据抓取工作,类似jqu ...

随机推荐

  1. Django TemplateView

    主要功能是渲染模板,看官例: from django.views.generic.base import TemplateView from articles.models import Articl ...

  2. VS2015 ASP&period;NET5 Web项目结构浅析

    前言 本文个人同步博客地址http://aehyok.com/Blog/Detail/76.html 个人网站地址:aehyok.com QQ 技术群号:206058845,验证码为:aehyok 本 ...

  3. Mingyang&period;net:格式化Hibernate的SQL输出语句

    在sping与hibernate整合中可以这样的设置 <property name="hibernateProperties"> <props> <p ...

  4. HashMap大小选择

    java hashmap,如果确定只装载100个元素,new HashMap(?)多少是最佳的,why? 要回答这个问题,首先得知道影响HashMap性能的参数有哪些.咱们翻翻JDK. 在JDK6中是 ...

  5. 记一次使用搬瓦工VPS的经历

    自己因为有需求上Google,以前是通过修改hosts的方法实现访问Google,但是最近不知道为什么改hosts后还是无法访问Google,于是决定搭建VPS来实现*,看了一下价格,作为穷逼学 ...

  6. 西电2017ACM网络赛

    #include<bits/stdc++.h> using namespace std; typedef long long LL; #define ms(a,x) memset(a,x, ...

  7. 006-Python函数

    Python函数(def) 函数是组织好的,可重复使用的,用来实现单一,或相关联功能的代码段.Python提供了许多内建函数,比如print().但你也可以自己创建函数,这被叫做用户自定义函数.函数能 ...

  8. 解决WPF导入图片不显示的问题

    想在XAML中使用一张图片,得先将其添加到工程中, 方法是: 在项目中双击Resources.resx,选择图像,在添加资源的下拉菜单中选择添加现有文件,然后选择文件,添加图片进来后可以在Resour ...

  9. 2-Twenty Second Scrum Meeting-20151222

    任务安排 成员 今日完成 明日任务 闫昊 服务器关闭,开发停滞……  …… 唐彬 服务器关闭,开发停滞……  …… 史烨轩  服务器关闭,开发停滞……  …… 余帆   路径保存 路径整合 金哉仁   ...

  10. 安卓GreenDao框架一些进阶用法整理&lpar;转&rpar;

    大致分为以下几个方面: 一些查询指令整理 使用SQL语句进行特殊查询 检测表字段是否存在 数据库升级 数据库表字段赋初始值 一.查询指令整理 1.链式执行的指令 return mDaoSession. ...