安卓Andriod使用入门(二十四)【爬取网络小说】

时间:2022-01-06 22:43:06

人法地,地法天,天法道,道法自然。——老子


ConActivity.java

package siso.novelreader.activity;

import android.app.Activity;
import android.content.Intent;
import android.os.Bundle;
import android.os.Handler;
import android.os.Message;
import android.view.View;
import android.view.Window;
import android.widget.Button;
import android.widget.TextView;

import siso.novelreader.R;
import siso.novelreader.Utils.ParserWeb;
import siso.novelreader.bean.NovalContentBean;

/**
* =============================================================================
* Describe : 文章内容
* ==============================================================================
*/


public class ConActivity extends Activity {


private NovalContentBean novalContentBean;
private TextView tv_title;
private TextView tv_name;
private TextView tv_author;
private TextView tv_time;
private TextView tv_wd;
private TextView tv_con;
private Button bt_pre;
private Button bt_next;

//更新文章内容
private Handler mhandler = new Handler() {
@Override
public void handleMessage(Message msg) {
super.handleMessage(msg);
if (msg.what == 111) {

tv_title.setText(novalContentBean.getTitle());
tv_name.setText(novalContentBean.getNovel_name());
tv_author.setText(novalContentBean.getAuthor());
tv_time.setText(novalContentBean.getTime());
tv_wd.setText(novalContentBean.getWdnumber());
tv_con.setText(novalContentBean.getNv_content().toString());
//如果上一章不存在
if (novalContentBean.getPre_link() == "" || novalContentBean.getPre_link() == null) {
bt_pre.setVisibility(View.INVISIBLE);
bt_pre.setClickable(false);
}
//如果下一章不存在
if (novalContentBean.getNext_link() == "" || novalContentBean.getNext_link() == null) {
bt_next.setVisibility(View.INVISIBLE);
bt_next.setClickable(false);
}
//上一章的点击事件
bt_pre.setOnClickListener(new View.OnClickListener() {
@Override
public void onClick(View v) {
Intent intent=new Intent(ConActivity.this,ConActivity.class);
intent.putExtra("path",novalContentBean.getPre_link());
startActivity(intent);
finish();
}
});
//下一章的点击事件
bt_next.setOnClickListener(new View.OnClickListener() {
@Override
public void onClick(View v) {
Intent intent=new Intent(ConActivity.this,ConActivity.class);
intent.putExtra("path",novalContentBean.getNext_link());
startActivity(intent);
finish();
}
});

}
}
};

@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
requestWindowFeature(Window.FEATURE_NO_TITLE);
setContentView(R.layout.activity_content);

tv_title = (TextView) findViewById(R.id.tv_title);
tv_name = (TextView) findViewById(R.id.tv_name);
tv_author = (TextView) findViewById(R.id.tv_author);
tv_time = (TextView) findViewById(R.id.tv_time);
tv_wd = (TextView) findViewById(R.id.tv_wd);
tv_con = (TextView) findViewById(R.id.tv_con);
bt_pre = (Button) findViewById(R.id.bt_pre);
bt_next = (Button) findViewById(R.id.bt_next);

final String path = getIntent().getStringExtra("path");
new Thread() {
@Override
public void run() {
novalContentBean = ParserWeb.parser_nol(path);
if (novalContentBean != null) {
mhandler.sendEmptyMessage(111);
}
}
}.start();
}
}

MainActivity.java代码:

package siso.novelreader.activity;

import android.content.Intent;
import android.os.Bundle;
import android.os.Handler;
import android.os.Message;
import android.support.v7.app.AppCompatActivity;
import android.view.View;
import android.widget.AdapterView;
import android.widget.Button;
import android.widget.ListView;

import siso.novelreader.R;
import siso.novelreader.Utils.ParserWeb;
import siso.novelreader.adapter.NovelAdapter;
import siso.novelreader.bean.NovelBean;
import com.nostra13.universalimageloader.cache.disc.naming.Md5FileNameGenerator;
import com.nostra13.universalimageloader.core.DisplayImageOptions;
import com.nostra13.universalimageloader.core.ImageLoader;
import com.nostra13.universalimageloader.core.ImageLoaderConfiguration;
import com.nostra13.universalimageloader.core.assist.ImageScaleType;
import com.nostra13.universalimageloader.core.assist.QueueProcessingType;
import com.nostra13.universalimageloader.core.display.RoundedBitmapDisplayer;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.util.ArrayList;
import java.util.List;

import static android.util.Log.e;

/**
* =============================================================================
* Describe :主界面,通过Jsoup爬虫来获取网络小说
* ==============================================================================
*/


public class MainActivity extends AppCompatActivity {

private static final String TAG = MainActivity.class.getSimpleName();
private ListView listView;
private List<NovelBean> novels;
private Handler mhandler = new Handler() {
@Override
public void handleMessage(Message msg) {
super.handleMessage(msg);
if (msg.what == 123) {
//获取文章列表成功,更新listview
listView.setAdapter(new NovelAdapter(MainActivity.this, novels, options));
}

}
};
private Button get_;
private DisplayImageOptions options;

@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
//初始化ImageLoader这用的是universalimageloader库
ImageLoaderConfiguration configuration = new ImageLoaderConfiguration.Builder(this)
.diskCacheFileCount(100)
.diskCacheFileNameGenerator(new Md5FileNameGenerator())
.diskCacheSize(30 * 1024 * 1024)
.tasksProcessingOrder(QueueProcessingType.LIFO)
.build();

//初始化Options
options = new DisplayImageOptions.Builder()
.cacheInMemory(true)
.cacheOnDisk(true)
.imageScaleType(ImageScaleType.EXACTLY)
.displayer(new RoundedBitmapDisplayer(10))
.build();
ImageLoader.getInstance().init(configuration);

listView = (ListView) findViewById(R.id.lv_content);
}


@Override
protected void onResume() {
super.onResume();
//从网页爬取数据
http_getnovel();

//listview的item点击事件
listView.setOnItemClickListener(new AdapterView.OnItemClickListener() {
@Override
public void onItemClick(AdapterView<?> parent, View view, final int position, long id) {
e(TAG, "onItemClick" + position);
new Thread() {
@Override
public void run() {
super.run();
NovelBean novelBean = novels.get(position);
String url = novelBean.getNovel_path();
String path = ParserWeb.parser_web(url);
Intent intent = new Intent(MainActivity.this, ConActivity.class);
intent.putExtra("path", path);
startActivity(intent);
}
}.start();
}
});
}

private void http_getnovel() {
// <div class="detail" style="display: block;">
// <em class="add jrsj_plus" bookId="609737"></em>
// <a class="mark63" href="http://book.zongheng.com/book/609737.html" target="_blank"><img src="http://static.zongheng.com/upload/cover/2016/10/1476063094434.jpg" alt="不死龙帝" style="width: 63px; height: 84px;"><span></span></a>
// <h3><a style="" href="http://book.zongheng.com/book/609737.html" title="奇幻玄幻: 不死龙帝" target="_blank">不死龙帝</a></h3>
// <p>作者:<a href="http://t.zongheng.com/25467902" title="从小不爱吃香菜" target="_blank">从小不爱…</a></p>
// <p>类型:<a href="http://book.zongheng.com/category/1.html" title="奇幻玄幻" target="_blank">奇幻玄幻</a></p>
// <p class="tr"><a href="http://book.zongheng.com/book/609737.html" target="_blank">【阅读】</a></p>
// <div class="cl0"></div>
// </div>
new Thread() {
@Override
public void run() {
super.run();
try {
//获取连接
Connection connect = Jsoup.connect("http://www.zongheng.com/category/3.html");
//设置超时
connect.timeout(10000);
Document document = connect.get();
Elements detail = document.select("div.detail");
novels = new ArrayList<>();
for (Element element : detail) {
String novel_image = element.getElementsByTag("img").first().attr("src");
String novel_name = element.getElementsByTag("h3").first().text();
Elements p_element = element.getElementsByTag("p");
String novel_autor = p_element.get(0).text();
String novel_type = p_element.get(1).text();
String novel_path = p_element.get(2).getElementsByTag("a").attr("href");
NovelBean novel = new NovelBean(novel_name, novel_type, novel_autor, novel_path, novel_image);
novels.add(novel);
}
mhandler.sendEmptyMessage(123);
} catch (Exception e) {
e.printStackTrace();
}
}
}.start();

}
}

NovelAdapter.java代码:

package siso.novelreader.adapter;

import android.content.Context;
import android.media.Image;
import android.view.LayoutInflater;
import android.view.View;
import android.view.ViewGroup;
import android.widget.BaseAdapter;
import android.widget.ImageView;
import android.widget.TextView;

import siso.novelreader.R;
import siso.novelreader.bean.NovelBean;
import com.nostra13.universalimageloader.core.DisplayImageOptions;
import com.nostra13.universalimageloader.core.ImageLoader;

import java.util.List;

/**
* =============================================================================
* Describe : listview的列表
* ==============================================================================
*/


public class NovelAdapter extends BaseAdapter {
private Context context;
private List<NovelBean> novels;
private LayoutInflater inflater;
private DisplayImageOptions options;

public NovelAdapter(Context context, List<NovelBean> novels, DisplayImageOptions options) {
this.context = context;
this.novels = novels;
this.options = options;
inflater = LayoutInflater.from(context);
}

@Override
public int getCount() {
return novels.size();
}

@Override
public Object getItem(int position) {
return novels.get(position);
}

@Override
public long getItemId(int position) {
return position;
}

@Override
public View getView(int position, View convertView, ViewGroup parent) {
ViewHolder viewHolder;
if (convertView == null) {
viewHolder = new ViewHolder();
convertView = inflater.inflate(R.layout.novel_item, null);
viewHolder.imageView = (ImageView) convertView.findViewById(R.id.novel_image);
viewHolder.name = (TextView) convertView.findViewById(R.id.name);
viewHolder.type = (TextView) convertView.findViewById(R.id.type);
viewHolder.author = (TextView) convertView.findViewById(R.id.author);
viewHolder.path = (TextView) convertView.findViewById(R.id.path);
convertView.setTag(viewHolder);
} else {
viewHolder = (ViewHolder) convertView.getTag();
}
//通过库加载图片
ImageLoader.getInstance().displayImage(
novels.get(position).getNovel_image(), viewHolder.imageView, options);
viewHolder.name.setText(novels.get(position).getNovel_name());
viewHolder.author.setText(novels.get(position).getNovel_autor());
viewHolder.type.setText(novels.get(position).getNovel_type());
viewHolder.path.setText("【阅读】");

return convertView;
}

class ViewHolder {
ImageView imageView;
TextView name;
TextView author;
TextView type;
TextView path;
}
}

NovalContentBean.java代码:

package siso.novelreader.bean;

import java.util.List;

/**
* =============================================================================
* Describe :文章对象
* ==============================================================================
*/


public class NovalContentBean {
private String title;
private String novel_name;
private String author;
private String time;
private String wdnumber;
private List<String> nv_content;
private String pre_link;
private String next_link;

public NovalContentBean() {
}

public NovalContentBean(String author, String next_link, String novel_name, List<String> nv_content, String pre_link, String title, String time, String wdnumber) {
this.author = author;
this.next_link = next_link;
this.novel_name = novel_name;
this.nv_content = nv_content;
this.pre_link = pre_link;
this.title = title;
this.time = time;
this.wdnumber = wdnumber;
}

public String getNovel_name() {
return novel_name;
}

public void setNovel_name(String novel_name) {
this.novel_name = novel_name;
}

public List<String> getNv_content() {
return nv_content;
}

public void setNv_content(List<String> nv_content) {
this.nv_content = nv_content;
}

public String getAuthor() {
return author;
}

public void setAuthor(String author) {
this.author = author;
}

public String getTime() {
return time;
}

public void setTime(String time) {
this.time = time;
}

public String getTitle() {
return title;
}

public void setTitle(String title) {
this.title = title;
}

public String getWdnumber() {
return wdnumber;
}

public void setWdnumber(String wdnumber) {
this.wdnumber = wdnumber;
}

public String getPre_link() {
return pre_link;
}

public void setPre_link(String pre_link) {
this.pre_link = pre_link;
}

public String getNext_link() {
return next_link;
}

public void setNext_link(String next_link) {
this.next_link = next_link;
}

@Override
public String toString() {
return "NovalContentBean{" +
"author='" + author + '\'' +
", title='" + title + '\'' +
", novel_name='" + novel_name + '\'' +
", time='" + time + '\'' +
", wdnumber='" + wdnumber + '\'' +
", nv_content=" + nv_content +
", pre_link='" + pre_link + '\'' +
", next_link='" + next_link + '\'' +
'}';
}
}

NovelBean.java代码:

package siso.novelreader.bean;

/**
* =============================================================================
* Describe : 列表Item bean
* ==============================================================================
*/


public class NovelBean {

private String novel_name;
private String novel_image;
private String novel_autor;
private String novel_type;
private String novel_path;

public String getNovel_autor() {
return novel_autor;
}

public void setNovel_autor(String novel_autor) {
this.novel_autor = novel_autor;
}

public String getNovel_image() {
return novel_image;
}

public void setNovel_image(String novel_image) {
this.novel_image = novel_image;
}

public String getNovel_name() {
return novel_name;
}

public void setNovel_name(String novel_name) {
this.novel_name = novel_name;
}

public String getNovel_path() {
return novel_path;
}

public void setNovel_path(String novel_path) {
this.novel_path = novel_path;
}

public String getNovel_type() {
return novel_type;
}

public void setNovel_type(String novel_type) {
this.novel_type = novel_type;
}


public NovelBean() {
}

public NovelBean(String novel_name, String novel_type, String novel_autor, String novel_path, String novel_image) {
this.novel_name = novel_name;
this.novel_type = novel_type;
this.novel_autor = novel_autor;
this.novel_path = novel_path;
this.novel_image = novel_image;
}

@Override
public String toString() {
return "NovelBean{" +
"novel_autor='" + novel_autor + '\'' +
", novel_name='" + novel_name + '\'' +
", novel_image='" + novel_image + '\'' +
", novel_type='" + novel_type + '\'' +
", novel_path='" + novel_path + '\'' +
'}';
}
}

ParserWeb.java代码:

package siso.novelreader.Utils;

import android.database.sqlite.SQLiteOpenHelper;
import android.util.Log;

import siso.novelreader.bean.NovalContentBean;
import siso.novelreader.bean.NovelBean;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import static android.util.Log.e;

public class ParserWeb {


private static final String TAG = ParserWeb.class.getSimpleName();

private static NovalContentBean novalContentBean;

//解析书籍开始阅读地址
public static String parser_web(String url) {
String attr = "";
try {

e(TAG, "parser_web" + url);
Document document = Jsoup.connect(url).get();
Elements book_btn = document.select("div.book_btn");
attr = book_btn.first().getElementsByTag("a").attr("href");


} catch (IOException e) {
e.printStackTrace();
}
return attr;
}

//解析文章内容,返回完整的文章对象
public static NovalContentBean parser_nol(String url) {
NovalContentBean novalcon = new NovalContentBean();
List<String> con = new ArrayList<>();
try {
Document novel = Jsoup.connect(url).get();
Elements read_con = novel.select("div.pane");
//<div class="tc txt">
//<h1><em itemprop="headline">序章 尸山之下</em></h1>
//</div>
String title = read_con.first().getElementsByTag("h1").text();
novalcon.setTitle(title);
// e(TAG, "parser_nol" + title);
Elements select = read_con.select("div.bread_crumb");
String novel_name = select.first().getElementsByTag("a").get(3).text();
novalcon.setNovel_name(novel_name);
// e(TAG, "parser_nol:novel_name" + novel_name);
String author = read_con.select("span.author").text();
novalcon.setAuthor(author);
//e(TAG, "parser_nol!!!" + author);
String time = read_con.select("span.number").text();
String[] strs = time.split("字");
novalcon.setTime(strs[0]);
novalcon.setWdnumber("字" + strs[1]);
// e(TAG, "parser_nol" + strs[0]);
//e(TAG, "parser_nol::" + "字" + strs[1]);
Elements select1 = read_con.select("div.content");
for (Element ele : select1) {
String p = ele.getElementsByTag("p").text();
con.add(p);
}
novalcon.setNv_content(con);
// e(TAG, "parser_nol" + con.toString() + "\n");

Elements select2 = read_con.select("div.tc");
Elements elements = select2.get(3).getElementsByTag("a");

Elements select3 = elements.select("a.marr");
String pre = "";
String next = "";
//判断是否存在上一章
if (select3.size() == 2) {
pre = select3.first().attr("href");
}
//判断是否存在下一章
Elements select4 = elements.select("a.next");
if (select4 != null) {
next = select4.first().attr("href");
}
// e(TAG, "parser_nol" + "pre"+pre+"next"+next);
novalcon.setPre_link(pre);
novalcon.setNext_link(next);
} catch (IOException e) {
e.printStackTrace();
}
return novalcon;

}
}

项目运行结果如图:

安卓Andriod使用入门(二十四)【爬取网络小说】


安卓Andriod使用入门(二十四)【爬取网络小说】


安卓Andriod使用入门(二十四)【爬取网络小说】


安卓Andriod使用入门(二十四)【爬取网络小说】