Lucene参与项目持久层中对于索引库的增删改查

时间:2022-12-26 03:09:51

本文主要介绍的是Lucene在参与项目中持久层的时候对于索引库增删改查的详细使用;(为了更好地使用Lucene,本文中使用的版本是Lucene4.04,使用的分词器是IKAnalyzer2012FF)


1、Lucene实现增删改查准备工作
2、Lucene持久层详细实现


1、Lucene实现增删改查准备工作

  • 第一步:创建Java工程,也可以创建pom.xml工程,需要五个jar包:
    common-io.jar
    IKAnalyzer2012FF_u1.jar
    lucene-analyzers-common-4.4.0.jar
    lucene-core-4.4.0.jar
    lucene-queryparser-4.4.0.jar
  • 第二步:我们需要一个JavaBean对象用于存储网上新闻或者文章;网上的新闻或者文章主要是有文章题目;文章作者;文章目录;文章链接四个方面组成所以,所以创建Article文章Bean类如下:

Article.java

/**
* JAVABEAN用于方便存储抓取的文章索引库中的单个对象
*/

public class Article {

private int id;

private String title;

private String author;

private String content;

private String link;

public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}

public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}

public String getAuthor() {
return author;
}
public void setAuthor(String author) {
this.author = author;
}

public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}

public String getLink() {
return link;
}
public void setLink(String link) {
this.link = link;
}

@Override
public String toString() {
return "Article [id=" + id + ", title=" + title + ", author=" + author
+ ", content=" + content + ", link=" + link + "]";
}
}
  • 第三步:创建Lucene工具类(用于准备Lucene中最重要的indexWriter和indexReader两个对象):

LuceneUtils.java

public class LuceneUtils {

private static Directory directory;

private static IndexWriterConfig indexWriterConfig;

private static Version matchVersion = Version.LUCENE_44;

private static Analyzer analyzer = new IKAnalyzer();

static {

try {

directory = FSDirectory.open(new File(Contants.INDEXURL));

indexWriterConfig = new IndexWriterConfig(matchVersion, analyzer);

} catch (IOException e) {
e.printStackTrace();
}

}

/**
* 获取返回用于操作索引的对象
* @return
* @throws IOException
*/

public static IndexWriter getIndexWriter() throws IOException {

IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);

return indexWriter;
}

/**
* 获取用于查询索引库的对象
* @return
* @throws Exception
*/

public static IndexSearcher getIndexSearcher() throws Exception {

IndexReader indexReader = DirectoryReader.open(directory);

IndexSearcher indexSearcher = new IndexSearcher(indexReader);

return indexSearcher;
}

// 获取索引库地址
public static Directory getDirectory() {
return directory;
}

// 获取当前的版本
public static Version getMatchVersion() {
return matchVersion;
}

// 获取分词分析器
public static Analyzer getAnalyzer() {
return analyzer;
}

}

Contants.java

public interface Contants {
// 存储地址
public static final String INDEXURL = "index/news";

}
  • 第四步:创建Article向Document转化的工具类:

ArticleUtils.java

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;

import com.lucene.bean.Article;

/**
* article 的转化类
*/

public class ArticleUtils {

/**
* 将article转化为document
* @param article
* @return
*/

public static Document articleDocument(Article article) {

Document document = new Document();

IntField IDField = new IntField("id", article.getId(), Store.YES);

StringField titleField = new StringField("title", article.getTitle(), Store.YES);

TextField contentField = new TextField("content", article.getContent(), Store.YES);

StringField authorField= new StringField("author", article.getAuthor(), Store.YES);

StringField urlField = new StringField("link", article.getLink(), Store.YES);

document.add(IDField);
document.add(titleField);
document.add(contentField);
document.add(authorField);
document.add(urlField);

return document;
}

}

2、Lucene持久层详细实现

LuceneDao.java

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.junit.Test;

import com.lucene.bean.Article;
import com.lucene.utils.ArticleUtils;
import com.lucene.utils.LuceneUtils;

/**
* Lucene操作索引库(Dao层操作)
*/

public class LuceneDao {
/**
* 增删改索引都通过indexWriter完成
* @throws IOException
*/

/**
* 添加索引库
* @param article
* @throws IOException
*/

@Test
public void addIndex(Article article) throws IOException {

IndexWriter indexWriter = LuceneUtils.getIndexWriter();

Document document = ArticleUtils.articleDocument(article);

indexWriter.addDocument(document);

indexWriter.close();
}

/**
* 根据字段删除索引,删除对应的值
* @param fieldName
* @param fieldValue
* @throws Exception
*/

public void delIndex(String fieldName, String fieldValue) throws Exception {
IndexWriter indexWriter = LuceneUtils.getIndexWriter();

Term term = new Term(fieldName, fieldValue);

indexWriter.deleteDocuments(term);
indexWriter.commit();
indexWriter.close();
}

/**
* 更新索引库中的内容
* @param fieldName
* @param fieldValue
* @param article
* @throws IOException
*/

public void updateIndex(String fieldName, String fieldValue,Article article) throws IOException {
IndexWriter indexWriter = LuceneUtils.getIndexWriter();

Term term = new Term(fieldName, fieldValue);

Document document = ArticleUtils.articleDocument(article);

indexWriter.updateDocument(term, document);

indexWriter.commit();

indexWriter.close();
}

/**
*
* 分页数据
*
* 显示数据进行分页 0 , 10
*
* 显示数据进行分页11 , 20
*
* 在索引库中根据关键字查找
* @param keywords
* @return
* @throws Exception
*/

public List<Article> findIndex(String keywords, int start, int row) throws Exception {

IndexSearcher indexSearcher = LuceneUtils.getIndexSearcher();

// 需要根据那几个字段进行检索
String fields[] = {"title","content"};
// String fields[] = {"author"};

QueryParser queryParser = new MultiFieldQueryParser(LuceneUtils.getMatchVersion(), fields, LuceneUtils.getAnalyzer());

// 不同的规则构造不同的子类
// title:keywords , content:keywords
Query query = queryParser.parse(keywords);

TopDocs topDocs = indexSearcher.search(query, start+row);

System.out.println("总记录数====total===="+topDocs.totalHits);

ScoreDoc scoreDocs[] = topDocs.scoreDocs;

Article article = null;

List<Article> articlelist = new ArrayList<Article>();

int endResult = Math.min(scoreDocs.length, start+row);

for (int i = start; i < endResult; i++) {
int docID = scoreDocs[i].doc;
article = new Article();
Document document = indexSearcher.doc(docID);
article.setId(Integer.parseInt(document.get("id")));
article.setTitle(document.get("title"));
article.setContent(document.get("content"));
article.setLink(document.get("link"));
article.setAuthor(document.get("author"));

articlelist.add(article);
}

return articlelist;

}

}