Lucene全文检索学习笔记(一):lucene的应用

时间:2022-04-10 03:10:03

1. Lucene的使用


1.1 引入Jar包

        <dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>6.5.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>6.6.0</version>
</dependency>
  • Lucene-core-6.5.0.jar 常用的文档,索引,搜索,存储等相关核心代码。
  • Lucene-queryparser-6.6.0.jar 提供了搜索相关的代码,用于各种搜索,比如模糊搜索,范围搜索

1.2 生成索引

package com.infinova.system.service.test;

import java.io.IOException;
import java.nio.file.FileSystems;
import java.util.List;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import com.alibaba.fastjson.JSON;
import com.infinova.system.pojo.ResourceZonning;

public class LuceneTest {
public void Index(List<ResourceZonning> rz) {
IndexWriterConfig cfg = new IndexWriterConfig(new StandardAnalyzer());

try {
Directory directory = FSDirectory.open(FileSystems.getDefault().getPath("D:/index/"));
IndexWriter writer = new IndexWriter(directory, cfg);
writer.deleteAll(); // 清除以前的index
for (ResourceZonning resourceZonning : rz) {
Document doc = new Document();
doc.add(new TextField("id", resourceZonning.getResId(), Field.Store.YES));
doc.add(new TextField("json", JSON.toJSONString(resourceZonning), Field.Store.YES));
writer.addDocument(doc);
}
writer.close();
} catch (IOException e) {
System.out.println(e);
}
}
}

1.3 搜索

package com.infinova.system.service.test;

import java.io.IOException;
import java.nio.file.FileSystems;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class IndexSearch {
private static void doSearch(Query query) {
// 创建IndexSearcher
// 指定索引库的地址
try {
Directory directory = FSDirectory.open(FileSystems.getDefault().getPath("D:/index/"));
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs topDocs = searcher.search(query, 20);
// 根据查询条件匹配出的记录总数
int count = topDocs.totalHits;
System.out.println("匹配总数:" + count);
// 根据查询条件匹配出的记录
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
int docId = scoreDoc.doc;
Document doc = searcher.doc(docId);
System.out.println("资源ID:" + doc.get("id"));
System.out.println("资源json:" + doc.get("json"));
System.out.println("==========================");
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) throws ParseException {
Analyzer analyzer = new StandardAnalyzer();
// 使用QueryParser搜索时,需要指定分词器,搜索时的分词器要和索引时的分词器一致
QueryParser parser = new QueryParser("json", analyzer);
// 通过queryparser来创建query对象 详细语法会在后面一章解释
Query query = parser.parse("1.1.1*");
doSearch(query);
}
}