Lucene的Query类介绍

时间:2023-03-09 22:08:57
Lucene的Query类介绍

把Lucene的查询当成sql的查询,也许会笼统的明白些query的真相了。

查询分为大致两类,1:精准查询。2,模糊查询。

创建测试数据。

private Directory directory;
private IndexReader reader;
private String[] ids = {"1","2","3","4","5","6"};
private String[] emails = {"aa@itat.org","bb@itat.org","cc@cc.org","dd@sina.org","ee@zttc.edu","ff@itat.org"};
private String[] contents = {
"welcome to visited the space,I like book",
"hello boy, I like pingpeng ball",
"my name is cc I like game",
"I like football",
"I like football and I like basketball too",
"I like movie and swim"
};
private int[] attachs = {2,3,1,4,5,5};
private String[] names = {"zhangsan","lisi","john","jetty","lisi","jake"};

先建立索引。

 private Map<String,Float> scores = new HashMap<String,Float>();

 public SearchUtil(){
try {
directory = FSDirectory.open(Paths.get("D://lucene//index"));
scores.put("itat.org", 1.5f);
scores.put("cc.org", 2.0f);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* 创建索引
*/
@SuppressWarnings("deprecation")
public void index(){
IndexWriter writer = null;
try {
directory = FSDirectory.open(Paths.get("D://lucene//index"));
writer = getWriter();
Document doc = null;
for(int i=0;i<ids.length;i++){
doc = new Document();
doc.add(new Field("id", ids[i], Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("name", names[i], Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("content", contents[i], Field.Store.NO,Field.Index.ANALYZED));
//存储数字
doc.add(new IntField("attach", attachs[i], Field.Store.YES)); // 加权操作
TextField field = new TextField("email", emails[i], Field.Store.YES);
String et = emails[i].substring(emails[i].lastIndexOf("@")+1);
if (scores.containsKey(et)) {
field.setBoost(scores.get(et));
}
doc.add(field);
// 添加文档
writer.addDocument(doc);
}
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}finally{
try {
writer.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}

索引建立完毕。

Lucene的Query类介绍

构造方法。

/**
* getSearcher
* @return
*/
public IndexSearcher getSearcher(){
try {
directory = FSDirectory.open(Paths.get("D://lucene//index"));
if(reader==null){
reader = DirectoryReader.open(directory);
}else{
reader.close();
}
return new IndexSearcher(reader);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}

一、精准匹配。

1,精准查询

就是查什么给什么。

 /**
* 精准匹配
*/
public void search(String searchField,String field){
// 得到读取索引文件的路径
IndexReader reader = null;
try {
directory = FSDirectory.open(Paths.get("D://lucene//index"));
reader = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
// 运用term来查找
Term t = new Term(searchField, field);
Query q = new TermQuery(t);
// 获得查询的hits
TopDocs hits = searcher.search(q, 10);
// 显示结果
System.out.println("匹配 '" + q + "',总共查询到" + hits.totalHits + "个文档");
for (ScoreDoc scoreDoc : hits.scoreDocs){
Document doc = searcher.doc(scoreDoc.doc);
System.out.println("id:"+doc.get("id")+":"+doc.get("name")+",email:"+doc.get("email"));
} } catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
try {
reader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}

Lucene的Query类介绍

2,区间查询。

/**
* between
* @param field
* @param start
* @param end
* @param num
*/
public void searchByTermRange(String field,String start,String end,int num) {
try {
IndexSearcher searcher = getSearcher();
BytesRef lowerTerm = new BytesRef(start.getBytes()) ;
BytesRef upperTerm = new BytesRef(end.getBytes()) ; Query query = new TermRangeQuery(field, lowerTerm , upperTerm, true, true);
TopDocs tds = searcher.search(query, num); System.out.println("一共查询了:"+tds.totalHits);
for(ScoreDoc sd:tds.scoreDocs) {
Document doc = searcher.doc(sd.doc);
System.out.println(doc.get("id")+"---->"+
doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
doc.get("attach"));
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}

Lucene的Query类介绍

3、匹配其索引开始以指定的字符串的文档

 /**
* 匹配其索引开始以指定的字符串的文档
* @param field
* @param value
* @param num
*/
public void searchByPrefix(String field,String value,int num) {
try {
IndexSearcher searcher = getSearcher();
Query query = new PrefixQuery(new Term(field,value));
TopDocs tds = searcher.search(query, num);
System.out.println("一共查到:"+tds.totalHits);
for(ScoreDoc scoreDoc:tds.scoreDocs){
Document doc = searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id")+"---->"+
doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
doc.get("attach"));
}
} catch (Exception e) {
e.printStackTrace();
}
}

Lucene的Query类介绍

4、数字搜索

/**
* 数字搜索
* @param field
* @param start
* @param end
* @param num
*/
public void searchByNums(String field,int start,int end,int num){
try {
IndexSearcher searcher = getSearcher();
Query query = NumericRangeQuery.newIntRange(field, start, end, true, true);
TopDocs tds = searcher.search(query, num);
System.out.println("一共查到:"+tds.totalHits);
for(ScoreDoc scoreDoc:tds.scoreDocs){
Document doc = searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id")+"---->"+
doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
doc.get("attach"));
}
} catch (Exception e) {
e.printStackTrace();
}
}

二、模糊匹配

/**
* 通配符
* @param field
* @param value
* @param num
*/
public void searchByWildcard(String field,String value,int num){
try {
IndexSearcher searcher = getSearcher();
WildcardQuery query = new WildcardQuery(new Term(field,value));
TopDocs tds = searcher.search(query, num);
System.out.println("一共查到:"+tds.totalHits);
for(ScoreDoc scoreDoc:tds.scoreDocs){
Document doc = searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id")+"---->"+
doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
doc.get("attach"));
}
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
/**
* BooleanQuery可以连接多个子查询
* Occur.MUST表示必须出现
* Occur.SHOULD表示可以出现
* Occur.MUSE_NOT表示不能出现
* @param field
* @param value
* @param num
*/
@SuppressWarnings("deprecation")
public void searchByBoolean(String[] field,String[] value,int num){
try {
if(field.length!=value.length){
System.out.println("field的长度需要与value的长度相等!");
System.exit(0);
}
IndexSearcher searcher = getSearcher();
BooleanQuery query = null;
TopDocs tds = null;
for(int i = 0;i<field.length;i++){
query = new BooleanQuery();
query.add(new TermQuery(new Term(field[i],value[i])),Occur.SHOULD);
tds = searcher.search(query, num);
}
System.out.println("一共查询:"+tds.totalHits);
for(ScoreDoc doc:tds.scoreDocs){
Document document = searcher.doc(doc.doc);
System.out.println(document.get("id")+"---->"+
document.get("name")+"["+document.get("email")+"]-->"+document.get("id")+","+
document.get("attach"));
}
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
public void searchByPhrase(int num){
try {
IndexSearcher searcher = getSearcher();
PhraseQuery query = new PhraseQuery();
query.setSlop(3);
query.add(new Term("content","like"));
// //第一个Term
query.add(new Term("content","football"));
TopDocs tds = searcher.search(query, num);
System.out.println("一共查询了:"+tds.totalHits);
for(ScoreDoc sd:tds.scoreDocs) {
Document doc = searcher.doc(sd.doc);
System.out.println(doc.get("id")+"---->"+
doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
doc.get("attach"));
}
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
/**
* 相似度匹配查询
* @param num
*/
public void searchByFuzzy(int num) {
try {
IndexSearcher searcher = getSearcher();
FuzzyQuery query = new FuzzyQuery(new Term("name","jake"));
TopDocs tds = searcher.search(query, num);
System.out.println("一共查询了:"+tds.totalHits);
for(ScoreDoc sd:tds.scoreDocs) {
Document doc = searcher.doc(sd.doc);
System.out.println(doc.get("id")+"---->"+
doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
doc.get("attach")+","+doc.get("date"));
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public void searchByQueryParse(Query query,int num) {
try {
IndexSearcher searcher = getSearcher();
TopDocs tds = searcher.search(query, num);
System.out.println("一共查询了:"+tds.totalHits);
for(ScoreDoc sd:tds.scoreDocs) {
Document doc = searcher.doc(sd.doc);
System.out.println(doc.get("id")+"---->"+
doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
doc.get("attach")+","+doc.get("date")+"=="+sd.score);
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}