Lucene.Net+盘古分词->开发自己的搜索引擎

时间:2021-11-18 07:58:30

//封装类

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using Lucene.Net.Analysis;
using Lucene.Net.Index;
using Lucene.Net.Documents;
using System.Reflection;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
namespace SearchTest
{
    /// <summary>
    /// 盘古分词在lucene.net中的使用帮助类
    /// 调用PanGuLuceneHelper.instance
    /// </summary>
    public class PanGuLuceneHelper
    {
        private PanGuLuceneHelper() { }

#region 单一实例
        private static PanGuLuceneHelper _instance = null;
        /// <summary>
        /// 单一实例
        /// </summary>
        public static PanGuLuceneHelper instance
        {
            get
            {
                if (_instance == null) _instance = new PanGuLuceneHelper();
                return _instance;
            }
        }
        #endregion

#region 分词测试
        /// <summary>
        /// 分词测试
        /// </summary>
        /// <param name="keyword"></param>
        /// <returns></returns>
        public string Token(string keyword)
        {
            string ret = "";
            System.IO.StringReader reader = new System.IO.StringReader(keyword);
            Lucene.Net.Analysis.TokenStream ts = analyzer.TokenStream(keyword, reader);
            bool hasNext = ts.IncrementToken();
            Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;
            while (hasNext)
            {
                ita = ts.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                ret += ita.Term + "|";
                hasNext = ts.IncrementToken();
            }
            ts.CloneAttributes();
            reader.Close();
            analyzer.Close();
            return ret;
        }
        #endregion

#region 创建索引
        /// <summary>
        /// 创建索引
        /// </summary>
        /// <param name="datalist"></param>
        /// <returns></returns>
        public bool CreateIndex(List<MySearchUnit> datalist)
        {
            IndexWriter writer = null;
            try
            {
                writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);//false表示追加(true表示删除之前的重新写入)
            }
            catch
            {
                writer = new IndexWriter(directory_luce, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);//false表示追加(true表示删除之前的重新写入)
            }
            foreach (MySearchUnit data in datalist)
            {
                CreateIndex(writer, data);
            }
            writer.Optimize();
            writer.Dispose();
            return true;
        }

public bool CreateIndex(IndexWriter writer, MySearchUnit data)
        {
            try
            {

if (data == null) return false;
                Document doc = new Document();
                Type type = data.GetType();//assembly.GetType("Reflect_test.PurchaseOrderHeadManageModel", true, true); //命名空间名称 + 类名

//创建类的实例    
                //object obj = Activator.CreateInstance(type, true);  
                //获取公共属性    
                PropertyInfo[] Propertys = type.GetProperties();
                for (int i = 0; i < Propertys.Length; i++)
                {
                    //Propertys[i].SetValue(Propertys[i], i, null); //设置值
                    PropertyInfo pi = Propertys[i];
                    string name=pi.Name;
                    object objval = pi.GetValue(data, null);
                    string value = objval == null ? "" : objval.ToString(); //值
                    if (name == "id" || name=="flag" )//id在写入索引时必是不分词,否则是模糊搜索和删除,会出现混乱
                    {
                        doc.Add(new Field(name, value, Field.Store.YES, Field.Index.NOT_ANALYZED));//id不分词
                    }
                    else
                    {
                        doc.Add(new Field(name, value, Field.Store.YES, Field.Index.ANALYZED));
                    }
                }
                writer.AddDocument(doc);
            }
            catch (System.IO.FileNotFoundException fnfe)
            {
                throw fnfe;
            }
            return true;
        }
        #endregion

#region 在title和content字段中查询数据
        /// <summary>
        /// 在title和content字段中查询数据
        /// </summary>
        /// <param name="keyword"></param>
        /// <returns></returns>
        public List<MySearchUnit> Search(string keyword)
        {

string[] fileds = { "title", "content" };//查询字段
            //Stopwatch st = new Stopwatch();
            //st.Start();
            QueryParser parser = null;// new QueryParser(Lucene.Net.Util.Version.LUCENE_30, field, analyzer);//一个字段查询
            parser = new MultiFieldQueryParser(version, fileds, analyzer);//多个字段查询
            Query query = parser.Parse(keyword);
            int n = 1000;
            IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只读
            TopDocs docs = searcher.Search(query, (Filter)null, n);
            if (docs == null || docs.TotalHits == 0)
            {
                return null;
            }
            else
            {
                List<MySearchUnit> list = new List<MySearchUnit>();
                int counter = 1;
                foreach (ScoreDoc sd in docs.ScoreDocs)//遍历搜索到的结果
                {
                    try
                    {
                        Document doc = searcher.Doc(sd.Doc);
                        string id = doc.Get("id");
                        string title = doc.Get("title");
                        string content = doc.Get("content");
                        string flag = doc.Get("flag");
                        string imageurl = doc.Get("imageurl");
                        string updatetime = doc.Get("updatetime");

string createdate = doc.Get("createdate");
                        PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color=\"red\">", "</font>");
                        PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new PanGu.Segment());
                        highlighter.FragmentSize = 50;
                        content = highlighter.GetBestFragment(keyword, content);
                        string titlehighlight = highlighter.GetBestFragment(keyword, title);
                        if (titlehighlight != "") title = titlehighlight;
                        list.Add(new MySearchUnit(id, title, content, flag,imageurl, updatetime));
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex.Message);
                    }
                    counter++;
                }
                return list;
            }
            //st.Stop();
            //Response.Write("查询时间:" + st.ElapsedMilliseconds + " 毫秒<br/>");

}
        #endregion

#region 在不同的分类下再根据title和content字段中查询数据(分页)
        /// <summary>
        /// 在不同的类型下再根据title和content字段中查询数据(分页)
        /// </summary>
        /// <param name="_flag">分类,传空值查询全部</param>
        /// <param name="keyword"></param>
        /// <param name="PageIndex"></param>
        /// <param name="PageSize"></param>
        /// <param name="TotalCount"></param>
        /// <returns></returns>
        public List<MySearchUnit> Search(string _flag,string keyword, int PageIndex, int PageSize, out int TotalCount)
        {
            if (PageIndex < 1) PageIndex = 1;
            //Stopwatch st = new Stopwatch();
            //st.Start();
            BooleanQuery bq = new BooleanQuery();
            if (_flag != "")
            {
                QueryParser qpflag = new QueryParser(version, "flag", analyzer);
                Query qflag = qpflag.Parse(_flag);
                bq.Add(qflag, Occur.MUST);//与运算
            }
            if (keyword != "")
            {
                string[] fileds = { "title", "content" };//查询字段
                QueryParser parser = null;// new QueryParser(version, field, analyzer);//一个字段查询
                parser = new MultiFieldQueryParser(version, fileds, analyzer);//多个字段查询
                Query queryKeyword = parser.Parse(keyword);
                bq.Add(queryKeyword, Occur.MUST);//与运算
            }
            
            TopScoreDocCollector collector = TopScoreDocCollector.Create(PageIndex * PageSize, false);
            IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只读
            searcher.Search(bq, collector);
            if (collector == null || collector.TotalHits == 0)
            {
                TotalCount = 0;
                return null;
            }
            else
            {
                int start = PageSize * (PageIndex - 1);
                //结束数
                int limit = PageSize;
                ScoreDoc[] hits = collector.TopDocs(start, limit).ScoreDocs;
                List<MySearchUnit> list = new List<MySearchUnit>();
                int counter = 1;
                TotalCount = collector.TotalHits;
                foreach (ScoreDoc sd in hits)//遍历搜索到的结果
                {
                    try
                    {
                        Document doc = searcher.Doc(sd.Doc);
                        string id = doc.Get("id");
                        string title = doc.Get("title");
                        string content = doc.Get("content");
                        string flag = doc.Get("flag");
                        string imageurl = doc.Get("imageurl");
                        string updatetime = doc.Get("updatetime");

PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color=\"red\">", "</font>");
                        PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new PanGu.Segment());
                        highlighter.FragmentSize = 50;
                        content = highlighter.GetBestFragment(keyword, content);
                        string titlehighlight = highlighter.GetBestFragment(keyword, title);
                        if (titlehighlight != "") title = titlehighlight;
                        list.Add(new MySearchUnit(id, title, content, flag,imageurl, updatetime));
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex.Message);
                    }
                    counter++;
                }
                return list;
            }
            //st.Stop();
            //Response.Write("查询时间:" + st.ElapsedMilliseconds + " 毫秒<br/>");

}
        #endregion

#region 删除索引数据(根据id)
        /// <summary>
        /// 删除索引数据(根据id)
        /// </summary>
        /// <param name="id"></param>
        /// <returns></returns>
        public bool Delete(string id)
        {
            bool IsSuccess = false;
            Term term = new Term("id", id);
            //Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            //Version version = new Version();
            //MultiFieldQueryParser parser = new MultiFieldQueryParser(version, new string[] { "name", "job" }, analyzer);//多个字段查询
            //Query query = parser.Parse("小王");

//IndexReader reader = IndexReader.Open(directory_luce, false);
            //reader.DeleteDocuments(term);
            //Response.Write("删除记录结果: " + reader.HasDeletions + "<br/>");
            //reader.Dispose();

IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);
            writer.DeleteDocuments(term); // writer.DeleteDocuments(term)或者writer.DeleteDocuments(query);
            ////writer.DeleteAll();
            writer.Commit();
            //writer.Optimize();//
            IsSuccess = writer.HasDeletions();
            writer.Dispose();
            return IsSuccess;
        }
        #endregion

#region 删除全部索引数据
        /// <summary>
        /// 删除全部索引数据
        /// </summary>
        /// <returns></returns>
        public bool DeleteAll()
        {
            bool IsSuccess = true;
            try
            {
                IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);
                writer.DeleteAll();
                writer.Commit();
                //writer.Optimize();//
                IsSuccess = writer.HasDeletions();
                writer.Dispose();
            }
            catch
            {
                IsSuccess = false;
            }
            return IsSuccess;
        }
        #endregion

#region directory_luce
        private Lucene.Net.Store.Directory _directory_luce = null;
        /// <summary>
        /// Lucene.Net的目录-参数
        /// </summary>
        public Lucene.Net.Store.Directory directory_luce
        {
            get
            {
                if (_directory_luce == null) _directory_luce = Lucene.Net.Store.FSDirectory.Open(directory);
                return _directory_luce;
            }
        }
        #endregion

#region directory
        private System.IO.DirectoryInfo _directory = null;
        /// <summary>
        /// 索引在硬盘上的目录
        /// </summary>
        public System.IO.DirectoryInfo directory
        {
            get
            {
                if (_directory == null)
                {
                    string dirPath = AppDomain.CurrentDomain.BaseDirectory + "SearchIndex";
                    if (System.IO.Directory.Exists(dirPath) == false) _directory = System.IO.Directory.CreateDirectory(dirPath);
                    else _directory = new System.IO.DirectoryInfo(dirPath);
                }
                return _directory;
            }
        }
        #endregion

#region analyzer
        private Analyzer _analyzer = null;
        /// <summary>
        /// 分析器
        /// </summary>
        public Analyzer analyzer
        {
            get
            {
                //if (_analyzer == null)
                {
                    _analyzer = new Lucene.Net.Analysis.PanGu.PanGuAnalyzer();//盘古分词分析器
                    //_analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);//标准分析器
                }
                return _analyzer;
            }
        }
        #endregion

#region version
        private static Lucene.Net.Util.Version _version = Lucene.Net.Util.Version.LUCENE_30;
        /// <summary>
        /// 版本号枚举类
        /// </summary>
        public Lucene.Net.Util.Version version
        {
            get
            {
                return _version;
            }
        }
        #endregion
    }

#region 索引的一个行单元,相当于数据库中的一行数据
    /// <summary>
    /// 索引的一个行单元,相当于数据库中的一行数据
    /// </summary>
    public class MySearchUnit
    {
        public MySearchUnit(string _id, string _title, string _content, string _flag, string _imageurl, string _updatetime)
        {
            this.id = _id;
            this.title = _title;
            this.content = _content;
            this.flag = _flag;
            this.imageurl = _imageurl;
            this.updatetime = _updatetime;
        }
        /// <summary>
        /// 唯一的id号
        /// </summary>
        public string id { get; set; }
        /// <summary>
        /// 标题
        /// </summary>
        public string title { get; set; }
        /// <summary>
        /// 内容
        /// </summary>
        public string content { get; set; }
        /// <summary>
        /// 其他信息
        /// </summary>
        public string flag { get; set; }
        /// <summary>
        /// 图片路径
        /// </summary>
        public string imageurl { get; set; }
        /// <summary>
        /// 时间
        /// </summary>
        public string updatetime { get; set; }
    }
    #endregion
}

原文地址:http://blog.csdn.net/pukuimin1226/article/details/17558247

Lucene.Net+盘古分词->开发自己的搜索引擎的更多相关文章

  1. 【盘古分词】Lucene&period;Net 盘古分词 实现公众号智能自动回复

    盘古分词是一个基于 .net framework 的中英文分词组件.主要功能 中文未登录词识别 盘古分词可以对一些不在字典中的未登录词自动识别 词频优先 盘古分词可以根据词频来解决分词的歧义问题 多元 ...

  2. 【原创】Lucene&period;Net&plus;盘古分词器&lpar;详细介绍&rpar;

    本章阅读概要 1.Lucenne.Net简介 2.介绍盘古分词器 3.Lucene.Net实例分析 4.结束语(Demo下载) Lucene.Net简介 Lucene.net是Lucene的.net移 ...

  3. Lucene&period;Net&plus;盘古分词器&lpar;详细介绍&rpar;&lpar;转&rpar;

    出处:http://www.cnblogs.com/magicchaiy/archive/2013/06/07/LuceneNet%E7%9B%98%E5%8F%A4%E5%88%86%E8%AF%8 ...

  4. Lucene&period;Net&plus;盘古分词器&lpar;详细介绍&rpar;

    本章阅读概要1.Lucenne.Net简介2.介绍盘古分词器3.Lucene.Net实例分析4.结束语(Demo下载)Lucene.Net简介 Lucene.net是Lucene的.net移植版本,是 ...

  5. Lucene&period;Net&plus;盘古分词

    前言 各位朋友,谢谢大家的支持,由于文件过大,有考虑到版权的问题,故没有提供下载,本人已建立一个搜索技术交流群:77570783,源代码已上传至群共享,需要的朋友,请自行下载! 首先自问自答几个问题, ...

  6. 完整的站内搜索Demo&lpar;Lucene&period;Net&plus;盘古分词&rpar;

    前言 首先自问自答几个问题,以让各位看官了解写此文的目的 什么是站内搜索?与一般搜索的区别? 很多网站都有搜索功能,很多都是用SQL语句的Like实现的,但是Like无法做到模糊匹配(例如我搜索“.n ...

  7. 使用Lucene&period;net&plus;盘古分词实现搜索查询

    这里我的的Demo的逻辑是这样的:首先我基本的数据是储存在Sql数据库中,然后我把我的必需的数据推送到MongoDB中,这样再去利用Lucene.net+盘古创建索引:其中为什么要这样把数据推送到Mo ...

  8. 完整的站内搜索实战应用&lpar;Lucene&period;Net&plus;盘古分词&rpar;

    首先自问自答几个问题,以让各位看官了解写此文的目的 什么是站内搜索?与一般搜索的区别? 多网站都有搜索功能,很多都是用SQL语句的Like实现的,但是Like无法做到模糊匹配(例如我搜索". ...

  9. Lucene&period;net 全文检索 盘古分词

    lucene.net + 盘古分词 引用: 1.Lucene.Net.dll 2.PanGu.Lucene.Analyzer.dll 3.PanGu.HighLight.dll 4.PanGu.dll ...

随机推荐

  1. PHP面向对象讲解

    面向对象   类<------>对象 面向对象例题 理解:  减少 变量的重新定义    比如  变量前的  var   $    思路更加明确 class Yuan ----后面不加() ...

  2. hibernate复合主键

    需要用到实体类Role的主键和Menu的主键结合起来作为实体类RoleMenu的主键,那么通过Hibernate具体实现如下: RoleMenu实体类:(注意该实体类需要实现Serializable接 ...

  3. php涉及数据库操作时响应很慢。

    症状描述: 网站是php开发的,大部分页面响应很慢. 本地开发时响应速度很快,但是部署到生产环境后大部分响应很慢. 通过谷歌浏览调试发现PHP页面加载很慢,有个别的php请求的响应时间甚至超过10秒, ...

  4. MyBatis Mapper 文件例子

    转载:http://blog.csdn.net/ppby2002/article/details/20611737 <?xml version="1.0" encoding= ...

  5. c&num; Chart设置样式

    一.chart属性熟悉 能够对C# chart控件属性熟悉起帮助的code, 现在分享给大家,Chart Operate Demo Code Download,如有帮助,别忘点个赞. Chart    ...

  6. OC的单例模式的实现

    下面是在ARC,GCD下的单例模式实现: 头文件里申明类方法getInstance: #import <Foundation/Foundation.h> @interface Single ...

  7. 【转】用串口登录Beaglebone Black、用usb共享电脑网络、内核模块的本地编译

    原文网址:http://bbs.eeworld.com.cn/thread-431507-1-1.html 串口连接BBB使用usb线可以连接BBB和电脑,用ssh就可以登录BBB来进行操作.但有时候 ...

  8. Oracle左连接&comma;右连接

    Oracle左连接,右连接 数据表的连接有: 1.内连接(自然连接): 只有两个表相匹配的行才能在结果集中出现 2.外连接: 包括 (1)左外连接(左边的表不加限制) (2)右外连接(右边的表不加限制 ...

  9. Python——Flask框架——Web表单

    一.框架Flask-WTF 安装: pip install flask-wtf 需要程序设置一个密钥 app = Flask(__name__) app.config['SECRET_KEY'] = ...

  10. Azure VMSS &lpar;3&rpar; 修改VM Template并创建VMSS

    <Windows Azure Platform 系列文章目录> 在开始本章内容之前,我们需要准备好Azure VM的镜像,具体可以参考:Azure VMSS (2) 对VM执行Genera ...