当前位置:编程学习 > C#/ASP.NET >>

求教关于lucene.net盘古分词器的问题

我在搜索中加入了盘古分词器之后,用panguanalyzer替代了standardanalyzer,但是建立新的索引之后的搜索貌似是搜索到一个关键词就返回一次结果,所以搜索结果大量重复,求怎么解决....z
这是搜索方法的代码,求大神解救.....
 private void search()
            {
                DateTime start = DateTime.Now;

                // create the searcher
                // index is placed in "index" subdirectory
                string indexDirectory = Server.MapPath("~/App_Data/index");
                IndexSearcher searcher = new IndexSearcher(indexDirectory);

                // parse the query, "text" is the default field to search
                QueryParser parser = new QueryParser(Version.LUCENE_29, "text",new PanGuAnalyzer());
                //MessageBox.Show(this.Query);
                Query query = parser.Parse(this.Query);

                // create the result DataTable
                this.Results.Columns.Add("title", typeof(string));
                this.Results.Columns.Add("sample", typeof(string));
                this.Results.Columns.Add("path", typeof(string));
                this.Results.Columns.Add("url", typeof(string));
                this.Results.Columns.Add("score", typeof(string));

                // search
                TopDocs hits = searcher.Search(query, 500);
                this.total = hits.totalHits;


                // create highlighter
                PanGu.HighLight.Formatter formatter = new PanGu.HighLight.SimpleHTMLFormatter("<span style=\"font-weight:bold;color:red;\">", "</span>");
                
                PanGu.Segment scorer = new Segment();
                // QueryScorer scorer = new QueryScorer(query);
                PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(formatter,scorer);
                highlighter.FragmentSize = 200;
               
                // initialize startAt
                this.startAt = InitStartAt();

                // how many items we should show - less than defined at the end of the results
                int resultsCount = Math.Min(total, this.maxResults + this.startAt);
                
                
                int i = startAt;
                while(i<resultsCount)
                {
                    // get the document from inde\\\
                //fuck:
                    Document doc = searcher.Doc((int)hits.scoreDocs[i].score);

                    TokenStream stream = new PanGuAnalyzer().TokenStream("text", new StringReader(doc.Get("text")));

                     
                    String sample_temp= highlighter.GetBestFragment(Query,doc.Get("text"));
                    //MessageBox.Show(doc.Get("text"));
                    //String sample = doc.Get("text");
                    char[] ctext = new char[5000];
                    bool flag_text = false;
                   
                    i++;
                    //MessageBox.Show(doc.Get("path"));
                    string sample = " "; 
                    try
                    {
                        for (int j = sample_temp.Length-1; j > 0; j--)
                        {
                            if (flag_text == false)
                            {
                                if (sample_temp[j] == '.')
                                {
                                    ctext[j] = '.';
                                    flag_text = true;
                                }
                            }
                            else ctext[j] = sample_temp[j];

                        }
                    }
                    catch (Exception e)
                    {
                        
                        sample = sample_temp.Length +" : "+ e.StackTrace;
                    }
                    sample = "\n "
                        +"  "+sample_temp.Length+" : "+ new String(ctext);
                    
                     
                    String path = doc.Get("path");
                    // System.Console.WriteLine(doc.Get("title") + " score boost for: " + hits.ScoreDocs.ToString());
                    // create a new row with the result data
                    DataRow row = this.Results.NewRow();

                    string originaltitle = doc.Get("title");
                    //string title = originaltitle.Substring(0,originaltitle.Length - 21);
                    char[] ctitle = new char[100];
                    for (int j = 0; j < originaltitle.Length; j++)
                    {
                        if (originaltitle[j] == '(') break;
                        ctitle[j] = originaltitle[j];
                    }
                    string title = new String(ctitle);
                    ScoreDoc[] sd = new ScoreDoc[10000];
                    sd[i] = hits.scoreDocs[i];
                    row["title"] = title;

                    row["path"] = "api/" + path;
                    row["url"] = "http:\\\\docs.oracle.com\\javase\\6\\docs\\api\\" + path;
                    row["sample"] = sample;
                    row["score"] = doc.Get("title") + " score boost for: " + sd[i].score;

                    this.Results.Rows.Add(row);
                }
                searcher.Close();

                // result information
                this.duration = DateTime.Now - start;
                this.fromItem = startAt + 1;
                this.toItem = Math.Min(startAt + maxResults, total);
            }
lucene.net 盘古 搜索
补充:.NET技术 ,  C#
CopyRight © 2012 站长网 编程知识问答 www.zzzyk.com All Rights Reserved
部份技术文章来自网络,