求教关于lucene.net盘古分词器的问题
我在搜索中加入了盘古分词器之后,用panguanalyzer替代了standardanalyzer,但是建立新的索引之后的搜索貌似是搜索到一个关键词就返回一次结果,所以搜索结果大量重复,求怎么解决....z这是搜索方法的代码,求大神解救.....
private void search()lucene.net 盘古 搜索
{
DateTime start = DateTime.Now;
// create the searcher
// index is placed in "index" subdirectory
string indexDirectory = Server.MapPath("~/App_Data/index");
IndexSearcher searcher = new IndexSearcher(indexDirectory);
// parse the query, "text" is the default field to search
QueryParser parser = new QueryParser(Version.LUCENE_29, "text",new PanGuAnalyzer());
//MessageBox.Show(this.Query);
Query query = parser.Parse(this.Query);
// create the result DataTable
this.Results.Columns.Add("title", typeof(string));
this.Results.Columns.Add("sample", typeof(string));
this.Results.Columns.Add("path", typeof(string));
this.Results.Columns.Add("url", typeof(string));
this.Results.Columns.Add("score", typeof(string));
// search
TopDocs hits = searcher.Search(query, 500);
this.total = hits.totalHits;
// create highlighter
PanGu.HighLight.Formatter formatter = new PanGu.HighLight.SimpleHTMLFormatter("<span style=\"font-weight:bold;color:red;\">", "</span>");
PanGu.Segment scorer = new Segment();
// QueryScorer scorer = new QueryScorer(query);
PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(formatter,scorer);
highlighter.FragmentSize = 200;
// initialize startAt
this.startAt = InitStartAt();
// how many items we should show - less than defined at the end of the results
int resultsCount = Math.Min(total, this.maxResults + this.startAt);
int i = startAt;
while(i<resultsCount)
{
// get the document from inde\\\
//fuck:
Document doc = searcher.Doc((int)hits.scoreDocs[i].score);
TokenStream stream = new PanGuAnalyzer().TokenStream("text", new StringReader(doc.Get("text")));
String sample_temp= highlighter.GetBestFragment(Query,doc.Get("text"));
//MessageBox.Show(doc.Get("text"));
//String sample = doc.Get("text");
char[] ctext = new char[5000];
bool flag_text = false;
i++;
//MessageBox.Show(doc.Get("path"));
string sample = " ";
try
{
for (int j = sample_temp.Length-1; j > 0; j--)
{
if (flag_text == false)
{
if (sample_temp[j] == '.')
{
ctext[j] = '.';
flag_text = true;
}
}
else ctext[j] = sample_temp[j];
}
}
catch (Exception e)
{
sample = sample_temp.Length +" : "+ e.StackTrace;
}
sample = "\n "
+" "+sample_temp.Length+" : "+ new String(ctext);
String path = doc.Get("path");
// System.Console.WriteLine(doc.Get("title") + " score boost for: " + hits.ScoreDocs.ToString());
// create a new row with the result data
DataRow row = this.Results.NewRow();
string originaltitle = doc.Get("title");
//string title = originaltitle.Substring(0,originaltitle.Length - 21);
char[] ctitle = new char[100];
for (int j = 0; j < originaltitle.Length; j++)
{
if (originaltitle[j] == '(') break;
ctitle[j] = originaltitle[j];
}
string title = new String(ctitle);
ScoreDoc[] sd = new ScoreDoc[10000];
sd[i] = hits.scoreDocs[i];
row["title"] = title;
row["path"] = "api/" + path;
row["url"] = "http:\\\\docs.oracle.com\\javase\\6\\docs\\api\\" + path;
row["sample"] = sample;
row["score"] = doc.Get("title") + " score boost for: " + sd[i].score;
this.Results.Rows.Add(row);
}
searcher.Close();
// result information
this.duration = DateTime.Now - start;
this.fromItem = startAt + 1;
this.toItem = Math.Min(startAt + maxResults, total);
}
补充:.NET技术 , C#