lucene中文分词
public class UserDao {// 查询所有的用户信息
public List findAll() {
List list = new ArrayList();
User user1 = new User();
user1.setUserId(1);
user1.setUserName("张三");
user1.setUserPass("zhangsan");
User user2 = new User();
user2.setUserId(2);
user2.setUserName("李四");
user2.setUserPass("lisi");
User user3 = new User();
user3.setUserId(3);
user3.setUserName("王五");
user3.setUserPass("wangwu");
User user4 = new User();
user4.setUserId(4);
user4.setUserName("张三");
user4.setUserPass("zhangsan");
list.add(user1);
list.add(user2);
list.add(user3);
list.add(user4);
return list;
}
/**
* 添加新对象
*
* @throws IOException
*/
public void addUser() throws IOException {
// 索引 存放的路径
File indexFilePath = new File("c:\\d");
// 创建分词器
Analyzer ana = new StandardAnalyzer();
// 创建索引
IndexWriter indexwr = new IndexWriter(indexFilePath, ana, false);
// 开始添加索引,计时开始
long startTime = new Date().getTime();
// 增加document到索引去
// 添加新对象
User user = new User();
user.setUserId(5);
user.setUserName("赵二");
user.setUserPass("zhaoer");
Document document = new Document();
document.add(new Field("userId", user.getUserId() + "",
Field.Store.YES, Field.Index.UN_TOKENIZED));
document.add(new Field("userName", user.getUserName(), Field.Store.YES,
Field.Index.UN_TOKENIZED));
document.add(new Field("userPass", user.getUserPass(), Field.Store.YES,
Field.Index.UN_TOKENIZED));
indexwr.addDocument(document);
// 优化
indexwr.optimize();
indexwr.close();
// 测试一下索引的时间
long endTime = new Date().getTime();
System.out.println("这花费了" + (endTime - startTime) + " 毫秒来把文档增加到索引里面去!");
}
public void createIndex() throws IOException {
// 索引 存放的路径
File indexFilePath = new File("c:\\d");
// 创建分词器
Analyzer ana = new StandardAnalyzer();
// 创建索引
IndexWriter indexwr = new IndexWriter(indexFilePath, ana, true);
// 开始添加索引,计时开始
long startTime = new Date().getTime();
// 增加document到索引去
// 调用用户信息表
List list = this.findAll();
if (list != null && list.size() != 0) {
for (int i = 0; i < list.size(); i++) {
User user = (User) list.get(i); // 获得用户信息
Document document = new Document();
document.add(new Field("userId", user.getUserId() + "",
Field.Store.YES, Field.Index.UN_TOKENIZED));
document.add(new Field("userName", user.getUserName(),
Field.Store.YES, Field.Index.UN_TOKENIZED));
document.add(new Field("userPass", user.getUserPass(),
Field.Store.YES, Field.Index.UN_TOKENIZED));
indexwr.addDocument(document);
}
}
// 优化
indexwr.optimize();
indexwr.close();
// 测试一下索引的时间
long endTime = new Date().getTime();
System.out.println("这花费了" + (endTime - startTime) + " 毫秒来把文档增加到索引里面去!");
}
/**
* 搜索内容
*
* @param queryString
* @return
* @throws IOException
* @throws ParseException
*/
public Hits seacher(String queryString) throws IOException, ParseException {
Hits hits = null;
// 索引位置
IndexSearcher is = new IndexSearcher("c:\\d");
// 创建分词器
Analyzer ana = new StandardAnalyzer();
// 查询
QueryParser qp = new QueryParser("userPass", ana);
System.out.println(qp);
Query query = qp.parse(queryString);
// TermQuery tq = new TermQuery(new Term("userName", queryString));
hits = is.search(query);
return hits;
}
public static void main(String[] args) {
UserDao dao = new UserDao();
try {
dao.createIndex();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// try {
// dao.addUser();
// } catch (IOException e1) {
// // TODO Auto-generated catch block
// e1.printStackTrace();
// }
try {
Hits hits = dao.seacher("zhang");
System.out.println(hits.length());
for (int i = 0; i < hits.length(); i++) {
Document document = hits.doc(i);
System.out.println(document.get("userName"));
System.out.println(document.get("userId"));
System.out.println(document.get("userPass"));
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
这样只能对全文字段检索,类似='字段内容',现在想通过关键字查询不行!哪位高手知道如何对中文分词!我分词出来但不知道用在哪里!有的话贴出来代码看看!在线急等! --------------------编程问答-------------------- 这个爱莫能助啊,这东西我看了段时间,太深奥了,关于这个分词也是个难点,有基于词库的分词,有那种撒二分法吧, 加油 --------------------编程问答-------------------- 这个在网上有很多的,你搜一下试试,帮LZ顶! --------------------编程问答-------------------- 每天坚持回复
--------------------编程问答-------------------- java分词我不会啊 有哪位大侠帮帮我啊 万分感谢
补充:Java , Java EE