运用lucene+庖丁解牛分词器，集成ssh，对数据库全文搜索

NealCai

浏览: 143042 次
性别:
来自: 上海

最近访客更多访客>>

wxd2228437723

feng_1086

chuhouqi

elephant_xiang

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

lucene

lucene PaodingAnalyzer java jdbc

用lucene+庖丁解牛主要是要完成对数据库比如某张表单的某些字段的搜索，由于lucene本身对中文支持非常不好，像StandardAnalyzer基本都是一个字一个字匹配的，而用了庖丁解牛分词之后主要是它有一个dic字典，进行分词，效率很高。
首先我封装了一个lucene类，为了简单一点，对数据库操作直接用jdbc进行数据库操作

package lucene;

import java.io.File;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;

import net.paoding.analysis.analyzer.PaodingAnalyzer;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;

import com.model.searchResult;

public class lucene {

private String searchString;
private List<searchResult> list = new ArrayList<searchResult>();;

public List<searchResult> getList() {
return list;
}

public void setList(List<searchResult> list) {
this.list = list;
}

public lucene() {
}

public lucene(String searchString) {
this.setSearchString(searchString);
}

public void setSearchString(String searchString) {
this.searchString = searchString;
}

public String getSearchString() {
return searchString;
}

// 对于name属性进行全文搜索，返回hits值
public Hits seacher(String queryString) {
Hits hits = null;

try {
File indexFile = new File("d:/index/");
IndexReader reader = IndexReader.open(indexFile);
Analyzer analyzer = new PaodingAnalyzer();
QueryParser parser = new QueryParser("name", analyzer);
IndexSearcher searcher = new IndexSearcher(reader);
Query query = parser.parse(queryString);
hits = searcher.search(query);

} catch (Exception e) {
System.out.print(e);
}
return hits;
}

//创建索引
public void indexCreateUtil() {
String driver = "com.mysql.jdbc.Driver";
String url = "jdbc:mysql://localhost:3306/rent";
String user = "root";
String password = "111111";
//查询所有房屋信息放在index索引中
String query = "select a.house_id,a.house_name,b.house_type_name,house_rent_type_name,a.single_day_price from t_house a,t_house_type b,t_house_rent_type c where a.house_type_id=b.house_type_id and a.house_rent_type_id=c.house_rent_type_id";
try {
Class.forName(driver);
Connection conn = DriverManager.getConnection(url, user, password);

if (!conn.isClosed())
System.out.println("数据库连接成功！");
Statement stmt = conn.createStatement();
ResultSet rs = stmt.executeQuery(query);
Analyzer analyzer = new PaodingAnalyzer();
try {
// 使用索引文件夹，庖丁解牛分词器创建IndexWriter
IndexWriter indexWriter = new IndexWriter("d:/index/",
analyzer, true);

while (rs.next()) {
Document doc = new Document();
doc.add(new Field("id", rs.getString("house_id"),
Field.Store.YES, Field.Index.TOKENIZED,
Field.TermVector.NO));
doc.add(new Field("name", rs.getString("house_name"),
Field.Store.YES, Field.Index.TOKENIZED,
Field.TermVector.NO));
doc.add(new Field("type_name", rs
.getString("house_type_name"), Field.Store.YES,
Field.Index.TOKENIZED, Field.TermVector.NO));
doc.add(new Field("rent_name", rs
.getString("house_rent_type_name"),
Field.Store.YES, Field.Index.TOKENIZED,
Field.TermVector.NO));
doc.add(new Field("price",
rs.getString("single_day_price"), Field.Store.YES,
Field.Index.TOKENIZED, Field.TermVector.NO));

indexWriter.addDocument(doc);
}
//优化
indexWriter.optimize();
indexWriter.close();
} catch (IOException e) {

e.printStackTrace();
}
conn.close();
} catch (ClassNotFoundException e) {
System.out.println("找不到驱动程序");
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
}
}

//用索引搜索,并加入list
public List<searchResult> indexSearchUtil(String search) {
Hits hits = seacher(search);

for (int i = 0; i < hits.length(); i++) {
try {
Document doc = hits.doc(i);
int id = Integer.parseInt(doc.get("id"));
String name = doc.get("name");
String type_name = doc.get("type_name");
String rent_name = doc.get("rent_name");
int price = Integer.parseInt(doc.get("price"));
// System.out.println(id + "" + name + "" + type_name + ""
// + rent_name + "" + price);

// List<searchResult> list = new ArrayList<searchResult>();
searchResult sr = new searchResult();
sr.setHouse_id(id);
sr.setHouse_name(name);
sr.setHouse_rent_name(rent_name);
sr.setHouse_type_name(type_name);
sr.setSingle_day_price(price);
list.add(sr);
} catch (IOException e) {

e.printStackTrace();
}
}
return list;
}
}

然后在luceneSearchAction.java中调用
package action;

import java.util.List;

import com.opensymphony.xwork2.ActionContext;

import lucene.lucene;

public class luceneSearchAction implements action {
private String searchContent;
private List house_list;

public String getSearchContent() {
return searchContent;
}

public void setSearchContent(String searchContent) {
this.searchContent = searchContent;
}

public List getHouse_list() {
return house_list;
}

public void setHouse_list(List house_list) {
this.house_list = house_list;
}

public String execute() throws Exception {
lucene lucene = new lucene();
lucene.indexCreateUtil();
lucene.indexSearchUtil(searchContent);
ActionContext.getContext().getSession()
.put("house_list", lucene.getList());
return SUCCESS;
}
}

最后在页面上调用
<c:forEach items="${house_list}" var="orderItem">
${orderItem.house_id}
${orderItem.house_name}
...
...
</c:forEach>

分享到：