Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
lodsve.search.engine.LuceneSearchEngine Maven / Gradle / Ivy
/*
* Copyright (C) 2018 Sun.Hao
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
package lodsve.search.engine;
import lodsve.core.utils.StringUtils;
import lodsve.search.bean.BaseSearchBean;
import lodsve.search.exception.LuceneException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.BeanUtils;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageImpl;
import org.springframework.data.domain.Pageable;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* 基于lucene实现的索引引擎.
*
* @author sunhao(sunhao.java @ gmail.com)
* @version V1.0
* @createTime 13-5-5 上午10:38
*/
public class LuceneSearchEngine extends AbstractSearchEngine {
private static final Logger logger = LoggerFactory.getLogger(LuceneSearchEngine.class);
/**
* 索引存放路径
*/
private String indexPath;
/**
* 分词器
*/
private Analyzer analyzer;
public LuceneSearchEngine(String indexPath, Analyzer analyzer, String htmlPrefix, String htmlSuffix) {
super.setHtmlPrefix(htmlPrefix);
super.setHtmlSuffix(htmlSuffix);
this.indexPath = indexPath;
this.analyzer = analyzer == null ? new StandardAnalyzer() : analyzer;
}
@Override
public synchronized void doIndex(List baseSearchBeans) throws Exception {
this.createOrUpdateIndex(baseSearchBeans, true);
}
@Override
public synchronized void deleteIndex(BaseSearchBean bean) throws Exception {
if (bean == null) {
logger.warn("Get search bean is empty!");
return;
}
String id = bean.getId();
if (StringUtils.isEmpty(id)) {
logger.warn("get id and id value from bean is empty!");
return;
}
String indexType = getIndexType(bean);
Directory indexDir = this.getIndexDir(indexType);
IndexWriter writer = this.getWriter(indexDir);
writer.deleteDocuments(new Term("pkId", id));
writer.commit();
this.destroy(writer);
}
@Override
public synchronized void deleteIndexs(List beans) throws Exception {
if (beans == null) {
logger.warn("Get beans is empty!");
return;
}
for (BaseSearchBean bean : beans) {
this.deleteIndex(bean);
}
}
@Override
public Page doSearch(List beans, boolean isHighlighter, Pageable pageable) throws Exception {
beans = mergerBaseSearchBean(beans);
if (beans == null || beans.isEmpty()) {
logger.debug("given search beans is empty!");
return new PageImpl<>(Collections.emptyList(), null, 0);
}
IndexReader[] readers = new IndexReader[beans.size()];
for (int i = 0; i < beans.size(); i++) {
BaseSearchBean bean = beans.get(i);
String indexType = getIndexType(bean);
readers[i] = DirectoryReader.open(getIndexDir(indexType));
}
//使用MultiSearcher进行多域搜索
MultiReader multiReader = new MultiReader(readers);
//查询的字段名
List fieldNames = new ArrayList<>();
//待查询字段的值
List queryValue = new ArrayList<>();
List flags = new ArrayList<>();
for (BaseSearchBean bean : beans) {
//要进行检索的字段
String[] doSearchFields = bean.getDoSearchFields();
if (doSearchFields == null || doSearchFields.length == 0) {
return new PageImpl<>(Collections.emptyList(), null, 0);
}
//默认字段
if (StringUtils.isNotEmpty(bean.getKeyword())) {
for (String field : doSearchFields) {
fieldNames.add(field);
queryValue.add(bean.getKeyword());
flags.add(BooleanClause.Occur.SHOULD);
}
}
}
Query query = MultiFieldQueryParser.parse(
queryValue.toArray(new String[queryValue.size()]),
fieldNames.toArray(new String[fieldNames.size()]),
flags.toArray(new BooleanClause.Occur[flags.size()]), analyzer);
logger.debug("make query string is '{}'!", query.toString());
IndexSearcher indexSearcher = new IndexSearcher(multiReader);
ScoreDoc[] scoreDocs = indexSearcher.search(query, Integer.MAX_VALUE).scoreDocs;
//查询起始记录位置
int begin = pageable.getPageNumber() * (pageable.getPageSize());
//查询终止记录位置
int end = Math.min((pageable.getPageNumber() + 1) * pageable.getPageSize(), scoreDocs.length);
//高亮处理
Highlighter highlighter = null;
if (isHighlighter) {
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(this.getHtmlPrefix(), this.getHtmlSuffix());
highlighter = new Highlighter(formatter, new QueryScorer(query));
}
List queryResults = new ArrayList<>();
for (int i = begin; i < end; i++) {
int docID = scoreDocs[i].doc;
Document hitDoc = indexSearcher.doc(docID);
String indexType = hitDoc.get("indexType");
BaseSearchBean result = super.getBaseSearchBean(indexType, beans);
if (result == null) {
continue;
}
result.setId(hitDoc.get("pkId"));
result.setLink(hitDoc.get("link"));
result.setOwerId(hitDoc.get("owerId"));
result.setOwerName(hitDoc.get("owerName"));
result.setCreateDate(hitDoc.get("createDate"));
result.setIndexType(indexType);
String keyword = StringUtils.EMPTY;
if (isHighlighter) {
keyword = highlighter.getBestFragment(analyzer, "keyword", hitDoc.get("keyword"));
}
if (StringUtils.isEmpty(keyword)) {
keyword = hitDoc.get("keyword");
}
result.setKeyword(keyword);
//要进行检索的字段
String[] doSearchFields = result.getDoSearchFields();
if (doSearchFields == null || doSearchFields.length == 0) {
continue;
}
Map extendValues = new HashMap<>(doSearchFields.length);
for (String field : doSearchFields) {
String value = hitDoc.get(field);
if (isHighlighter) {
value = highlighter.getBestFragment(analyzer, field, hitDoc.get(field));
}
if (StringUtils.isEmpty(value)) {
value = hitDoc.get(field);
}
extendValues.put(field, value);
}
result.setSearchValues(extendValues);
queryResults.add(result);
}
//关闭链接
for (IndexReader reader : readers) {
if (reader != null) {
reader.close();
}
}
return new PageImpl<>(queryResults, pageable, scoreDocs.length);
}
@Override
public synchronized void deleteIndexsByIndexType(Class extends BaseSearchBean> clazz) throws Exception {
String indexType = getIndexType(BeanUtils.instantiate(clazz));
this.deleteIndexsByIndexType(indexType);
}
@Override
public synchronized void deleteIndexsByIndexType(String indexType) throws Exception {
//传入readOnly的参数,默认是只读的
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter indexWriter = new IndexWriter(getIndexDir(indexType), config);
long result = indexWriter.deleteDocuments(new Term("indexType", indexType));
indexWriter.close();
logger.debug("the rows of delete index is '{}'! index type is '{}'!", result, indexType);
}
@Override
public synchronized void deleteAllIndexs() throws Exception {
File indexFolder = new File(this.indexPath);
if (!indexFolder.isDirectory()) {
//不存在或者不是文件夹
logger.debug("indexPath is not a folder! indexPath: '{}'!", indexPath);
return;
}
File[] children = indexFolder.listFiles();
if (children == null || children.length == 0) {
return;
}
for (File child : children) {
if (child == null || !child.isDirectory()) {
continue;
}
String indexType = child.getName();
logger.debug("Get indexType is '{}'!", indexType);
this.deleteIndexsByIndexType(indexType);
}
}
@Override
public void updateIndex(BaseSearchBean searchBean) throws Exception {
this.updateIndexs(Collections.singletonList(searchBean));
}
@Override
public void updateIndexs(List searchBean) throws Exception {
this.createOrUpdateIndex(searchBean, false);
}
/**
* 创建或者更新索引
*
* @param searchBean 需要创建或者更新的对象
* @param isCreate 是否是创建索引;true创建索引,false更新索引
* @throws Exception
*/
private synchronized void createOrUpdateIndex(List searchBean, boolean isCreate) throws Exception {
if (searchBean == null || searchBean.isEmpty()) {
logger.debug("do no index!");
return;
}
Directory indexDir = null;
IndexWriter writer = null;
for (BaseSearchBean sb : searchBean) {
String indexType = getIndexType(sb);
if (sb == null) {
logger.debug("give BaseSearchBean is null!");
return;
}
boolean anotherBaseSearchBean = indexDir != null && !indexType.equals(((FSDirectory) indexDir).getDirectory().toString());
if (indexDir == null || anotherBaseSearchBean) {
indexDir = this.getIndexDir(indexType);
}
if (writer == null || anotherBaseSearchBean) {
this.destroy(writer);
writer = this.getWriter(indexDir);
}
Document doc = new Document();
//初始化一些字段
sb.initPublicFields();
String id = sb.getId();
//主键的索引,不作为搜索字段,并且也不进行分词
StringField idField = new StringField("pkId", id, Field.Store.YES);
doc.add(idField);
logger.debug("create id index for '{}', value is '{}'! index is '{}'!", "pkId", id, idField);
String owerId = sb.getOwerId();
if (StringUtils.isEmpty(owerId)) {
throw new LuceneException(104009, "you must give a owerId");
}
StringField owerIdField = new StringField("owerId", owerId, Field.Store.YES);
doc.add(owerIdField);
String owerName = sb.getOwerName();
if (StringUtils.isEmpty(owerName)) {
throw new LuceneException(104010, "you must give a owerName");
}
StringField owerNameField = new StringField("owerName", owerName, Field.Store.YES);
doc.add(owerNameField);
String link = sb.getLink();
if (StringUtils.isEmpty(link)) {
throw new LuceneException(104011, "you must give a link");
}
StringField linkField = new StringField("link", link, Field.Store.YES);
doc.add(linkField);
String keyword = sb.getKeyword();
if (StringUtils.isEmpty(keyword)) {
throw new LuceneException(104012, "you must give a keyword");
}
StringField keywordField = new StringField("keyword", keyword, Field.Store.YES);
doc.add(keywordField);
String createDate = sb.getCreateDate();
if (StringUtils.isEmpty(createDate)) {
throw new LuceneException(104013, "you must give a createDate");
}
StringField createDateField = new StringField("createDate", createDate, Field.Store.YES);
doc.add(createDateField);
//索引类型字段
StringField indexTypeField = new StringField("indexType", indexType, Field.Store.YES);
doc.add(indexTypeField);
//进行索引的字段
String[] doIndexFields = sb.getDoIndexFields();
Map indexFieldValues = sb.getIndexFieldValues();
if (doIndexFields != null && doIndexFields.length > 0) {
for (String field : doIndexFields) {
String fieldValue = indexFieldValues.get(field);
if (StringUtils.isEmpty(fieldValue)) {
continue;
}
TextField extInfoField = new TextField(field, fieldValue, Field.Store.YES);
doc.add(extInfoField);
}
}
if (isCreate) {
writer.addDocument(doc);
} else {
writer.updateDocument(new Term("pkId", sb.getId()), doc);
}
writer.commit();
}
this.destroy(writer);
logger.debug("create or update index success!");
}
private Directory getIndexDir(String suffix) throws Exception {
Path path = Paths.get(indexPath, suffix);
File file = path.toFile();
if (!file.exists()) {
if (!file.mkdirs()) {
throw new LuceneException("create lucene index folder error!");
}
}
return FSDirectory.open(path);
}
private List mergerBaseSearchBean(List beans) {
List beanList = new ArrayList<>();
if (beans == null || beans.isEmpty()) {
return beanList;
}
for (BaseSearchBean bean : beans) {
IndexReader reader = null;
try {
Directory dir = getIndexDir(bean.getIndexType());
reader = getReader(dir);
} catch (Exception ignored) {
}
if (reader != null) {
beanList.add(bean);
}
}
return beanList;
}
private IndexWriter getWriter(Directory indexDir) throws IOException {
IndexWriterConfig writerConfig = new IndexWriterConfig(analyzer);
writerConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
return new IndexWriter(indexDir, writerConfig);
}
private void destroy(IndexWriter writer) throws Exception {
if (writer != null) {
writer.close();
}
}
private IndexReader getReader(Directory dir) throws IOException {
return DirectoryReader.open(dir);
}
}