de.spinscale.elasticsearch.service.suggest.ShardSuggestService Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch-plugin-suggest Show documentation
Show all versions of elasticsearch-plugin-suggest Show documentation
Lucene FSTSuggester implementation for ElasticSearch
The newest version!
package de.spinscale.elasticsearch.service.suggest;
import de.spinscale.elasticsearch.action.suggest.refresh.ShardSuggestRefreshRequest;
import de.spinscale.elasticsearch.action.suggest.refresh.ShardSuggestRefreshResponse;
import de.spinscale.elasticsearch.action.suggest.statistics.FstStats;
import de.spinscale.elasticsearch.action.suggest.statistics.ShardSuggestStatisticsResponse;
import de.spinscale.elasticsearch.action.suggest.suggest.ShardSuggestRequest;
import de.spinscale.elasticsearch.action.suggest.suggest.ShardSuggestResponse;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.spell.HighFrequencyDictionary;
import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester;
import org.apache.lucene.search.suggest.analyzing.FuzzySuggester;
import org.apache.lucene.search.suggest.fst.FSTCompletionLookup;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.base.Function;
import org.elasticsearch.common.base.Joiner;
import org.elasticsearch.common.base.Objects;
import org.elasticsearch.common.cache.CacheBuilder;
import org.elasticsearch.common.cache.CacheLoader;
import org.elasticsearch.common.cache.LoadingCache;
import org.elasticsearch.common.collect.Collections2;
import org.elasticsearch.common.collect.Lists;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.internal.ToStringBuilder;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Streamable;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.analysis.AnalysisService;
import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.index.shard.AbstractIndexShardComponent;
import org.elasticsearch.index.shard.IndexShardState;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.shard.service.IndexShard;
import java.io.IOException;
import java.io.Serializable;
import java.util.*;
import java.util.concurrent.locks.ReentrantLock;
public class ShardSuggestService extends AbstractIndexShardComponent {
private final IndexShard indexShard;
private final ReentrantLock lock = new ReentrantLock();
private IndexReader indexReader;
private final LoadingCache lookupCache;
private final LoadingCache analyzingSuggesterCache;
private final LoadingCache fuzzySuggesterCache;
private final LoadingCache dictCache;
private final LoadingCache spellCheckerCache;
private final LoadingCache ramDirectoryCache;
@Inject
public ShardSuggestService(ShardId shardId, @IndexSettings Settings indexSettings, IndexShard indexShard,
final AnalysisService analysisService, final MapperService mapperService) {
super(shardId, indexSettings);
this.indexShard = indexShard;
ramDirectoryCache = CacheBuilder.newBuilder().build(
new CacheLoader() {
@Override
public RAMDirectory load(String field) throws Exception {
return new RAMDirectory();
}
}
);
dictCache = CacheBuilder.newBuilder().build(
new CacheLoader() {
@Override
public HighFrequencyDictionary load(String field) throws Exception {
return new HighFrequencyDictionary(createOrGetIndexReader(), field, 0.00001f);
}
}
);
spellCheckerCache = CacheBuilder.newBuilder().build(
new CacheLoader() {
@Override
public SpellChecker load(String field) throws Exception {
SpellChecker spellChecker = new SpellChecker(ramDirectoryCache.get(field));
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_44, new WhitespaceAnalyzer(Version.LUCENE_44));
spellChecker.indexDictionary(dictCache.getUnchecked(field), indexWriterConfig, false);
return spellChecker;
}
}
);
lookupCache = CacheBuilder.newBuilder().build(
new CacheLoader() {
@Override
public FSTCompletionLookup load(String field) throws Exception {
FSTCompletionLookup lookup = new FSTCompletionLookup();
lookup.build(dictCache.getUnchecked(field));
return lookup;
}
}
);
analyzingSuggesterCache = CacheBuilder.newBuilder().build(
new AbstractCacheLoaderSuggester.CacheLoaderAnalyzingSuggester(mapperService, analysisService, dictCache));
fuzzySuggesterCache = CacheBuilder.newBuilder().build(
new AbstractCacheLoaderSuggester.CacheLoaderFuzzySuggester(mapperService, analysisService, dictCache));
}
public ShardSuggestRefreshResponse refresh(ShardSuggestRefreshRequest shardSuggestRefreshRequest) {
String field = shardSuggestRefreshRequest.field();
if (!Strings.hasLength(field)) {
update();
} else {
resetIndexReader();
HighFrequencyDictionary dict = dictCache.getIfPresent(field);
if (dict != null) dictCache.refresh(field);
RAMDirectory ramDirectory = ramDirectoryCache.getIfPresent(field);
if (ramDirectory != null) {
ramDirectory.close();
ramDirectoryCache.invalidate(field);
}
SpellChecker spellChecker = spellCheckerCache.getIfPresent(field);
if (spellChecker != null) {
spellCheckerCache.refresh(field);
try {
spellChecker.close();
} catch (IOException e) {
logger.error("Could not close spellchecker in indexshard [{}] for field [{}]", e, indexShard, field);
}
}
FSTCompletionLookup lookup = lookupCache.getIfPresent(field);
if (lookup != null) lookupCache.refresh(field);
for (FieldType fieldType : analyzingSuggesterCache.asMap().keySet()) {
if (fieldType.field().equals(shardSuggestRefreshRequest.field())) {
analyzingSuggesterCache.refresh(fieldType);
}
}
for (FieldType fieldType : fuzzySuggesterCache.asMap().keySet()) {
if (fieldType.field().equals(shardSuggestRefreshRequest.field())) {
fuzzySuggesterCache.refresh(fieldType);
}
}
}
return new ShardSuggestRefreshResponse(shardId.index().name(), shardId.id());
}
public void shutDown() {
resetIndexReader();
dictCache.invalidateAll();
for (Map.Entry entry : spellCheckerCache.asMap().entrySet()) {
try {
ramDirectoryCache.getUnchecked(entry.getKey()).close();
entry.getValue().close();
} catch (IOException e) {
logger.error("Could not close spellchecker in indexshard [{}] for field [{}]", e, indexShard, entry.getKey());
}
}
spellCheckerCache.invalidateAll();
ramDirectoryCache.invalidateAll();
lookupCache.invalidateAll();
analyzingSuggesterCache.invalidateAll();
fuzzySuggesterCache.invalidateAll();
}
public void update() {
resetIndexReader();
for (String field : dictCache.asMap().keySet()) {
dictCache.refresh(field);
}
try {
for (String field : spellCheckerCache.asMap().keySet()) {
SpellChecker oldSpellchecker = spellCheckerCache.getUnchecked(field);
RAMDirectory oldRamDirectory = ramDirectoryCache.getUnchecked(field);
ramDirectoryCache.refresh(field);
spellCheckerCache.refresh(field);
oldRamDirectory.close();
oldSpellchecker.close();
}
} catch (IOException e ) {
logger.error("Error refreshing spell checker cache [{}]", e, shardId);
}
for (String field : lookupCache.asMap().keySet()) {
lookupCache.refresh(field);
}
for (FieldType fieldType : analyzingSuggesterCache.asMap().keySet()) {
analyzingSuggesterCache.refresh(fieldType);
}
for (FieldType fieldType : fuzzySuggesterCache.asMap().keySet()) {
fuzzySuggesterCache.refresh(fieldType);
}
}
public ShardSuggestResponse suggest(ShardSuggestRequest shardSuggestRequest) {
List suggestions;
try {
suggestions = Lists.newArrayList(getSuggestions(shardSuggestRequest));
} catch (IOException e) {
throw new ElasticsearchException("Error getting suggestions", e);
}
return new ShardSuggestResponse(shardId.index().name(), shardId.id(), suggestions);
}
private Collection getSimilarSuggestions(ShardSuggestRequest shardSuggestRequest) {
String field = shardSuggestRequest.field();
String term = shardSuggestRequest.term();
Integer limit = shardSuggestRequest.size();
Float similarity = shardSuggestRequest.similarity();
try {
String[] suggestSimilar = spellCheckerCache.getUnchecked(field).suggestSimilar(term, limit, similarity);
return Arrays.asList(suggestSimilar);
} catch (IOException e) {
logger.error("Error getting spellchecker suggestions for shard [{}] field [{}] term [{}] limit [{}] similarity [{}]", e, shardId, field, term, limit, similarity);
}
return Collections.emptyList();
}
private Collection getSuggestions(ShardSuggestRequest shardSuggestRequest) throws IOException {
List lookupResults = Lists.newArrayList();
if ("full".equals(shardSuggestRequest.suggestType())) {
AnalyzingSuggester analyzingSuggester = analyzingSuggesterCache.getUnchecked(new FieldType(shardSuggestRequest));
lookupResults.addAll(analyzingSuggester.lookup(shardSuggestRequest.term(), false, shardSuggestRequest.size()));
} else if ("fuzzy".equals(shardSuggestRequest.suggestType())) {
lookupResults.addAll(fuzzySuggesterCache.getUnchecked(new FieldType(shardSuggestRequest))
.lookup(shardSuggestRequest.term(), false, shardSuggestRequest.size()));
} else {
lookupResults.addAll(lookupCache.getUnchecked(shardSuggestRequest.field())
.lookup(shardSuggestRequest.term(), true, shardSuggestRequest.size() + 1));
Collection suggestions = Collections2.transform(lookupResults, new LookupResultToStringFunction());
float similarity = shardSuggestRequest.similarity();
if (similarity < 1.0f && suggestions.size() < shardSuggestRequest.size()) {
suggestions = Lists.newArrayList(suggestions);
suggestions.addAll(getSimilarSuggestions(shardSuggestRequest));
}
return suggestions;
}
return Collections2.transform(lookupResults, new LookupResultToStringFunction());
}
private class LookupResultToStringFunction implements Function {
@Override
public String apply(LookupResult result) {
return result.key.toString();
}
}
public void resetIndexReader() {
IndexReader currentIndexReader = null;
if (indexShard.state() == IndexShardState.STARTED) {
try (Engine.Searcher currentIndexSearcher = indexShard.acquireSearcher( "suggest" )) {
currentIndexReader = currentIndexSearcher.reader();
}
}
// if this index reader is not used in the current index searcher, we need to decrease the old refcount
if (indexReader != null && indexReader.getRefCount() > 0 && !indexReader.equals(currentIndexReader)) {
try {
indexReader.decRef();
} catch (IOException e) {
logger.error("Error decreasing indexreader ref count [{}] of shard [{}]", e, indexReader.getRefCount(), shardId);
}
}
indexReader = null;
}
public ShardSuggestStatisticsResponse getStatistics() {
ShardSuggestStatisticsResponse shardSuggestStatisticsResponse = new ShardSuggestStatisticsResponse(shardId());
for (FieldType fieldType : analyzingSuggesterCache.asMap().keySet()) {
long sizeInBytes = analyzingSuggesterCache.getIfPresent(fieldType).ramBytesUsed();
FstStats.FstIndexShardStats fstIndexShardStats = new FstStats.FstIndexShardStats(shardId, "analyzingsuggester", fieldType, sizeInBytes);
shardSuggestStatisticsResponse.getFstIndexShardStats().add(fstIndexShardStats);
}
for (FieldType fieldType : fuzzySuggesterCache.asMap().keySet()) {
long sizeInBytes = fuzzySuggesterCache.getIfPresent(fieldType).ramBytesUsed();
FstStats.FstIndexShardStats fstIndexShardStats = new FstStats.FstIndexShardStats(shardId, "fuzzysuggester", fieldType, sizeInBytes);
shardSuggestStatisticsResponse.getFstIndexShardStats().add(fstIndexShardStats);
}
return shardSuggestStatisticsResponse;
}
// this does not look thread safe and nice...
private IndexReader createOrGetIndexReader() {
try {
if (indexReader == null) {
lock.lock();
if (indexReader == null) {
try (Engine.Searcher indexSearcher = indexShard.acquireSearcher( "suggest" )) {
indexReader = indexSearcher.reader();
// logger.info("1 shard {} : ref count {}", shardId, indexReader.getRefCount());
}
// If an indexreader closes, we have to refresh all our data structures!
indexReader.addReaderClosedListener(new IndexReader.ReaderClosedListener() {
@Override
public void onClose(IndexReader reader) {
update();
}
});
}
}
} finally {
if (lock.isLocked()) {
lock.unlock();
}
}
return indexReader;
}
public static class FieldType implements Streamable, Serializable, ToXContent {
private String field;
private List types = Lists.newArrayList();
private String queryAnalyzer;
private String indexAnalyzer;
private boolean preservePositionIncrements = true;
public FieldType() {}
public FieldType(ShardSuggestRequest shardSuggestRequest) {
this.field = shardSuggestRequest.field();
this.types = Arrays.asList(shardSuggestRequest.types());
this.queryAnalyzer = shardSuggestRequest.queryAnalyzer();
this.indexAnalyzer = shardSuggestRequest.indexAnalyzer();
this.preservePositionIncrements = shardSuggestRequest.preservePositionIncrements();
}
public String field() {
return field;
}
public String[] types() {
return types.toArray(new String[]{});
}
public String queryAnalyzer() {
return queryAnalyzer;
}
public String indexAnalyzer() {
return indexAnalyzer;
}
public boolean preservePositionIncrements() {
return preservePositionIncrements;
}
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
final FieldType other = (FieldType) obj;
return Objects.equal(this.field(), other.field())
&& Objects.equal(this.queryAnalyzer(), other.queryAnalyzer())
&& Objects.equal(this.indexAnalyzer(), other.indexAnalyzer())
&& Objects.equal(this.types, other.types)
&& Objects.equal(this.preservePositionIncrements(), other.preservePositionIncrements());
}
@Override
public int hashCode() {
int hashCode = this.field().hashCode();
hashCode += this.types.hashCode();
if (this.queryAnalyzer != null) hashCode += this.queryAnalyzer.hashCode();
if (this.indexAnalyzer != null) hashCode += this.indexAnalyzer.hashCode();
hashCode += Boolean.valueOf(preservePositionIncrements).hashCode();
return hashCode;
}
@Override
public String toString() {
ToStringBuilder toStringBuilder = new ToStringBuilder(this.getClass())
.add("field", this.field());
toStringBuilder.add("preservePositionIncrements", this.preservePositionIncrements);
if (queryAnalyzer != null && queryAnalyzer.equals(indexAnalyzer)) {
toStringBuilder.add("analyzer", this.queryAnalyzer);
} else {
if (queryAnalyzer != null) {
toStringBuilder.add("queryAnalyzer", queryAnalyzer);
}
if (indexAnalyzer != null) {
toStringBuilder.add("indexAnalyzer", indexAnalyzer);
}
}
if (types.size() > 0) {
toStringBuilder.add("types", Joiner.on("-").join(types));
}
return toStringBuilder.toString();
}
@Override
public void readFrom(StreamInput in) throws IOException {
field = in.readString();
queryAnalyzer = in.readOptionalString();
indexAnalyzer = in.readOptionalString();
types = (List) in.readGenericValue();
preservePositionIncrements = in.readBoolean();
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(field);
out.writeOptionalString(queryAnalyzer);
out.writeOptionalString(indexAnalyzer);
out.writeGenericValue(types);
out.writeBoolean(preservePositionIncrements);
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
//builder.startObject(field);
builder.field("field", field);
if (queryAnalyzer != null && queryAnalyzer.equals(indexAnalyzer)) {
builder.field("analyzer", this.queryAnalyzer);
} else {
if (queryAnalyzer != null) builder.field("queryAnalyzer", queryAnalyzer);
if (indexAnalyzer != null) builder.field("indexAnalyzer", indexAnalyzer);
}
if (!preservePositionIncrements) builder.field("preservePositionIncrements", preservePositionIncrements);
if (types.size() > 0) builder.field("types", types());
//builder.endObject();
return builder;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy