org.apache.lucene.analysis.PrefixAnalyzer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.analysis;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import java.io.IOException;
import java.util.Collections;
import java.util.Iterator;
/**
* This {@link Analyzer} wraps another analyzer and adds a set of prefixes to the
* underlying TokenStream. While these prefixes are iterated the position attribute
* will not be incremented. Also each prefix will be separated from the other tokens
* by a separator character.
* NOTE: The sequence of prefixes needs to be not empty
*/
public class PrefixAnalyzer extends Analyzer {
private final char separator;
private final Iterable extends CharSequence> prefix;
private final Analyzer analyzer;
/**
* Create a new {@link PrefixAnalyzer}. The separator will be set to the DEFAULT_SEPARATOR.
*
* @param analyzer {@link Analyzer} to wrap
* @param prefix Single prefix
*/
public PrefixAnalyzer(Analyzer analyzer, char separator, CharSequence prefix) {
this(analyzer, separator, Collections.singleton(prefix));
}
/**
* Create a new {@link PrefixAnalyzer}. The separator will be set to the DEFAULT_SEPARATOR.
*
* @param analyzer {@link Analyzer} to wrap
* @param prefix {@link Iterable} of {@link CharSequence} which keeps all prefixes
*/
public PrefixAnalyzer(Analyzer analyzer, char separator, Iterable extends CharSequence> prefix) {
super();
this.analyzer = analyzer;
this.prefix = prefix;
this.separator = separator;
}
@Override
protected TokenStreamComponents createComponents(String fieldName) {
TokenStreamComponents createComponents = analyzer.createComponents(fieldName);
TokenStream stream = new PrefixTokenFilter(createComponents.getTokenStream(), separator, prefix);
TokenStreamComponents tsc = new TokenStreamComponents(createComponents.getTokenizer(), stream);
return tsc;
}
/**
* The {@link PrefixTokenFilter} wraps a {@link TokenStream} and adds a set
* prefixes ahead. The position attribute will not be incremented for the prefixes.
*/
public static final class PrefixTokenFilter extends TokenFilter {
private final char separator;
private final CharTermAttribute termAttr = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posAttr = addAttribute(PositionIncrementAttribute.class);
private final Iterable extends CharSequence> prefixes;
private Iterator extends CharSequence> currentPrefix;
/**
* Create a new {@link PrefixTokenFilter}. The separator will be set to the DEFAULT_SEPARATOR.
*
* @param input {@link TokenStream} to wrap
* @param separator Character used separate prefixes from other tokens
* @param prefixes {@link Iterable} of {@link CharSequence} which keeps all prefixes
*/
public PrefixTokenFilter(TokenStream input, char separator, Iterable extends CharSequence> prefixes) {
super(input);
this.prefixes = prefixes;
this.currentPrefix = null;
this.separator = separator;
if (prefixes == null || !prefixes.iterator().hasNext()) {
throw new IllegalArgumentException("one or more prefixes needed");
}
}
@Override
public boolean incrementToken() throws IOException {
if (currentPrefix != null) {
if (!currentPrefix.hasNext()) {
return input.incrementToken();
} else {
posAttr.setPositionIncrement(0);
}
} else {
currentPrefix = prefixes.iterator();
termAttr.setEmpty();
posAttr.setPositionIncrement(1);
assert (currentPrefix.hasNext()) : "one or more prefixes needed";
}
termAttr.setEmpty();
termAttr.append(currentPrefix.next());
termAttr.append(separator);
return true;
}
@Override
public void reset() throws IOException {
super.reset();
currentPrefix = null;
}
}
}