org.apache.solr.analysis.ReversedWildcardFilterFactory Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Show all versions of solr-core Show documentation
Apache Solr (module: core)
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import java.util.Map;
import org.apache.lucene.analysis.TokenFilterFactory;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
/**
* Factory for {@link ReversedWildcardFilter}-s. When this factory is added to an analysis chain, it
* will be used both for filtering the tokens during indexing, and to determine the query processing
* of this field during search.
*
* This class supports the following init arguments:
*
*
* withOriginal
- if true, then produce both original and reversed tokens at the
* same positions. If false, then produce only reversed tokens.
* maxPosAsterisk
- maximum position (1-based) of the asterisk wildcard ('*')
* that triggers the reversal of query term. Asterisk that occurs at positions higher than
* this value will not cause the reversal of query term. Defaults to 2, meaning that asterisks
* on positions 1 and 2 will cause a reversal.
* maxPosQuestion
- maximum position (1-based) of the question mark wildcard
* ('?') that triggers the reversal of query term. Defaults to 1. Set this to 0, and
* maxPosAsterisk
to 1 to reverse only pure suffix queries (i.e. ones with a single
* leading asterisk).
* maxFractionAsterisk
- additional parameter that triggers the reversal if
* asterisk ('*') position is less than this fraction of the query token length. Defaults to
* 0.0f (disabled).
* minTrailing
- minimum number of trailing characters in query token after the
* last wildcard character. For good performance this should be set to a value larger than 1.
* Defaults to 2.
*
*
* Note 1: This filter always reverses input tokens during indexing. Note 2: Query tokens without
* wildcard characters will never be reversed.
*
*
* <fieldType name="text_rvswc" class="solr.TextField" positionIncrementGap="100">
* <analyzer type="index">
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
* <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
* maxPosAsterisk="2" maxPosQuestion="1" minTrailing="2" maxFractionAsterisk="0"/>
* </analyzer>
* <analyzer type="query">
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
* </analyzer>
* </fieldType>
*
* @since 3.1
* @lucene.spi {@value #NAME}
*/
public class ReversedWildcardFilterFactory extends TokenFilterFactory {
/** SPI name */
public static final String NAME = "reversedWildcard";
private char markerChar = ReverseStringFilter.START_OF_HEADING_MARKER;
private boolean withOriginal;
private int maxPosAsterisk;
private int maxPosQuestion;
private int minTrailing;
private float maxFractionAsterisk;
/** Creates a new ReversedWildcardFilterFactory */
public ReversedWildcardFilterFactory(Map args) {
super(args);
withOriginal = getBoolean(args, "withOriginal", true);
maxPosAsterisk = getInt(args, "maxPosAsterisk", 2);
maxPosQuestion = getInt(args, "maxPosQuestion", 1);
minTrailing = getInt(args, "minTrailing", 2);
maxFractionAsterisk = getFloat(args, "maxFractionAsterisk", 0.0f);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
/** Default ctor for compatibility with SPI */
public ReversedWildcardFilterFactory() {
throw defaultCtorException();
}
@Override
public TokenStream create(TokenStream input) {
return new ReversedWildcardFilter(input, withOriginal, markerChar);
}
/**
* This method encapsulates the logic that determines whether a query token should be reversed in
* order to use the reversed terms in the index.
*
* @param token input token.
* @return true if input token should be reversed, false otherwise.
*/
public boolean shouldReverse(String token) {
int posQ = token.indexOf('?');
int posA = token.indexOf('*');
if (posQ == -1 && posA == -1) { // not a wildcard query
return false;
}
int pos;
int lastPos;
int len = token.length();
lastPos = token.lastIndexOf('?');
pos = token.lastIndexOf('*');
if (pos > lastPos) lastPos = pos;
if (posQ != -1) {
pos = posQ;
if (posA != -1) {
pos = Math.min(posQ, posA);
}
} else {
pos = posA;
}
if (len - lastPos < minTrailing) { // too few trailing chars
return false;
}
if (posQ != -1 && posQ < maxPosQuestion) { // leading '?'
return true;
}
if (posA != -1 && posA < maxPosAsterisk) { // leading '*'
return true;
}
// '*' in the leading part
if (maxFractionAsterisk > 0.0f && pos < (float) token.length() * maxFractionAsterisk) {
return true;
}
return false;
}
public char getMarkerChar() {
return markerChar;
}
}