All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.analysis.ReversedWildcardFilterFactory Maven / Gradle / Ivy

There is a newer version: 9.7.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.analysis;

import java.util.Map;
import org.apache.lucene.analysis.TokenFilterFactory;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.reverse.ReverseStringFilter;

/**
 * Factory for {@link ReversedWildcardFilter}-s. When this factory is added to an analysis chain, it
 * will be used both for filtering the tokens during indexing, and to determine the query processing
 * of this field during search.
 *
 * 

This class supports the following init arguments: * *

    *
  • withOriginal - if true, then produce both original and reversed tokens at the * same positions. If false, then produce only reversed tokens. *
  • maxPosAsterisk - maximum position (1-based) of the asterisk wildcard ('*') * that triggers the reversal of query term. Asterisk that occurs at positions higher than * this value will not cause the reversal of query term. Defaults to 2, meaning that asterisks * on positions 1 and 2 will cause a reversal. *
  • maxPosQuestion - maximum position (1-based) of the question mark wildcard * ('?') that triggers the reversal of query term. Defaults to 1. Set this to 0, and * maxPosAsterisk to 1 to reverse only pure suffix queries (i.e. ones with a single * leading asterisk). *
  • maxFractionAsterisk - additional parameter that triggers the reversal if * asterisk ('*') position is less than this fraction of the query token length. Defaults to * 0.0f (disabled). *
  • minTrailing - minimum number of trailing characters in query token after the * last wildcard character. For good performance this should be set to a value larger than 1. * Defaults to 2. *
* * Note 1: This filter always reverses input tokens during indexing. Note 2: Query tokens without * wildcard characters will never be reversed. * *
 * <fieldType name="text_rvswc" class="solr.TextField" positionIncrementGap="100">
 *   <analyzer type="index">
 *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
 *     <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
 *             maxPosAsterisk="2" maxPosQuestion="1" minTrailing="2" maxFractionAsterisk="0"/>
 *   </analyzer>
 *   <analyzer type="query">
 *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
 *   </analyzer>
 * </fieldType>
* * @since 3.1 * @lucene.spi {@value #NAME} */ public class ReversedWildcardFilterFactory extends TokenFilterFactory { /** SPI name */ public static final String NAME = "reversedWildcard"; private char markerChar = ReverseStringFilter.START_OF_HEADING_MARKER; private boolean withOriginal; private int maxPosAsterisk; private int maxPosQuestion; private int minTrailing; private float maxFractionAsterisk; /** Creates a new ReversedWildcardFilterFactory */ public ReversedWildcardFilterFactory(Map args) { super(args); withOriginal = getBoolean(args, "withOriginal", true); maxPosAsterisk = getInt(args, "maxPosAsterisk", 2); maxPosQuestion = getInt(args, "maxPosQuestion", 1); minTrailing = getInt(args, "minTrailing", 2); maxFractionAsterisk = getFloat(args, "maxFractionAsterisk", 0.0f); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } } /** Default ctor for compatibility with SPI */ public ReversedWildcardFilterFactory() { throw defaultCtorException(); } @Override public TokenStream create(TokenStream input) { return new ReversedWildcardFilter(input, withOriginal, markerChar); } /** * This method encapsulates the logic that determines whether a query token should be reversed in * order to use the reversed terms in the index. * * @param token input token. * @return true if input token should be reversed, false otherwise. */ public boolean shouldReverse(String token) { int posQ = token.indexOf('?'); int posA = token.indexOf('*'); if (posQ == -1 && posA == -1) { // not a wildcard query return false; } int pos; int lastPos; int len = token.length(); lastPos = token.lastIndexOf('?'); pos = token.lastIndexOf('*'); if (pos > lastPos) lastPos = pos; if (posQ != -1) { pos = posQ; if (posA != -1) { pos = Math.min(posQ, posA); } } else { pos = posA; } if (len - lastPos < minTrailing) { // too few trailing chars return false; } if (posQ != -1 && posQ < maxPosQuestion) { // leading '?' return true; } if (posA != -1 && posA < maxPosAsterisk) { // leading '*' return true; } // '*' in the leading part if (maxFractionAsterisk > 0.0f && pos < (float) token.length() * maxFractionAsterisk) { return true; } return false; } public char getMarkerChar() { return markerChar; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy