All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.analysis.ReversedWildcardFilterFactory Maven / Gradle / Ivy

The newest version!
package org.apache.solr.analysis;
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.util.Map;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.reverse.ReverseStringFilter;

/**
 * Factory for {@link ReversedWildcardFilter}-s. When this factory is
 * added to an analysis chain, it will be used both for filtering the
 * tokens during indexing, and to determine the query processing of
 * this field during search.
 * 

This class supports the following init arguments: *

    *
  • withOriginal - if true, then produce both original and reversed tokens at * the same positions. If false, then produce only reversed tokens.
  • *
  • maxPosAsterisk - maximum position (1-based) of the asterisk wildcard * ('*') that triggers the reversal of query term. Asterisk that occurs at * positions higher than this value will not cause the reversal of query term. * Defaults to 2, meaning that asterisks on positions 1 and 2 will cause * a reversal.
  • *
  • maxPosQuestion - maximum position (1-based) of the question * mark wildcard ('?') that triggers the reversal of query term. Defaults to 1. * Set this to 0, and maxPosAsterisk to 1 to reverse only * pure suffix queries (i.e. ones with a single leading asterisk).
  • *
  • maxFractionAsterisk - additional parameter that * triggers the reversal if asterisk ('*') position is less than this * fraction of the query token length. Defaults to 0.0f (disabled).
  • *
  • minTrailing - minimum number of trailing characters in query * token after the last wildcard character. For good performance this should be * set to a value larger than 1. Defaults to 2. *
* Note 1: This filter always reverses input tokens during indexing. * Note 2: Query tokens without wildcard characters will never be reversed. */ public class ReversedWildcardFilterFactory extends BaseTokenFilterFactory { private char markerChar = ReverseStringFilter.START_OF_HEADING_MARKER; private boolean withOriginal; private int maxPosAsterisk; private int maxPosQuestion; private int minTrailing; private float maxFractionAsterisk; @Override public void init(Map args) { super.init(args); withOriginal = getBoolean("withOriginal", true); maxPosAsterisk = getInt("maxPosAsterisk", 2); maxPosQuestion = getInt("maxPosQuestion", 1); minTrailing = getInt("minTrailing", 2); maxFractionAsterisk = getFloat("maxFractionAsterisk", 0.0f); } public TokenStream create(TokenStream input) { return new ReversedWildcardFilter(input, withOriginal, markerChar); } /** * This method encapsulates the logic that determines whether * a query token should be reversed in order to use the * reversed terms in the index. * @param token input token. * @return true if input token should be reversed, false otherwise. */ public boolean shouldReverse(String token) { int posQ = token.indexOf('?'); int posA = token.indexOf('*'); if (posQ == -1 && posA == -1) { // not a wildcard query return false; } int pos; int lastPos; int len = token.length(); lastPos = token.lastIndexOf('?'); pos = token.lastIndexOf('*'); if (pos > lastPos) lastPos = pos; if (posQ != -1) { pos = posQ; if (posA != -1) { pos = Math.min(posQ, posA); } } else { pos = posA; } if (len - lastPos < minTrailing) { // too few trailing chars return false; } if (posQ != -1 && posQ < maxPosQuestion) { // leading '?' return true; } if (posA != -1 && posA < maxPosAsterisk) { // leading '*' return true; } // '*' in the leading part if (maxFractionAsterisk > 0.0f && pos < (float)token.length() * maxFractionAsterisk) { return true; } return false; } public char getMarkerChar() { return markerChar; } protected float getFloat(String name, float defValue) { String val = args.get(name); if (val == null) { return defValue; } else { return Float.parseFloat(val); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy