All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.search.similarities.SweetSpotSimilarityFactory Maven / Gradle / Ivy

There is a newer version: 9.7.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.search.similarities;

import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR;

import org.apache.lucene.misc.SweetSpotSimilarity;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;

/**
 * Factory for {@link SweetSpotSimilarity}.
 *
 * 

SweetSpotSimilarity is an extension of {@link ClassicSimilarity} that provides * additional tuning options for specifying the "sweetspot" of optimal tf and * lengthNorm values in the source data. * *

In addition to the discountOverlaps init param supported by {@link * ClassicSimilarityFactory} The following sets of init params are supported by this factory: * *

    *
  • Length Norm Settings: *
      *
    • lengthNormMin (int) *
    • lengthNormMax (int) *
    • lengthNormSteepness (float) *
    *
  • Baseline TF Settings: *
      *
    • baselineTfBase (float) *
    • baselineTfMin (float) *
    *
  • Hyperbolic TF Settings: *
      *
    • hyperbolicTfMin (float) *
    • hyperbolicTfMax (float) *
    • hyperbolicTfBase (double) *
    • hyperbolicTfOffset (float) *
    *
* *

Note: * *

    *
  • If any individual settings from one of the above mentioned sets are specified, then all * settings from that set must be specified. *
  • If Baseline TF settings are specified, then Hyperbolic TF settings are not permitted, and * vice versa. (The settings specified will determine whether {@link * SweetSpotSimilarity#baselineTf} or {@link SweetSpotSimilarity#hyperbolicTf} will be used. *
* *

Example usage... * *

 * <!-- using baseline TF -->
 * <fieldType name="text_baseline" class="solr.TextField"
 *            indexed="true" stored="false">
 *   <analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
 *   <similarity class="solr.SweetSpotSimilarityFactory">
 *     <!-- TF -->
 *     <float name="baselineTfMin">6.0</float>
 *     <float name="baselineTfBase">1.5</float>
 *     <!-- plateau norm -->
 *     <int name="lengthNormMin">3</int>
 *     <int name="lengthNormMax">5</int>
 *     <float name="lengthNormSteepness">0.5</float>
 *   </similarity>
 * </fieldType>
 *
 * <!-- using hyperbolic TF -->
 * <fieldType name="text_hyperbolic" class="solr.TextField"
 *            indexed="true" stored="false" >
 *   <analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
 *   <similarity class="solr.SweetSpotSimilarityFactory">
 *     <float name="hyperbolicTfMin">3.3</float>
 *     <float name="hyperbolicTfMax">7.7</float>
 *     <double name="hyperbolicTfBase">2.718281828459045</double> <!-- e -->
 *     <float name="hyperbolicTfOffset">5.0</float>
 *     <!-- plateau norm, shallower slope -->
 *     <int name="lengthNormMin">1</int>
 *     <int name="lengthNormMax">5</int>
 *     <float name="lengthNormSteepness">0.2</float>
 *   </similarity>
 * </fieldType>
 * 
* * @see SweetSpotSimilarity The javadocs for the individual methods in SweetSpotSimilarity * for SVG diagrams showing how the each function behaves with various settings/inputs. */ public class SweetSpotSimilarityFactory extends ClassicSimilarityFactory { private SweetSpotSimilarity sim = null; @Override public void init(SolrParams params) { super.init(params); Integer ln_min = params.getInt("lengthNormMin"); Integer ln_max = params.getInt("lengthNormMax"); Float ln_steep = params.getFloat("lengthNormSteepness"); if (!allOrNoneNull(ln_min, ln_max, ln_steep)) { throw new SolrException( SERVER_ERROR, "Overriding default lengthNorm settings requires all to be specified: lengthNormMin, lengthNormMax, lengthNormSteepness"); } Float hyper_min = params.getFloat("hyperbolicTfMin"); Float hyper_max = params.getFloat("hyperbolicTfMax"); Double hyper_base = params.getDouble("hyperbolicTfBase"); Float hyper_offset = params.getFloat("hyperbolicTfOffset"); if (!allOrNoneNull(hyper_min, hyper_max, hyper_base, hyper_offset)) { throw new SolrException( SERVER_ERROR, "Overriding default hyperbolicTf settings requires all to be specified: hyperbolicTfMin, hyperbolicTfMax, hyperbolicTfBase, hyperbolicTfOffset"); } Float baseline_base = params.getFloat("baselineTfBase"); Float baseline_min = params.getFloat("baselineTfMin"); if (!allOrNoneNull(baseline_min, baseline_base)) { throw new SolrException( SERVER_ERROR, "Overriding default baselineTf settings requires all to be specified: baselineTfBase, baselineTfMin"); } // sanity check that they aren't trying to use two diff tf impls if ((null != hyper_min) && (null != baseline_min)) { throw new SolrException( SERVER_ERROR, "Can not mix hyperbolicTf settings with baselineTf settings"); } // pick Similarity impl based on whether hyper tf settings are set sim = (null != hyper_min) ? new HyperbolicSweetSpotSimilarity() : new SweetSpotSimilarity(); if (null != ln_min) { // overlaps already handled by super factory sim.setLengthNormFactors(ln_min, ln_max, ln_steep, this.discountOverlaps); } if (null != hyper_min) { sim.setHyperbolicTfFactors(hyper_min, hyper_max, hyper_base, hyper_offset); } if (null != baseline_min) { sim.setBaselineTfFactors(baseline_base, baseline_min); } } @Override public Similarity getSimilarity() { assert sim != null : "SweetSpotSimilarityFactory was not initialized"; return sim; } /** * Returns true if either: all of the specified arguments are null; or none of the specified * arguments are null */ private static boolean allOrNoneNull(Object... args) { int nulls = 0; int objs = 0; for (Object o : args) { objs++; if (null == o) nulls++; } return (0 == nulls || nulls == objs); } private static final class HyperbolicSweetSpotSimilarity extends SweetSpotSimilarity { @Override public float tf(float freq) { return hyperbolicTf(freq); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy