org.apache.solr.search.similarities.SweetSpotSimilarityFactory Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Show all versions of solr-core Show documentation
Apache Solr (module: core)
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.similarities;
import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR;
import org.apache.lucene.misc.SweetSpotSimilarity;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
/**
* Factory for {@link SweetSpotSimilarity}.
*
* SweetSpotSimilarity
is an extension of {@link ClassicSimilarity} that provides
* additional tuning options for specifying the "sweetspot" of optimal tf
and
* lengthNorm
values in the source data.
*
*
In addition to the discountOverlaps
init param supported by {@link
* ClassicSimilarityFactory} The following sets of init params are supported by this factory:
*
*
* - Length Norm Settings:
*
* lengthNormMin
(int)
* lengthNormMax
(int)
* lengthNormSteepness
(float)
*
* - Baseline TF Settings:
*
* baselineTfBase
(float)
* baselineTfMin
(float)
*
* - Hyperbolic TF Settings:
*
* hyperbolicTfMin
(float)
* hyperbolicTfMax
(float)
* hyperbolicTfBase
(double)
* hyperbolicTfOffset
(float)
*
*
*
* Note:
*
*
* - If any individual settings from one of the above mentioned sets are specified, then all
* settings from that set must be specified.
*
- If Baseline TF settings are specified, then Hyperbolic TF settings are not permitted, and
* vice versa. (The settings specified will determine whether {@link
* SweetSpotSimilarity#baselineTf} or {@link SweetSpotSimilarity#hyperbolicTf} will be used.
*
*
* Example usage...
*
*
* <!-- using baseline TF -->
* <fieldType name="text_baseline" class="solr.TextField"
* indexed="true" stored="false">
* <analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
* <similarity class="solr.SweetSpotSimilarityFactory">
* <!-- TF -->
* <float name="baselineTfMin">6.0</float>
* <float name="baselineTfBase">1.5</float>
* <!-- plateau norm -->
* <int name="lengthNormMin">3</int>
* <int name="lengthNormMax">5</int>
* <float name="lengthNormSteepness">0.5</float>
* </similarity>
* </fieldType>
*
* <!-- using hyperbolic TF -->
* <fieldType name="text_hyperbolic" class="solr.TextField"
* indexed="true" stored="false" >
* <analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
* <similarity class="solr.SweetSpotSimilarityFactory">
* <float name="hyperbolicTfMin">3.3</float>
* <float name="hyperbolicTfMax">7.7</float>
* <double name="hyperbolicTfBase">2.718281828459045</double> <!-- e -->
* <float name="hyperbolicTfOffset">5.0</float>
* <!-- plateau norm, shallower slope -->
* <int name="lengthNormMin">1</int>
* <int name="lengthNormMax">5</int>
* <float name="lengthNormSteepness">0.2</float>
* </similarity>
* </fieldType>
*
*
* @see SweetSpotSimilarity The javadocs for the individual methods in SweetSpotSimilarity
*
for SVG diagrams showing how the each function behaves with various settings/inputs.
*/
public class SweetSpotSimilarityFactory extends ClassicSimilarityFactory {
private SweetSpotSimilarity sim = null;
@Override
public void init(SolrParams params) {
super.init(params);
Integer ln_min = params.getInt("lengthNormMin");
Integer ln_max = params.getInt("lengthNormMax");
Float ln_steep = params.getFloat("lengthNormSteepness");
if (!allOrNoneNull(ln_min, ln_max, ln_steep)) {
throw new SolrException(
SERVER_ERROR,
"Overriding default lengthNorm settings requires all to be specified: lengthNormMin, lengthNormMax, lengthNormSteepness");
}
Float hyper_min = params.getFloat("hyperbolicTfMin");
Float hyper_max = params.getFloat("hyperbolicTfMax");
Double hyper_base = params.getDouble("hyperbolicTfBase");
Float hyper_offset = params.getFloat("hyperbolicTfOffset");
if (!allOrNoneNull(hyper_min, hyper_max, hyper_base, hyper_offset)) {
throw new SolrException(
SERVER_ERROR,
"Overriding default hyperbolicTf settings requires all to be specified: hyperbolicTfMin, hyperbolicTfMax, hyperbolicTfBase, hyperbolicTfOffset");
}
Float baseline_base = params.getFloat("baselineTfBase");
Float baseline_min = params.getFloat("baselineTfMin");
if (!allOrNoneNull(baseline_min, baseline_base)) {
throw new SolrException(
SERVER_ERROR,
"Overriding default baselineTf settings requires all to be specified: baselineTfBase, baselineTfMin");
}
// sanity check that they aren't trying to use two diff tf impls
if ((null != hyper_min) && (null != baseline_min)) {
throw new SolrException(
SERVER_ERROR, "Can not mix hyperbolicTf settings with baselineTf settings");
}
// pick Similarity impl based on whether hyper tf settings are set
sim = (null != hyper_min) ? new HyperbolicSweetSpotSimilarity() : new SweetSpotSimilarity();
if (null != ln_min) {
// overlaps already handled by super factory
sim.setLengthNormFactors(ln_min, ln_max, ln_steep, this.discountOverlaps);
}
if (null != hyper_min) {
sim.setHyperbolicTfFactors(hyper_min, hyper_max, hyper_base, hyper_offset);
}
if (null != baseline_min) {
sim.setBaselineTfFactors(baseline_base, baseline_min);
}
}
@Override
public Similarity getSimilarity() {
assert sim != null : "SweetSpotSimilarityFactory was not initialized";
return sim;
}
/**
* Returns true if either: all of the specified arguments are null; or none of the specified
* arguments are null
*/
private static boolean allOrNoneNull(Object... args) {
int nulls = 0;
int objs = 0;
for (Object o : args) {
objs++;
if (null == o) nulls++;
}
return (0 == nulls || nulls == objs);
}
private static final class HyperbolicSweetSpotSimilarity extends SweetSpotSimilarity {
@Override
public float tf(float freq) {
return hyperbolicTf(freq);
}
}
}