
org.elasticsearch.index.similarity.SimilarityProviders Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch - Open Source, Distributed, RESTful Search Engine
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.similarity;
import org.apache.logging.log4j.LogManager;
import org.apache.lucene.search.similarities.AfterEffect;
import org.apache.lucene.search.similarities.AfterEffectB;
import org.apache.lucene.search.similarities.AfterEffectL;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.BasicModel;
import org.apache.lucene.search.similarities.BasicModelBE;
import org.apache.lucene.search.similarities.BasicModelD;
import org.apache.lucene.search.similarities.BasicModelG;
import org.apache.lucene.search.similarities.BasicModelIF;
import org.apache.lucene.search.similarities.BasicModelIn;
import org.apache.lucene.search.similarities.BasicModelIne;
import org.apache.lucene.search.similarities.BasicModelP;
import org.apache.lucene.search.similarities.BooleanSimilarity;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.DFISimilarity;
import org.apache.lucene.search.similarities.DFRSimilarity;
import org.apache.lucene.search.similarities.Distribution;
import org.apache.lucene.search.similarities.DistributionLL;
import org.apache.lucene.search.similarities.DistributionSPL;
import org.apache.lucene.search.similarities.IBSimilarity;
import org.apache.lucene.search.similarities.Independence;
import org.apache.lucene.search.similarities.IndependenceChiSquared;
import org.apache.lucene.search.similarities.IndependenceSaturated;
import org.apache.lucene.search.similarities.IndependenceStandardized;
import org.apache.lucene.search.similarities.LMDirichletSimilarity;
import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity;
import org.apache.lucene.search.similarities.Lambda;
import org.apache.lucene.search.similarities.LambdaDF;
import org.apache.lucene.search.similarities.LambdaTTF;
import org.apache.lucene.search.similarities.Normalization;
import org.apache.lucene.search.similarities.NormalizationH1;
import org.apache.lucene.search.similarities.NormalizationH2;
import org.apache.lucene.search.similarities.NormalizationH3;
import org.apache.lucene.search.similarities.NormalizationZ;
import org.elasticsearch.Version;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.settings.Settings;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import static java.util.Collections.unmodifiableMap;
final class SimilarityProviders {
private SimilarityProviders() {} // no instantiation
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(LogManager.getLogger(SimilarityProviders.class));
static final String DISCOUNT_OVERLAPS = "discount_overlaps";
private static final Map BASIC_MODELS;
private static final Map AFTER_EFFECTS;
static {
Map models = new HashMap<>();
models.put("be", new BasicModelBE());
models.put("d", new BasicModelD());
models.put("g", new BasicModelG());
models.put("if", new BasicModelIF());
models.put("in", new BasicModelIn());
models.put("ine", new BasicModelIne());
models.put("p", new BasicModelP());
BASIC_MODELS = unmodifiableMap(models);
Map effects = new HashMap<>();
effects.put("no", new AfterEffect.NoAfterEffect());
effects.put("b", new AfterEffectB());
effects.put("l", new AfterEffectL());
AFTER_EFFECTS = unmodifiableMap(effects);
}
private static final Map INDEPENDENCE_MEASURES;
static {
Map measures = new HashMap<>();
measures.put("standardized", new IndependenceStandardized());
measures.put("saturated", new IndependenceSaturated());
measures.put("chisquared", new IndependenceChiSquared());
INDEPENDENCE_MEASURES = unmodifiableMap(measures);
}
private static final Map DISTRIBUTIONS;
private static final Map LAMBDAS;
static {
Map distributions = new HashMap<>();
distributions.put("ll", new DistributionLL());
distributions.put("spl", new DistributionSPL());
DISTRIBUTIONS = unmodifiableMap(distributions);
Map lamdas = new HashMap<>();
lamdas.put("df", new LambdaDF());
lamdas.put("ttf", new LambdaTTF());
LAMBDAS = unmodifiableMap(lamdas);
}
/**
* Parses the given Settings and creates the appropriate {@link BasicModel}
*
* @param settings Settings to parse
* @return {@link BasicModel} referred to in the Settings
*/
private static BasicModel parseBasicModel(Settings settings) {
String basicModel = settings.get("basic_model");
BasicModel model = BASIC_MODELS.get(basicModel);
if (model == null) {
throw new IllegalArgumentException("Unsupported BasicModel [" + basicModel + "], expected one of " + BASIC_MODELS.keySet());
}
return model;
}
/**
* Parses the given Settings and creates the appropriate {@link AfterEffect}
*
* @param settings Settings to parse
* @return {@link AfterEffect} referred to in the Settings
*/
private static AfterEffect parseAfterEffect(Settings settings) {
String afterEffect = settings.get("after_effect");
AfterEffect effect = AFTER_EFFECTS.get(afterEffect);
if (effect == null) {
throw new IllegalArgumentException("Unsupported AfterEffect [" + afterEffect + "], expected one of " + AFTER_EFFECTS.keySet());
}
return effect;
}
/**
* Parses the given Settings and creates the appropriate {@link Normalization}
*
* @param settings Settings to parse
* @return {@link Normalization} referred to in the Settings
*/
private static Normalization parseNormalization(Settings settings) {
String normalization = settings.get("normalization");
if ("no".equals(normalization)) {
return new Normalization.NoNormalization();
} else if ("h1".equals(normalization)) {
float c = settings.getAsFloat("normalization.h1.c", 1f);
return new NormalizationH1(c);
} else if ("h2".equals(normalization)) {
float c = settings.getAsFloat("normalization.h2.c", 1f);
return new NormalizationH2(c);
} else if ("h3".equals(normalization)) {
float c = settings.getAsFloat("normalization.h3.c", 800f);
return new NormalizationH3(c);
} else if ("z".equals(normalization)) {
float z = settings.getAsFloat("normalization.z.z", 0.30f);
return new NormalizationZ(z);
} else {
throw new IllegalArgumentException("Unsupported Normalization [" + normalization + "]");
}
}
private static Independence parseIndependence(Settings settings) {
String name = settings.get("independence_measure");
Independence measure = INDEPENDENCE_MEASURES.get(name);
if (measure == null) {
throw new IllegalArgumentException("Unsupported IndependenceMeasure [" + name + "], expected one of "
+ INDEPENDENCE_MEASURES.keySet());
}
return measure;
}
/**
* Parses the given Settings and creates the appropriate {@link Distribution}
*
* @param settings Settings to parse
* @return {@link Normalization} referred to in the Settings
*/
private static Distribution parseDistribution(Settings settings) {
String rawDistribution = settings.get("distribution");
Distribution distribution = DISTRIBUTIONS.get(rawDistribution);
if (distribution == null) {
throw new IllegalArgumentException("Unsupported Distribution [" + rawDistribution + "]");
}
return distribution;
}
/**
* Parses the given Settings and creates the appropriate {@link Lambda}
*
* @param settings Settings to parse
* @return {@link Normalization} referred to in the Settings
*/
private static Lambda parseLambda(Settings settings) {
String rawLambda = settings.get("lambda");
Lambda lambda = LAMBDAS.get(rawLambda);
if (lambda == null) {
throw new IllegalArgumentException("Unsupported Lambda [" + rawLambda + "]");
}
return lambda;
}
static void assertSettingsIsSubsetOf(String type, Version version, Settings settings, String... supportedSettings) {
Set unknownSettings = new HashSet<>(settings.keySet());
unknownSettings.removeAll(Arrays.asList(supportedSettings));
unknownSettings.remove("type"); // used to figure out which sim this is
if (unknownSettings.isEmpty() == false) {
DEPRECATION_LOGGER.deprecated("Unknown settings for similarity of type [" + type + "]: " + unknownSettings);
}
}
public static BM25Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) {
assertSettingsIsSubsetOf("BM25", indexCreatedVersion, settings, "k1", "b", DISCOUNT_OVERLAPS);
float k1 = settings.getAsFloat("k1", 1.2f);
float b = settings.getAsFloat("b", 0.75f);
boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true);
BM25Similarity similarity = new BM25Similarity(k1, b);
similarity.setDiscountOverlaps(discountOverlaps);
return similarity;
}
public static BooleanSimilarity createBooleanSimilarity(Settings settings, Version indexCreatedVersion) {
assertSettingsIsSubsetOf("boolean", indexCreatedVersion, settings);
return new BooleanSimilarity();
}
public static ClassicSimilarity createClassicSimilarity(Settings settings, Version indexCreatedVersion) {
assertSettingsIsSubsetOf("classic", indexCreatedVersion, settings, DISCOUNT_OVERLAPS);
boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true);
ClassicSimilarity similarity = new ClassicSimilarity();
similarity.setDiscountOverlaps(discountOverlaps);
return similarity;
}
public static DFRSimilarity createDfrSimilarity(Settings settings, Version indexCreatedVersion) {
assertSettingsIsSubsetOf("DFR", indexCreatedVersion, settings,
"basic_model", "after_effect", "normalization",
"normalization.h1.c", "normalization.h2.c", "normalization.h3.c", "normalization.z.z");
return new DFRSimilarity(
parseBasicModel(settings),
parseAfterEffect(settings),
parseNormalization(settings));
}
public static DFISimilarity createDfiSimilarity(Settings settings, Version indexCreatedVersion) {
assertSettingsIsSubsetOf("DFI", indexCreatedVersion, settings, "independence_measure");
return new DFISimilarity(parseIndependence(settings));
}
public static IBSimilarity createIBSimilarity(Settings settings, Version indexCreatedVersion) {
assertSettingsIsSubsetOf("IB", indexCreatedVersion, settings, "distribution", "lambda", "normalization",
"normalization.h1.c", "normalization.h2.c", "normalization.h3.c", "normalization.z.z");
return new IBSimilarity(
parseDistribution(settings),
parseLambda(settings),
parseNormalization(settings));
}
public static LMDirichletSimilarity createLMDirichletSimilarity(Settings settings, Version indexCreatedVersion) {
assertSettingsIsSubsetOf("LMDirichlet", indexCreatedVersion, settings, "mu");
float mu = settings.getAsFloat("mu", 2000f);
return new LMDirichletSimilarity(mu);
}
public static LMJelinekMercerSimilarity createLMJelinekMercerSimilarity(Settings settings, Version indexCreatedVersion) {
assertSettingsIsSubsetOf("LMJelinekMercer", indexCreatedVersion, settings, "lambda");
float lambda = settings.getAsFloat("lambda", 0.1f);
return new LMJelinekMercerSimilarity(lambda);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy