All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.search.similarities.RandomSimilarity Maven / Gradle / Ivy

There is a newer version: 10.1.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.similarities;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;

/**
 * Similarity implementation that randomizes Similarity implementations
 * per-field.
 * 

* The choices are 'sticky', so the selected algorithm is always used * for the same field. */ public class RandomSimilarity extends PerFieldSimilarityWrapper { final ClassicSimilarity defaultSim = new ClassicSimilarity(); final List knownSims; Map previousMappings = new HashMap<>(); final int perFieldSeed; final int coordType; // 0 = no coord, 1 = coord, 2 = crazy coord final boolean shouldQueryNorm; public RandomSimilarity(Random random) { perFieldSeed = random.nextInt(); coordType = random.nextInt(3); shouldQueryNorm = random.nextBoolean(); knownSims = new ArrayList<>(allSims); Collections.shuffle(knownSims, random); } @Override public float coord(int overlap, int maxOverlap) { if (coordType == 0) { return 1.0f; } else if (coordType == 1) { return defaultSim.coord(overlap, maxOverlap); } else { return overlap / ((float)maxOverlap + 1); } } @Override public float queryNorm(float sumOfSquaredWeights) { if (shouldQueryNorm) { return defaultSim.queryNorm(sumOfSquaredWeights); } else { return 1.0f; } } @Override public synchronized Similarity get(String field) { assert field != null; Similarity sim = previousMappings.get(field); if (sim == null) { sim = knownSims.get(Math.max(0, Math.abs(perFieldSeed ^ field.hashCode())) % knownSims.size()); previousMappings.put(field, sim); } return sim; } // all the similarities that we rotate through /** The DFR basic models to test. */ static BasicModel[] BASIC_MODELS = { /* TODO: enable new BasicModelBE(), */ /* TODO: enable new BasicModelD(), */ new BasicModelG(), new BasicModelIF(), new BasicModelIn(), new BasicModelIne(), /* TODO: enable new BasicModelP() */ }; /** The DFR aftereffects to test. */ static AfterEffect[] AFTER_EFFECTS = { new AfterEffectB(), new AfterEffectL(), new AfterEffect.NoAfterEffect() }; /** The DFR normalizations to test. */ static Normalization[] NORMALIZATIONS = { new NormalizationH1(), new NormalizationH2(), new NormalizationH3(), new NormalizationZ() // TODO: if we enable NoNormalization, we have to deal with // a couple tests (e.g. TestDocBoost, TestSort) that expect length normalization // new Normalization.NoNormalization() }; /** The distributions for IB. */ static Distribution[] DISTRIBUTIONS = { new DistributionLL(), new DistributionSPL() }; /** Lambdas for IB. */ static Lambda[] LAMBDAS = { new LambdaDF(), new LambdaTTF() }; /** Independence measures for DFI */ static Independence[] INDEPENDENCE_MEASURES = { new IndependenceStandardized(), new IndependenceSaturated(), new IndependenceChiSquared() }; static List allSims; static { allSims = new ArrayList<>(); allSims.add(new ClassicSimilarity()); allSims.add(new BM25Similarity()); for (BasicModel basicModel : BASIC_MODELS) { for (AfterEffect afterEffect : AFTER_EFFECTS) { for (Normalization normalization : NORMALIZATIONS) { allSims.add(new DFRSimilarity(basicModel, afterEffect, normalization)); } } } for (Distribution distribution : DISTRIBUTIONS) { for (Lambda lambda : LAMBDAS) { for (Normalization normalization : NORMALIZATIONS) { allSims.add(new IBSimilarity(distribution, lambda, normalization)); } } } /* TODO: enable Dirichlet allSims.add(new LMDirichletSimilarity()); */ allSims.add(new LMJelinekMercerSimilarity(0.1f)); allSims.add(new LMJelinekMercerSimilarity(0.7f)); for (Independence independence : INDEPENDENCE_MEASURES) { allSims.add(new DFISimilarity(independence)); } } @Override public synchronized String toString() { final String coordMethod; if (coordType == 0) { coordMethod = "no"; } else if (coordType == 1) { coordMethod = "yes"; } else { coordMethod = "crazy"; } return "RandomSimilarity(queryNorm=" + shouldQueryNorm + ",coord=" + coordMethod + "): " + previousMappings.toString(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy