All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.search.similarities.DFRSimilarityFactory Maven / Gradle / Ivy

There is a newer version: 9.7.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.search.similarities;

import org.apache.lucene.search.similarities.AfterEffect;
import org.apache.lucene.search.similarities.AfterEffectB;
import org.apache.lucene.search.similarities.AfterEffectL;
import org.apache.lucene.search.similarities.BasicModel;
import org.apache.lucene.search.similarities.BasicModelG;
import org.apache.lucene.search.similarities.BasicModelIF;
import org.apache.lucene.search.similarities.BasicModelIn;
import org.apache.lucene.search.similarities.BasicModelIne;
import org.apache.lucene.search.similarities.DFRSimilarity;
import org.apache.lucene.search.similarities.Normalization;
import org.apache.lucene.search.similarities.Normalization.NoNormalization;
import org.apache.lucene.search.similarities.NormalizationH1;
import org.apache.lucene.search.similarities.NormalizationH2;
import org.apache.lucene.search.similarities.NormalizationH3;
import org.apache.lucene.search.similarities.NormalizationZ;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.schema.SimilarityFactory;

/**
 * Factory for {@link DFRSimilarity}
 *
 * 

You must specify the implementations for all three components of DFR (strings). In general the * models are parameter-free, but two of the normalizations take floating point parameters (see * below): * *

    *
  1. {@link BasicModel basicModel}: Basic model of information content: *
      *
    • {@link BasicModelG G}: Geometric approximation of Bose-Einstein *
    • {@link BasicModelIn I(n)}: Inverse document frequency *
    • {@link BasicModelIne I(ne)}: Inverse expected document frequency [mixture of Poisson * and IDF] *
    • {@link BasicModelIF I(F)}: Inverse term frequency [approximation of I(ne)] *
    *
  2. {@link AfterEffect afterEffect}: First normalization of information gain: *
      *
    • {@link AfterEffectL L}: Laplace's law of succession *
    • {@link AfterEffectB B}: Ratio of two Bernoulli processes *
    *
  3. {@link Normalization normalization}: Second (length) normalization: *
      *
    • {@link NormalizationH1 H1}: Uniform distribution of term frequency *
        *
      • parameter c (float): hyper-parameter that controls the term frequency * normalization with respect to the document length. The default is 1 * *
      *
    • {@link NormalizationH2 H2}: term frequency density inversely related to length *
        *
      • parameter c (float): hyper-parameter that controls the term frequency * normalization with respect to the document length. The default is 1 * *
      *
    • {@link NormalizationH3 H3}: term frequency normalization provided by Dirichlet prior *
        *
      • parameter mu (float): smoothing parameter μ. The default is 800 *
      *
    • {@link NormalizationZ Z}: term frequency normalization provided by a Zipfian relation *
        *
      • parameter z (float): represents A/(A+1) where A measures the * specificity of the language. The default is 0.3 *
      *
    • {@link NoNormalization none}: no second normalization *
    *
* *

Optional settings: * *

    *
  • discountOverlaps (bool): Sets {@link DFRSimilarity#setDiscountOverlaps(boolean)} *
* * @lucene.experimental */ public class DFRSimilarityFactory extends SimilarityFactory { private boolean discountOverlaps; private BasicModel basicModel; private AfterEffect afterEffect; private Normalization normalization; @Override public void init(SolrParams params) { super.init(params); discountOverlaps = params.getBool("discountOverlaps", true); basicModel = parseBasicModel(params.get("basicModel")); afterEffect = parseAfterEffect(params.get("afterEffect")); normalization = parseNormalization( params.get("normalization"), params.get("c"), params.get("mu"), params.get("z")); } private BasicModel parseBasicModel(String expr) { if ("G".equals(expr)) { return new BasicModelG(); } else if ("I(F)".equals(expr)) { return new BasicModelIF(); } else if ("I(n)".equals(expr)) { return new BasicModelIn(); } else if ("I(ne)".equals(expr)) { return new BasicModelIne(); } else { throw new RuntimeException("Invalid basicModel: " + expr); } } private AfterEffect parseAfterEffect(String expr) { if ("B".equals(expr)) { return new AfterEffectB(); } else if ("L".equals(expr)) { return new AfterEffectL(); } else { throw new RuntimeException("Invalid afterEffect: " + expr); } } // also used by IBSimilarityFactory static Normalization parseNormalization(String expr, String c, String mu, String z) { if (mu != null && !"H3".equals(expr)) { throw new RuntimeException("parameter mu only makes sense for normalization H3"); } if (z != null && !"Z".equals(expr)) { throw new RuntimeException("parameter z only makes sense for normalization Z"); } if (c != null && !("H1".equals(expr) || "H2".equals(expr))) { throw new RuntimeException("parameter c only makese sense for normalizations H1 and H2"); } if ("H1".equals(expr)) { return (c != null) ? new NormalizationH1(Float.parseFloat(c)) : new NormalizationH1(); } else if ("H2".equals(expr)) { return (c != null) ? new NormalizationH2(Float.parseFloat(c)) : new NormalizationH2(); } else if ("H3".equals(expr)) { return (mu != null) ? new NormalizationH3(Float.parseFloat(mu)) : new NormalizationH3(); } else if ("Z".equals(expr)) { return (z != null) ? new NormalizationZ(Float.parseFloat(z)) : new NormalizationZ(); } else if ("none".equals(expr)) { return new Normalization.NoNormalization(); } else { throw new RuntimeException("Invalid normalization: " + expr); } } @Override public Similarity getSimilarity() { DFRSimilarity sim = new DFRSimilarity(basicModel, afterEffect, normalization); sim.setDiscountOverlaps(discountOverlaps); return sim; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy