All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.facet.search.sampling.SamplingAccumulator Maven / Gradle / Ivy

There is a newer version: 9.11.1
Show newest version
package org.apache.lucene.facet.search.sampling;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.index.IndexReader;

import org.apache.lucene.facet.search.FacetResultsHandler;
import org.apache.lucene.facet.search.FacetsAccumulator;
import org.apache.lucene.facet.search.FloatArrayAllocator;
import org.apache.lucene.facet.search.IntArrayAllocator;
import org.apache.lucene.facet.search.SamplingWrapper;
import org.apache.lucene.facet.search.ScoredDocIDs;
import org.apache.lucene.facet.search.StandardFacetsAccumulator;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.FacetResultNode;
import org.apache.lucene.facet.search.sampling.Sampler.SampleResult;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 * Facets accumulation with sampling.
*

* Note two major differences between this class and {@link SamplingWrapper}: *

    *
  1. Latter can wrap any other {@link FacetsAccumulator} while this class * directly extends {@link StandardFacetsAccumulator}.
  2. *
  3. This class can effectively apply sampling on the complement set of * matching document, thereby working efficiently with the complement * optimization - see {@link FacetsAccumulator#getComplementThreshold()}.
  4. *
*

* Note: Sampling accumulation (Accumulation over a sampled-set of the results), * does not guarantee accurate values for * {@link FacetResult#getNumValidDescendants()} & * {@link FacetResultNode#getResidue()}. * * @see Sampler * @lucene.experimental */ public class SamplingAccumulator extends StandardFacetsAccumulator { private double samplingRatio = -1d; private final Sampler sampler; /** * Constructor... */ public SamplingAccumulator( Sampler sampler, FacetSearchParams searchParams, IndexReader indexReader, TaxonomyReader taxonomyReader, IntArrayAllocator intArrayAllocator, FloatArrayAllocator floatArrayAllocator) { super(searchParams, indexReader, taxonomyReader, intArrayAllocator, floatArrayAllocator); this.sampler = sampler; } /** * Constructor... */ public SamplingAccumulator( Sampler sampler, FacetSearchParams searchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) { super(searchParams, indexReader, taxonomyReader); this.sampler = sampler; } @Override public List accumulate(ScoredDocIDs docids) throws IOException { // first let delegee accumulate without labeling at all (though // currently it doesn't matter because we have to label all returned anyhow) boolean origAllowLabeling = isAllowLabeling(); setAllowLabeling(false); // Replacing the original searchParams with the over-sampled FacetSearchParams original = searchParams; searchParams = sampler.overSampledSearchParams(original); List sampleRes = super.accumulate(docids); setAllowLabeling(origAllowLabeling); List fixedRes = new ArrayList(); for (FacetResult fres : sampleRes) { // for sure fres is not null because this is guaranteed by the delegee. FacetResultsHandler frh = fres.getFacetRequest().createFacetResultsHandler( taxonomyReader); // fix the result of current request sampler.getSampleFixer(indexReader, taxonomyReader, searchParams) .fixResult(docids, fres); fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any // Using the sampler to trim the extra (over-sampled) results fres = sampler.trimResult(fres); // arranging it needs to // final labeling if allowed (because labeling is a costly operation) if (isAllowLabeling()) { frh.labelResult(fres); } fixedRes.add(fres); // add to final results } searchParams = original; // Back to original params return fixedRes; } @Override protected ScoredDocIDs actualDocsToAccumulate(ScoredDocIDs docids) throws IOException { SampleResult sampleRes = sampler.getSampleSet(docids); samplingRatio = sampleRes.actualSampleRatio; return sampleRes.docids; } @Override protected double getTotalCountsFactor() { if (samplingRatio<0) { throw new IllegalStateException("Total counts ratio unavailable because actualDocsToAccumulate() was not invoked"); } return samplingRatio; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy