All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.request.DocValuesStats Maven / Gradle / Ivy

There is a newer version: 9.7.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.request;

import java.io.IOException;
import java.util.List;
import java.util.Map;

import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongValues;
import org.apache.solr.common.SolrException;
import org.apache.solr.handler.component.FieldFacetStats;
import org.apache.solr.handler.component.StatsField;
import org.apache.solr.handler.component.StatsValues;
import org.apache.solr.handler.component.StatsValuesFactory;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.Filter;
import org.apache.solr.search.SolrIndexSearcher;

/**
 * Computes term stats for docvalues field (single or multivalued).
 * 

* Instead of working on a top-level reader view (binary-search per docid), * it collects per-segment, but maps ordinals to global ordinal space using * MultiDocValues' OrdinalMap. */ public class DocValuesStats { private DocValuesStats() {} public static StatsValues getCounts(SolrIndexSearcher searcher, StatsField statsField, DocSet docs, String[] facet) throws IOException { final SchemaField schemaField = statsField.getSchemaField(); assert null != statsField.getSchemaField() : "DocValuesStats requires a StatsField using a SchemaField"; final String fieldName = schemaField.getName(); final FieldType ft = schemaField.getType(); final StatsValues res = StatsValuesFactory.createStatsValues(statsField); //Initialize facetstats, if facets have been passed in final FieldFacetStats[] facetStats = new FieldFacetStats[facet.length]; int upto = 0; for (String facetField : facet) { SchemaField fsf = searcher.getSchema().getField(facetField); if ( fsf.multiValued()) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Stats can only facet on single-valued fields, not: " + facetField ); } SchemaField facetSchemaField = searcher.getSchema().getField(facetField); facetStats[upto++] = new FieldFacetStats(searcher, facetSchemaField, statsField); } // TODO: remove multiValuedFieldCache(), check dv type / uninversion type? final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache(); SortedSetDocValues si; // for term lookups only OrdinalMap ordinalMap = null; // for mapping per-segment ords to global ones if (multiValued) { si = searcher.getSlowAtomicReader().getSortedSetDocValues(fieldName); if (si instanceof MultiSortedSetDocValues) { ordinalMap = ((MultiDocValues.MultiSortedSetDocValues)si).mapping; } } else { SortedDocValues single = searcher.getSlowAtomicReader().getSortedDocValues(fieldName); si = single == null ? null : DocValues.singleton(single); if (single instanceof MultiDocValues.MultiSortedDocValues) { ordinalMap = ((MultiDocValues.MultiSortedDocValues)single).mapping; } } if (si == null) { si = DocValues.emptySortedSet(); } if (si.getValueCount() >= Integer.MAX_VALUE) { throw new UnsupportedOperationException("Currently this stats method is limited to " + Integer.MAX_VALUE + " unique terms"); } int missingDocCountTotal = 0; final int nTerms = (int) si.getValueCount(); // count collection array only needs to be as big as the number of terms we are // going to collect counts for. final int[] counts = new int[nTerms]; Filter filter = docs.getTopFilter(); List leaves = searcher.getTopReaderContext().leaves(); for (int subIndex = 0; subIndex < leaves.size(); subIndex++) { LeafReaderContext leaf = leaves.get(subIndex); DocIdSet dis = filter.getDocIdSet(leaf, null); // solr docsets already exclude any deleted docs DocIdSetIterator disi = null; if (dis != null) { disi = dis.iterator(); } if (disi != null) { int docBase = leaf.docBase; if (multiValued) { SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName); if (sub == null) { sub = DocValues.emptySortedSet(); } SortedDocValues singleton = DocValues.unwrapSingleton(sub); if (singleton != null) { // some codecs may optimize SORTED_SET storage for single-valued fields missingDocCountTotal += accumSingle(counts, docBase, facetStats, singleton, disi, subIndex, ordinalMap); } else { missingDocCountTotal += accumMulti(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap); } } else { SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName); if (sub == null) { sub = DocValues.emptySorted(); } missingDocCountTotal += accumSingle(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap); } } } // add results in index order for (int ord = 0; ord < counts.length; ord++) { int count = counts[ord]; if (count > 0) { final BytesRef value = si.lookupOrd(ord); res.accumulate(value, count); for (FieldFacetStats f : facetStats) { f.accumulateTermNum(ord, value); } } } res.addMissing(missingDocCountTotal); if (facetStats.length > 0) { for (FieldFacetStats f : facetStats) { Map facetStatsValues = f.facetStatsValues; f.accumulateMissing(); res.addFacet(f.name, facetStatsValues); } } return res; } /** accumulates per-segment single-valued stats */ static int accumSingle(int counts[], int docBase, FieldFacetStats[] facetStats, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex); int missingDocCount = 0; int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (doc > si.docID()) { si.advance(doc); } if (doc == si.docID()) { int term = si.ordValue(); if (map != null) { term = (int) ordMap.get(term); } counts[term]++; for (FieldFacetStats f : facetStats) { f.facetTermNum(docBase + doc, term); } }else{ for (FieldFacetStats f : facetStats) { f.facetMissingNum(docBase + doc); } missingDocCount++; } } return missingDocCount; } /** accumulates per-segment multi-valued stats */ static int accumMulti(int counts[], int docBase, FieldFacetStats[] facetStats, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex); int missingDocCount = 0; int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (doc > si.docID()) { si.advance(doc); } if (doc == si.docID()) { long ord; while ((ord = si.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { int term = (int) ord; if (map != null) { term = (int) ordMap.get(term); } counts[term]++; for (FieldFacetStats f : facetStats) { f.facetTermNum(docBase + doc, term); } } } else { for (FieldFacetStats f : facetStats) { f.facetMissingNum(docBase + doc); } missingDocCount++; } } return missingDocCount; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy