org.apache.solr.search.facet.FacetFieldProcessorByHashDV Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Apache Solr (module: core)
There is a newer version: 9.7.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.search.facet;

import java.io.IOException;
import java.text.ParseException;
import java.util.function.IntFunction;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongValues;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocSetUtil;
import org.apache.solr.search.facet.SlotAcc.SlotContext;

/**
 * Facets numbers into a hash table. The number is either a raw numeric DocValues value, or a term
 * global ordinal integer. Limitations:
 *
 * 
 *   doesn't handle prefix, but could easily be added
 *   
doesn't handle mincount==0 -- you're better off with an array alg
 * 
 */
class FacetFieldProcessorByHashDV extends FacetFieldProcessor {
  // must be a power of two, non-final to support setting by tests
  static int MAXIMUM_STARTING_TABLE_SIZE = 1024;

  /** a hash table with long keys (what we're counting) and integer values (counts) */
  private static class LongCounts {

    static final float LOAD_FACTOR = 0.7f;

    long[] vals;
    // maintain the counts here since we need them to tell if there was actually a value anyway
    long[] counts;
    long[] oldToNewMapping;

    int cardinality;
    int threshold;

    /** sz must be a power of two */
    LongCounts(int sz) {
      vals = new long[sz];
      counts = new long[sz];
      threshold = (int) (sz * LOAD_FACTOR);
    }

    /** Current number of slots in the hash table */
    int numSlots() {
      return vals.length;
    }

    private int hash(long val) {
      // For floats: exponent bits start at bit 23 for single precision, and bit 52 for double
      // precision. Many values will only have significant bits just to the right of that, and the
      // leftmost bits will all be zero.

      // For now, lets just settle to get first 8 significant mantissa bits of double or float in
      // the lowest bits of our hash. The upper bits of our hash will be irrelevant.
      int h = (int) (val + (val >>> 44) + (val >>> 15));
      return h;
    }

    /** returns the slot */
    int add(long val) {
      if (cardinality >= threshold) {
        rehash();
      }

      int h = hash(val);
      for (int slot = h & (vals.length - 1); ; slot = (slot + ((h >> 7) | 1)) & (vals.length - 1)) {
        long count = counts[slot];
        if (count == 0) {
          counts[slot] = 1;
          vals[slot] = val;
          cardinality++;
          return slot;
        } else if (vals[slot] == val) {
          // val is already in the set
          counts[slot] = count + 1;
          return slot;
        }
      }
    }

    protected void rehash() {
      long[] oldVals = vals;
      // after retrieving the count, this array is reused as a mapping to new array
      long[] oldCounts = counts;
      int newCapacity = vals.length << 1;
      vals = new long[newCapacity];
      counts = new long[newCapacity];
      threshold = (int) (newCapacity * LOAD_FACTOR);

      for (int i = 0; i < oldVals.length; i++) {
        long count = oldCounts[i];
        if (count == 0) {
          oldCounts[i] = -1;
          continue;
        }

        long val = oldVals[i];

        int h = hash(val);
        int slot = h & (vals.length - 1);
        while (counts[slot] != 0) {
          slot = (slot + ((h >> 7) | 1)) & (vals.length - 1);
        }
        counts[slot] = count;
        vals[slot] = val;
        oldCounts[i] = slot;
      }

      oldToNewMapping = oldCounts;
    }

    int cardinality() {
      return cardinality;
    }
  }

  /** A hack instance of Calc for Term ordinals in DocValues. */
  // TODO consider making FacetRangeProcessor.Calc facet top level; then less of a hack?
  private class TermOrdCalc extends FacetRangeProcessor.Calc {

    IntFunction lookupOrdFunction; // set in collectDocs()!

    TermOrdCalc() throws IOException {
      super(sf);
    }

    @Override
    public long bitsToSortableBits(long globalOrd) {
      return globalOrd;
    }

    /** To be returned in "buckets"/"val" */
    @Override
    @SuppressWarnings({"rawtypes"})
    public Comparable bitsToValue(long globalOrd) {
      BytesRef bytesRef = lookupOrdFunction.apply((int) globalOrd);
      // note FacetFieldProcessorByArray.findTopSlots also calls SchemaFieldType.toObject
      return sf.getType().toObject(sf, bytesRef).toString();
    }

    @Override
    @SuppressWarnings({"rawtypes"})
    public String formatValue(Comparable val) {
      return (String) val;
    }

    @Override
    @SuppressWarnings({"rawtypes"})
    protected Comparable parseStr(String rawval) throws ParseException {
      throw new UnsupportedOperationException();
    }

    @Override
    @SuppressWarnings({"rawtypes"})
    protected Comparable parseAndAddGap(Comparable value, String gap) throws ParseException {
      throw new UnsupportedOperationException();
    }
  }

  FacetRangeProcessor.Calc calc;
  LongCounts table;
  int allBucketsSlot = -1;

  FacetFieldProcessorByHashDV(FacetContext fcontext, FacetField freq, SchemaField sf) {
    super(fcontext, freq, sf);
    if (freq.mincount == 0) {
      throw new SolrException(
          SolrException.ErrorCode.BAD_REQUEST, getClass() + " doesn't support mincount=0");
    }
    if (freq.prefix != null) {
      throw new SolrException(
          SolrException.ErrorCode.BAD_REQUEST,
          getClass() + " doesn't support prefix"); // yet, but it could
    }
    FieldInfo fieldInfo = fcontext.searcher.getFieldInfos().fieldInfo(sf.getName());
    if (fieldInfo != null
        && fieldInfo.getDocValuesType() != DocValuesType.NUMERIC
        && fieldInfo.getDocValuesType() != DocValuesType.SORTED
        && fieldInfo.getDocValuesType() != DocValuesType.SORTED_NUMERIC) {
      throw new SolrException(
          SolrException.ErrorCode.BAD_REQUEST,
          getClass() + " only support single valued number/string with docValues");
    }
  }

  @Override
  public void process() throws IOException {
    super.process();
    response = calcFacets();
    table = null; // gc
  }

  private SimpleOrderedMap