All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.search.facet.UniqueSlotAcc Maven / Gradle / Ivy

There is a newer version: 9.7.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.search.facet;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.common.util.Hash;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.util.hll.HLL;

abstract class UniqueSlotAcc extends SlotAcc {
  HLLAgg.HLLFactory factory;
  SchemaField field;
  FixedBitSet[] arr;
  int[] counts; // populated with the cardinality once
  int nTerms;

  public UniqueSlotAcc(
      FacetContext fcontext, SchemaField field, int numSlots, HLLAgg.HLLFactory factory)
      throws IOException {
    super(fcontext);
    this.factory = factory;
    arr = new FixedBitSet[numSlots];
    this.field = field;
  }

  @Override
  public void reset() throws IOException {
    counts = null;
    for (FixedBitSet bits : arr) {
      if (bits == null) continue;
      bits.clear(0, bits.length());
    }
  }

  @Override
  public Object getValue(int slot) throws IOException {
    if (fcontext.isShard()) {
      return getShardValue(slot);
    }
    return getNonShardValue(slot);
  }

  /** Returns the current slot value as long This is used to get non-sharded value */
  public long getNonShardValue(int slot) {
    long res;
    if (counts != null) { // will only be pre-populated if this was used for sorting.
      res = counts[slot];
    } else {
      FixedBitSet bs = arr[slot];
      res = bs == null ? 0 : bs.cardinality();
    }
    return res;
  }

  private Object getShardHLL(int slot) throws IOException {
    FixedBitSet ords = arr[slot];
    if (ords == null) return HLLAgg.NO_VALUES;

    HLL hll = factory.getHLL();
    long maxOrd = ords.length();
    Hash.LongPair hashResult = new Hash.LongPair();
    for (int ord = -1; ++ord < maxOrd; ) {
      ord = ords.nextSetBit(ord);
      if (ord == DocIdSetIterator.NO_MORE_DOCS) break;
      BytesRef val = lookupOrd(ord);
      // way to avoid recomputing hash across slots?  Prob not worth space
      Hash.murmurhash3_x64_128(val.bytes, val.offset, val.length, 0, hashResult);
      // idea: if the set is small enough, just send the hashes?  We can add at the top
      // level or even just do a hash table at the top level.
      hll.addRaw(hashResult.val1);
    }

    SimpleOrderedMap map = new SimpleOrderedMap<>();
    map.add("hll", hll.toBytes());
    return map;
  }

  private Object getShardValue(int slot) throws IOException {
    if (factory != null) return getShardHLL(slot);
    FixedBitSet ords = arr[slot];
    int unique;
    if (counts != null) {
      unique = counts[slot];
    } else {
      unique = ords == null ? 0 : ords.cardinality();
    }

    SimpleOrderedMap map = new SimpleOrderedMap<>();
    map.add("unique", unique);
    map.add("nTerms", nTerms);

    int maxExplicit = 100;
    // TODO: make configurable
    // TODO: share values across buckets
    if (unique > 0) {

      List lst = new ArrayList<>(Math.min(unique, maxExplicit));

      int maxOrd = ords.length();
      if (maxOrd > 0) {
        for (int ord = 0; lst.size() < maxExplicit; ) {
          ord = ords.nextSetBit(ord);
          if (ord == DocIdSetIterator.NO_MORE_DOCS) break;
          BytesRef val = lookupOrd(ord);
          Object o = field.getType().toObject(field, val);
          lst.add(o);
          if (++ord >= maxOrd) break;
        }
      }

      map.add("vals", lst);
    }

    return map;
  }

  protected abstract BytesRef lookupOrd(int ord) throws IOException;

  // we only calculate all the counts when sorting by count
  public void calcCounts() {
    counts = new int[arr.length];
    for (int i = 0; i < arr.length; i++) {
      FixedBitSet bs = arr[i];
      counts[i] = bs == null ? 0 : bs.cardinality();
    }
  }

  @Override
  public int compare(int slotA, int slotB) {
    if (counts == null) { // TODO: a more efficient way to do this?  prepareSort?
      calcCounts();
    }
    return counts[slotA] - counts[slotB];
  }

  @Override
  public void resize(Resizer resizer) {
    arr = resizer.resize(arr, null);
    if (counts != null) {
      counts = resizer.resize(counts, 0);
    }
  }
}