All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.search.facet.UniqueSlotAcc Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.search.facet;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.common.util.Hash;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.util.hll.HLL;

abstract class UniqueSlotAcc extends SlotAcc {
  HLLAgg.HLLFactory factory;
  SchemaField field;
  FixedBitSet[] arr;
  int[] counts;  // populated with the cardinality once
  int nTerms;

  public UniqueSlotAcc(FacetContext fcontext, SchemaField field, int numSlots, HLLAgg.HLLFactory factory) throws IOException {
    super(fcontext);
    this.factory = factory;
    arr = new FixedBitSet[numSlots];
    this.field = field;
  }

  @Override
  public void reset() throws IOException {
    counts = null;
    for (FixedBitSet bits : arr) {
      if (bits == null) continue;
      bits.clear(0, bits.length());
    }
  }

  @Override
  public Object getValue(int slot) throws IOException {
    if (fcontext.isShard()) {
      return getShardValue(slot);
    }
    if (counts != null) {  // will only be pre-populated if this was used for sorting.
      return counts[slot];
    }

    FixedBitSet bs = arr[slot];
    return bs==null ? 0 : bs.cardinality();
  }

  private Object getShardHLL(int slot) throws IOException {
    FixedBitSet ords = arr[slot];
    if (ords == null) return HLLAgg.NO_VALUES;

    HLL hll = factory.getHLL();
    long maxOrd = ords.length();
    Hash.LongPair hashResult = new Hash.LongPair();
    for(int ord=-1; ++ord < maxOrd;) {
      ord = ords.nextSetBit(ord);
      if (ord == DocIdSetIterator.NO_MORE_DOCS) break;
      BytesRef val = lookupOrd(ord);
      // way to avoid recomputing hash across slots?  Prob not worth space
      Hash.murmurhash3_x64_128(val.bytes, val.offset, val.length, 0, hashResult);
      // idea: if the set is small enough, just send the hashes?  We can add at the top
      // level or even just do a hash table at the top level.
      hll.addRaw(hashResult.val1);
    }

    SimpleOrderedMap map = new SimpleOrderedMap();
    map.add("hll", hll.toBytes());
    return map;
  }

  private Object getShardValue(int slot) throws IOException {
    if (factory != null) return getShardHLL(slot);
    FixedBitSet ords = arr[slot];
    int unique;
    if (counts != null) {
      unique = counts[slot];
    } else {
      unique = ords==null ? 0 : ords.cardinality();
    }

    SimpleOrderedMap map = new SimpleOrderedMap();
    map.add("unique", unique);
    map.add("nTerms", nTerms);

    int maxExplicit=100;
    // TODO: make configurable
    // TODO: share values across buckets
    if (unique > 0) {

      List lst = new ArrayList( Math.min(unique, maxExplicit) );

      long maxOrd = ords.length();
      if (ords != null && ords.length() > 0) {
        for (int ord=0; lst.size() < maxExplicit;) {
          ord = ords.nextSetBit(ord);
          if (ord == DocIdSetIterator.NO_MORE_DOCS) break;
          BytesRef val = lookupOrd(ord);
          Object o = field.getType().toObject(field, val);
          lst.add(o);
          if (++ord >= maxOrd) break;
        }
      }

      map.add("vals", lst);
    }

    return map;
  }

  protected abstract BytesRef lookupOrd(int ord) throws IOException;

  // we only calculate all the counts when sorting by count
  public void calcCounts() {
    counts = new int[arr.length];
    for (int i=0; i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy