All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.util.BitDocIdSet Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.util;


import java.io.IOException;

import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;

/**
 * Implementation of the {@link DocIdSet} interface on top of a {@link BitSet}.
 * @lucene.internal
 */
public class BitDocIdSet extends DocIdSet {

  private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(BitDocIdSet.class);

  private final BitSet set;
  private final long cost;

  /**
   * Wrap the given {@link BitSet} as a {@link DocIdSet}. The provided
   * {@link BitSet} must not be modified afterwards.
   */
  public BitDocIdSet(BitSet set, long cost) {
    this.set = set;
    this.cost = cost;
  }

  /**
   * Same as {@link #BitDocIdSet(BitSet, long)} but uses the set's
   * {@link BitSet#approximateCardinality() approximate cardinality} as a cost.
   */
  public BitDocIdSet(BitSet set) {
    this(set, set.approximateCardinality());
  }

  @Override
  public DocIdSetIterator iterator() {
    return new BitSetIterator(set, cost);
  }

  @Override
  public BitSet bits() {
    return set;
  }

  /** This DocIdSet implementation is cacheable. */
  @Override
  public boolean isCacheable() {
    return true;
  }

  @Override
  public long ramBytesUsed() {
    return BASE_RAM_BYTES_USED + set.ramBytesUsed();
  }

  @Override
  public String toString() {
    return getClass().getSimpleName() + "(set=" + set + ",cost=" + cost + ")";
  }

  /**
   * A builder of {@link DocIdSet}s that supports random access. If you don't
   * need random access, you should rather use {@link DocIdSetBuilder}.
   * @lucene.internal
   */
  public static final class Builder {

    private final int maxDoc;
    private final int threshold;
    private SparseFixedBitSet sparseSet;
    private FixedBitSet denseSet;

    // we cache an upper bound of the cost of this builder so that we don't have
    // to re-compute approximateCardinality on the sparse set every time 
    private long costUpperBound;

    /** Create a new instance that can hold maxDoc documents and is optionally full. */
    public Builder(int maxDoc, boolean full) {
      this.maxDoc = maxDoc;
      threshold = maxDoc >>> 10;
      if (full) {
        denseSet = new FixedBitSet(maxDoc);
        denseSet.set(0, maxDoc);
      }
    }

    /** Create a new empty instance. */
    public Builder(int maxDoc) {
      this(maxDoc, false);
    }

    // pkg-private for testing
    boolean dense() {
      return denseSet != null;
    }

    /**
     * Is this builder definitely empty?  If so, {@link #build()} will return null.  This is usually the same as
     * simply being empty but if this builder was constructed with the {@code full} option or if an iterator was passed
     * that iterated over no documents, then we're not sure.
     */
    public boolean isDefinitelyEmpty() {
      return sparseSet == null && denseSet == null;
    }

    /**
     * Add the content of the provided {@link DocIdSetIterator} to this builder.
     */
    public void or(DocIdSetIterator it) throws IOException {
      if (denseSet != null) {
        // already upgraded
        denseSet.or(it);
        return;
      }

      final long itCost = it.cost();
      costUpperBound += itCost;
      if (costUpperBound >= threshold) {
        costUpperBound = (sparseSet == null ? 0 : sparseSet.approximateCardinality()) + itCost;

        if (costUpperBound >= threshold) {
          // upgrade
          denseSet = new FixedBitSet(maxDoc);
          denseSet.or(it);
          if (sparseSet != null) {
            denseSet.or(new BitSetIterator(sparseSet, 0L));
          }
          return;
        }
      }

      // we are still sparse
      if (sparseSet == null) {
        sparseSet = new SparseFixedBitSet(maxDoc);
      }
      sparseSet.or(it);
    }

    /**
     * Removes from this builder documents that are not contained in it.
     */
    @Deprecated
    public void and(DocIdSetIterator it) throws IOException {
      if (denseSet != null) {
        denseSet.and(it);
      } else if (sparseSet != null) {
        sparseSet.and(it);
      }
    }

    /**
     * Removes from this builder documents that are contained in it.
     */
    @Deprecated
    public void andNot(DocIdSetIterator it) throws IOException {
      if (denseSet != null) {
        denseSet.andNot(it);
      } else if (sparseSet != null) {
        sparseSet.andNot(it);
      }
    }

    /**
     * Build a {@link DocIdSet} that contains all doc ids that have been added.
     * This method may return null if no documents were addded to this
     * builder.
     * NOTE: this is a destructive operation, the builder should not be used
     * anymore after this method has been called.
     */
    public BitDocIdSet build() {
      final BitDocIdSet result;
      if (denseSet != null) {
        result = new BitDocIdSet(denseSet);
      } else if (sparseSet != null) {
        result = new BitDocIdSet(sparseSet);
      } else {
        result = null;
      }
      denseSet = null;
      sparseSet = null;
      costUpperBound = 0;
      return result;
    }

  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy