All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.index.DocValuesFieldUpdates Maven / Gradle / Ivy

There is a newer version: 6.4.2_1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;

import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;

import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntroSorter;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PagedMutable;

/**
 * Holds updates of a single DocValues field, for a set of documents within one segment.
 *
 * @lucene.experimental
 */
abstract class DocValuesFieldUpdates implements Accountable {

  protected static final int PAGE_SIZE = 1024;
  private static final long HAS_VALUE_MASK = 1;
  private static final long HAS_NO_VALUE_MASK = 0;
  // we use the first bit of each value to mark if the doc has a value or not
  private static final int SHIFT = 1;

  /**
   * An iterator over documents and their updated values. Only documents with updates are returned
   * by this iterator, and the documents are returned in increasing order.
   */
  abstract static class Iterator extends DocValuesIterator {

    @Override
    public final boolean advanceExact(int target) {
      throw new UnsupportedOperationException();
    }

    @Override
    public final int advance(int target) {
      throw new UnsupportedOperationException();
    }

    @Override
    public final long cost() {
      throw new UnsupportedOperationException();
    }

    @Override
    public abstract int nextDoc(); // no IOException

    /** Returns a long value for the current document if this iterator is a long iterator. */
    abstract long longValue();

    /**
     * Returns a binary value for the current document if this iterator is a binary value iterator.
     */
    abstract BytesRef binaryValue();

    /** Returns delGen for this packet. */
    abstract long delGen();

    /** Returns true if this doc has a value */
    abstract boolean hasValue();

    /** Wraps the given iterator as a BinaryDocValues instance. */
    static BinaryDocValues asBinaryDocValues(Iterator iterator) {
      return new BinaryDocValues() {
        @Override
        public int docID() {
          return iterator.docID();
        }

        @Override
        public BytesRef binaryValue() {
          return iterator.binaryValue();
        }

        @Override
        public boolean advanceExact(int target) {
          return iterator.advanceExact(target);
        }

        @Override
        public int nextDoc() {
          return iterator.nextDoc();
        }

        @Override
        public int advance(int target) {
          return iterator.advance(target);
        }

        @Override
        public long cost() {
          return iterator.cost();
        }
      };
    }

    /** Wraps the given iterator as a NumericDocValues instance. */
    static NumericDocValues asNumericDocValues(Iterator iterator) {
      return new NumericDocValues() {
        @Override
        public long longValue() {
          return iterator.longValue();
        }

        @Override
        public boolean advanceExact(int target) {
          throw new UnsupportedOperationException();
        }

        @Override
        public int docID() {
          return iterator.docID();
        }

        @Override
        public int nextDoc() {
          return iterator.nextDoc();
        }

        @Override
        public int advance(int target) {
          return iterator.advance(target);
        }

        @Override
        public long cost() {
          return iterator.cost();
        }
      };
    }
  }

  /**
   * Merge-sorts multiple iterators, one per delGen, favoring the largest delGen that has updates
   * for a given docID.
   */
  public static Iterator mergedIterator(Iterator[] subs) {

    if (subs.length == 1) {
      return subs[0];
    }

    PriorityQueue queue =
        new PriorityQueue(subs.length) {
          @Override
          protected boolean lessThan(Iterator a, Iterator b) {
            // sort by smaller docID
            int cmp = Integer.compare(a.docID(), b.docID());
            if (cmp == 0) {
              // then by larger delGen
              cmp = Long.compare(b.delGen(), a.delGen());

              // delGens are unique across our subs:
              assert cmp != 0;
            }

            return cmp < 0;
          }
        };

    for (Iterator sub : subs) {
      if (sub.nextDoc() != NO_MORE_DOCS) {
        queue.add(sub);
      }
    }

    if (queue.size() == 0) {
      return null;
    }

    return new Iterator() {
      private int doc = -1;

      @Override
      public int nextDoc() {
        // Advance all sub iterators past current doc
        while (true) {
          if (queue.size() == 0) {
            doc = NO_MORE_DOCS;
            break;
          }
          int newDoc = queue.top().docID();
          if (newDoc != doc) {
            assert newDoc > doc : "doc=" + doc + " newDoc=" + newDoc;
            doc = newDoc;
            break;
          }
          if (queue.top().nextDoc() == NO_MORE_DOCS) {
            queue.pop();
          } else {
            queue.updateTop();
          }
        }
        return doc;
      }

      @Override
      public int docID() {
        return doc;
      }

      @Override
      long longValue() {
        return queue.top().longValue();
      }

      @Override
      BytesRef binaryValue() {
        return queue.top().binaryValue();
      }

      @Override
      public long delGen() {
        throw new UnsupportedOperationException();
      }

      @Override
      boolean hasValue() {
        return queue.top().hasValue();
      }
    };
  }

  final String field;
  final DocValuesType type;
  final long delGen;
  private final int bitsPerValue;
  private boolean finished;
  protected final int maxDoc;
  protected PagedMutable docs;
  protected int size;

  protected DocValuesFieldUpdates(int maxDoc, long delGen, String field, DocValuesType type) {
    this.maxDoc = maxDoc;
    this.delGen = delGen;
    this.field = field;
    if (type == null) {
      throw new NullPointerException("DocValuesType must not be null");
    }
    this.type = type;
    bitsPerValue = PackedInts.bitsRequired(maxDoc - 1) + SHIFT;
    docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.DEFAULT);
  }

  final boolean getFinished() {
    return finished;
  }

  abstract void add(int doc, long value);

  abstract void add(int doc, BytesRef value);

  /**
   * Adds the value for the given docID. This method prevents conditional calls to {@link
   * Iterator#longValue()} or {@link Iterator#binaryValue()} since the implementation knows if it's
   * a long value iterator or binary value
   */
  abstract void add(int docId, Iterator iterator);

  /** Returns an {@link Iterator} over the updated documents and their values. */
  // TODO: also use this for merging, instead of having to write through to disk first
  abstract Iterator iterator();

  /** Freezes internal data structures and sorts updates by docID for efficient iteration. */
  final synchronized void finish() {
    if (finished) {
      throw new IllegalStateException("already finished");
    }
    finished = true;
    // shrink wrap
    if (size < docs.size()) {
      resize(size);
    }
    if (size > 0) {
      // We need a stable sort but InPlaceMergeSorter performs lots of swaps
      // which hurts performance due to all the packed ints we are using.
      // Another option would be TimSorter, but it needs additional API (copy to
      // temp storage, compare with item in temp storage, etc.) so we instead
      // use quicksort and record ords of each update to guarantee stability.
      final PackedInts.Mutable ords =
          PackedInts.getMutable(size, PackedInts.bitsRequired(size - 1), PackedInts.DEFAULT);
      for (int i = 0; i < size; ++i) {
        ords.set(i, i);
      }
      new IntroSorter() {
        @Override
        protected void swap(int i, int j) {
          final long tmpOrd = ords.get(i);
          ords.set(i, ords.get(j));
          ords.set(j, tmpOrd);

          DocValuesFieldUpdates.this.swap(i, j);
        }

        @Override
        protected int compare(int i, int j) {
          // increasing docID order:
          // NOTE: we can have ties here, when the same docID was updated in the same segment, in
          // which case we rely on sort being
          // stable and preserving original order so the last update to that docID wins
          int cmp = Long.compare(docs.get(i) >>> 1, docs.get(j) >>> 1);
          if (cmp == 0) {
            cmp = (int) (ords.get(i) - ords.get(j));
          }
          return cmp;
        }

        long pivotDoc;
        int pivotOrd;

        @Override
        protected void setPivot(int i) {
          pivotDoc = docs.get(i) >>> 1;
          pivotOrd = (int) ords.get(i);
        }

        @Override
        protected int comparePivot(int j) {
          int cmp = Long.compare(pivotDoc, docs.get(j) >>> 1);
          if (cmp == 0) {
            cmp = pivotOrd - (int) ords.get(j);
          }
          return cmp;
        }
      }.sort(0, size);
    }
  }

  /** Returns true if this instance contains any updates. */
  synchronized boolean any() {
    return size > 0;
  }

  final synchronized int size() {
    return size;
  }

  /**
   * Adds an update that resets the documents value.
   *
   * @param doc the doc to update
   */
  synchronized void reset(int doc) {
    addInternal(doc, HAS_NO_VALUE_MASK);
  }

  final synchronized int add(int doc) {
    return addInternal(doc, HAS_VALUE_MASK);
  }

  private synchronized int addInternal(int doc, long hasValueMask) {
    if (finished) {
      throw new IllegalStateException("already finished");
    }
    assert doc < maxDoc;

    // TODO: if the Sorter interface changes to take long indexes, we can remove that limitation
    if (size == Integer.MAX_VALUE) {
      throw new IllegalStateException(
          "cannot support more than Integer.MAX_VALUE doc/value entries");
    }
    // grow the structures to have room for more elements
    if (docs.size() == size) {
      grow(size + 1);
    }
    docs.set(size, (((long) doc) << SHIFT) | hasValueMask);
    ++size;
    return size - 1;
  }

  protected void swap(int i, int j) {
    long tmpDoc = docs.get(j);
    docs.set(j, docs.get(i));
    docs.set(i, tmpDoc);
  }

  protected void grow(int size) {
    docs = docs.grow(size);
  }

  protected void resize(int size) {
    docs = docs.resize(size);
  }

  protected final void ensureFinished() {
    if (finished == false) {
      throw new IllegalStateException("call finish first");
    }
  }

  @Override
  public long ramBytesUsed() {
    return docs.ramBytesUsed()
        + RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
        + 2 * Integer.BYTES
        + 2
        + Long.BYTES
        + RamUsageEstimator.NUM_BYTES_OBJECT_REF;
  }

  // TODO: can't this just be NumericDocValues now?  avoid boxing the long value...
  protected abstract static class AbstractIterator extends DocValuesFieldUpdates.Iterator {
    private final int size;
    private final PagedMutable docs;
    private long idx = 0; // long so we don't overflow if size == Integer.MAX_VALUE
    private int doc = -1;
    private final long delGen;
    private boolean hasValue;

    AbstractIterator(int size, PagedMutable docs, long delGen) {
      this.size = size;
      this.docs = docs;
      this.delGen = delGen;
    }

    @Override
    public final int nextDoc() {
      if (idx >= size) {
        return doc = DocIdSetIterator.NO_MORE_DOCS;
      }
      long longDoc = docs.get(idx);
      ++idx;
      for (; idx < size; idx++) {
        // scan forward to last update to this doc
        final long nextLongDoc = docs.get(idx);
        if ((longDoc >>> 1) != (nextLongDoc >>> 1)) {
          break;
        }
        longDoc = nextLongDoc;
      }
      hasValue = (longDoc & HAS_VALUE_MASK) > 0;
      if (hasValue) {
        set(idx - 1);
      }
      doc = (int) (longDoc >> SHIFT);
      return doc;
    }

    /**
     * Called when the iterator moved to the next document
     *
     * @param idx the internal index to set the value to
     */
    protected abstract void set(long idx);

    @Override
    public final int docID() {
      return doc;
    }

    @Override
    final long delGen() {
      return delGen;
    }

    @Override
    final boolean hasValue() {
      return hasValue;
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy