All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.index.DocIDMerger Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.index;

import java.io.IOException;
import java.util.List;

import org.apache.lucene.search.DocIdSetIterator; // javadocs
import org.apache.lucene.util.PriorityQueue;

import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;

/** Utility class to help merging documents from sub-readers according to either simple
 *  concatenated (unsorted) order, or by a specified index-time sort, skipping
 *  deleted documents and remapping non-deleted documents. */

public abstract class DocIDMerger {

  /** Represents one sub-reader being merged */
  public static abstract class Sub {
    /** Mapped doc ID */
    public int mappedDocID;

    final MergeState.DocMap docMap;

    /** Sole constructor */
    public Sub(MergeState.DocMap docMap) {
      this.docMap = docMap;
    }

    /** Returns the next document ID from this sub reader, and {@link DocIdSetIterator#NO_MORE_DOCS} when done */
    public abstract int nextDoc() throws IOException;
  }

  /** Construct this from the provided subs, specifying the maximum sub count */
  public static  DocIDMerger of(List subs, int maxCount, boolean indexIsSorted) throws IOException {
    if (indexIsSorted && maxCount > 1) {
      return new SortedDocIDMerger<>(subs, maxCount);
    } else {
      return new SequentialDocIDMerger<>(subs);
    }
  }

  /** Construct this from the provided subs */
  public static  DocIDMerger of(List subs, boolean indexIsSorted) throws IOException {
    return of(subs, subs.size(), indexIsSorted);
  }

  /** Reuse API, currently only used by postings during merge */
  public abstract void reset() throws IOException;

  /** Returns null when done.
   *  NOTE: after the iterator has exhausted you should not call this
   *  method, as it may result in unpredicted behavior. */
  public abstract T next() throws IOException;

  private DocIDMerger() {}

  private static class SequentialDocIDMerger extends DocIDMerger {

    private final List subs;
    private T current;
    private int nextIndex;

    private SequentialDocIDMerger(List subs) throws IOException {
      this.subs = subs;
      reset();
    }

    @Override
    public void reset() throws IOException {
      if (subs.size() > 0) {
        current = subs.get(0);
        nextIndex = 1;
      } else {
        current = null;
        nextIndex = 0;
      }
    }

    @Override
    public T next() throws IOException {
      while (true) {
        int docID = current.nextDoc();
        if (docID == NO_MORE_DOCS) {
          if (nextIndex == subs.size()) {
            current = null;
            return null;
          }
          current = subs.get(nextIndex);
          nextIndex++;
          continue;
        }

        int mappedDocID = current.docMap.get(docID);
        if (mappedDocID != -1) {
          current.mappedDocID = mappedDocID;
          return current;
        }
      }
    }

  }

  private static class SortedDocIDMerger extends DocIDMerger {

    private final List subs;
    private final PriorityQueue queue;

    private SortedDocIDMerger(List subs, int maxCount) throws IOException {
      this.subs = subs;
      queue = new PriorityQueue(maxCount) {
        @Override
        protected boolean lessThan(Sub a, Sub b) {
          assert a.mappedDocID != b.mappedDocID;
          return a.mappedDocID < b.mappedDocID;
        }
      };
      reset();
    }

    @Override
    public void reset() throws IOException {
      // caller may not have fully consumed the queue:
      queue.clear();
      boolean first = true;
      for(T sub : subs) {
        if (first) {
          // by setting mappedDocID = -1, this entry is guaranteed to be the top of the queue
          // so the first call to next() will advance it
          sub.mappedDocID = -1;
          first = false;
        } else {
          int mappedDocID;
          while (true) {
            int docID = sub.nextDoc();
            if (docID == NO_MORE_DOCS) {
              mappedDocID = NO_MORE_DOCS;
              break;
            }
            mappedDocID = sub.docMap.get(docID);
            if (mappedDocID != -1) {
              break;
            }
          }
          if (mappedDocID == NO_MORE_DOCS) {
            // all docs in this sub were deleted; do not add it to the queue!
            continue;
          }
          sub.mappedDocID = mappedDocID;
        }
        queue.add(sub);
      }
    }

    @Override
    public T next() throws IOException {
      T top = queue.top();

      while (true) {
        int docID = top.nextDoc();
        if (docID == NO_MORE_DOCS) {
          queue.pop();
          top = queue.top();
          break;
        }
        int mappedDocID = top.docMap.get(docID);
        if (mappedDocID == -1) {
          // doc was deleted
          continue;
        } else {
          top.mappedDocID = mappedDocID;
          top = queue.updateTop();
          break;
        }
      }

      return top;
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy