org.apache.lucene.index.MergeState Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of lucene-core Show documentation
Apache Lucene Java Core
There is a newer version: 9.11.1
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;


import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;

import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;

/** Holds common state used during segment merging.
 *
 * @lucene.experimental */
public class MergeState {

  /** Maps document IDs from old segments to document IDs in the new segment */
  public final DocMap[] docMaps;

  // Only used by IW when it must remap deletes that arrived against the merging segments while a merge was running:
  final DocMap[] leafDocMaps;

  /** {@link SegmentInfo} of the newly merged segment. */
  public final SegmentInfo segmentInfo;

  /** {@link FieldInfos} of the newly merged segment. */
  public FieldInfos mergeFieldInfos;

  /** Stored field producers being merged */
  public final StoredFieldsReader[] storedFieldsReaders;

  /** Term vector producers being merged */
  public final TermVectorsReader[] termVectorsReaders;

  /** Norms producers being merged */
  public final NormsProducer[] normsProducers;

  /** DocValues producers being merged */
  public final DocValuesProducer[] docValuesProducers;

  /** FieldInfos being merged */
  public final FieldInfos[] fieldInfos;

  /** Live docs for each reader */
  public final Bits[] liveDocs;

  /** Postings to merge */
  public final FieldsProducer[] fieldsProducers;

  /** Point readers to merge */
  public final PointsReader[] pointsReaders;

  /** Max docs per reader */
  public final int[] maxDocs;

  /** InfoStream for debugging messages. */
  public final InfoStream infoStream;

  /** Indicates if the index needs to be sorted **/
  public boolean needsIndexSort;

  /** Sole constructor. */
  MergeState(List originalReaders, SegmentInfo segmentInfo, InfoStream infoStream) throws IOException {

    this.infoStream = infoStream;

    final Sort indexSort = segmentInfo.getIndexSort();
    int numReaders = originalReaders.size();
    leafDocMaps = new DocMap[numReaders];
    List readers = maybeSortReaders(originalReaders, segmentInfo);

    maxDocs = new int[numReaders];
    fieldsProducers = new FieldsProducer[numReaders];
    normsProducers = new NormsProducer[numReaders];
    storedFieldsReaders = new StoredFieldsReader[numReaders];
    termVectorsReaders = new TermVectorsReader[numReaders];
    docValuesProducers = new DocValuesProducer[numReaders];
    pointsReaders = new PointsReader[numReaders];
    fieldInfos = new FieldInfos[numReaders];
    liveDocs = new Bits[numReaders];

    int numDocs = 0;
    for(int i=0;i readers) {

    int totalDocs = 0;
    int numReaders = readers.size();
    DocMap[] docMaps = new DocMap[numReaders];

    for (int i = 0; i < numReaders; i++) {
      LeafReader reader = readers.get(i);
      Bits liveDocs = reader.getLiveDocs();

      final PackedLongValues delDocMap;
      if (liveDocs != null) {
        delDocMap = removeDeletes(reader.maxDoc(), liveDocs);
      } else {
        delDocMap = null;
      }

      final int docBase = totalDocs;
      docMaps[i] = new DocMap() {
        @Override
        public int get(int docID) {
          if (liveDocs == null) {
            return docBase + docID;
          } else if (liveDocs.get(docID)) {
            return docBase + (int) delDocMap.get(docID);
          } else {
            return -1;
          }
        }
      };
      totalDocs += reader.numDocs();
    }

    return docMaps;
  }

  private DocMap[] buildDocMaps(List readers, Sort indexSort) throws IOException {

    if (indexSort == null) {
      // no index sort ... we only must map around deletions, and rebase to the merged segment's docID space
      return buildDeletionDocMaps(readers);
    } else {
      // do a merge sort of the incoming leaves:
      long t0 = System.nanoTime();
      DocMap[] result = MultiSorter.sort(indexSort, readers);
      if (result == null) {
        // already sorted so we can switch back to map around deletions
        return buildDeletionDocMaps(readers);
      } else {
        needsIndexSort = true;
      }
      long t1 = System.nanoTime();
      if (infoStream.isEnabled("SM")) {
        infoStream.message("SM", String.format(Locale.ROOT, "%.2f msec to build merge sorted DocMaps", (t1-t0)/1000000.0));
      }
      return result;
    }
  }

  private List maybeSortReaders(List originalReaders, SegmentInfo segmentInfo) throws IOException {

    // Default to identity:
    for(int i=0;i readers = new ArrayList<>(originalReaders.size());

    for (CodecReader leaf : originalReaders) {
      Sort segmentSort = leaf.getIndexSort();

      if (segmentSort == null) {
        // This segment was written by flush, so documents are not yet sorted, so we sort them now:
        long t0 = System.nanoTime();
        Sorter.DocMap sortDocMap = sorter.sort(leaf);
        long t1 = System.nanoTime();
        double msec = (t1-t0)/1000000.0;
        
        if (sortDocMap != null) {
          if (infoStream.isEnabled("SM")) {
            infoStream.message("SM", String.format(Locale.ROOT, "segment %s is not sorted; wrapping for sort %s now (%.2f msec to sort)", leaf, indexSort, msec));
          }
          needsIndexSort = true;
          leaf = SlowCodecReaderWrapper.wrap(SortingLeafReader.wrap(new MergeReaderWrapper(leaf), sortDocMap));
          leafDocMaps[readers.size()] = new DocMap() {
              @Override
              public int get(int docID) {
                return sortDocMap.oldToNew(docID);
              }
            };
        } else {
          if (infoStream.isEnabled("SM")) {
            infoStream.message("SM", String.format(Locale.ROOT, "segment %s is not sorted, but is already accidentally in sort %s order (%.2f msec to sort)", leaf, indexSort, msec));
          }
        }

      } else {
        if (segmentSort.equals(indexSort) == false) {
          throw new IllegalArgumentException("index sort mismatch: merged segment has sort=" + indexSort + " but to-be-merged segment has sort=" + segmentSort);
        }
        if (infoStream.isEnabled("SM")) {
          infoStream.message("SM", "segment " + leaf + " already sorted");
        }
      }

      readers.add(leaf);
    }

    return readers;
  }

  /** A map of doc IDs. */
  public static abstract class DocMap {
    /** Sole constructor */
    public DocMap() {
    }

    /** Return the mapped docID or -1 if the given doc is not mapped. */
    public abstract int get(int docID);
  }

  static PackedLongValues removeDeletes(final int maxDoc, final Bits liveDocs) {
    final PackedLongValues.Builder docMapBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
    int del = 0;
    for (int i = 0; i < maxDoc; ++i) {
      docMapBuilder.add(i - del);
      if (liveDocs.get(i) == false) {
        ++del;
      }
    }
    return docMapBuilder.build();
  }
}