
org.apache.lucene.index.MergeState Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.List;
import java.util.Locale;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
import static org.apache.lucene.index.IndexWriter.isCongruentSort;
/** Holds common state used during segment merging.
*
* @lucene.experimental */
public class MergeState {
/** Maps document IDs from old segments to document IDs in the new segment */
public final DocMap[] docMaps;
/** {@link SegmentInfo} of the newly merged segment. */
public final SegmentInfo segmentInfo;
/** {@link FieldInfos} of the newly merged segment. */
public FieldInfos mergeFieldInfos;
/** Stored field producers being merged */
public final StoredFieldsReader[] storedFieldsReaders;
/** Term vector producers being merged */
public final TermVectorsReader[] termVectorsReaders;
/** Norms producers being merged */
public final NormsProducer[] normsProducers;
/** DocValues producers being merged */
public final DocValuesProducer[] docValuesProducers;
/** FieldInfos being merged */
public final FieldInfos[] fieldInfos;
/** Live docs for each reader */
public final Bits[] liveDocs;
/** Postings to merge */
public final FieldsProducer[] fieldsProducers;
/** Point readers to merge */
public final PointsReader[] pointsReaders;
/** Max docs per reader */
public final int[] maxDocs;
/** InfoStream for debugging messages. */
public final InfoStream infoStream;
/** Indicates if the index needs to be sorted **/
public boolean needsIndexSort;
/** Sole constructor. */
MergeState(List readers, SegmentInfo segmentInfo, InfoStream infoStream) throws IOException {
verifyIndexSort(readers, segmentInfo);
this.infoStream = infoStream;
int numReaders = readers.size();
maxDocs = new int[numReaders];
fieldsProducers = new FieldsProducer[numReaders];
normsProducers = new NormsProducer[numReaders];
storedFieldsReaders = new StoredFieldsReader[numReaders];
termVectorsReaders = new TermVectorsReader[numReaders];
docValuesProducers = new DocValuesProducer[numReaders];
pointsReaders = new PointsReader[numReaders];
fieldInfos = new FieldInfos[numReaders];
liveDocs = new Bits[numReaders];
int numDocs = 0;
for(int i=0;i readers) {
int totalDocs = 0;
int numReaders = readers.size();
DocMap[] docMaps = new DocMap[numReaders];
for (int i = 0; i < numReaders; i++) {
LeafReader reader = readers.get(i);
Bits liveDocs = reader.getLiveDocs();
final PackedLongValues delDocMap;
if (liveDocs != null) {
delDocMap = removeDeletes(reader.maxDoc(), liveDocs);
} else {
delDocMap = null;
}
final int docBase = totalDocs;
docMaps[i] = new DocMap() {
@Override
public int get(int docID) {
if (liveDocs == null) {
return docBase + docID;
} else if (liveDocs.get(docID)) {
return docBase + (int) delDocMap.get(docID);
} else {
return -1;
}
}
};
totalDocs += reader.numDocs();
}
return docMaps;
}
private DocMap[] buildDocMaps(List readers, Sort indexSort) throws IOException {
if (indexSort == null) {
// no index sort ... we only must map around deletions, and rebase to the merged segment's docID space
return buildDeletionDocMaps(readers);
} else {
// do a merge sort of the incoming leaves:
long t0 = System.nanoTime();
DocMap[] result = MultiSorter.sort(indexSort, readers);
if (result == null) {
// already sorted so we can switch back to map around deletions
return buildDeletionDocMaps(readers);
} else {
needsIndexSort = true;
}
long t1 = System.nanoTime();
if (infoStream.isEnabled("SM")) {
infoStream.message("SM", String.format(Locale.ROOT, "%.2f msec to build merge sorted DocMaps", (t1-t0)/1000000.0));
}
return result;
}
}
private static void verifyIndexSort(List readers, SegmentInfo segmentInfo) {
Sort indexSort = segmentInfo.getIndexSort();
if (indexSort == null) {
return;
}
for (CodecReader leaf : readers) {
Sort segmentSort = leaf.getMetaData().getSort();
if (segmentSort == null || isCongruentSort(indexSort, segmentSort) == false) {
throw new IllegalArgumentException("index sort mismatch: merged segment has sort=" + indexSort +
" but to-be-merged segment has sort=" + (segmentSort == null ? "null" : segmentSort));
}
}
}
/** A map of doc IDs. */
public static abstract class DocMap {
/** Sole constructor */
public DocMap() {
}
/** Return the mapped docID or -1 if the given doc is not mapped. */
public abstract int get(int docID);
}
static PackedLongValues removeDeletes(final int maxDoc, final Bits liveDocs) {
final PackedLongValues.Builder docMapBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
int del = 0;
for (int i = 0; i < maxDoc; ++i) {
docMapBuilder.add(i - del);
if (liveDocs.get(i) == false) {
++del;
}
}
return docMapBuilder.build();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy