org.opensearch.common.lucene.Lucene Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearch Show documentation
Show all versions of opensearch Show documentation
OpenSearch subproject :server
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/
package org.opensearch.common.lucene;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.document.LatLonDocValuesField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterCodecReader;
import org.apache.lucene.index.FilterDirectoryReader;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexFormatTooNewException;
import org.apache.lucene.index.IndexFormatTooOldException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafMetaData;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.StoredFields;
import org.apache.lucene.index.TermVectors;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.grouping.CollapseTopFieldDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.Lock;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import org.opensearch.ExceptionsHelper;
import org.opensearch.LegacyESVersion;
import org.opensearch.common.Nullable;
import org.opensearch.common.SuppressForbidden;
import org.opensearch.common.lucene.search.TopDocsAndMaxScore;
import org.opensearch.common.util.iterable.Iterables;
import org.opensearch.core.common.Strings;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.common.io.stream.StreamOutput;
import org.opensearch.index.analysis.AnalyzerScope;
import org.opensearch.index.analysis.NamedAnalyzer;
import org.opensearch.index.fielddata.IndexFieldData;
import org.opensearch.search.sort.SortedWiderNumericSortField;
import java.io.IOException;
import java.math.BigInteger;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
/**
* Main lucene class.
*
* @opensearch.internal
*/
public class Lucene {
public static final String LATEST_CODEC = "Lucene95";
public static final String SOFT_DELETES_FIELD = "__soft_deletes";
public static final NamedAnalyzer STANDARD_ANALYZER = new NamedAnalyzer("_standard", AnalyzerScope.GLOBAL, new StandardAnalyzer());
public static final NamedAnalyzer KEYWORD_ANALYZER = new NamedAnalyzer("_keyword", AnalyzerScope.GLOBAL, new KeywordAnalyzer());
public static final NamedAnalyzer WHITESPACE_ANALYZER = new NamedAnalyzer(
"_whitespace",
AnalyzerScope.GLOBAL,
new WhitespaceAnalyzer()
);
public static final ScoreDoc[] EMPTY_SCORE_DOCS = new ScoreDoc[0];
public static final TopDocs EMPTY_TOP_DOCS = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), EMPTY_SCORE_DOCS);
private Lucene() {}
public static Version parseVersion(@Nullable String version, Version defaultVersion, Logger logger) {
if (version == null) {
return defaultVersion;
}
try {
return Version.parse(version);
} catch (ParseException e) {
logger.warn(() -> new ParameterizedMessage("no version match {}, default to {}", version, defaultVersion), e);
return defaultVersion;
}
}
/**
* Reads the segments infos, failing if it fails to load
*/
public static SegmentInfos readSegmentInfos(Directory directory) throws IOException {
return SegmentInfos.readLatestCommit(directory);
}
/**
* A variant of {@link #readSegmentInfos(Directory)} that supports reading indices written by
* older major versions of Lucene. This leverages Lucene's "expert" readLatestCommit API. The
* {@link org.opensearch.Version} parameter determines the minimum supported Lucene major version.
*/
public static SegmentInfos readSegmentInfos(Directory directory, org.opensearch.Version minimumVersion) throws IOException {
final int minSupportedLuceneMajor = minimumVersion.minimumIndexCompatibilityVersion().luceneVersion.major;
return SegmentInfos.readLatestCommit(directory, minSupportedLuceneMajor);
}
/**
* Returns an iterable that allows to iterate over all files in this segments info
*/
public static Iterable files(SegmentInfos infos) throws IOException {
final List> list = new ArrayList<>();
list.add(Collections.singleton(infos.getSegmentsFileName()));
for (SegmentCommitInfo info : infos) {
list.add(info.files());
}
return Iterables.flatten(list);
}
/**
* Returns the number of documents in the index referenced by this {@link SegmentInfos}
*/
public static int getNumDocs(SegmentInfos info) {
int numDocs = 0;
for (SegmentCommitInfo si : info) {
numDocs += si.info.maxDoc() - si.getDelCount() - si.getSoftDelCount();
}
return numDocs;
}
/**
* Reads the segments infos from the given commit, failing if it fails to load
*/
public static SegmentInfos readSegmentInfos(IndexCommit commit) throws IOException {
// Using commit.getSegmentsFileName() does NOT work here, have to
// manually create the segment filename
String filename = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", commit.getGeneration());
return SegmentInfos.readCommit(commit.getDirectory(), filename);
}
/**
* Reads the segments infos from the given segments file name, failing if it fails to load
*/
private static SegmentInfos readSegmentInfos(String segmentsFileName, Directory directory) throws IOException {
return SegmentInfos.readCommit(directory, segmentsFileName);
}
/**
* This method removes all files from the given directory that are not referenced by the given segments file.
* This method will open an IndexWriter and relies on index file deleter to remove all unreferenced files. Segment files
* that are newer than the given segments file are removed forcefully to prevent problems with IndexWriter opening a potentially
* broken commit point / leftover.
* Note: this method will fail if there is another IndexWriter open on the given directory. This method will also acquire
* a write lock from the directory while pruning unused files. This method expects an existing index in the given directory that has
* the given segments file.
*/
public static SegmentInfos pruneUnreferencedFiles(String segmentsFileName, Directory directory) throws IOException {
final SegmentInfos si = readSegmentInfos(segmentsFileName, directory);
try (Lock writeLock = directory.obtainLock(IndexWriter.WRITE_LOCK_NAME)) {
int foundSegmentFiles = 0;
for (final String file : directory.listAll()) {
/*
* we could also use a deletion policy here but in the case of snapshot and restore
* sometimes we restore an index and override files that were referenced by a "future"
* commit. If such a commit is opened by the IW it would likely throw a corrupted index exception
* since checksums don's match anymore. that's why we prune the name here directly.
* We also want the caller to know if we were not able to remove a segments_N file.
*/
if (file.startsWith(IndexFileNames.SEGMENTS)) {
foundSegmentFiles++;
if (file.equals(si.getSegmentsFileName()) == false) {
directory.deleteFile(file); // remove all segment_N files except of the one we wanna keep
}
}
}
assert SegmentInfos.getLastCommitSegmentsFileName(directory).equals(segmentsFileName);
if (foundSegmentFiles == 0) {
throw new IllegalStateException("no commit found in the directory");
}
}
final IndexCommit cp = getIndexCommit(si, directory);
try (
IndexWriter writer = new IndexWriter(
directory,
new IndexWriterConfig(Lucene.STANDARD_ANALYZER).setSoftDeletesField(Lucene.SOFT_DELETES_FIELD)
.setIndexCommit(cp)
.setCommitOnClose(false)
.setMergePolicy(NoMergePolicy.INSTANCE)
.setOpenMode(IndexWriterConfig.OpenMode.APPEND)
)
) {
// do nothing and close this will kick off IndexFileDeleter which will remove all pending files
}
return si;
}
/**
* Returns an index commit for the given {@link SegmentInfos} in the given directory.
*/
public static IndexCommit getIndexCommit(SegmentInfos si, Directory directory) throws IOException {
return new CommitPoint(si, directory);
}
/**
* This method removes all lucene files from the given directory. It will first try to delete all commit points / segments
* files to ensure broken commits or corrupted indices will not be opened in the future. If any of the segment files can't be deleted
* this operation fails.
*/
public static void cleanLuceneIndex(Directory directory) throws IOException {
try (Lock writeLock = directory.obtainLock(IndexWriter.WRITE_LOCK_NAME)) {
for (final String file : directory.listAll()) {
if (file.startsWith(IndexFileNames.SEGMENTS)) {
directory.deleteFile(file); // remove all segment_N files
}
}
}
try (
IndexWriter writer = new IndexWriter(
directory,
new IndexWriterConfig(Lucene.STANDARD_ANALYZER).setSoftDeletesField(Lucene.SOFT_DELETES_FIELD)
.setMergePolicy(NoMergePolicy.INSTANCE) // no merges
.setCommitOnClose(false) // no commits
.setOpenMode(IndexWriterConfig.OpenMode.CREATE) // force creation - don't append...
)
) {
// do nothing and close this will kick of IndexFileDeleter which will remove all pending files
}
}
public static void checkSegmentInfoIntegrity(final Directory directory) throws IOException {
new SegmentInfos.FindSegmentsFile(directory) {
@Override
protected Object doBody(String segmentFileName) throws IOException {
try (IndexInput input = directory.openInput(segmentFileName, IOContext.READ)) {
CodecUtil.checksumEntireFile(input);
}
return null;
}
}.run();
}
/**
* Check whether there is one or more documents matching the provided query.
*/
public static boolean exists(IndexSearcher searcher, Query query) throws IOException {
final Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f);
// the scorer API should be more efficient at stopping after the first
// match than the bulk scorer API
for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
final Scorer scorer = weight.scorer(context);
if (scorer == null) {
continue;
}
final Bits liveDocs = context.reader().getLiveDocs();
final DocIdSetIterator iterator = scorer.iterator();
for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
if (liveDocs == null || liveDocs.get(doc)) {
return true;
}
}
}
return false;
}
public static TotalHits readTotalHits(StreamInput in) throws IOException {
long totalHits = in.readVLong();
TotalHits.Relation totalHitsRelation = TotalHits.Relation.EQUAL_TO;
if (in.getVersion().onOrAfter(LegacyESVersion.V_7_0_0)) {
totalHitsRelation = in.readEnum(TotalHits.Relation.class);
}
return new TotalHits(totalHits, totalHitsRelation);
}
public static TopDocsAndMaxScore readTopDocs(StreamInput in) throws IOException {
byte type = in.readByte();
if (type == 0) {
TotalHits totalHits = readTotalHits(in);
float maxScore = in.readFloat();
final int scoreDocCount = in.readVInt();
final ScoreDoc[] scoreDocs;
if (scoreDocCount == 0) {
scoreDocs = EMPTY_SCORE_DOCS;
} else {
scoreDocs = new ScoreDoc[scoreDocCount];
for (int i = 0; i < scoreDocs.length; i++) {
scoreDocs[i] = new ScoreDoc(in.readVInt(), in.readFloat());
}
}
return new TopDocsAndMaxScore(new TopDocs(totalHits, scoreDocs), maxScore);
} else if (type == 1) {
TotalHits totalHits = readTotalHits(in);
float maxScore = in.readFloat();
SortField[] fields = in.readArray(Lucene::readSortField, SortField[]::new);
FieldDoc[] fieldDocs = new FieldDoc[in.readVInt()];
for (int i = 0; i < fieldDocs.length; i++) {
fieldDocs[i] = readFieldDoc(in);
}
return new TopDocsAndMaxScore(new TopFieldDocs(totalHits, fieldDocs, fields), maxScore);
} else if (type == 2) {
TotalHits totalHits = readTotalHits(in);
float maxScore = in.readFloat();
String field = in.readString();
SortField[] fields = in.readArray(Lucene::readSortField, SortField[]::new);
int size = in.readVInt();
Object[] collapseValues = new Object[size];
FieldDoc[] fieldDocs = new FieldDoc[size];
for (int i = 0; i < fieldDocs.length; i++) {
fieldDocs[i] = readFieldDoc(in);
collapseValues[i] = readSortValue(in);
}
return new TopDocsAndMaxScore(new CollapseTopFieldDocs(field, totalHits, fieldDocs, fields, collapseValues), maxScore);
} else {
throw new IllegalStateException("Unknown type " + type);
}
}
public static FieldDoc readFieldDoc(StreamInput in) throws IOException {
Comparable[] cFields = new Comparable[in.readVInt()];
for (int j = 0; j < cFields.length; j++) {
byte type = in.readByte();
if (type == 0) {
cFields[j] = null;
} else if (type == 1) {
cFields[j] = in.readString();
} else if (type == 2) {
cFields[j] = in.readInt();
} else if (type == 3) {
cFields[j] = in.readLong();
} else if (type == 4) {
cFields[j] = in.readFloat();
} else if (type == 5) {
cFields[j] = in.readDouble();
} else if (type == 6) {
cFields[j] = in.readByte();
} else if (type == 7) {
cFields[j] = in.readShort();
} else if (type == 8) {
cFields[j] = in.readBoolean();
} else if (type == 9) {
cFields[j] = in.readBytesRef();
} else if (type == 10) {
cFields[j] = new BigInteger(in.readString());
} else {
throw new IOException("Can't match type [" + type + "]");
}
}
return new FieldDoc(in.readVInt(), in.readFloat(), cFields);
}
public static Comparable readSortValue(StreamInput in) throws IOException {
byte type = in.readByte();
if (type == 0) {
return null;
} else if (type == 1) {
return in.readString();
} else if (type == 2) {
return in.readInt();
} else if (type == 3) {
return in.readLong();
} else if (type == 4) {
return in.readFloat();
} else if (type == 5) {
return in.readDouble();
} else if (type == 6) {
return in.readByte();
} else if (type == 7) {
return in.readShort();
} else if (type == 8) {
return in.readBoolean();
} else if (type == 9) {
return in.readBytesRef();
} else if (type == 10) {
return new BigInteger(in.readString());
} else {
throw new IOException("Can't match type [" + type + "]");
}
}
public static ScoreDoc readScoreDoc(StreamInput in) throws IOException {
return new ScoreDoc(in.readVInt(), in.readFloat());
}
private static final Class> GEO_DISTANCE_SORT_TYPE_CLASS = LatLonDocValuesField.newDistanceSort("some_geo_field", 0, 0).getClass();
public static void writeTotalHits(StreamOutput out, TotalHits totalHits) throws IOException {
out.writeVLong(totalHits.value);
if (out.getVersion().onOrAfter(LegacyESVersion.V_7_0_0)) {
out.writeEnum(totalHits.relation);
} else if (totalHits.value > 0 && totalHits.relation != TotalHits.Relation.EQUAL_TO) {
throw new IllegalArgumentException("Cannot serialize approximate total hit counts to nodes that are on a version < 7.0.0");
}
}
public static void writeTopDocs(StreamOutput out, TopDocsAndMaxScore topDocs) throws IOException {
if (topDocs.topDocs instanceof CollapseTopFieldDocs) {
out.writeByte((byte) 2);
CollapseTopFieldDocs collapseDocs = (CollapseTopFieldDocs) topDocs.topDocs;
writeTotalHits(out, topDocs.topDocs.totalHits);
out.writeFloat(topDocs.maxScore);
out.writeString(collapseDocs.field);
out.writeArray(Lucene::writeSortField, collapseDocs.fields);
out.writeVInt(topDocs.topDocs.scoreDocs.length);
for (int i = 0; i < topDocs.topDocs.scoreDocs.length; i++) {
ScoreDoc doc = collapseDocs.scoreDocs[i];
writeFieldDoc(out, (FieldDoc) doc);
writeSortValue(out, collapseDocs.collapseValues[i]);
}
} else if (topDocs.topDocs instanceof TopFieldDocs) {
out.writeByte((byte) 1);
TopFieldDocs topFieldDocs = (TopFieldDocs) topDocs.topDocs;
writeTotalHits(out, topDocs.topDocs.totalHits);
out.writeFloat(topDocs.maxScore);
out.writeArray(Lucene::writeSortField, topFieldDocs.fields);
out.writeVInt(topDocs.topDocs.scoreDocs.length);
for (ScoreDoc doc : topFieldDocs.scoreDocs) {
writeFieldDoc(out, (FieldDoc) doc);
}
} else {
out.writeByte((byte) 0);
writeTotalHits(out, topDocs.topDocs.totalHits);
out.writeFloat(topDocs.maxScore);
out.writeVInt(topDocs.topDocs.scoreDocs.length);
for (ScoreDoc doc : topDocs.topDocs.scoreDocs) {
writeScoreDoc(out, doc);
}
}
}
private static void writeMissingValue(StreamOutput out, Object missingValue) throws IOException {
if (missingValue == SortField.STRING_FIRST) {
out.writeByte((byte) 1);
} else if (missingValue == SortField.STRING_LAST) {
out.writeByte((byte) 2);
} else {
out.writeByte((byte) 0);
out.writeGenericValue(missingValue);
}
}
private static Object readMissingValue(StreamInput in) throws IOException {
final byte id = in.readByte();
switch (id) {
case 0:
return in.readGenericValue();
case 1:
return SortField.STRING_FIRST;
case 2:
return SortField.STRING_LAST;
default:
throw new IOException("Unknown missing value id: " + id);
}
}
public static void writeSortValue(StreamOutput out, Object field) throws IOException {
if (field == null) {
out.writeByte((byte) 0);
} else {
Class type = field.getClass();
if (type == String.class) {
out.writeByte((byte) 1);
out.writeString((String) field);
} else if (type == Integer.class) {
out.writeByte((byte) 2);
out.writeInt((Integer) field);
} else if (type == Long.class) {
out.writeByte((byte) 3);
out.writeLong((Long) field);
} else if (type == Float.class) {
out.writeByte((byte) 4);
out.writeFloat((Float) field);
} else if (type == Double.class) {
out.writeByte((byte) 5);
out.writeDouble((Double) field);
} else if (type == Byte.class) {
out.writeByte((byte) 6);
out.writeByte((Byte) field);
} else if (type == Short.class) {
out.writeByte((byte) 7);
out.writeShort((Short) field);
} else if (type == Boolean.class) {
out.writeByte((byte) 8);
out.writeBoolean((Boolean) field);
} else if (type == BytesRef.class) {
out.writeByte((byte) 9);
out.writeBytesRef((BytesRef) field);
} else if (type == BigInteger.class) {
// TODO: improve serialization of BigInteger
out.writeByte((byte) 10);
out.writeString(field.toString());
} else {
throw new IOException("Can't handle sort field value of type [" + type + "]");
}
}
}
public static void writeFieldDoc(StreamOutput out, FieldDoc fieldDoc) throws IOException {
out.writeVInt(fieldDoc.fields.length);
for (Object field : fieldDoc.fields) {
writeSortValue(out, field);
}
out.writeVInt(fieldDoc.doc);
out.writeFloat(fieldDoc.score);
}
public static void writeScoreDoc(StreamOutput out, ScoreDoc scoreDoc) throws IOException {
if (!scoreDoc.getClass().equals(ScoreDoc.class)) {
throw new IllegalArgumentException("This method can only be used to serialize a ScoreDoc, not a " + scoreDoc.getClass());
}
out.writeVInt(scoreDoc.doc);
out.writeFloat(scoreDoc.score);
}
// LUCENE 4 UPGRADE: We might want to maintain our own ordinal, instead of Lucene's ordinal
public static SortField.Type readSortType(StreamInput in) throws IOException {
return SortField.Type.values()[in.readVInt()];
}
public static SortField readSortField(StreamInput in) throws IOException {
String field = null;
if (in.readBoolean()) {
field = in.readString();
}
SortField.Type sortType = readSortType(in);
Object missingValue = readMissingValue(in);
boolean reverse = in.readBoolean();
SortField sortField = new SortField(field, sortType, reverse);
if (missingValue != null) {
sortField.setMissingValue(missingValue);
}
return sortField;
}
public static void writeSortType(StreamOutput out, SortField.Type sortType) throws IOException {
out.writeVInt(sortType.ordinal());
}
public static void writeSortField(StreamOutput out, SortField sortField) throws IOException {
if (sortField.getClass() == GEO_DISTANCE_SORT_TYPE_CLASS) {
// for geo sorting, we replace the SortField with a SortField that assumes a double field.
// this works since the SortField is only used for merging top docs
SortField newSortField = new SortField(sortField.getField(), SortField.Type.DOUBLE);
newSortField.setMissingValue(sortField.getMissingValue());
sortField = newSortField;
} else if (sortField.getClass() == SortedSetSortField.class) {
// for multi-valued sort field, we replace the SortedSetSortField with a simple SortField.
// It works because the sort field is only used to merge results from different shards.
SortField newSortField = new SortField(sortField.getField(), SortField.Type.STRING, sortField.getReverse());
newSortField.setMissingValue(sortField.getMissingValue());
sortField = newSortField;
} else if (sortField.getClass() == SortedNumericSortField.class || sortField.getClass() == SortedWiderNumericSortField.class) {
// for multi-valued sort field, we replace the SortedNumericSortField/SortedWiderNumericSortField with a simple SortField.
// It works because the sort field is only used to merge results from different shards.
SortField newSortField = new SortField(
sortField.getField(),
((SortedNumericSortField) sortField).getNumericType(),
sortField.getReverse()
);
newSortField.setMissingValue(sortField.getMissingValue());
sortField = newSortField;
}
if (sortField.getClass() != SortField.class) {
throw new IllegalArgumentException("Cannot serialize SortField impl [" + sortField + "]");
}
if (sortField.getField() == null) {
out.writeBoolean(false);
} else {
out.writeBoolean(true);
out.writeString(sortField.getField());
}
if (sortField.getComparatorSource() != null) {
IndexFieldData.XFieldComparatorSource comparatorSource = (IndexFieldData.XFieldComparatorSource) sortField
.getComparatorSource();
writeSortType(out, comparatorSource.reducedType());
writeMissingValue(out, comparatorSource.missingValue(sortField.getReverse()));
} else {
writeSortType(out, sortField.getType());
writeMissingValue(out, sortField.getMissingValue());
}
out.writeBoolean(sortField.getReverse());
}
private static Number readExplanationValue(StreamInput in) throws IOException {
if (in.getVersion().onOrAfter(LegacyESVersion.V_7_0_0)) {
final int numberType = in.readByte();
switch (numberType) {
case 0:
return in.readFloat();
case 1:
return in.readDouble();
case 2:
return in.readZLong();
default:
throw new IOException("Unexpected number type: " + numberType);
}
} else {
return in.readFloat();
}
}
public static Explanation readExplanation(StreamInput in) throws IOException {
boolean match = in.readBoolean();
String description = in.readString();
final Explanation[] subExplanations = new Explanation[in.readVInt()];
for (int i = 0; i < subExplanations.length; ++i) {
subExplanations[i] = readExplanation(in);
}
if (match) {
return Explanation.match(readExplanationValue(in), description, subExplanations);
} else {
return Explanation.noMatch(description, subExplanations);
}
}
private static void writeExplanationValue(StreamOutput out, Number value) throws IOException {
if (out.getVersion().onOrAfter(LegacyESVersion.V_7_0_0)) {
if (value instanceof Float) {
out.writeByte((byte) 0);
out.writeFloat(value.floatValue());
} else if (value instanceof Double) {
out.writeByte((byte) 1);
out.writeDouble(value.doubleValue());
} else {
out.writeByte((byte) 2);
out.writeZLong(value.longValue());
}
} else {
out.writeFloat(value.floatValue());
}
}
public static void writeExplanation(StreamOutput out, Explanation explanation) throws IOException {
out.writeBoolean(explanation.isMatch());
out.writeString(explanation.getDescription());
Explanation[] subExplanations = explanation.getDetails();
out.writeVInt(subExplanations.length);
for (Explanation subExp : subExplanations) {
writeExplanation(out, subExp);
}
if (explanation.isMatch()) {
writeExplanationValue(out, explanation.getValue());
}
}
public static boolean indexExists(final Directory directory) throws IOException {
return DirectoryReader.indexExists(directory);
}
/**
* Wait for an index to exist for up to {@code timeLimitMillis}. Returns
* true if the index eventually exists, false if not.
*
* Will retry the directory every second for at least {@code timeLimitMillis}
*/
public static boolean waitForIndex(final Directory directory, final long timeLimitMillis) throws IOException {
final long DELAY = 1000;
long waited = 0;
try {
while (true) {
if (waited >= timeLimitMillis) {
break;
}
if (indexExists(directory)) {
return true;
}
Thread.sleep(DELAY);
waited += DELAY;
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
return false;
}
// one more try after all retries
return indexExists(directory);
}
/**
* Returns {@code true} iff the given exception or
* one of it's causes is an instance of {@link CorruptIndexException},
* {@link IndexFormatTooOldException}, or {@link IndexFormatTooNewException} otherwise {@code false}.
*/
public static boolean isCorruptionException(Throwable t) {
return ExceptionsHelper.unwrapCorruption(t) != null;
}
/**
* Parses the version string lenient and returns the default value if the given string is null or empty
*/
public static Version parseVersionLenient(String toParse, Version defaultValue) {
return LenientParser.parse(toParse, defaultValue);
}
/**
* Tries to extract a segment reader from the given index reader.
* If no SegmentReader can be extracted an {@link IllegalStateException} is thrown.
*/
public static SegmentReader segmentReader(LeafReader reader) {
if (reader instanceof SegmentReader) {
return (SegmentReader) reader;
} else if (reader instanceof FilterLeafReader) {
final FilterLeafReader fReader = (FilterLeafReader) reader;
return segmentReader(FilterLeafReader.unwrap(fReader));
} else if (reader instanceof FilterCodecReader) {
final FilterCodecReader fReader = (FilterCodecReader) reader;
return segmentReader(FilterCodecReader.unwrap(fReader));
}
// hard fail - we can't get a SegmentReader
throw new IllegalStateException("Can not extract segment reader from given index reader [" + reader + "]");
}
@SuppressForbidden(reason = "Version#parseLeniently() used in a central place")
private static final class LenientParser {
public static Version parse(String toParse, Version defaultValue) {
if (Strings.hasLength(toParse)) {
try {
return Version.parseLeniently(toParse);
} catch (ParseException e) {
// pass to default
}
}
return defaultValue;
}
}
private static final class CommitPoint extends IndexCommit {
private String segmentsFileName;
private final Collection files;
private final Directory dir;
private final long generation;
private final Map userData;
private final int segmentCount;
private CommitPoint(SegmentInfos infos, Directory dir) throws IOException {
segmentsFileName = infos.getSegmentsFileName();
this.dir = dir;
userData = infos.getUserData();
files = Collections.unmodifiableCollection(infos.files(true));
generation = infos.getGeneration();
segmentCount = infos.size();
}
@Override
public String toString() {
return "DirectoryReader.ReaderCommit(" + segmentsFileName + ")";
}
@Override
public int getSegmentCount() {
return segmentCount;
}
@Override
public String getSegmentsFileName() {
return segmentsFileName;
}
@Override
public Collection getFileNames() {
return files;
}
@Override
public Directory getDirectory() {
return dir;
}
@Override
public long getGeneration() {
return generation;
}
@Override
public boolean isDeleted() {
return false;
}
@Override
public Map getUserData() {
return userData;
}
@Override
public void delete() {
throw new UnsupportedOperationException("This IndexCommit does not support deletions");
}
}
/**
* Return a {@link Bits} view of the provided scorer.
* NOTE: that the returned {@link Bits} instance MUST be consumed in order.
* @see #asSequentialAccessBits(int, ScorerSupplier, long)
*/
public static Bits asSequentialAccessBits(final int maxDoc, @Nullable ScorerSupplier scorerSupplier) throws IOException {
return asSequentialAccessBits(maxDoc, scorerSupplier, 0L);
}
/**
* Given a {@link ScorerSupplier}, return a {@link Bits} instance that will match
* all documents contained in the set.
* NOTE: that the returned {@link Bits} instance MUST be consumed in order.
* @param estimatedGetCount an estimation of the number of times that {@link Bits#get} will get called
*/
public static Bits asSequentialAccessBits(final int maxDoc, @Nullable ScorerSupplier scorerSupplier, long estimatedGetCount)
throws IOException {
if (scorerSupplier == null) {
return new Bits.MatchNoBits(maxDoc);
}
// Since we want bits, we need random-access
final Scorer scorer = scorerSupplier.get(estimatedGetCount); // this never returns null
final TwoPhaseIterator twoPhase = scorer.twoPhaseIterator();
final DocIdSetIterator iterator;
if (twoPhase == null) {
iterator = scorer.iterator();
} else {
iterator = twoPhase.approximation();
}
return new Bits() {
int previous = -1;
boolean previousMatched = false;
@Override
public boolean get(int index) {
if (index < 0 || index >= maxDoc) {
throw new IndexOutOfBoundsException(index + " is out of bounds: [" + 0 + "-" + maxDoc + "[");
}
if (index < previous) {
throw new IllegalArgumentException(
"This Bits instance can only be consumed in order. "
+ "Got called on ["
+ index
+ "] while previously called on ["
+ previous
+ "]"
);
}
if (index == previous) {
// we cache whether it matched because it is illegal to call
// twoPhase.matches() twice
return previousMatched;
}
previous = index;
int doc = iterator.docID();
if (doc < index) {
try {
doc = iterator.advance(index);
} catch (IOException e) {
throw new IllegalStateException("Cannot advance iterator", e);
}
}
if (index == doc) {
try {
return previousMatched = twoPhase == null || twoPhase.matches();
} catch (IOException e) {
throw new IllegalStateException("Cannot validate match", e);
}
}
return previousMatched = false;
}
@Override
public int length() {
return maxDoc;
}
};
}
/**
* Whether a query sorted by {@code searchSort} can be early-terminated if the index is sorted by {@code indexSort}.
*/
public static boolean canEarlyTerminate(Sort searchSort, Sort indexSort) {
final SortField[] fields1 = searchSort.getSort();
final SortField[] fields2 = indexSort.getSort();
// early termination is possible if fields1 is a prefix of fields2
if (fields1.length > fields2.length) {
return false;
}
return Arrays.asList(fields1).equals(Arrays.asList(fields2).subList(0, fields1.length));
}
/**
* Wraps a directory reader to make all documents live except those were rolled back
* or hard-deleted due to non-aborting exceptions during indexing.
* The wrapped reader can be used to query all documents.
*
* @param in the input directory reader
* @return the wrapped reader
*/
public static DirectoryReader wrapAllDocsLive(DirectoryReader in) throws IOException {
return new DirectoryReaderWithAllLiveDocs(in);
}
private static final class DirectoryReaderWithAllLiveDocs extends FilterDirectoryReader {
static final class LeafReaderWithLiveDocs extends FilterLeafReader {
final Bits liveDocs;
final int numDocs;
LeafReaderWithLiveDocs(LeafReader in, Bits liveDocs, int numDocs) {
super(in);
this.liveDocs = liveDocs;
this.numDocs = numDocs;
}
@Override
public Bits getLiveDocs() {
return liveDocs;
}
@Override
public int numDocs() {
return numDocs;
}
@Override
public CacheHelper getCoreCacheHelper() {
return in.getCoreCacheHelper();
}
@Override
public CacheHelper getReaderCacheHelper() {
return null; // Modifying liveDocs
}
}
DirectoryReaderWithAllLiveDocs(DirectoryReader in) throws IOException {
super(in, new SubReaderWrapper() {
@Override
public LeafReader wrap(LeafReader leaf) {
final SegmentReader segmentReader = segmentReader(leaf);
final Bits hardLiveDocs = segmentReader.getHardLiveDocs();
if (hardLiveDocs == null) {
return new LeafReaderWithLiveDocs(leaf, null, leaf.maxDoc());
}
// Once soft-deletes is enabled, we no longer hard-update or hard-delete documents directly.
// Two scenarios that we have hard-deletes: (1) from old segments where soft-deletes was disabled,
// (2) when IndexWriter hits non-aborted exceptions. These two cases, IW flushes SegmentInfos
// before exposing the hard-deletes, thus we can use the hard-delete count of SegmentInfos.
final int numDocs = segmentReader.maxDoc() - segmentReader.getSegmentInfo().getDelCount();
assert numDocs == popCount(hardLiveDocs) : numDocs + " != " + popCount(hardLiveDocs);
return new LeafReaderWithLiveDocs(segmentReader, hardLiveDocs, numDocs);
}
});
}
@Override
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
return wrapAllDocsLive(in);
}
@Override
public CacheHelper getReaderCacheHelper() {
return null; // Modifying liveDocs
}
}
private static int popCount(Bits bits) {
assert bits != null;
int onBits = 0;
for (int i = 0; i < bits.length(); i++) {
if (bits.get(i)) {
onBits++;
}
}
return onBits;
}
/**
* Returns a numeric docvalues which can be used to soft-delete documents.
*/
public static NumericDocValuesField newSoftDeletesField() {
return new NumericDocValuesField(SOFT_DELETES_FIELD, 1);
}
/**
* Returns an empty leaf reader with the given max docs. The reader will be fully deleted.
*/
public static LeafReader emptyReader(final int maxDoc) {
return new LeafReader() {
final Bits liveDocs = new Bits.MatchNoBits(maxDoc);
public Terms terms(String field) {
return null;
}
public NumericDocValues getNumericDocValues(String field) {
return null;
}
public BinaryDocValues getBinaryDocValues(String field) {
return null;
}
public SortedDocValues getSortedDocValues(String field) {
return null;
}
public SortedNumericDocValues getSortedNumericDocValues(String field) {
return null;
}
public SortedSetDocValues getSortedSetDocValues(String field) {
return null;
}
public NumericDocValues getNormValues(String field) {
return null;
}
public FieldInfos getFieldInfos() {
return new FieldInfos(new FieldInfo[0]);
}
public Bits getLiveDocs() {
return this.liveDocs;
}
public PointValues getPointValues(String fieldName) {
return null;
}
public void checkIntegrity() {}
public Fields getTermVectors(int docID) {
return null;
}
@Override
public TermVectors termVectors() throws IOException {
return null;
}
public int numDocs() {
return 0;
}
public int maxDoc() {
return maxDoc;
}
public void document(int docID, StoredFieldVisitor visitor) {}
@Override
public StoredFields storedFields() throws IOException {
return null;
}
protected void doClose() {}
public LeafMetaData getMetaData() {
return new LeafMetaData(Version.LATEST.major, Version.LATEST, null);
}
public CacheHelper getCoreCacheHelper() {
return null;
}
public CacheHelper getReaderCacheHelper() {
return null;
}
@Override
public FloatVectorValues getFloatVectorValues(String field) throws IOException {
return null;
}
@Override
public ByteVectorValues getByteVectorValues(String field) throws IOException {
return null;
}
@Override
public TopDocs searchNearestVectors(String field, byte[] target, int k, Bits acceptDocs, int visitedLimit) throws IOException {
return null;
}
@Override
public TopDocs searchNearestVectors(String field, float[] target, int k, Bits acceptDocs, int visitedLimit) throws IOException {
return null;
}
};
}
}