org.elasticsearch.index.mapper.DocumentLeafReader Maven / Gradle / Ivy
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.index.mapper;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafMetaData;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.index.VectorValues;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.Consumer;
import java.util.stream.Collectors;
/**
* A {@link LeafReader} over a lucene document that exposes doc values and stored fields.
* Note that unlike lucene's {@link MemoryIndex} implementation, this holds no state and
* does not attempt to do any analysis on text fields. It also supports stored
* fields where MemoryIndex does not. It is used to back index-time scripts that
* reference field data and stored fields from a document that has not yet been
* indexed.
*/
class DocumentLeafReader extends LeafReader {
private final LuceneDocument document;
private final Map> calculatedFields;
private final Set fieldPath = new LinkedHashSet<>();
DocumentLeafReader(LuceneDocument document, Map> calculatedFields) {
this.document = document;
this.calculatedFields = calculatedFields;
}
private void checkField(String field) {
if (calculatedFields.containsKey(field)) {
// this means that a mapper script is referring to another calculated field;
// in which case we need to execute that field first. We also check for loops here
if (fieldPath.add(field) == false) {
throw new IllegalArgumentException("Loop in field resolution detected: " + String.join("->", fieldPath) + "->" + field);
}
calculatedFields.get(field).accept(this.getContext());
fieldPath.remove(field);
}
}
@Override
public NumericDocValues getNumericDocValues(String field) throws IOException {
checkField(field);
List values = document.getFields()
.stream()
.filter(f -> Objects.equals(f.name(), field))
.filter(f -> f.fieldType().docValuesType() == DocValuesType.NUMERIC)
.map(IndexableField::numericValue)
.sorted()
.collect(Collectors.toList());
return numericDocValues(values);
}
@Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
checkField(field);
List values = document.getFields()
.stream()
.filter(f -> Objects.equals(f.name(), field))
.filter(f -> f.fieldType().docValuesType() == DocValuesType.BINARY)
.map(IndexableField::binaryValue)
.sorted()
.collect(Collectors.toList());
return binaryDocValues(values);
}
@Override
public SortedDocValues getSortedDocValues(String field) throws IOException {
checkField(field);
List values = document.getFields()
.stream()
.filter(f -> Objects.equals(f.name(), field))
.filter(f -> f.fieldType().docValuesType() == DocValuesType.SORTED)
.map(IndexableField::binaryValue)
.sorted()
.collect(Collectors.toList());
return sortedDocValues(values);
}
@Override
public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException {
checkField(field);
List values = document.getFields()
.stream()
.filter(f -> Objects.equals(f.name(), field))
.filter(f -> f.fieldType().docValuesType() == DocValuesType.SORTED_NUMERIC)
.map(IndexableField::numericValue)
.sorted()
.collect(Collectors.toList());
return sortedNumericDocValues(values);
}
@Override
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
checkField(field);
List values = document.getFields()
.stream()
.filter(f -> Objects.equals(f.name(), field))
.filter(f -> f.fieldType().docValuesType() == DocValuesType.SORTED_SET)
.map(IndexableField::binaryValue)
.sorted()
.collect(Collectors.toList());
return sortedSetDocValues(values);
}
@Override
public FieldInfos getFieldInfos() {
return new FieldInfos(new FieldInfo[0]);
}
@Override
public void document(int docID, StoredFieldVisitor visitor) throws IOException {
List fields = document.getFields().stream().filter(f -> f.fieldType().stored()).collect(Collectors.toList());
for (IndexableField field : fields) {
FieldInfo fieldInfo = fieldInfo(field.name());
if (visitor.needsField(fieldInfo) != StoredFieldVisitor.Status.YES) {
continue;
}
if (field.numericValue() != null) {
Number v = field.numericValue();
if (v instanceof Integer) {
visitor.intField(fieldInfo, v.intValue());
} else if (v instanceof Long) {
visitor.longField(fieldInfo, v.longValue());
} else if (v instanceof Float) {
visitor.floatField(fieldInfo, v.floatValue());
} else if (v instanceof Double) {
visitor.doubleField(fieldInfo, v.doubleValue());
}
} else if (field.stringValue() != null) {
visitor.stringField(fieldInfo, field.stringValue());
} else if (field.binaryValue() != null) {
// We can't just pass field.binaryValue().bytes here as there may be offset/length
// considerations
byte[] data = new byte[field.binaryValue().length];
System.arraycopy(field.binaryValue().bytes, field.binaryValue().offset, data, 0, data.length);
visitor.binaryField(fieldInfo, data);
}
}
}
@Override
public CacheHelper getCoreCacheHelper() {
throw new UnsupportedOperationException();
}
@Override
public Terms terms(String field) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public NumericDocValues getNormValues(String field) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public VectorValues getVectorValues(String field) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public TopDocs searchNearestVectors(String field, float[] target, int k, Bits acceptDocs) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public Bits getLiveDocs() {
throw new UnsupportedOperationException();
}
@Override
public PointValues getPointValues(String field) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public void checkIntegrity() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public LeafMetaData getMetaData() {
throw new UnsupportedOperationException();
}
@Override
public Fields getTermVectors(int docID) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int numDocs() {
throw new UnsupportedOperationException();
}
@Override
public int maxDoc() {
throw new UnsupportedOperationException();
}
@Override
protected void doClose() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public CacheHelper getReaderCacheHelper() {
throw new UnsupportedOperationException();
}
// Our StoredFieldsVisitor implementations only check the name of the passed-in
// FieldInfo, so that's the only value we need to set here.
private static FieldInfo fieldInfo(String name) {
return new FieldInfo(
name,
0,
false,
false,
false,
IndexOptions.NONE,
DocValuesType.NONE,
-1,
Collections.emptyMap(),
0,
0,
0,
0,
VectorSimilarityFunction.EUCLIDEAN,
false
);
}
private static NumericDocValues numericDocValues(List values) {
if (values.size() == 0) {
return null;
}
DocIdSetIterator disi = DocIdSetIterator.all(1);
return new NumericDocValues() {
@Override
public long longValue() {
return values.get(0).longValue();
}
@Override
public boolean advanceExact(int target) throws IOException {
return disi.advance(target) == target;
}
@Override
public int docID() {
return disi.docID();
}
@Override
public int nextDoc() throws IOException {
return disi.nextDoc();
}
@Override
public int advance(int target) throws IOException {
return disi.advance(target);
}
@Override
public long cost() {
return disi.cost();
}
};
}
private static SortedNumericDocValues sortedNumericDocValues(List values) {
if (values.size() == 0) {
return null;
}
DocIdSetIterator disi = DocIdSetIterator.all(1);
return new SortedNumericDocValues() {
int i = -1;
@Override
public long nextValue() {
i++;
return values.get(i).longValue();
}
@Override
public int docValueCount() {
return values.size();
}
@Override
public boolean advanceExact(int target) throws IOException {
i = -1;
return disi.advance(target) == target;
}
@Override
public int docID() {
return disi.docID();
}
@Override
public int nextDoc() throws IOException {
i = -1;
return disi.nextDoc();
}
@Override
public int advance(int target) throws IOException {
i = -1;
return disi.advance(target);
}
@Override
public long cost() {
return disi.cost();
}
};
}
private static BinaryDocValues binaryDocValues(List values) {
if (values.size() == 0) {
return null;
}
DocIdSetIterator disi = DocIdSetIterator.all(1);
return new BinaryDocValues() {
@Override
public BytesRef binaryValue() {
return values.get(0);
}
@Override
public boolean advanceExact(int target) throws IOException {
return disi.advance(target) == target;
}
@Override
public int docID() {
return disi.docID();
}
@Override
public int nextDoc() throws IOException {
return disi.nextDoc();
}
@Override
public int advance(int target) throws IOException {
return disi.advance(target);
}
@Override
public long cost() {
return disi.cost();
}
};
}
private static SortedDocValues sortedDocValues(List values) {
if (values.size() == 0) {
return null;
}
DocIdSetIterator disi = DocIdSetIterator.all(1);
return new SortedDocValues() {
@Override
public int ordValue() {
return 0;
}
@Override
public BytesRef lookupOrd(int ord) {
return values.get(0);
}
@Override
public int getValueCount() {
return values.size();
}
@Override
public boolean advanceExact(int target) throws IOException {
return disi.advance(target) == target;
}
@Override
public int docID() {
return disi.docID();
}
@Override
public int nextDoc() throws IOException {
return disi.nextDoc();
}
@Override
public int advance(int target) throws IOException {
return disi.advance(target);
}
@Override
public long cost() {
return disi.cost();
}
};
}
private static SortedSetDocValues sortedSetDocValues(List values) {
if (values.size() == 0) {
return null;
}
DocIdSetIterator disi = DocIdSetIterator.all(1);
return new SortedSetDocValues() {
int i = -1;
@Override
public long nextOrd() {
i++;
if (i >= values.size()) {
return NO_MORE_ORDS;
}
return i;
}
@Override
public BytesRef lookupOrd(long ord) {
return values.get((int) ord);
}
@Override
public long getValueCount() {
return values.size();
}
@Override
public boolean advanceExact(int target) throws IOException {
i = -1;
return disi.advance(target) == target;
}
@Override
public int docID() {
return disi.docID();
}
@Override
public int nextDoc() throws IOException {
i = -1;
return disi.nextDoc();
}
@Override
public int advance(int target) throws IOException {
i = -1;
return disi.advance(target);
}
@Override
public long cost() {
return disi.cost();
}
};
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy