org.elasticsearch.compute.lucene.ValuesSourceReaderOperator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of x-pack-esql-compute Show documentation
Show all versions of x-pack-esql-compute Show documentation
Elasticsearch subproject :x-pack:plugin:esql:compute
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.compute.lucene;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BlockFactory;
import org.elasticsearch.compute.data.BytesRefBlock;
import org.elasticsearch.compute.data.DocBlock;
import org.elasticsearch.compute.data.DocVector;
import org.elasticsearch.compute.data.ElementType;
import org.elasticsearch.compute.data.IntVector;
import org.elasticsearch.compute.data.Page;
import org.elasticsearch.compute.data.SingletonOrdinalsBuilder;
import org.elasticsearch.compute.operator.AbstractPageMappingOperator;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.compute.operator.Operator;
import org.elasticsearch.core.Assertions;
import org.elasticsearch.core.Releasable;
import org.elasticsearch.core.Releasables;
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
import org.elasticsearch.index.mapper.BlockLoader;
import org.elasticsearch.index.mapper.BlockLoaderStoredFieldsFromLeafLoader;
import org.elasticsearch.index.mapper.SourceLoader;
import org.elasticsearch.search.fetch.StoredFieldsSpec;
import org.elasticsearch.xcontent.XContentBuilder;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.TreeMap;
import java.util.function.IntFunction;
import java.util.function.Supplier;
/**
* Operator that extracts doc_values from a Lucene index out of pages that have been produced by {@link LuceneSourceOperator}
* and outputs them to a new column.
*/
public class ValuesSourceReaderOperator extends AbstractPageMappingOperator {
/**
* Minimum number of documents for which it is more efficient to use a
* sequential stored field reader when reading stored fields.
*
* The sequential stored field reader decompresses a whole block of docs
* at a time so for very short lists it won't be faster to use it. We use
* {@code 10} documents as the boundary for "very short" because it's what
* search does, not because we've done extensive testing on the number.
*
*/
static final int SEQUENTIAL_BOUNDARY = 10;
/**
* Creates a factory for {@link ValuesSourceReaderOperator}.
* @param fields fields to load
* @param shardContexts per-shard loading information
* @param docChannel the channel containing the shard, leaf/segment and doc id
*/
public record Factory(List fields, List shardContexts, int docChannel) implements OperatorFactory {
@Override
public Operator get(DriverContext driverContext) {
return new ValuesSourceReaderOperator(driverContext.blockFactory(), fields, shardContexts, docChannel);
}
@Override
public String describe() {
StringBuilder sb = new StringBuilder();
sb.append("ValuesSourceReaderOperator[fields = [");
if (fields.size() < 10) {
boolean first = true;
for (FieldInfo f : fields) {
if (first) {
first = false;
} else {
sb.append(", ");
}
sb.append(f.name);
}
} else {
sb.append(fields.size()).append(" fields");
}
return sb.append("]]").toString();
}
}
/**
* Configuration for a field to load.
*
* {@code blockLoader} maps shard index to the {@link BlockLoader}s
* which load the actual blocks.
*/
public record FieldInfo(String name, ElementType type, IntFunction blockLoader) {}
public record ShardContext(IndexReader reader, Supplier newSourceLoader) {}
private final FieldWork[] fields;
private final List shardContexts;
private final int docChannel;
private final BlockFactory blockFactory;
private final Map readersBuilt = new TreeMap<>();
int lastShard = -1;
int lastSegment = -1;
/**
* Creates a new extractor
* @param fields fields to load
* @param docChannel the channel containing the shard, leaf/segment and doc id
*/
public ValuesSourceReaderOperator(BlockFactory blockFactory, List fields, List shardContexts, int docChannel) {
this.fields = fields.stream().map(f -> new FieldWork(f)).toArray(FieldWork[]::new);
this.shardContexts = shardContexts;
this.docChannel = docChannel;
this.blockFactory = blockFactory;
}
@Override
protected Page process(Page page) {
DocVector docVector = page.getBlock(docChannel).asVector();
Block[] blocks = new Block[fields.length];
boolean success = false;
try {
if (docVector.singleSegmentNonDecreasing()) {
IntVector docs = docVector.docs();
int shard = docVector.shards().getInt(0);
int segment = docVector.segments().getInt(0);
loadFromSingleLeaf(blocks, shard, segment, new BlockLoader.Docs() {
@Override
public int count() {
return docs.getPositionCount();
}
@Override
public int get(int i) {
return docs.getInt(i);
}
});
} else if (docVector.singleSegment()) {
loadFromSingleLeafUnsorted(blocks, docVector);
} else {
try (LoadFromMany many = new LoadFromMany(blocks, docVector)) {
many.run();
}
}
if (Assertions.ENABLED) {
for (int f = 0; f < fields.length; f++) {
assert blocks[f].elementType() == ElementType.NULL || blocks[f].elementType() == fields[f].info.type
: blocks[f].elementType() + " NOT IN (NULL, " + fields[f].info.type + ")";
}
}
success = true;
return page.appendBlocks(blocks);
} catch (IOException e) {
throw new UncheckedIOException(e);
} finally {
if (success == false) {
Releasables.closeExpectNoException(blocks);
}
}
}
private void positionFieldWork(int shard, int segment, int firstDoc) {
if (lastShard == shard) {
if (lastSegment == segment) {
for (FieldWork w : fields) {
w.sameSegment(firstDoc);
}
return;
}
lastSegment = segment;
for (FieldWork w : fields) {
w.sameShardNewSegment();
}
return;
}
lastShard = shard;
lastSegment = segment;
for (FieldWork w : fields) {
w.newShard(shard);
}
}
private boolean positionFieldWorkDocGuarteedAscending(int shard, int segment) {
if (lastShard == shard) {
if (lastSegment == segment) {
return false;
}
lastSegment = segment;
for (FieldWork w : fields) {
w.sameShardNewSegment();
}
return true;
}
lastShard = shard;
lastSegment = segment;
for (FieldWork w : fields) {
w.newShard(shard);
}
return true;
}
private void loadFromSingleLeaf(Block[] blocks, int shard, int segment, BlockLoader.Docs docs) throws IOException {
int firstDoc = docs.get(0);
positionFieldWork(shard, segment, firstDoc);
StoredFieldsSpec storedFieldsSpec = StoredFieldsSpec.NO_REQUIREMENTS;
List rowStrideReaders = new ArrayList<>(fields.length);
ComputeBlockLoaderFactory loaderBlockFactory = new ComputeBlockLoaderFactory(blockFactory, docs.count());
LeafReaderContext ctx = ctx(shard, segment);
try {
for (int f = 0; f < fields.length; f++) {
FieldWork field = fields[f];
BlockLoader.ColumnAtATimeReader columnAtATime = field.columnAtATime(ctx);
if (columnAtATime != null) {
blocks[f] = (Block) columnAtATime.read(loaderBlockFactory, docs);
} else {
rowStrideReaders.add(
new RowStrideReaderWork(
field.rowStride(ctx),
(Block.Builder) field.loader.builder(loaderBlockFactory, docs.count()),
field.loader,
f
)
);
storedFieldsSpec = storedFieldsSpec.merge(field.loader.rowStrideStoredFieldSpec());
}
}
if (rowStrideReaders.isEmpty()) {
return;
}
if (storedFieldsSpec.equals(StoredFieldsSpec.NO_REQUIREMENTS)) {
throw new IllegalStateException(
"found row stride readers [" + rowStrideReaders + "] without stored fields [" + storedFieldsSpec + "]"
);
}
StoredFieldLoader storedFieldLoader;
if (useSequentialStoredFieldsReader(docs)) {
storedFieldLoader = StoredFieldLoader.fromSpecSequential(storedFieldsSpec);
trackStoredFields(storedFieldsSpec, true);
} else {
storedFieldLoader = StoredFieldLoader.fromSpec(storedFieldsSpec);
trackStoredFields(storedFieldsSpec, false);
}
BlockLoaderStoredFieldsFromLeafLoader storedFields = new BlockLoaderStoredFieldsFromLeafLoader(
storedFieldLoader.getLoader(ctx, null),
storedFieldsSpec.requiresSource() ? shardContexts.get(shard).newSourceLoader.get().leaf(ctx.reader(), null) : null
);
for (int p = 0; p < docs.count(); p++) {
int doc = docs.get(p);
storedFields.advanceTo(doc);
for (RowStrideReaderWork work : rowStrideReaders) {
work.read(doc, storedFields);
}
}
for (RowStrideReaderWork work : rowStrideReaders) {
blocks[work.offset] = work.build();
}
} finally {
Releasables.close(rowStrideReaders);
}
}
private void loadFromSingleLeafUnsorted(Block[] blocks, DocVector docVector) throws IOException {
IntVector docs = docVector.docs();
int[] forwards = docVector.shardSegmentDocMapForwards();
int shard = docVector.shards().getInt(0);
int segment = docVector.segments().getInt(0);
loadFromSingleLeaf(blocks, shard, segment, new BlockLoader.Docs() {
@Override
public int count() {
return docs.getPositionCount();
}
@Override
public int get(int i) {
return docs.getInt(forwards[i]);
}
});
final int[] backwards = docVector.shardSegmentDocMapBackwards();
for (int i = 0; i < blocks.length; i++) {
Block in = blocks[i];
blocks[i] = in.filter(backwards);
in.close();
}
}
private class LoadFromMany implements Releasable {
private final Block[] target;
private final IntVector shards;
private final IntVector segments;
private final IntVector docs;
private final int[] forwards;
private final int[] backwards;
private final Block.Builder[][] builders;
private final BlockLoader[][] converters;
private final Block.Builder[] fieldTypeBuilders;
private final BlockLoader.RowStrideReader[] rowStride;
BlockLoaderStoredFieldsFromLeafLoader storedFields;
LoadFromMany(Block[] target, DocVector docVector) {
this.target = target;
shards = docVector.shards();
segments = docVector.segments();
docs = docVector.docs();
forwards = docVector.shardSegmentDocMapForwards();
backwards = docVector.shardSegmentDocMapBackwards();
fieldTypeBuilders = new Block.Builder[target.length];
builders = new Block.Builder[target.length][shardContexts.size()];
converters = new BlockLoader[target.length][shardContexts.size()];
rowStride = new BlockLoader.RowStrideReader[target.length];
}
void run() throws IOException {
for (int f = 0; f < fields.length; f++) {
/*
* Important note: each field has a desired type, which might not match the mapped type (in the case of union-types).
* We create the final block builders using the desired type, one for each field, but then also use inner builders
* (one for each field and shard), and converters (again one for each field and shard) to actually perform the field
* loading in a way that is correct for the mapped field type, and then convert between that type and the desired type.
*/
fieldTypeBuilders[f] = fields[f].info.type.newBlockBuilder(docs.getPositionCount(), blockFactory);
builders[f] = new Block.Builder[shardContexts.size()];
converters[f] = new BlockLoader[shardContexts.size()];
}
ComputeBlockLoaderFactory loaderBlockFactory = new ComputeBlockLoaderFactory(blockFactory, docs.getPositionCount());
int p = forwards[0];
int shard = shards.getInt(p);
int segment = segments.getInt(p);
int firstDoc = docs.getInt(p);
positionFieldWork(shard, segment, firstDoc);
LeafReaderContext ctx = ctx(shard, segment);
fieldsMoved(ctx, shard);
verifyBuilders(loaderBlockFactory, shard);
read(firstDoc, shard);
for (int i = 1; i < forwards.length; i++) {
p = forwards[i];
shard = shards.getInt(p);
segment = segments.getInt(p);
boolean changedSegment = positionFieldWorkDocGuarteedAscending(shard, segment);
if (changedSegment) {
ctx = ctx(shard, segment);
fieldsMoved(ctx, shard);
}
verifyBuilders(loaderBlockFactory, shard);
read(docs.getInt(p), shard);
}
for (int f = 0; f < target.length; f++) {
for (int s = 0; s < shardContexts.size(); s++) {
if (builders[f][s] != null) {
try (Block orig = (Block) converters[f][s].convert(builders[f][s].build())) {
fieldTypeBuilders[f].copyFrom(orig, 0, orig.getPositionCount());
}
}
}
try (Block targetBlock = fieldTypeBuilders[f].build()) {
target[f] = targetBlock.filter(backwards);
}
}
}
private void fieldsMoved(LeafReaderContext ctx, int shard) throws IOException {
StoredFieldsSpec storedFieldsSpec = StoredFieldsSpec.NO_REQUIREMENTS;
for (int f = 0; f < fields.length; f++) {
FieldWork field = fields[f];
rowStride[f] = field.rowStride(ctx);
storedFieldsSpec = storedFieldsSpec.merge(field.loader.rowStrideStoredFieldSpec());
storedFields = new BlockLoaderStoredFieldsFromLeafLoader(
StoredFieldLoader.fromSpec(storedFieldsSpec).getLoader(ctx, null),
storedFieldsSpec.requiresSource() ? shardContexts.get(shard).newSourceLoader.get().leaf(ctx.reader(), null) : null
);
if (false == storedFieldsSpec.equals(StoredFieldsSpec.NO_REQUIREMENTS)) {
trackStoredFields(storedFieldsSpec, false);
}
}
}
private void verifyBuilders(ComputeBlockLoaderFactory loaderBlockFactory, int shard) {
for (int f = 0; f < fields.length; f++) {
if (builders[f][shard] == null) {
// Note that this relies on field.newShard() to set the loader and converter correctly for the current shard
builders[f][shard] = (Block.Builder) fields[f].loader.builder(loaderBlockFactory, docs.getPositionCount());
converters[f][shard] = fields[f].loader;
}
}
}
private void read(int doc, int shard) throws IOException {
storedFields.advanceTo(doc);
for (int f = 0; f < builders.length; f++) {
rowStride[f].read(doc, storedFields, builders[f][shard]);
}
}
@Override
public void close() {
Releasables.closeExpectNoException(fieldTypeBuilders);
for (int f = 0; f < fields.length; f++) {
Releasables.closeExpectNoException(builders[f]);
}
}
}
/**
* Is it more efficient to use a sequential stored field reader
* when reading stored fields for the documents contained in {@code docIds}?
*/
private boolean useSequentialStoredFieldsReader(BlockLoader.Docs docs) {
int count = docs.count();
return count >= SEQUENTIAL_BOUNDARY && docs.get(count - 1) - docs.get(0) == count - 1;
}
private void trackStoredFields(StoredFieldsSpec spec, boolean sequential) {
readersBuilt.merge(
"stored_fields["
+ "requires_source:"
+ spec.requiresSource()
+ ", fields:"
+ spec.requiredStoredFields().size()
+ ", sequential: "
+ sequential
+ "]",
1,
(prev, one) -> prev + one
);
}
private class FieldWork {
final FieldInfo info;
BlockLoader loader;
BlockLoader.ColumnAtATimeReader columnAtATime;
BlockLoader.RowStrideReader rowStride;
FieldWork(FieldInfo info) {
this.info = info;
}
void sameSegment(int firstDoc) {
if (columnAtATime != null && columnAtATime.canReuse(firstDoc) == false) {
columnAtATime = null;
}
if (rowStride != null && rowStride.canReuse(firstDoc) == false) {
rowStride = null;
}
}
void sameShardNewSegment() {
columnAtATime = null;
rowStride = null;
}
void newShard(int shard) {
loader = info.blockLoader.apply(shard);
columnAtATime = null;
rowStride = null;
}
BlockLoader.ColumnAtATimeReader columnAtATime(LeafReaderContext ctx) throws IOException {
if (columnAtATime == null) {
columnAtATime = loader.columnAtATimeReader(ctx);
trackReader("column_at_a_time", this.columnAtATime);
}
return columnAtATime;
}
BlockLoader.RowStrideReader rowStride(LeafReaderContext ctx) throws IOException {
if (rowStride == null) {
rowStride = loader.rowStrideReader(ctx);
trackReader("row_stride", this.rowStride);
}
return rowStride;
}
private void trackReader(String type, BlockLoader.Reader reader) {
readersBuilt.merge(info.name + ":" + type + ":" + reader, 1, (prev, one) -> prev + one);
}
}
private record RowStrideReaderWork(BlockLoader.RowStrideReader reader, Block.Builder builder, BlockLoader loader, int offset)
implements
Releasable {
void read(int doc, BlockLoaderStoredFieldsFromLeafLoader storedFields) throws IOException {
reader.read(doc, storedFields, builder);
}
Block build() {
return (Block) loader.convert(builder.build());
}
@Override
public void close() {
builder.close();
}
}
private LeafReaderContext ctx(int shard, int segment) {
return shardContexts.get(shard).reader.leaves().get(segment);
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("ValuesSourceReaderOperator[fields = [");
if (fields.length < 10) {
boolean first = true;
for (FieldWork f : fields) {
if (first) {
first = false;
} else {
sb.append(", ");
}
sb.append(f.info.name);
}
} else {
sb.append(fields.length).append(" fields");
}
return sb.append("]]").toString();
}
@Override
protected Status status(long processNanos, int pagesProcessed) {
return new Status(new TreeMap<>(readersBuilt), processNanos, pagesProcessed);
}
public static class Status extends AbstractPageMappingOperator.Status {
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
Operator.Status.class,
"values_source_reader",
Status::new
);
private final Map readersBuilt;
Status(Map readersBuilt, long processNanos, int pagesProcessed) {
super(processNanos, pagesProcessed);
this.readersBuilt = readersBuilt;
}
Status(StreamInput in) throws IOException {
super(in);
readersBuilt = in.readOrderedMap(StreamInput::readString, StreamInput::readVInt);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
out.writeMap(readersBuilt, StreamOutput::writeVInt);
}
@Override
public String getWriteableName() {
return ENTRY.name;
}
public Map readersBuilt() {
return readersBuilt;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.startObject("readers_built");
for (Map.Entry e : readersBuilt.entrySet()) {
builder.field(e.getKey(), e.getValue());
}
builder.endObject();
innerToXContent(builder);
return builder.endObject();
}
@Override
public boolean equals(Object o) {
if (super.equals(o) == false) return false;
Status status = (Status) o;
return readersBuilt.equals(status.readersBuilt);
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), readersBuilt);
}
@Override
public String toString() {
return Strings.toString(this);
}
}
private static class ComputeBlockLoaderFactory implements BlockLoader.BlockFactory {
private final BlockFactory factory;
private final int pageSize;
private Block nullBlock;
private ComputeBlockLoaderFactory(BlockFactory factory, int pageSize) {
this.factory = factory;
this.pageSize = pageSize;
}
@Override
public BlockLoader.BooleanBuilder booleansFromDocValues(int expectedCount) {
return factory.newBooleanBlockBuilder(expectedCount).mvOrdering(Block.MvOrdering.SORTED_ASCENDING);
}
@Override
public BlockLoader.BooleanBuilder booleans(int expectedCount) {
return factory.newBooleanBlockBuilder(expectedCount);
}
@Override
public BlockLoader.BytesRefBuilder bytesRefsFromDocValues(int expectedCount) {
return factory.newBytesRefBlockBuilder(expectedCount).mvOrdering(Block.MvOrdering.DEDUPLICATED_AND_SORTED_ASCENDING);
}
@Override
public BlockLoader.BytesRefBuilder bytesRefs(int expectedCount) {
return factory.newBytesRefBlockBuilder(expectedCount);
}
@Override
public BlockLoader.DoubleBuilder doublesFromDocValues(int expectedCount) {
return factory.newDoubleBlockBuilder(expectedCount).mvOrdering(Block.MvOrdering.SORTED_ASCENDING);
}
@Override
public BlockLoader.DoubleBuilder doubles(int expectedCount) {
return factory.newDoubleBlockBuilder(expectedCount);
}
@Override
public BlockLoader.IntBuilder intsFromDocValues(int expectedCount) {
return factory.newIntBlockBuilder(expectedCount).mvOrdering(Block.MvOrdering.SORTED_ASCENDING);
}
@Override
public BlockLoader.IntBuilder ints(int expectedCount) {
return factory.newIntBlockBuilder(expectedCount);
}
@Override
public BlockLoader.LongBuilder longsFromDocValues(int expectedCount) {
return factory.newLongBlockBuilder(expectedCount).mvOrdering(Block.MvOrdering.SORTED_ASCENDING);
}
@Override
public BlockLoader.LongBuilder longs(int expectedCount) {
return factory.newLongBlockBuilder(expectedCount);
}
@Override
public BlockLoader.Builder nulls(int expectedCount) {
return ElementType.NULL.newBlockBuilder(expectedCount, factory);
}
@Override
public Block constantNulls() {
if (nullBlock == null) {
nullBlock = factory.newConstantNullBlock(pageSize);
} else {
nullBlock.incRef();
}
return nullBlock;
}
@Override
public BytesRefBlock constantBytes(BytesRef value) {
return factory.newConstantBytesRefBlockWith(value, pageSize);
}
@Override
public BlockLoader.SingletonOrdinalsBuilder singletonOrdinalsBuilder(SortedDocValues ordinals, int count) {
return new SingletonOrdinalsBuilder(factory, ordinals, count);
}
}
// TODO tests that mix source loaded fields and doc values in the same block
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy