All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yelp.nrtsearch.server.luceneserver.doc.LoadedDocValues Maven / Gradle / Ivy

There is a newer version: 1.0.0-beta.1
Show newest version
/*
 * Copyright 2020 Yelp Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.yelp.nrtsearch.server.luceneserver.doc;

import com.google.gson.Gson;
import com.google.protobuf.ListValue;
import com.google.protobuf.Struct;
import com.google.protobuf.util.JsonFormat;
import com.google.type.LatLng;
import com.yelp.nrtsearch.server.grpc.SearchResponse;
import com.yelp.nrtsearch.server.grpc.SearchResponse.Hit.FieldValue;
import com.yelp.nrtsearch.server.grpc.SearchResponse.Hit.FieldValue.Vector;
import com.yelp.nrtsearch.server.grpc.SearchResponse.Hit.FieldValue.Vector.Builder;
import com.yelp.nrtsearch.server.luceneserver.geo.GeoPoint;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.FloatBuffer;
import java.time.Instant;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Function;
import java.util.function.LongFunction;
import java.util.stream.Collectors;
import org.apache.lucene.geo.GeoEncodingUtils;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;

/**
 * Container class for loading and holding doc values for a field. Abstracts loaded values as a
 * list. The various implementations of this class use the lucene segment doc values accessors to
 * load the data for that field.
 *
 * 

These are used during field data retrieval when building a search response, and provided to * scripts during execution. * *

All implementations must define setDocId to advance the doc values accessor to the provided * segment document. All implementations must also define toFieldValue, which provides a {@link * com.yelp.nrtsearch.server.grpc.SearchResponse.Hit.FieldValue} containing the doc value data for a * given index in the list. * *

All implementations throw an IllegalStateException when trying to get a value when there are * no values for the field. All implementations throw an IndexOutOfBoundsException when trying to * access an invalid index. * * @param the loaded doc values type. This could be a simple boxed primitive, or something more * complex like a {@link GeoPoint}. */ public abstract class LoadedDocValues extends AbstractList { // long decoders private static final LongFunction BOOL_DECODER = (longValue) -> longValue == 1; private static final LongFunction INT_DECODER = (longValue) -> (int) longValue; private static final LongFunction LONG_DECODER = (longValue) -> longValue; private static final LongFunction FLOAT_DECODER = (longValue) -> Float.intBitsToFloat((int) longValue); private static final LongFunction SORTED_FLOAT_DECODER = (longValue) -> NumericUtils.sortableIntToFloat((int) longValue); private static final LongFunction DOUBLE_DECODER = Double::longBitsToDouble; private static final LongFunction SORTED_DOUBLE_DECODER = NumericUtils::sortableLongToDouble; private static final LongFunction DATE_DECODER = Instant::ofEpochMilli; private static final LongFunction GEO_POINT_DECODER = (longValue) -> new GeoPoint( GeoEncodingUtils.decodeLatitude((int) (longValue >> 32)), GeoEncodingUtils.decodeLongitude((int) longValue)); // BytesRef decoders // copy the target buffer, as the original BytesRef buffer will be reused private static final Function BYTES_REF_DECODER = BytesRef::deepCopyOf; private static final Function STRING_DECODER = BytesRef::utf8ToString; // Gson decoder to deserilize string to objects private static final Gson gson = new Gson(); public abstract void setDocId(int docID) throws IOException; public abstract SearchResponse.Hit.FieldValue toFieldValue(int index); public abstract static class SingleNumericValue extends LoadedDocValues { private final NumericDocValues docValues; private final LongFunction decoder; private T value; SingleNumericValue(NumericDocValues docValues, LongFunction decoder) { this.docValues = docValues; this.decoder = decoder; } @Override public void setDocId(int docID) throws IOException { if (docValues.advanceExact(docID)) { value = decoder.apply(docValues.longValue()); } else { value = null; } } @Override public T get(int index) { if (value == null) { throw new IllegalStateException("No doc values for document"); } else if (index != 0) { throw new IndexOutOfBoundsException("No doc value for index: " + index); } return value; } @Override public int size() { return value == null ? 0 : 1; } public T getValue() { return get(0); } } public static final class SingleBoolean extends SingleNumericValue { public SingleBoolean(NumericDocValues docValues) { super(docValues, BOOL_DECODER); } public SearchResponse.Hit.FieldValue toFieldValue(int index) { return SearchResponse.Hit.FieldValue.newBuilder().setBooleanValue(get(index)).build(); } } public static final class SingleInteger extends SingleNumericValue { public SingleInteger(NumericDocValues docValues) { super(docValues, INT_DECODER); } @Override public SearchResponse.Hit.FieldValue toFieldValue(int index) { return SearchResponse.Hit.FieldValue.newBuilder().setIntValue(get(index)).build(); } } public static final class SingleLong extends SingleNumericValue { public SingleLong(NumericDocValues docValues) { super(docValues, LONG_DECODER); } @Override public SearchResponse.Hit.FieldValue toFieldValue(int index) { return SearchResponse.Hit.FieldValue.newBuilder().setLongValue(get(index)).build(); } } public static final class SingleFloat extends SingleNumericValue { public SingleFloat(NumericDocValues docValues) { super(docValues, FLOAT_DECODER); } @Override public SearchResponse.Hit.FieldValue toFieldValue(int index) { return SearchResponse.Hit.FieldValue.newBuilder().setFloatValue(get(index)).build(); } } public static final class SingleDouble extends SingleNumericValue { public SingleDouble(NumericDocValues docValues) { super(docValues, DOUBLE_DECODER); } @Override public SearchResponse.Hit.FieldValue toFieldValue(int index) { return SearchResponse.Hit.FieldValue.newBuilder().setDoubleValue(get(index)).build(); } } public static final class SingleDateTime extends SingleNumericValue { public SingleDateTime(NumericDocValues docValues) { super(docValues, DATE_DECODER); } @Override public SearchResponse.Hit.FieldValue toFieldValue(int index) { long epochMs = get(index).toEpochMilli(); return SearchResponse.Hit.FieldValue.newBuilder().setLongValue(epochMs).build(); } } public abstract static class SortedNumericValues extends LoadedDocValues { private final SortedNumericDocValues docValues; private final LongFunction decoder; private final ArrayList values = new ArrayList<>(); SortedNumericValues(SortedNumericDocValues docValues, LongFunction decoder) { this.docValues = docValues; this.decoder = decoder; } @Override public void setDocId(int docID) throws IOException { values.clear(); if (docValues.advanceExact(docID)) { int count = docValues.docValueCount(); values.ensureCapacity(count); for (int i = 0; i < count; ++i) { values.add(decoder.apply(docValues.nextValue())); } } values.trimToSize(); } @Override public T get(int index) { if (values.isEmpty()) { throw new IllegalStateException("No doc values for document"); } else if (index < 0 || index >= values.size()) { throw new IndexOutOfBoundsException("No doc value for index: " + index); } return values.get(index); } @Override public int size() { return values.size(); } } public static final class SortedBooleans extends SortedNumericValues { public SortedBooleans(SortedNumericDocValues docValues) { super(docValues, BOOL_DECODER); } @Override public SearchResponse.Hit.FieldValue toFieldValue(int index) { return SearchResponse.Hit.FieldValue.newBuilder().setBooleanValue(get(index)).build(); } } public static final class SortedIntegers extends SortedNumericValues { public SortedIntegers(SortedNumericDocValues docValues) { super(docValues, INT_DECODER); } @Override public SearchResponse.Hit.FieldValue toFieldValue(int index) { return SearchResponse.Hit.FieldValue.newBuilder().setIntValue(get(index)).build(); } } public static final class SortedLongs extends SortedNumericValues { public SortedLongs(SortedNumericDocValues docValues) { super(docValues, LONG_DECODER); } @Override public SearchResponse.Hit.FieldValue toFieldValue(int index) { return SearchResponse.Hit.FieldValue.newBuilder().setLongValue(get(index)).build(); } } public static final class SortedFloats extends SortedNumericValues { public SortedFloats(SortedNumericDocValues docValues) { super(docValues, SORTED_FLOAT_DECODER); } @Override public SearchResponse.Hit.FieldValue toFieldValue(int index) { return SearchResponse.Hit.FieldValue.newBuilder().setFloatValue(get(index)).build(); } } public static final class SortedDoubles extends SortedNumericValues { public SortedDoubles(SortedNumericDocValues docValues) { super(docValues, SORTED_DOUBLE_DECODER); } @Override public SearchResponse.Hit.FieldValue toFieldValue(int index) { return SearchResponse.Hit.FieldValue.newBuilder().setDoubleValue(get(index)).build(); } } // Even single points use SortedNumericDocValues, since they are LatLonDocValuesFields public static final class SingleLocation extends SortedNumericValues { public SingleLocation(SortedNumericDocValues docValues) { super(docValues, GEO_POINT_DECODER); } public GeoPoint getValue() { return get(0); } @Override public SearchResponse.Hit.FieldValue toFieldValue(int index) { GeoPoint point = get(index); LatLng latLon = LatLng.newBuilder().setLatitude(point.getLat()).setLongitude(point.getLon()).build(); return SearchResponse.Hit.FieldValue.newBuilder().setLatLngValue(latLon).build(); } public double arcDistance(double lat, double lon) { return getValue().arcDistance(lat, lon); } } public static final class Locations extends SortedNumericValues { public Locations(SortedNumericDocValues docValues) { super(docValues, GEO_POINT_DECODER); } @Override public SearchResponse.Hit.FieldValue toFieldValue(int index) { GeoPoint point = get(index); LatLng latLon = LatLng.newBuilder().setLatitude(point.getLat()).setLongitude(point.getLon()).build(); return SearchResponse.Hit.FieldValue.newBuilder().setLatLngValue(latLon).build(); } public double arcDistance(double lat, double lon) { // backward compatible with ES // assume we actually only have 1 location while declared as multivalued return get(0).arcDistance(lat, lon); } } public static final class SortedDateTimes extends SortedNumericValues { public SortedDateTimes(SortedNumericDocValues docValues) { super(docValues, DATE_DECODER); } @Override public SearchResponse.Hit.FieldValue toFieldValue(int index) { long epochMs = get(index).toEpochMilli(); return SearchResponse.Hit.FieldValue.newBuilder().setLongValue(epochMs).build(); } } public static final class ObjectJsonDocValues extends LoadedDocValues { private final BinaryDocValues docValues; private List value; public ObjectJsonDocValues(BinaryDocValues docValues) { this.docValues = docValues; } @Override public void setDocId(int docID) throws IOException { if (docValues.advanceExact(docID)) { String jsonString = STRING_DECODER.apply(docValues.binaryValue()); ListValue.Builder builder = ListValue.newBuilder(); JsonFormat.parser().merge(jsonString, builder); value = builder.getValuesList().stream() .map(e -> e.getStructValue()) .collect(Collectors.toList()); } else { value = null; } } @Override public Struct get(int index) { if (value == null) { throw new IllegalStateException("No doc values for document"); } try { return value.get(index); } catch (IndexOutOfBoundsException e) { throw new RuntimeException(e); } } @Override public SearchResponse.Hit.FieldValue toFieldValue(int index) { Struct struct = get(index); return SearchResponse.Hit.FieldValue.newBuilder().setStructValue(struct).build(); } @Override public int size() { return value == null ? 0 : value.size(); } } public abstract static class SingleBinaryBase extends LoadedDocValues { private final BinaryDocValues docValues; private final Function decoder; private T value; public SingleBinaryBase(BinaryDocValues docValues, Function decoder) { this.docValues = docValues; this.decoder = decoder; } @Override public void setDocId(int docID) throws IOException { if (docValues.advanceExact(docID)) { value = decoder.apply(docValues.binaryValue()); } else { value = null; } } @Override public T get(int index) { if (value == null) { throw new IllegalStateException("No doc values for document"); } else if (index != 0) { throw new IndexOutOfBoundsException("No doc value for index: " + index); } return value; } @Override public int size() { return value == null ? 0 : 1; } public T getValue() { return get(0); } } public static final class SingleBinary extends SingleBinaryBase { public SingleBinary(BinaryDocValues docValues) { super(docValues, BYTES_REF_DECODER); } @Override public SearchResponse.Hit.FieldValue toFieldValue(int index) { return SearchResponse.Hit.FieldValue.newBuilder() .setTextValue(get(index).utf8ToString()) .build(); } } public static final class SingleString extends SingleBinaryBase { public SingleString(BinaryDocValues docValues) { super(docValues, STRING_DECODER); } @Override public SearchResponse.Hit.FieldValue toFieldValue(int index) { return SearchResponse.Hit.FieldValue.newBuilder().setTextValue(get(index)).build(); } } public static final class SortedStrings extends LoadedDocValues { private final SortedSetDocValues docValues; private final ArrayList values = new ArrayList<>(); public SortedStrings(SortedSetDocValues docValues) { this.docValues = docValues; } @Override public void setDocId(int docID) throws IOException { values.clear(); if (docValues.advanceExact(docID)) { long ord = docValues.nextOrd(); while (ord != SortedSetDocValues.NO_MORE_ORDS) { values.add(docValues.lookupOrd(ord).utf8ToString()); ord = docValues.nextOrd(); } } values.trimToSize(); } @Override public SearchResponse.Hit.FieldValue toFieldValue(int index) { return SearchResponse.Hit.FieldValue.newBuilder().setTextValue(get(index)).build(); } @Override public String get(int index) { if (values.isEmpty()) { throw new IllegalStateException("No doc values for document"); } else if (index < 0 || index >= values.size()) { throw new IndexOutOfBoundsException("No doc value for index: " + index); } return values.get(index); } @Override public int size() { return values.size(); } } public static final class SingleVector extends LoadedDocValues { private final BinaryDocValues docValues; private VectorType value; public SingleVector(BinaryDocValues docValues) { this.docValues = docValues; } /** * Set method to set the lucene level doc id to lookup value from index and initialize the * loaded doc value index by loading vector data */ public void setDocId(int docID) throws IOException { if (docValues.advanceExact(docID)) { value = decodeBytesRefToVectorType(docValues.binaryValue()); } else { value = null; } } /** Decodes binary doc value to float array and wraps it into a VectorType */ private static VectorType decodeBytesRefToVectorType(BytesRef bytesRef) { float[] floats = new float[bytesRef.length / Float.BYTES]; FloatBuffer fb = ByteBuffer.wrap(bytesRef.bytes, bytesRef.offset, bytesRef.length).asFloatBuffer(); fb.get(floats); return new VectorType(floats); } /** Provide field value containing the doc value data for a given index */ @Override public FieldValue toFieldValue(int index) { VectorType vector = get(index); Builder vectorBuilder = Vector.newBuilder(); for (float value : vector.getVectorData()) { vectorBuilder.addValue(value); } return SearchResponse.Hit.FieldValue.newBuilder() .setVectorValue(vectorBuilder.build()) .build(); } @Override public VectorType get(int index) { if (value == null) { throw new IllegalStateException("No doc values for document"); } else if (index != 0) { throw new IndexOutOfBoundsException("No doc value for index: " + index); } return value; } @Override public int size() { return value == null ? 0 : 1; } public VectorType getValue() { return get(0); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy