org.elasticsearch.search.DocValueFormat Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.search;
import org.apache.lucene.document.InetAddressPoint;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.TransportVersions;
import org.elasticsearch.common.io.stream.NamedWriteable;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.network.InetAddresses;
import org.elasticsearch.common.network.NetworkAddress;
import org.elasticsearch.common.time.DateFormatter;
import org.elasticsearch.common.time.DateMathParser;
import org.elasticsearch.geometry.utils.Geohash;
import org.elasticsearch.index.mapper.DateFieldMapper;
import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper;
import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper.TimeSeriesIdBuilder;
import org.elasticsearch.search.aggregations.bucket.geogrid.GeoTileUtils;
import java.io.IOException;
import java.math.BigInteger;
import java.net.InetAddress;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.text.NumberFormat;
import java.text.ParseException;
import java.time.ZoneId;
import java.util.Arrays;
import java.util.Base64;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.function.LongSupplier;
/** A formatter for values as returned by the fielddata/doc-values APIs. */
public interface DocValueFormat extends NamedWriteable {
long MASK_2_63 = 0x8000000000000000L;
BigInteger BIGINTEGER_2_64_MINUS_ONE = BigInteger.ONE.shiftLeft(64).subtract(BigInteger.ONE); // 2^64 -1
/** Format a long value. This is used by terms and histogram aggregations
* to format keys for fields that use longs as a doc value representation
* such as the {@code long} and {@code date} fields. */
default Object format(long value) {
throw new UnsupportedOperationException();
}
/** Format a double value. This is used by terms and stats aggregations
* to format keys for fields that use numbers as a doc value representation
* such as the {@code long}, {@code double} or {@code date} fields. */
default Object format(double value) {
throw new UnsupportedOperationException();
}
/** Format a binary value. This is used by terms aggregations to format
* keys for fields that use binary doc value representations such as the
* {@code keyword} and {@code ip} fields. */
default Object format(BytesRef value) {
throw new UnsupportedOperationException();
}
/** Parse a value that was formatted with {@link #format(long)} back to the
* original long value. */
default long parseLong(String value, boolean roundUp, LongSupplier now) {
throw new UnsupportedOperationException();
}
/** Parse a value that was formatted with {@link #format(double)} back to
* the original double value. */
default double parseDouble(String value, boolean roundUp, LongSupplier now) {
throw new UnsupportedOperationException();
}
/** Parse a value that was formatted with {@link #format(BytesRef)} back
* to the original BytesRef. */
default BytesRef parseBytesRef(Object value) {
throw new UnsupportedOperationException();
}
/**
* Formats a value of a sort field in a search response. This is used by {@link SearchSortValues}
* to avoid sending the internal representation of a value of a sort field in a search response.
* The default implementation formats {@link BytesRef} but leave other types as-is.
*/
default Object formatSortValue(Object value) {
if (value instanceof BytesRef) {
return format((BytesRef) value);
}
return value;
}
DocValueFormat RAW = RawDocValueFormat.INSTANCE;
/**
* Singleton, stateless formatter for "Raw" values, generally taken to mean keywords and other strings.
*/
class RawDocValueFormat implements DocValueFormat {
public static final DocValueFormat INSTANCE = new RawDocValueFormat();
private RawDocValueFormat() {}
@Override
public String getWriteableName() {
return "raw";
}
@Override
public void writeTo(StreamOutput out) {}
@Override
public Long format(long value) {
return value;
}
@Override
public Double format(double value) {
return value;
}
@Override
public String format(BytesRef value) {
try {
return value.utf8ToString();
} catch (Exception | AssertionError e) {
throw new IllegalArgumentException("Failed trying to format bytes as UTF8. Possibly caused by a mapping mismatch", e);
}
}
@Override
public long parseLong(String value, boolean roundUp, LongSupplier now) {
try {
// Prefer parsing as a long to avoid losing precision
return Long.parseLong(value);
} catch (NumberFormatException e) {
// retry as a double
}
double d = Double.parseDouble(value);
if (roundUp) {
d = Math.ceil(d);
} else {
d = Math.floor(d);
}
return Math.round(d);
}
@Override
public double parseDouble(String value, boolean roundUp, LongSupplier now) {
return Double.parseDouble(value);
}
@Override
public BytesRef parseBytesRef(Object value) {
return new BytesRef(value.toString());
}
@Override
public String toString() {
return "raw";
}
};
DocValueFormat BINARY = BinaryDocValueFormat.INSTANCE;
/**
* Singleton, stateless formatter, for representing bytes as base64 strings
*/
class BinaryDocValueFormat implements DocValueFormat {
public static final DocValueFormat INSTANCE = new BinaryDocValueFormat();
private BinaryDocValueFormat() {}
@Override
public String getWriteableName() {
return "binary";
}
@Override
public void writeTo(StreamOutput out) {}
@Override
public String format(BytesRef value) {
return Base64.getEncoder().encodeToString(Arrays.copyOfRange(value.bytes, value.offset, value.offset + value.length));
}
@Override
public BytesRef parseBytesRef(Object value) {
return new BytesRef(Base64.getDecoder().decode(value.toString()));
}
};
static DocValueFormat withNanosecondResolution(final DocValueFormat format) {
if (format instanceof DateTime dateTime) {
return new DateTime(dateTime.formatter, dateTime.timeZone, DateFieldMapper.Resolution.NANOSECONDS, dateTime.formatSortValues);
} else {
throw new IllegalArgumentException("trying to convert a known date time formatter to a nanosecond one, wrong field used?");
}
}
static DocValueFormat enableFormatSortValues(DocValueFormat format) {
if (format instanceof DateTime dateTime) {
return new DateTime(dateTime.formatter, dateTime.timeZone, dateTime.resolution, true);
}
throw new IllegalArgumentException("require a date_time formatter; got [" + format.getWriteableName() + "]");
}
final class DateTime implements DocValueFormat {
public static final String NAME = "date_time";
final DateFormatter formatter;
final ZoneId timeZone;
private final DateMathParser parser;
final DateFieldMapper.Resolution resolution;
final boolean formatSortValues;
public DateTime(DateFormatter formatter, ZoneId timeZone, DateFieldMapper.Resolution resolution) {
this(formatter, timeZone, resolution, false);
}
private DateTime(DateFormatter formatter, ZoneId timeZone, DateFieldMapper.Resolution resolution, boolean formatSortValues) {
this.timeZone = Objects.requireNonNull(timeZone);
this.formatter = formatter.withZone(timeZone);
this.parser = this.formatter.toDateMathParser();
this.resolution = resolution;
this.formatSortValues = formatSortValues;
}
public DateTime(StreamInput in) throws IOException {
String formatterPattern = in.readString();
String zoneId = in.readString();
this.timeZone = ZoneId.of(zoneId);
this.formatter = DateFormatter.forPattern(formatterPattern).withZone(this.timeZone);
this.parser = formatter.toDateMathParser();
this.resolution = DateFieldMapper.Resolution.ofOrdinal(in.readVInt());
if (in.getTransportVersion().between(TransportVersions.V_7_7_0, TransportVersions.V_8_0_0)) {
/* when deserialising from 7.7+ nodes expect a flag indicating if a pattern is of joda style
This is only used to support joda style indices in 7.x, in 8 we no longer support this.
All indices in 8 should use java style pattern. Hence we can ignore this flag.
*/
in.readBoolean();
}
this.formatSortValues = in.readBoolean();
}
@Override
public String getWriteableName() {
return NAME;
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(formatter.pattern());
out.writeString(timeZone.getId());
out.writeVInt(resolution.ordinal());
if (out.getTransportVersion().between(TransportVersions.V_7_7_0, TransportVersions.V_8_0_0)) {
/* when serializing to 7.7+ send out a flag indicating if a pattern is of joda style
This is only used to support joda style indices in 7.x, in 8 we no longer support this.
All indices in 8 should use java style pattern. Hence this flag is always false.
*/
out.writeBoolean(false);
}
out.writeBoolean(formatSortValues);
}
public DateMathParser getDateMathParser() {
return parser;
}
@Override
public String format(long value) {
return formatter.format(resolution.toInstant(value).atZone(timeZone));
}
@Override
public String format(double value) {
return format((long) value);
}
@Override
public Object formatSortValue(Object value) {
if (formatSortValues) {
if (value instanceof Long) {
return format((Long) value);
}
}
return value;
}
@Override
public long parseLong(String value, boolean roundUp, LongSupplier now) {
return resolution.convert(parser.parse(value, now, roundUp, timeZone));
}
@Override
public double parseDouble(String value, boolean roundUp, LongSupplier now) {
return parseLong(value, roundUp, now);
}
@Override
public String toString() {
return "DocValueFormat.DateTime(" + formatter + ", " + timeZone + ", " + resolution + ")";
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
DateTime that = (DateTime) o;
return formatter.equals(that.formatter)
&& timeZone.equals(that.timeZone)
&& resolution == that.resolution
&& formatSortValues == that.formatSortValues;
}
@Override
public int hashCode() {
return Objects.hash(formatter, timeZone, resolution, formatSortValues);
}
}
DocValueFormat GEOHASH = GeoHashDocValueFormat.INSTANCE;
/**
* Singleton, stateless formatter for geo hash values
*/
class GeoHashDocValueFormat implements DocValueFormat {
public static final DocValueFormat INSTANCE = new GeoHashDocValueFormat();
private GeoHashDocValueFormat() {}
@Override
public String getWriteableName() {
return "geo_hash";
}
@Override
public void writeTo(StreamOutput out) {}
@Override
public String format(long value) {
return Geohash.stringEncode(value);
}
@Override
public String format(double value) {
return format((long) value);
}
};
DocValueFormat GEOTILE = GeoTileDocValueFormat.INSTANCE;
class GeoTileDocValueFormat implements DocValueFormat {
public static final DocValueFormat INSTANCE = new GeoTileDocValueFormat();
private GeoTileDocValueFormat() {}
@Override
public String getWriteableName() {
return "geo_tile";
}
@Override
public void writeTo(StreamOutput out) {}
@Override
public String format(long value) {
return GeoTileUtils.stringEncode(value);
}
@Override
public String format(double value) {
return format((long) value);
}
@Override
public long parseLong(String value, boolean roundUp, LongSupplier now) {
return GeoTileUtils.longEncode(value);
}
};
DocValueFormat BOOLEAN = BooleanDocValueFormat.INSTANCE;
/**
* Stateless, Singleton formatter for boolean values. Parses the strings "true" and "false" as inputs.
*/
class BooleanDocValueFormat implements DocValueFormat {
public static final DocValueFormat INSTANCE = new BooleanDocValueFormat();
private BooleanDocValueFormat() {}
@Override
public String getWriteableName() {
return "bool";
}
@Override
public void writeTo(StreamOutput out) {}
@Override
public Boolean format(long value) {
return value != 0;
}
@Override
public Boolean format(double value) {
return value != 0;
}
@Override
public long parseLong(String value, boolean roundUp, LongSupplier now) {
switch (value) {
case "false":
return 0;
case "true":
return 1;
}
throw new IllegalArgumentException("Cannot parse boolean [" + value + "], expected either [true] or [false]");
}
@Override
public double parseDouble(String value, boolean roundUp, LongSupplier now) {
return parseLong(value, roundUp, now);
}
};
IpDocValueFormat IP = IpDocValueFormat.INSTANCE;
/**
* Stateless, singleton formatter for IP address data
*/
class IpDocValueFormat implements DocValueFormat {
public static final IpDocValueFormat INSTANCE = new IpDocValueFormat();
private IpDocValueFormat() {}
@Override
public String getWriteableName() {
return "ip";
}
@Override
public void writeTo(StreamOutput out) {}
@Override
public String format(BytesRef value) {
try {
byte[] bytes = Arrays.copyOfRange(value.bytes, value.offset, value.offset + value.length);
InetAddress inet = InetAddressPoint.decode(bytes);
return NetworkAddress.format(inet);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(
"Failed trying to format bytes as IP address. Possibly caused by a mapping mismatch",
e
);
}
}
@Override
public BytesRef parseBytesRef(Object value) {
return new BytesRef(InetAddressPoint.encode(InetAddresses.forString(value.toString())));
}
@Override
public String toString() {
return "ip";
}
};
final class Decimal implements DocValueFormat {
public static final String NAME = "decimal";
private static final DecimalFormatSymbols SYMBOLS = new DecimalFormatSymbols(Locale.ROOT);
final String pattern;
private final NumberFormat format;
public Decimal(String pattern) {
this.pattern = pattern;
this.format = new DecimalFormat(pattern, SYMBOLS);
}
public Decimal(StreamInput in) throws IOException {
this(in.readString());
}
@Override
public String getWriteableName() {
return NAME;
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(pattern);
}
@Override
public String format(long value) {
return format.format(value);
}
@Override
public String format(double value) {
/*
* Explicitly check for NaN, since it formats to "�" or "NaN" depending on JDK version.
*
* Decimal formatter uses the JRE's default symbol list (via Locale.ROOT above). In JDK8,
* this translates into using {@link sun.util.locale.provider.JRELocaleProviderAdapter}, which loads
* {@link sun.text.resources.FormatData} for symbols. There, `NaN` is defined as `\ufffd` (�)
*
* In JDK9+, {@link sun.util.cldr.CLDRLocaleProviderAdapter} is used instead, which loads
* {@link sun.text.resources.cldr.FormatData}. There, `NaN` is defined as `"NaN"`
*
* Since the character � isn't very useful, and makes the output change depending on JDK version,
* we manually check to see if the value is NaN and return the string directly.
*/
if (Double.isNaN(value)) {
return String.valueOf(Double.NaN);
}
return format.format(value);
}
@Override
public long parseLong(String value, boolean roundUp, LongSupplier now) {
Number n;
try {
n = format.parse(value);
} catch (ParseException e) {
throw new RuntimeException("Cannot parse the value [" + value + "] using the pattern [" + pattern + "]", e);
}
if (format.isParseIntegerOnly()) {
return n.longValue();
} else {
double d = n.doubleValue();
if (roundUp) {
d = Math.ceil(d);
} else {
d = Math.floor(d);
}
return Math.round(d);
}
}
@Override
public double parseDouble(String value, boolean roundUp, LongSupplier now) {
Number n;
try {
n = format.parse(value);
} catch (ParseException e) {
throw new RuntimeException("Cannot parse the value [" + value + "] using the pattern [" + pattern + "]", e);
}
return n.doubleValue();
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
Decimal that = (Decimal) o;
return Objects.equals(pattern, that.pattern);
}
@Override
public int hashCode() {
return Objects.hash(pattern);
}
@Override
public String toString() {
return pattern;
}
};
DocValueFormat UNSIGNED_LONG_SHIFTED = UnsignedLongShiftedDocValueFormat.INSTANCE;
/**
* DocValues format for unsigned 64 bit long values,
* that are stored as shifted signed 64 bit long values.
*/
class UnsignedLongShiftedDocValueFormat implements DocValueFormat {
public static final DocValueFormat INSTANCE = new UnsignedLongShiftedDocValueFormat();
private UnsignedLongShiftedDocValueFormat() {}
@Override
public String getWriteableName() {
return "unsigned_long_shifted";
}
@Override
public void writeTo(StreamOutput out) {}
@Override
public String toString() {
return "unsigned_long_shifted";
}
/**
* Formats the unsigned long to the shifted long format
*/
@Override
public long parseLong(String value, boolean roundUp, LongSupplier now) {
long parsedValue = Long.parseUnsignedLong(value);
// subtract 2^63 or 10000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
// equivalent to flipping the first bit
return parsedValue ^ MASK_2_63;
}
/**
* Formats a raw docValue that is stored in the shifted long format to the unsigned long representation.
*/
@Override
public Object format(long value) {
// add 2^63 or 10000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000,
// equivalent to flipping the first bit
long formattedValue = value ^ MASK_2_63;
if (formattedValue >= 0) {
return formattedValue;
} else {
return BigInteger.valueOf(formattedValue).and(BIGINTEGER_2_64_MINUS_ONE);
}
}
@Override
public Object formatSortValue(Object value) {
if (value instanceof Long) {
return format((Long) value);
}
return value;
}
/**
* Double docValues of the unsigned_long field type are already in the formatted representation,
* so we don't need to do anything here
*/
@Override
public Double format(double value) {
return value;
}
@Override
public double parseDouble(String value, boolean roundUp, LongSupplier now) {
return Double.parseDouble(value);
}
};
DocValueFormat TIME_SERIES_ID = new TimeSeriesIdDocValueFormat();
/**
* DocValues format for time series id.
*/
class TimeSeriesIdDocValueFormat implements DocValueFormat {
private static final Base64.Decoder BASE64_DECODER = Base64.getUrlDecoder();
private TimeSeriesIdDocValueFormat() {}
@Override
public String getWriteableName() {
return "tsid";
}
@Override
public void writeTo(StreamOutput out) {}
@Override
public String toString() {
return "tsid";
}
/**
* @param value The TSID as a {@link BytesRef}
* @return the Base 64 encoded TSID
*/
@Override
public Object format(BytesRef value) {
try {
// NOTE: if the tsid is a map of dimension key/value pairs (as it was before introducing
// tsid hashing) we just decode the map and return it.
return TimeSeriesIdFieldMapper.decodeTsidAsMap(value);
} catch (Exception e) {
// NOTE: otherwise the _tsid field is just a hash and we can't decode it
return TimeSeriesIdFieldMapper.encodeTsid(value);
}
}
@Override
public BytesRef parseBytesRef(Object value) {
if (value instanceof BytesRef valueAsBytesRef) {
return valueAsBytesRef;
}
if (value instanceof String valueAsString) {
return new BytesRef(BASE64_DECODER.decode(valueAsString));
}
return parseBytesRefMap(value);
}
/**
* After introducing tsid hashing this tsid parsing logic is deprecated.
* Tsid hashing does not allow us to parse the tsid extracting dimension fields key/values pairs.
* @param value The Map encoding tsid dimension fields key/value pairs.
*
* @return a {@link BytesRef} representing a map of key/value pairs
*/
private BytesRef parseBytesRefMap(Object value) {
if (value instanceof Map == false) {
throw new IllegalArgumentException("Cannot parse tsid object [" + value + "]");
}
Map m = (Map) value;
TimeSeriesIdBuilder builder = new TimeSeriesIdBuilder(null);
for (Map.Entry entry : m.entrySet()) {
String f = entry.getKey().toString();
Object v = entry.getValue();
if (v instanceof String s) {
builder.addString(f, s);
} else if (v instanceof Long l) {
builder.addLong(f, l);
} else if (v instanceof Integer i) {
builder.addLong(f, i.longValue());
} else if (v instanceof BigInteger ul) {
long ll = UNSIGNED_LONG_SHIFTED.parseLong(ul.toString(), false, () -> 0L);
builder.addUnsignedLong(f, ll);
} else {
throw new IllegalArgumentException("Unexpected value in tsid object [" + v + "]");
}
}
try {
// NOTE: we can decode the tsid only if it is not hashed (represented as a map)
return builder.buildLegacyTsid().toBytesRef();
} catch (IOException e) {
throw new IllegalArgumentException(e);
}
}
};
}