All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.document.HalfFloatPoint Maven / Gradle / Ivy

There is a newer version: 10.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.document;

import java.util.Arrays;
import java.util.Collection;

import org.apache.lucene.index.PointValues;
import org.apache.lucene.search.PointInSetQuery;
import org.apache.lucene.search.PointRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;

/**
 * An indexed {@code half-float} field for fast range filters. If you also
 * need to store the value, you should add a separate {@link StoredField} instance.
 * If you need doc values, you can store them in a {@link NumericDocValuesField}
 * and use {@link #halfFloatToSortableShort} and
 * {@link #sortableShortToHalfFloat} for encoding/decoding.
 * 

* The API takes floats, but they will be encoded to half-floats before being * indexed. In case the provided floats cannot be represented accurately as a * half float, they will be rounded to the closest value that can be * represented as a half float. In case of tie, values will be rounded to the * value that has a zero as its least significant bit. *

* Finding all documents within an N-dimensional at search time is * efficient. Multiple values for the same field in one document * is allowed. *

* This field defines static factory methods for creating common queries: *

    *
  • {@link #newExactQuery(String, float)} for matching an exact 1D point. *
  • {@link #newSetQuery(String, float...)} for matching a set of 1D values. *
  • {@link #newRangeQuery(String, float, float)} for matching a 1D range. *
  • {@link #newRangeQuery(String, float[], float[])} for matching points/ranges in n-dimensional space. *
* @see PointValues */ public final class HalfFloatPoint extends Field { /** The number of bytes used to represent a half-float value. */ public static final int BYTES = 2; /** * Return the first half float which is immediately greater than {@code v}. * If the argument is {@link Float#NaN} then the return value is * {@link Float#NaN}. If the argument is {@link Float#POSITIVE_INFINITY} * then the return value is {@link Float#POSITIVE_INFINITY}. */ public static float nextUp(float v) { if (Float.isNaN(v) || v == Float.POSITIVE_INFINITY) { return v; } short s = halfFloatToSortableShort(v); // if the float does not represent a half float accurately then just // converting back might give us the value we are looking for float r = sortableShortToHalfFloat(s); if (r <= v) { r = sortableShortToHalfFloat((short) (s + 1)); } return r; } /** * Return the first half float which is immediately smaller than {@code v}. * If the argument is {@link Float#NaN} then the return value is * {@link Float#NaN}. If the argument is {@link Float#NEGATIVE_INFINITY} * then the return value is {@link Float#NEGATIVE_INFINITY}. */ public static float nextDown(float v) { if (Float.isNaN(v) || v == Float.NEGATIVE_INFINITY) { return v; } short s = halfFloatToSortableShort(v); // if the float does not represent a half float accurately then just // converting back might give us the value we are looking for float r = sortableShortToHalfFloat(s); if (r >= v) { r = sortableShortToHalfFloat((short) (s - 1)); } return r; } /** Convert a half-float to a short value that maintains ordering. */ public static short halfFloatToSortableShort(float v) { return sortableShortBits(halfFloatToShortBits(v)); } /** Convert short bits to a half-float value that maintains ordering. */ public static float sortableShortToHalfFloat(short bits) { return shortBitsToHalfFloat(sortableShortBits(bits)); } private static short sortableShortBits(short s) { return (short) (s ^ (s >> 15) & 0x7fff); } static short halfFloatToShortBits(float v) { int floatBits = Float.floatToIntBits(v); int sign = floatBits >>> 31; int exp = (floatBits >>> 23) & 0xff; int mantissa = floatBits & 0x7fffff; if (exp == 0xff) { // preserve NaN and Infinity exp = 0x1f; mantissa >>>= (23 - 10); } else if (exp == 0x00) { // denormal float rounded to zero since even the largest denormal float // cannot be represented as a half float mantissa = 0; } else { exp = exp - 127 + 15; if (exp >= 0x1f) { // too large, make it infinity exp = 0x1f; mantissa = 0; } else if (exp <= 0) { // we need to convert to a denormal representation int shift = 23 - 10 - exp + 1; if (shift >= 32) { // need a special case since shifts are mod 32... exp = 0; mantissa = 0; } else { // add the implicit bit mantissa |= 0x800000; mantissa = roundShift(mantissa, shift); exp = mantissa >>> 10; mantissa &= 0x3ff; } } else { mantissa = roundShift((exp << 23) | mantissa, 23 - 10); exp = mantissa >>> 10; mantissa &= 0x3ff; } } return (short) ((sign << 15) | (exp << 10) | mantissa); } // divide by 2^shift and round to the closest int // round to even in case of tie static int roundShift(int i, int shift) { assert shift > 0; i += 1 << (shift - 1); // add 2^(shift-1) so that we round rather than truncate i -= (i >>> shift) & 1; // and subtract the shift-th bit so that we round to even in case of tie return i >>> shift; } static float shortBitsToHalfFloat(short s) { int sign = s >>> 15; int exp = (s >>> 10) & 0x1f; int mantissa = s & 0x3ff; if (exp == 0x1f) { // NaN or infinities exp = 0xff; mantissa <<= (23 - 10); } else if (mantissa == 0 && exp == 0) { // zero } else { if (exp == 0) { // denormal half float becomes a normal float int shift = Integer.numberOfLeadingZeros(mantissa) - (32 - 11); mantissa = (mantissa << shift) & 0x3ff; // clear the implicit bit exp = exp - shift + 1; } exp = exp + 127 - 15; mantissa <<= (23 - 10); } return Float.intBitsToFloat((sign << 31) | (exp << 23) | mantissa); } static void shortToSortableBytes(short value, byte[] result, int offset) { // Flip the sign bit, so negative shorts sort before positive shorts correctly: value ^= 0x8000; result[offset] = (byte) (value >> 8); result[offset+1] = (byte) value; } static short sortableBytesToShort(byte[] encoded, int offset) { short x = (short) (((encoded[offset] & 0xFF) << 8) | (encoded[offset+1] & 0xFF)); // Re-flip the sign bit to restore the original value: return (short) (x ^ 0x8000); } private static FieldType getType(int numDims) { FieldType type = new FieldType(); type.setDimensions(numDims, BYTES); type.freeze(); return type; } @Override public void setFloatValue(float value) { setFloatValues(value); } /** Change the values of this field */ public void setFloatValues(float... point) { if (type.pointDimensionCount() != point.length) { throw new IllegalArgumentException("this field (name=" + name + ") uses " + type.pointDimensionCount() + " dimensions; cannot change to (incoming) " + point.length + " dimensions"); } fieldsData = pack(point); } @Override public void setBytesValue(BytesRef bytes) { throw new IllegalArgumentException("cannot change value type from float to BytesRef"); } @Override public Number numericValue() { if (type.pointDimensionCount() != 1) { throw new IllegalStateException("this field (name=" + name + ") uses " + type.pointDimensionCount() + " dimensions; cannot convert to a single numeric value"); } BytesRef bytes = (BytesRef) fieldsData; assert bytes.length == BYTES; return decodeDimension(bytes.bytes, bytes.offset); } private static BytesRef pack(float... point) { if (point == null) { throw new IllegalArgumentException("point must not be null"); } if (point.length == 0) { throw new IllegalArgumentException("point must not be 0 dimensions"); } byte[] packed = new byte[point.length * BYTES]; for (int dim = 0; dim < point.length; dim++) { encodeDimension(point[dim], packed, dim * BYTES); } return new BytesRef(packed); } /** Creates a new FloatPoint, indexing the * provided N-dimensional float point. * * @param name field name * @param point float[] value * @throws IllegalArgumentException if the field name or value is null. */ public HalfFloatPoint(String name, float... point) { super(name, pack(point), getType(point.length)); } @Override public String toString() { StringBuilder result = new StringBuilder(); result.append(getClass().getSimpleName()); result.append(" <"); result.append(name); result.append(':'); BytesRef bytes = (BytesRef) fieldsData; for (int dim = 0; dim < type.pointDimensionCount(); dim++) { if (dim > 0) { result.append(','); } result.append(decodeDimension(bytes.bytes, bytes.offset + dim * BYTES)); } result.append('>'); return result.toString(); } // public helper methods (e.g. for queries) /** Encode single float dimension */ public static void encodeDimension(float value, byte dest[], int offset) { shortToSortableBytes(halfFloatToSortableShort(value), dest, offset); } /** Decode single float dimension */ public static float decodeDimension(byte value[], int offset) { return sortableShortToHalfFloat(sortableBytesToShort(value, offset)); } // static methods for generating queries /** * Create a query for matching an exact half-float value. It will be rounded * to the closest half-float if {@code value} cannot be represented accurately * as a half-float. *

* This is for simple one-dimension points, for multidimensional points use * {@link #newRangeQuery(String, float[], float[])} instead. * * @param field field name. must not be {@code null}. * @param value half-float value * @throws IllegalArgumentException if {@code field} is null. * @return a query matching documents with this exact value */ public static Query newExactQuery(String field, float value) { return newRangeQuery(field, value, value); } /** * Create a range query for half-float values. Bounds will be rounded to the * closest half-float if they cannot be represented accurately as a * half-float. *

* This is for simple one-dimension ranges, for multidimensional ranges use * {@link #newRangeQuery(String, float[], float[])} instead. *

* You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting {@code lowerValue = Float.NEGATIVE_INFINITY} or {@code upperValue = Float.POSITIVE_INFINITY}. *

Ranges are inclusive. For exclusive ranges, pass {@code nextUp(lowerValue)} * or {@code nextDown(upperValue)}. *

* Range comparisons are consistent with {@link Float#compareTo(Float)}. * * @param field field name. must not be {@code null}. * @param lowerValue lower portion of the range (inclusive). * @param upperValue upper portion of the range (inclusive). * @throws IllegalArgumentException if {@code field} is null. * @return a query matching documents within this range. */ public static Query newRangeQuery(String field, float lowerValue, float upperValue) { return newRangeQuery(field, new float[] { lowerValue }, new float[] { upperValue }); } /** * Create a range query for n-dimensional half-float values. Bounds will be * rounded to the closest half-float if they cannot be represented accurately * as a half-float. *

* You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting {@code lowerValue[i] = Float.NEGATIVE_INFINITY} or {@code upperValue[i] = Float.POSITIVE_INFINITY}. *

Ranges are inclusive. For exclusive ranges, pass {@code nextUp(lowerValue[i])} * or {@code nextDown(upperValue[i])}. *

* Range comparisons are consistent with {@link Float#compareTo(Float)}. * * @param field field name. must not be {@code null}. * @param lowerValue lower portion of the range (inclusive). must not be {@code null}. * @param upperValue upper portion of the range (inclusive). must not be {@code null}. * @throws IllegalArgumentException if {@code field} is null, if {@code lowerValue} is null, if {@code upperValue} is null, * or if {@code lowerValue.length != upperValue.length} * @return a query matching documents within this range. */ public static Query newRangeQuery(String field, float[] lowerValue, float[] upperValue) { PointRangeQuery.checkArgs(field, lowerValue, upperValue); return new PointRangeQuery(field, pack(lowerValue).bytes, pack(upperValue).bytes, lowerValue.length) { @Override protected String toString(int dimension, byte[] value) { return Float.toString(decodeDimension(value, 0)); } }; } /** * Create a query matching any of the specified 1D values. * This is the points equivalent of {@code TermsQuery}. * Values will be rounded to the closest half-float if they * cannot be represented accurately as a half-float. * * @param field field name. must not be {@code null}. * @param values all values to match */ public static Query newSetQuery(String field, float... values) { // Don't unexpectedly change the user's incoming values array: float[] sortedValues = values.clone(); Arrays.sort(sortedValues); final BytesRef encoded = new BytesRef(new byte[BYTES]); return new PointInSetQuery(field, 1, BYTES, new PointInSetQuery.Stream() { int upto; @Override public BytesRef next() { if (upto == sortedValues.length) { return null; } else { encodeDimension(sortedValues[upto], encoded.bytes, 0); upto++; return encoded; } } }) { @Override protected String toString(byte[] value) { assert value.length == BYTES; return Float.toString(decodeDimension(value, 0)); } }; } /** * Create a query matching any of the specified 1D values. This is the points equivalent of {@code TermsQuery}. * * @param field field name. must not be {@code null}. * @param values all values to match */ public static Query newSetQuery(String field, Collection values) { Float[] boxed = values.toArray(new Float[0]); float[] unboxed = new float[boxed.length]; for (int i = 0; i < boxed.length; i++) { unboxed[i] = boxed[i]; } return newSetQuery(field, unboxed); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy