All Downloads are FREE. Search and download functionalities are using the official Maven repository.

hivemall.utils.lang.HalfFloat Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package hivemall.utils.lang;

/**
 * A utility class to deal with half-precision floating-point. The conversion is very fast because
 * there is no conditional branch instruction in the conversion.
 * 
 * 
 * |sign|       exponent          |                   mantissa                                 |
 * | 31 | 30 29 28 27 26 25 24 23 | 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 |
 * 
* * @see http://en.wikipedia.org/wiki/Half-precision_floating-point_format * @see http://en.wikipedia.org/wiki/Single_precision_floating-point_format * @see ftp://www.fox-toolkit.org/pub/fasthalffloatconversion.pdf */ public final class HalfFloat { public static final short ZERO = 0; public static final short ONE; // Integers equal to or above 65520 are rounded to "infinity" public static final float MAX_FLOAT_INTEGER = 65520f; /** (2-2^-10) * 2^15 */ public static final float MAX_FLOAT = 65504f; /** * Smallest positive e for which HalfFloat (1.0 + e) != HalfFloat (1.0) */ public static final float EPSILON = 0.00097656f; private static final int[] mantissatable; private static final int[] exponenttable; private static final short[] offsettable; private static final short[] basetable; private static final byte[] shifttable; static {// lookup tables are 10 KB in total mantissatable = new int[2048]; // 8192 bytes exponenttable = new int[64]; // 256 bytes offsettable = new short[64]; // 128 bytes basetable = new short[512]; // 1024 bytes shifttable = new byte[512]; // 512 bytes populateTableEntries(); ONE = floatToHalfFloat(1f); } private HalfFloat() {} public static float halfFloatToFloat(final short f16) { int i = ((f16 & 0xFFFF) >> 10) & 0xFF; int j = (offsettable[i] + (f16 & 0x3FF)) & 0x7FF; int bits = mantissatable[j] + exponenttable[i]; return Float.intBitsToFloat(bits); } public static short floatToHalfFloat(final float f32) { int bits = Float.floatToRawIntBits(f32); int i = (bits >> 23) & 0x1FF; return (short) (basetable[i] + ((bits & 0x007FFFFF) >> shifttable[i])); } public static int halfFloatToFloatBits(final short f16) { int i = f16 >> 10; int j = offsettable[i] + (f16 & 0x3FF); return mantissatable[j] + exponenttable[i]; } public static short floatBitsToHalfFloat(final int f32b) { int i = (f32b >> 23) & 0x1FF; return (short) (basetable[i] + ((f32b & 0x007FFFFF) >> shifttable[i])); } private static void populateTableEntries() { populateMantissaTable(mantissatable); populateExponentTable(exponenttable); populateOffsetTable(offsettable); for (int i = 0; i < 256; i++) { final int e = i - 127; if (e < -24) { // Very small numbers map to zero //basetable[i | 0x000] = (short) 0x0000; basetable[i | 0x100] = (short) 0x8000; shifttable[i | 0x000] = 24; shifttable[i | 0x100] = 24; } else if (e < -14) { // Small numbers map to denorms basetable[i | 0x000] = (short) (0x0400 >> (-e - 14)); basetable[i | 0x100] = (short) ((0x0400 >> (-e - 14)) | 0x8000); shifttable[i | 0x000] = (byte) (-e - 1); shifttable[i | 0x100] = (byte) (-e - 1); } else if (e <= 15) { // Normal numbers just lose precision basetable[i | 0x000] = (short) ((e + 15) << 10); basetable[i | 0x100] = (short) (((e + 15) << 10) | 0x8000); shifttable[i | 0x000] = 13; shifttable[i | 0x100] = 13; } else if (e < 128) { // Large numbers map to Infinity basetable[i | 0x000] = (short) 0x7C00; basetable[i | 0x100] = (short) 0xFC00; shifttable[i | 0x000] = 24; shifttable[i | 0x100] = 24; } else { // Infinity and NaN's stay Infinity and NaN's basetable[i | 0x000] = (short) 0x7C00; basetable[i | 0x100] = (short) 0xFC00; shifttable[i | 0x000] = 13; shifttable[i | 0x100] = 13; } } } private static void populateMantissaTable(final int[] mantissatable) { mantissatable[0] = 0; for (int i = 1; i < 1024; i++) { mantissatable[i] = convertMantissa(i); } for (int i = 1024; i < 2048; i++) { mantissatable[i] = 0x38000000 + ((i - 1024) << 13); } } private static int convertMantissa(final int i) { int m = i << 13; // Zero pad mantissa bits int e = 0; // Zero exponent while ((m & 0x00800000) == 0) {// While not normalized e -= 0x00800000; // Decrement exponent (1<<23) m <<= 1; // Shift mantissa } m &= ~0x00800000; // Clear leading 1 bit e += 0x38800000; // Adjust bias ((127-14)<<23) return m | e; // Return combined number } private static void populateExponentTable(final int[] exponenttable) { exponenttable[0] = 0; for (int i = 1; i < 31; i++) { exponenttable[i] = i << 23; } exponenttable[31] = 0x47800000; exponenttable[32] = 0x80000000; for (int i = 33; i < 63; i++) { exponenttable[i] = 0x80000000 + ((i - 32) << 23); } exponenttable[63] = 0xC7800000; } private static void populateOffsetTable(final short[] offsettable) { offsettable[0] = 0; for (int i = 1; i < 64; i++) { offsettable[i] = 1024; } offsettable[32] = 0; } public static boolean isRepresentable(final float f) { return Math.abs(f) <= HalfFloat.MAX_FLOAT_INTEGER; } public static boolean isRepresentable(final float f, final boolean strict) { if (strict) { return Math.abs(f) <= HalfFloat.MAX_FLOAT; } else { return Math.abs(f) <= HalfFloat.MAX_FLOAT_INTEGER; } } public static void checkRange(final float f) { if (Math.abs(f) > HalfFloat.MAX_FLOAT) { throw new IllegalArgumentException( "Acceptable maximum weight is " + HalfFloat.MAX_FLOAT + ": " + f); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy