All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.openhft.chronicle.algo.hashing.LongHashFunction Maven / Gradle / Ivy

There is a newer version: 2.27ea0
Show newest version
/*
 * Copyright 2014 Higher Frequency Trading http://www.higherfrequencytrading.com
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package net.openhft.chronicle.algo.hashing;

import net.openhft.chronicle.algo.bytes.Accessor;
import net.openhft.chronicle.algo.bytes.ReadAccess;
import org.jetbrains.annotations.NotNull;

import java.io.Serializable;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;

import static java.nio.ByteOrder.LITTLE_ENDIAN;
import static java.nio.ByteOrder.nativeOrder;
import static net.openhft.chronicle.algo.bytes.Access.nativeAccess;
import static net.openhft.chronicle.algo.bytes.Accessor.*;

/**
 * Hash function producing {@code long}-valued result from byte sequences of any length and
 * a plenty of different sources which "feels like byte sequences". Except {@link
 * #hashBytes(byte[])}, {@link #hashBytes(ByteBuffer)} (with their "sliced" versions) and
 * {@link #hashMemory(long, long)} methods, which actually accept byte sequences, notion of byte
 * sequence is defined as follows:
 * 
    *
  • For methods accepting arrays of Java primitives, {@code String}s and * {@code StringBuilder}s, byte sequence is how the input's bytes are actually lay in memory. *
  • *
  • For methods accepting single primitive values, byte sequence is how this primitive * would be put into memory with {@link ByteOrder#nativeOrder() native} byte order, or * equivalently, {@code hashXxx(primitive)} has always the same result as {@code * hashXxxs(new xxx[] {primitive})}, where "xxx" is any Java primitive type name.
  • *
  • For {@link #hash(Object, ReadAccess, long, long)} method byte sequence abstraction * is defined by the given {@link ReadAccess} strategy to the given object.
  • *
* *

Hash function implementation could either produce equal results for equal input on platforms * with different {@link ByteOrder}, favoring one byte order in terms of performance, or different * results, but performing equally good. This choice should be explicitly documented for all * {@code LongHashFunction} implementations. * *

Subclassing

* To implement a specific hash function algorithm, this class should be subclassed. Only methods * that accept single primitives, {@link #hashVoid()} and {@link #hash(Object, ReadAccess, long, * long)} should be implemented; other have default implementations which in the end delegate to * {@link #hash(Object, ReadAccess, long, long)} abstract method. * *

Notes about how exactly methods with default implementations are implemented in doc comments * are given for information and could be changed at any moment. However, it could hardly cause * any issues with subclassing, except probably little performance degradation. Methods documented * as "shortcuts" could either delegate to the referenced method or delegate directly to the method * to which the referenced method delegates. * *

{@code LongHashFunction} implementations shouldn't assume that {@code ReadAccess} strategies * do defensive checks, and access only bytes within the requested range. */ public abstract class LongHashFunction implements Serializable { static final boolean NATIVE_LITTLE_ENDIAN = nativeOrder() == LITTLE_ENDIAN; private static final long serialVersionUID = 0L; /** * Constructor for use in subclasses. */ protected LongHashFunction() { } /** * Returns a hash function implementing * * CityHash64 algorithm, version 1.1 without seed values. This implementation produce * equal results for equal input on platforms with different {@link ByteOrder}, but is slower * on big-endian platforms than on little-endian. * * @see #city_1_1(long) * @see #city_1_1(long, long) */ public static LongHashFunction city_1_1() { return CityHash_1_1.asLongHashFunctionWithoutSeed(); } /** * Returns a hash function implementing * * CityHash64 algorithm, version 1.1 using the given seed value. This implementation produce * equal results for equal input on platforms with different {@link ByteOrder}, but is slower * on big-endian platforms than on little-endian. * * @see #city_1_1() * @see #city_1_1(long, long) */ public static LongHashFunction city_1_1(long seed) { return CityHash_1_1.asLongHashFunctionWithSeed(seed); } /** * Returns a hash function implementing * * CityHash64 algorithm, version 1.1 using the two given seed values. This implementation * produce equal results for equal input on platforms with different {@link ByteOrder}, but * is slower on big-endian platforms than on little-endian. * * @see #city_1_1() * @see #city_1_1(long) */ public static LongHashFunction city_1_1(long seed0, long seed1) { return CityHash_1_1.asLongHashFunctionWithTwoSeeds(seed0, seed1); } /** * Returns a hash function implementing * xxHash * algorithm, release 39 without seed value (0 is used as default seed value). * This implementation produce equal results for equal * input on platforms with different {@link ByteOrder}, but is slower on big-endian platforms * than on little-endian. * *

Note: implementation is fully compatible with r40 * * @see #xx_r39(long) */ public static LongHashFunction xx_r39() { return XxHash_r39.asLongHashFunctionWithoutSeed(); } /** * Returns a hash function implementing * xxHash * algorithm, release 39 with the given seed value. * This implementation produce equal results for equal * input on platforms with different {@link ByteOrder}, but is slower on big-endian platforms * than on little-endian. * *

Note: implementation is fully compatible with r40 * * @see #xx_r39() */ public static LongHashFunction xx_r39(long seed) { return XxHash_r39.asLongHashFunctionWithSeed(seed); } /** * Returns a hash function implementing * MurmurHash3 * algorithm without seed values. This implementation produce equal results for equal input * on platforms with different {@link ByteOrder}, but is slower on big-endian platforms than on * little-endian. * * @see #murmur_3(long) */ public static LongHashFunction murmur_3() { return MurmurHash_3.asLongHashFunctionWithoutSeed(); } /** * Returns a hash function implementing * MurmurHash3 * algorithm with the given seed value. This implementation produce equal results for equal * input on platforms with different {@link ByteOrder}, but is slower on big-endian platforms * than on little-endian. * * @see #murmur_3() */ public static LongHashFunction murmur_3(long seed) { return MurmurHash_3.asLongHashFunctionWithSeed(seed); } private static void checkArrayOffs(int arrayLength, int off, int len) { if (len < 0 || off < 0 || off + len > arrayLength || off + len < 0) throw new IndexOutOfBoundsException(); } /** * Returns the hash code for the given {@code long} value; this method is consistent with * {@code LongHashFunction} methods that accept sequences of bytes, assuming the {@code input} * value is interpreted in {@linkplain ByteOrder#nativeOrder() native} byte order. For example, * the result of {@code hashLong(v)} call is identical to the result of * {@code hashLongs(new long[] {v})} call for any {@code long} value. */ public abstract long hashLong(long input); /** * Returns the hash code for the given {@code int} value; this method is consistent with * {@code LongHashFunction} methods that accept sequences of bytes, assuming the {@code input} * value is interpreted in {@linkplain ByteOrder#nativeOrder() native} byte order. For example, * the result of {@code hashInt(v)} call is identical to the result of * {@code hashInts(new int[] {v})} call for any {@code int} value. */ public abstract long hashInt(int input); /** * Returns the hash code for the given {@code short} value; this method is consistent with * {@code LongHashFunction} methods that accept sequences of bytes, assuming the {@code input} * value is interpreted in {@linkplain ByteOrder#nativeOrder() native} byte order. For example, * the result of {@code hashShort(v)} call is identical to the result of * {@code hashShorts(new short[] {v})} call for any {@code short} value. * As a consequence, {@code hashShort(v)} call produce always the same result as {@code * hashChar((char) v)}. */ public abstract long hashShort(short input); /** * Returns the hash code for the given {@code char} value; this method is consistent with * {@code LongHashFunction} methods that accept sequences of bytes, assuming the {@code input} * value is interpreted in {@linkplain ByteOrder#nativeOrder() native} byte order. For example, * the result of {@code hashChar(v)} call is identical to the result of * {@code hashChars(new char[] {v})} call for any {@code char} value. * As a consequence, {@code hashChar(v)} call produce always the same result as {@code * hashShort((short) v)}. */ public abstract long hashChar(char input); /** * Returns the hash code for the given {@code byte} value. This method is consistent with * {@code LongHashFunction} methods that accept sequences of bytes. For example, the result of * {@code hashByte(v)} call is identical to the result of * {@code hashBytes(new byte[] {v})} call for any {@code byte} value. */ public abstract long hashByte(byte input); /** * Returns the hash code for the empty (zero-length) bytes sequence, * for example {@code hashBytes(new byte[0])}. */ public abstract long hashVoid(); /** * Returns the hash code for {@code len} continuous bytes of the given {@code input} object, * starting from the given offset. The abstraction of input as ordered byte sequence and * "offset within the input" is defined by the given {@code access} strategy. * *

This method doesn't promise to throw a {@code RuntimeException} if {@code * [off, off + len - 1]} subsequence exceeds the bounds of the bytes sequence, defined by {@code * access} strategy for the given {@code input}, so use this method with caution. * * @param input the object to read bytes from * @param access access which defines the abstraction of the given input * as ordered byte sequence * @param off offset to the first byte of the subsequence to hash * @param len length of the subsequence to hash * @param the type of the input * @return hash code for the specified bytes subsequence */ public abstract long hash(T input, ReadAccess access, long off, long len); public > long hash( Accessor accessor, S source, long off, long len) { return hash(accessor.handle(source), accessor.access(), accessor.offset(source, off), accessor.size(len)); } private long unsafeHash(Object input, long off, long len) { return hash(input, nativeAccess(), off, len); } /** * Shortcut for {@link #hashBooleans(boolean[]) hashBooleans(new boolean[] {input})}. */ public long hashBoolean(boolean input) { return hashByte((byte) (input ? 1 : 0)); } /** * Shortcut for {@link #hashBooleans(boolean[], int, int) hashBooleans(input, 0, input.length)}. */ public long hashBooleans(@NotNull boolean[] input) { return hashBooleans(input, 0, input.length); } /** * Returns the hash code for the specified subsequence of the given {@code boolean} array. * * @param input the array to read data from * @param off index of the first {@code boolean} in the subsequence to hash * @param len length of the subsequence to hash * @return hash code for the specified subsequence * @throws IndexOutOfBoundsException if {@code off < 0} or {@code off + len > input.length} * or {@code len < 0} */ public long hashBooleans(@NotNull boolean[] input, int off, int len) { checkArrayOffs(input.length, off, len); return hash(booleanArrayAccessor(), input, off, len); } /** * Shortcut for {@link #hashBytes(byte[], int, int) hashBytes(input, 0, input.length)}. */ public long hashBytes(@NotNull byte[] input) { return hashBytes(input, 0, input.length); } /** * Returns the hash code for the specified subsequence of the given {@code byte} array. * * @param input the array to read bytes from * @param off index of the first {@code byte} in the subsequence to hash * @param len length of the subsequence to hash * @return hash code for the specified subsequence * @throws IndexOutOfBoundsException if {@code off < 0} or {@code off + len > input.length} * or {@code len < 0} */ public long hashBytes(@NotNull byte[] input, int off, int len) { checkArrayOffs(input.length, off, len); return hash(byteArrayAccessor(), input, off, len); } /** * Shortcut for {@link #hashBytes(ByteBuffer, int, int) * hashBytes(input, input.position(), input.remaining())}. */ public long hashBytes(ByteBuffer input) { return hashBytes(input, input.position(), input.remaining()); } /** * Returns the hash code for the specified subsequence of the given {@code ByteBuffer}. * *

This method doesn't alter the state (mark, position, limit or order) of the given * {@code ByteBuffer}. * * @param input the buffer to read bytes from * @param off index of the first {@code byte} in the subsequence to hash * @param len length of the subsequence to hash * @return hash code for the specified subsequence * @throws IndexOutOfBoundsException if {@code off < 0} or {@code off + len > input.capacity()} * or {@code len < 0} */ public long hashBytes(@NotNull ByteBuffer input, int off, int len) { checkArrayOffs(input.capacity(), off, len); return hash(uncheckedByteBufferAccessor(input), input, off, len); } /** * Returns the hash code of bytes of the wild memory from the given address. Use with caution. * * @param address the address of the first byte to hash * @param len length of the byte sequence to hash * @return hash code for the specified byte sequence */ public long hashMemory(long address, long len) { return unsafeHash(null, address, len); } /** * Shortcut for {@link #hashChars(char[], int, int) hashChars(input, 0, input.length)}. */ public long hashChars(@NotNull char[] input) { return hashChars(input, 0, input.length); } /** * Returns the hash code for bytes, as they lay in memory, of the specified subsequence * of the given {@code char} array. * * @param input the array to read data from * @param off index of the first {@code char} in the subsequence to hash * @param len length of the subsequence to hash, in chars (i. e. the length of the bytes * sequence to hash is {@code len * 2L}) * @return hash code for the specified subsequence * @throws IndexOutOfBoundsException if {@code off < 0} or {@code off + len > input.length} * or {@code len < 0} */ public long hashChars(@NotNull char[] input, int off, int len) { checkArrayOffs(input.length, off, len); return hash(charArrayAccessor(), input, off, len); } /** * Shortcut for {@link #hashChars(String, int, int) hashChars(input, 0, input.length())}. */ public long hashChars(@NotNull String input) { return hashChars(input, 0, input.length()); } /** * Returns the hash code for bytes of the specified subsequence of the given {@code String}'s * underlying {@code char} array. * * @param input the string which bytes to hash * @param off index of the first {@code char} in the subsequence to hash * @param len length of the subsequence to hash, in chars (i. e. the length of the bytes * sequence to hash is {@code len * 2L}) * @return the hash code of the given {@code String}'s bytes * @throws IndexOutOfBoundsException if {@code off < 0} or {@code off + len > input.length()} * or {@code len < 0} */ public long hashChars(@NotNull String input, int off, int len) { checkArrayOffs(input.length(), off, len); return hash(stringAccessor(), input, off, len); } /** * Shortcut for {@link #hashChars(StringBuilder, int, int) hashChars(input, 0, input.length())}. */ public long hashChars(@NotNull StringBuilder input) { return hashNativeChars(input); } /** * Returns the hash code for bytes of the specified subsequence of the given * {@code StringBuilder}'s underlying {@code char} array. * * @param input the string builder which bytes to hash * @param off index of the first {@code char} in the subsequence to hash * @param len length of the subsequence to hash, in chars (i. e. the length of the bytes * sequence to hash is {@code len * 2L}) * @return the hash code of the given {@code String}'s bytes * @throws IndexOutOfBoundsException if {@code off < 0} or {@code off + len > input.length()} * or {@code len < 0} */ public long hashChars(@NotNull StringBuilder input, int off, int len) { return hashNativeChars(input, off, len); } long hashNativeChars(CharSequence input) { return hashNativeChars(input, 0, input.length()); } long hashNativeChars(CharSequence input, int off, int len) { checkArrayOffs(input.length(), off, len); return hash(checkedNativeCharSequenceAccessor(), input, off, len); } /** * Shortcut for {@link #hashShorts(short[], int, int) hashShorts(input, 0, input.length)}. */ public long hashShorts(@NotNull short[] input) { return hashShorts(input, 0, input.length); } /** * Returns the hash code for bytes, as they lay in memory, of the specified subsequence * of the given {@code short} array. * * @param input the array to read data from * @param off index of the first {@code short} in the subsequence to hash * @param len length of the subsequence to hash, in shorts (i. e. the length of the bytes * sequence to hash is {@code len * 2L}) * @return hash code for the specified subsequence * @throws IndexOutOfBoundsException if {@code off < 0} or {@code off + len > input.length} * or {@code len < 0} */ public long hashShorts(@NotNull short[] input, int off, int len) { checkArrayOffs(input.length, off, len); return hash(shortArrayAccessor(), input, off, len); } /** * Shortcut for {@link #hashInts(int[], int, int) hashInts(input, 0, input.length)}. */ public long hashInts(@NotNull int[] input) { return hashInts(input, 0, input.length); } /** * Returns the hash code for bytes, as they lay in memory, of the specified subsequence * of the given {@code int} array. * * @param input the array to read data from * @param off index of the first {@code int} in the subsequence to hash * @param len length of the subsequence to hash, in ints (i. e. the length of the bytes * sequence to hash is {@code len * 4L}) * @return hash code for the specified subsequence * @throws IndexOutOfBoundsException if {@code off < 0} or {@code off + len > input.length} * or {@code len < 0} */ public long hashInts(@NotNull int[] input, int off, int len) { checkArrayOffs(input.length, off, len); return hash(intArrayAccessor(), input, off, len); } /** * Shortcut for {@link #hashLongs(long[], int, int) hashLongs(input, 0, input.length)}. */ public long hashLongs(@NotNull long[] input) { return hashLongs(input, 0, input.length); } /** * Returns the hash code for bytes, as they lay in memory, of the specified subsequence * of the given {@code long} array. * * @param input the array to read data from * @param off index of the first {@code long} in the subsequence to hash * @param len length of the subsequence to hash, in longs (i. e. the length of the bytes * sequence to hash is {@code len * 8L}) * @return hash code for the specified subsequence * @throws IndexOutOfBoundsException if {@code off < 0} or {@code off + len > input.length} * or {@code len < 0} */ public long hashLongs(@NotNull long[] input, int off, int len) { checkArrayOffs(input.length, off, len); return hash(longArrayAccessor(), input, off, len); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy