All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.mahout.math.MurmurHash Maven / Gradle / Ivy

Go to download

High performance scientific and technical computing data structures and methods, mostly based on CERN's Colt Java API

There is a newer version: 0.13.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.mahout.math;

import com.google.common.primitives.Ints;

import java.nio.ByteBuffer;
import java.nio.ByteOrder;

/**
 * This is a very fast, non-cryptographic hash suitable for general hash-based
 * lookup.  See http://murmurhash.googlepages.com/ for more details.
 * 

*

The C version of MurmurHash 2.0 found at that site was ported * to Java by Andrzej Bialecki (ab at getopt org).

*/ public final class MurmurHash { private MurmurHash() {} /** * Hashes an int. * @param data The int to hash. * @param seed The seed for the hash. * @return The 32 bit hash of the bytes in question. */ public static int hash(int data, int seed) { return hash(ByteBuffer.wrap(Ints.toByteArray(data)), seed); } /** * Hashes bytes in an array. * @param data The bytes to hash. * @param seed The seed for the hash. * @return The 32 bit hash of the bytes in question. */ public static int hash(byte[] data, int seed) { return hash(ByteBuffer.wrap(data), seed); } /** * Hashes bytes in part of an array. * @param data The data to hash. * @param offset Where to start munging. * @param length How many bytes to process. * @param seed The seed to start with. * @return The 32-bit hash of the data in question. */ public static int hash(byte[] data, int offset, int length, int seed) { return hash(ByteBuffer.wrap(data, offset, length), seed); } /** * Hashes the bytes in a buffer from the current position to the limit. * @param buf The bytes to hash. * @param seed The seed for the hash. * @return The 32 bit murmur hash of the bytes in the buffer. */ public static int hash(ByteBuffer buf, int seed) { // save byte order for later restoration ByteOrder byteOrder = buf.order(); buf.order(ByteOrder.LITTLE_ENDIAN); int m = 0x5bd1e995; int r = 24; int h = seed ^ buf.remaining(); while (buf.remaining() >= 4) { int k = buf.getInt(); k *= m; k ^= k >>> r; k *= m; h *= m; h ^= k; } if (buf.remaining() > 0) { ByteBuffer finish = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN); // for big-endian version, use this first: // finish.position(4-buf.remaining()); finish.put(buf).rewind(); h ^= finish.getInt(); h *= m; } h ^= h >>> 13; h *= m; h ^= h >>> 15; buf.order(byteOrder); return h; } public static long hash64A(byte[] data, int seed) { return hash64A(ByteBuffer.wrap(data), seed); } public static long hash64A(byte[] data, int offset, int length, int seed) { return hash64A(ByteBuffer.wrap(data, offset, length), seed); } public static long hash64A(ByteBuffer buf, int seed) { ByteOrder byteOrder = buf.order(); buf.order(ByteOrder.LITTLE_ENDIAN); long m = 0xc6a4a7935bd1e995L; int r = 47; long h = seed ^ (buf.remaining() * m); while (buf.remaining() >= 8) { long k = buf.getLong(); k *= m; k ^= k >>> r; k *= m; h ^= k; h *= m; } if (buf.remaining() > 0) { ByteBuffer finish = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN); // for big-endian version, do this first: // finish.position(8-buf.remaining()); finish.put(buf).rewind(); h ^= finish.getLong(); h *= m; } h ^= h >>> r; h *= m; h ^= h >>> r; buf.order(byteOrder); return h; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy