All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sangupta.murmur.Murmur2 Maven / Gradle / Ivy

There is a newer version: 3.2.3
Show newest version
/**
 *
 * murmurhash - Pure Java implementation of the Murmur Hash algorithms.
 * Copyright (c) 2014, Sandeep Gupta
 * 
 * http://sangupta.com/projects/murmur
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * 		http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
 */

package com.sangupta.murmur;

/**
 * A pure Java implementation of the Murmur 2 hashing algorithm as presented
 * at Murmur Project
 * 
 * Code is ported from original C++ source at 
 * 
 * MurmurHash2.cpp
 * 
 * and the 64-bit version at
 * 
 * MurmurHash2_64.cpp
 * 
 * @author sangupta
 * @since 1.0
 */
public class Murmur2 implements MurmurConstants {

	/**
	 * Compute the Murmur2 hash as described in the original source code.
	 * 
	 * @param data
	 *            the data that needs to be hashed
	 * 
	 * @param length
	 *            the length of the data that needs to be hashed
	 * 
	 * @param seed
	 *            the seed to use to compute the hash
	 * 
	 * @return the computed hash value
	 */
	public static long hash(final byte[] data, int length, long seed) {
		final long m = 0x5bd1e995l;
		final int r = 24;

		// Initialize the hash to a 'random' value
		long hash = ((seed ^ length) & UINT_MASK);

		// Mix 4 bytes at a time into the hash
		int length4 = length >>> 2;

		for (int i = 0; i < length4; i++) {
			final int i4 = i << 2;
			
			long k = (data[i4] & UNSIGNED_MASK);
			k |= (data[i4 + 1] & UNSIGNED_MASK) << 8;
			k |= (data[i4 + 2] & UNSIGNED_MASK) << 16;
			k |= (data[i4 + 3] & UNSIGNED_MASK) << 24;
			
			k = ((k * m) & UINT_MASK);
			k ^= ((k >>> r) & UINT_MASK);
			k = ((k * m) & UINT_MASK);
			
			hash = ((hash * m) & UINT_MASK);
			hash = ((hash ^ k) & UINT_MASK);
		}

		// Handle the last few bytes of the input array
		int offset = length4 << 2;
		switch (length & 3) {
			case 3:
				hash ^= ((data[offset + 2] << 16) & UINT_MASK);
				
			case 2:
				hash ^= ((data[offset + 1] << 8) & UINT_MASK);
				
			case 1:
				hash ^= (data[offset] & UINT_MASK);
				hash = ((hash * m) & UINT_MASK);
		}

		hash ^= ((hash >>> 13) & UINT_MASK);
		hash = ((hash * m) & UINT_MASK);
		hash ^= hash >>> 15;

		return hash;
	}

	/**
	 * Compute the Murmur2 hash (64-bit version) as described in the original source code.
	 * 
	 * @param data
	 *            the data that needs to be hashed
	 * 
	 * @param length
	 *            the length of the data that needs to be hashed
	 * 
	 * @param seed
	 *            the seed to use to compute the hash
	 * 
	 * @return the computed hash value
	 */
	public static long hash64(final byte[] data, int length, long seed) {
		final long m = 0xc6a4a7935bd1e995L;
        final int r = 47;

        long h = (seed & UINT_MASK) ^ (length * m);

        int length8 = length >> 3;

        for (int i = 0; i < length8; i++) {
            final int i8 = i << 3;
            
            long k =  ((long)data[i8]&0xff) +(((long)data[i8+1]&0xff)<<8)
                    +(((long)data[i8+2]&0xff)<<16) +(((long)data[i8+3]&0xff)<<24)
                    +(((long)data[i8+4]&0xff)<<32) +(((long)data[i8+5]&0xff)<<40)
                    +(((long)data[i8+6]&0xff)<<48) +(((long)data[i8+7]&0xff)<<56);
            
            k *= m;
            k ^= k >>> r;
            k *= m;
            
            h ^= k;
            h *= m; 
        }
        
        switch (length & 7) {
			case 7:
				h ^= (long) (data[(length & ~7) + 6] & 0xff) << 48;

			case 6:
				h ^= (long) (data[(length & ~7) + 5] & 0xff) << 40;
			
			case 5:
				h ^= (long) (data[(length & ~7) + 4] & 0xff) << 32;
			
			case 4:
				h ^= (long) (data[(length & ~7) + 3] & 0xff) << 24;
			
			case 3:
				h ^= (long) (data[(length & ~7) + 2] & 0xff) << 16;
			
			case 2:
				h ^= (long) (data[(length & ~7) + 1] & 0xff) << 8;
			
			case 1:
				h ^= (long) (data[length & ~7] & 0xff);
				h *= m;
        }
     
        h ^= h >>> r;
        h *= m;
        h ^= h >>> r;

        return h;
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy