de.lmu.ifi.dbs.elki.index.lsh.hashfunctions.MultipleProjectionsLocalitySensitiveHashFunction Maven / Gradle / Ivy
* This file is part of ELKI:
* Environment for Developing KDD-Applications Supported by Index-Structures
* Copyright (C) 2019
* ELKI Development Team
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Affero General Public License for more details.
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
package de.lmu.ifi.dbs.elki.index.lsh.hashfunctions;
import java.util.Random;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.projection.random.RandomProjectionFamily;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
* LSH hash function for vector space data. Depending on the choice of random
* vectors, it can be appropriate for Manhattan and Euclidean distances.
* Reference:
* M. Datar, N. Immorlica, P. Indyk, V. S. Mirrokni
* Locality-sensitive hashing scheme based on p-stable distributions
* Proc. 20th Annual Symposium on Computational Geometry
* @author Erich Schubert
* @since 0.6.0
@Reference(authors = "M. Datar, N. Immorlica, P. Indyk, V. S. Mirrokni", //
title = "Locality-sensitive hashing scheme based on p-stable distributions", //
booktitle = "Proc. 20th Annual Symposium on Computational Geometry", //
url = "https://doi.org/10.1145/997817.997857", //
bibkey = "DBLP:conf/compgeom/DatarIIM04")
public class MultipleProjectionsLocalitySensitiveHashFunction implements LocalitySensitiveHashFunction {
* Projection matrix.
RandomProjectionFamily.Projection projection;
* Shift offset.
double[] shift;
* Scaling factor: inverse of width.
double iwidth;
* Random numbers for mixing the hash codes of the individual functions
int[] randoms1;
* Constructor.
* @param projection Projection vectors
* @param width Width of bins
* @param rnd Random number generator
public MultipleProjectionsLocalitySensitiveHashFunction(RandomProjectionFamily.Projection projection, double width, Random rnd) {
this.projection = projection;
this.iwidth = 1. / width;
// Generate random shifts:
final int num = projection.getOutputDimensionality();
this.shift = new double[num];
this.randoms1 = new int[num];
for(int i = 0; i < num; i++) {
shift[i] = rnd.nextDouble() * width;
// Produce a large random number; although 7FFFFFFF would likely be large
// enough, we try to stick to the suggested approach (which assumes
// unsigned integers).
randoms1[i] = (rnd.nextInt(0x10000D) << 16) + rnd.nextInt(0xFFFFD) + 1;
* Bit mask for signed int to unsigned long conversion.
private final static long MASK32 = 0xFFFFFFFFL;
public int hashObject(NumberVector vec) {
// Project the vector:
final double[] proj = projection.project(vec);
long t1sum = 0L;
for(int i = 0; i < shift.length; i++) {
int ai = (int) Math.floor((proj[i] + shift[i]) * iwidth);
t1sum += (randoms1[i] & MASK32) * ai; // unsigned math!
return fastModPrime(t1sum);
public int hashObject(NumberVector vec, double[] buf) {
// Project the vector:
projection.project(vec, buf);
long t1sum = 0L;
for(int i = 0; i < shift.length; i++) {
int ai = (int) Math.floor((buf[i] + shift[i]) * iwidth);
t1sum += (randoms1[i] & MASK32) * ai; // unsigned math!
return fastModPrime(t1sum);
* Fast modulo operation for the largest unsigned integer prime.
* @param data Long input
* @return {@code data % (2^32 - 5)}.
public static int fastModPrime(long data) {
// Mix high and low 32 bit:
int high = (int) (data >>> 32);
// Use fast multiplication with 5 for high:
int alpha = ((int) data) + (high << 2 + high);
// Note that in Java, PRIME will be negative.
if(alpha < 0 && alpha > -5) {
alpha = alpha + 5;
return alpha;
public int getNumberOfProjections() {
return this.projection.getOutputDimensionality();