All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.neighbor.lsh.MultiProbeHash Maven / Gradle / Ivy

There is a newer version: 4.2.0
Show newest version
/*
 * Copyright (c) 2010-2021 Haifeng Li. All rights reserved.
 *
 * Smile is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Smile is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Smile.  If not, see .
 */

package smile.neighbor.lsh;

import java.io.Serial;
import java.util.Arrays;

/**
 * The hash function for data in Euclidean spaces.
 *
 * @author Haifeng Li
 */
public class MultiProbeHash extends Hash {
    @Serial
    private static final long serialVersionUID = 2L;

    /**
     * The minimum values of hashing functions for given dataset.
     */
    final double[] umin;
    /**
     * The maximum values of hashing functions for given dataset.
     */
    final double[] umax;


    /**
     * Constructor.
     * @param d the dimensionality of data.
     * @param k the number of random projection hash functions, which is usually
     *          set to log(N) where N is the dataset size.
     * @param w the width of random projections. It should be sufficiently away
     *          from 0. But we should not choose a w value that is too large,
     *          which will increase the query time.
     * @param H the size of universal hash tables.
     */
    public MultiProbeHash(int d, int k, double w, int H) {
        super(d, k, w, H);

        umin = new double[k];
        umax = new double[k];

        Arrays.fill(umin, Double.POSITIVE_INFINITY);
        Arrays.fill(umax, Double.NEGATIVE_INFINITY);
    }

    /**
     * This should only be used for adding data.
     * @param x the vector to be hashed.
     * @return the bucket of hash table for given vector x.
     */
    private int mphash(double[] x) {
        double[] h = a.mv(x);

        long g = 0;
        for (int i = 0; i < k; i++) {
            double hi = (h[i] + b[i]) / w;

            umin[i] = Math.min(umin[i], hi);
            umax[i] = Math.max(umax[i], hi);

            g += c[i] * (long) Math.floor(hi);
        }

        int gint = (int) (g % P);
        return gint >= 0 ? gint : gint + P;
    }

    @Override
    public void add(int index, double[] x) {
        int bucket = mphash(x);
        int i = bucket % H;

        if (table[i] == null) {
            table[i] = new Bucket(bucket);
        }

        table[i].add(index);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy