All Downloads are FREE. Search and download functionalities are using the official Maven repository.

hivemall.knn.distance.HammingDistanceUDF Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package hivemall.knn.distance;

import static hivemall.utils.hadoop.WritableUtils.val;

import java.math.BigInteger;
import java.util.List;

import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;

//@formatter:off
@Description(name = "hamming_distance",
        value = "_FUNC_(integer A, integer B) - Returns Hamming distance between A and B",
        extended = "select \n" + 
                "  hamming_distance(0,3) as c1, \n" + 
                "  hamming_distance(\"0\",\"3\") as c2 -- 0=0x00, 3=0x11\n" + 
                ";\n" + 
                "\n" + 
                "c1      c2\n" + 
                "2       2")
@UDFType(deterministic = true, stateful = false)
//@formatter:on
public class HammingDistanceUDF extends UDF {

    public IntWritable evaluate(long a, long b) {
        return val(hammingDistance(a, b));
    }

    public IntWritable evaluate(String a, String b) {
        BigInteger ai = new BigInteger(a);
        BigInteger bi = new BigInteger(b);
        return val(hammingDistance(ai, bi));
    }

    public IntWritable evaluate(List a, List b) {
        int alen = a.size();
        int blen = b.size();

        final int min, max;
        final List r;
        if (alen < blen) {
            min = alen;
            max = blen;
            r = b;
        } else {
            min = blen;
            max = alen;
            r = a;
        }

        int result = 0;
        for (int i = 0; i < min; i++) {
            result += hammingDistance(a.get(i).get(), b.get(i).get());
        }
        for (int j = min; j < max; j++) {
            result += hammingDistance(0L, r.get(j).get());
        }
        return val(result);
    }

    public static int hammingDistance(final long a, final long b) {
        return Long.bitCount(a ^ b);
    }

    public static int hammingDistance(final BigInteger a, final BigInteger b) {
        BigInteger xor = a.xor(b);
        return xor.bitCount();
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy