hivemall.knn.distance.HammingDistanceUDF Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package hivemall.knn.distance;
import static hivemall.utils.hadoop.WritableUtils.val;
import java.math.BigInteger;
import java.util.List;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
//@formatter:off
@Description(name = "hamming_distance",
value = "_FUNC_(integer A, integer B) - Returns Hamming distance between A and B",
extended = "select \n" +
" hamming_distance(0,3) as c1, \n" +
" hamming_distance(\"0\",\"3\") as c2 -- 0=0x00, 3=0x11\n" +
";\n" +
"\n" +
"c1 c2\n" +
"2 2")
@UDFType(deterministic = true, stateful = false)
//@formatter:on
public class HammingDistanceUDF extends UDF {
public IntWritable evaluate(long a, long b) {
return val(hammingDistance(a, b));
}
public IntWritable evaluate(String a, String b) {
BigInteger ai = new BigInteger(a);
BigInteger bi = new BigInteger(b);
return val(hammingDistance(ai, bi));
}
public IntWritable evaluate(List a, List b) {
int alen = a.size();
int blen = b.size();
final int min, max;
final List r;
if (alen < blen) {
min = alen;
max = blen;
r = b;
} else {
min = blen;
max = alen;
r = a;
}
int result = 0;
for (int i = 0; i < min; i++) {
result += hammingDistance(a.get(i).get(), b.get(i).get());
}
for (int j = min; j < max; j++) {
result += hammingDistance(0L, r.get(j).get());
}
return val(result);
}
public static int hammingDistance(final long a, final long b) {
return Long.bitCount(a ^ b);
}
public static int hammingDistance(final BigInteger a, final BigInteger b) {
BigInteger xor = a.xor(b);
return xor.bitCount();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy