
com.bigdata.service.ndx.pipeline.DefaultDuplicateRemover Maven / Gradle / Ivy
Show all versions of bigdata-core Show documentation
package com.bigdata.service.ndx.pipeline;
import org.apache.log4j.Logger;
import com.bigdata.btree.keys.KVO;
import com.bigdata.util.BytesUtil;
/**
* Implementation which retains one instance of each tuple having the same
* unsigned byte[] key and the same byte[] value. For efficiency, you may
* specify that the presence of the same non-null
object reference
* may be used to detect duplicates without requiring the comparison of the
* byte[] values.
*
* When duplicates are eliminated, {@link KVOC}s identified as duplicates are
* arranged into a linked list.
*
* @author Bryan Thompson
* @version $Id$
* @param
* The generic type of the object associated with the key-value pair.
*/
public class DefaultDuplicateRemover implements IDuplicateRemover {
// static protected transient final Logger log = Logger
// .getLogger(DefaultDuplicateRemover.class);
final private boolean testRefs;
/**
* Instance verifies the same unsigned byte[] key and the same byte[]
* value.,
*/
public transient static final IDuplicateRemover KEY_VAL = new DefaultDuplicateRemover(
false/* testRefs */);
/**
* Instance verifies the same unsigned byte[] key and will accept the same
* non-null
object reference as indicating the same value. If
* the object reference is null
then it will compare the byte[]
* values.
*/
public transient static final IDuplicateRemover KEY_REF_VAL = new DefaultDuplicateRemover(
false/* testRefs */);
/**
* @param testRefs
* When true
, {@link KVO}s having the same key
* and the same non-null
object reference will be
* filtered without testing the byte[] values for equality.
*/
public DefaultDuplicateRemover(final boolean testRefs) {
this.testRefs = testRefs;
}
public KVO[] filter(final KVO[] src) {
final KVO[] tmp = new KVO[src.length];
int ndistinct = 0;
KVO prior = null;
for (KVO other : src) {
if (prior != null) {
if (filterDuplicate(prior, other)) {
continue;
}
}
tmp[ndistinct++] = prior = other;
}
// Make the array dense.
return KVO.dense(tmp, ndistinct);
}
/**
* Return true
if the other instance is a duplicate and
* may be dropped. (This implementation recognizes {@link KVOList} and
* handles it appropriately.)
*
* @param prior
* The previous {@link KVO} instance.
* @param other
* Another {@link KVO} instance.
*
* @return true
if the other is a duplicate.
*/
protected boolean filterDuplicate(final KVO prior, final KVO other) {
// same key?
if (BytesUtil.bytesEqual(prior.key, other.key)) {
// same reference (if ref testing) or same value?
if ((testRefs && prior.obj != null && prior.obj == other.obj)
|| BytesUtil.bytesEqual(prior.val, other.val)) {
if (prior instanceof KVOList) {
// link the duplicates together.
((KVOList) prior).add(other);
}
return true;
}
}
return false;
}
}