com.github.jparkie.deduplicator.impl.BSBFDeDuplicatorSerializer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of DeDuplicator Show documentation
Show all versions of DeDuplicator Show documentation
Advanced Bloom Filter Based Algorithms for Efficient Approximate Data De-Duplication in Streams
The newest version!
package com.github.jparkie.deduplicator.impl;
import com.github.jparkie.deduplicator.BitArray;
import com.github.jparkie.deduplicator.ProbabilisticDeDuplicatorSerializer;
import java.io.*;
public class BSBFDeDuplicatorSerializer implements ProbabilisticDeDuplicatorSerializer {
@Override
public byte version() {
return 1;
}
@Override
public void writeTo(BSBFDeDuplicator probabilisticDeDuplicator, OutputStream out) throws IOException {
final DataOutputStream dos = new DataOutputStream(out);
dos.writeByte(version());
dos.writeLong(probabilisticDeDuplicator.numBits);
dos.writeInt(probabilisticDeDuplicator.numHashFunctions);
for (BitArray bloomFilter : probabilisticDeDuplicator.bloomFilters) {
bloomFilter.writeTo(dos);
}
}
@Override
public BSBFDeDuplicator readFrom(InputStream in) throws IOException {
final DataInputStream dis = new DataInputStream(in);
final byte serializedVersion = dis.readByte();
if (serializedVersion != version()) {
final String error = String.format("Unexpected ProbabilisticDeDuplicator version number (%d), expected %d", serializedVersion, version());
throw new IllegalArgumentException(error);
}
final long numBits = dis.readLong();
final int numHashFunctions = dis.readInt();
final BitArray[] bloomFilters = new BitArray[numHashFunctions];
for (int index = 0; index < numHashFunctions; index++) {
bloomFilters[index] = BitArray.readFrom(dis);
}
return new BSBFDeDuplicator(numBits, numHashFunctions, bloomFilters);
}
}