All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.jparkie.deduplicator.impl.BSBFSDDeDuplicatorSerializer Maven / Gradle / Ivy

Go to download

Advanced Bloom Filter Based Algorithms for Efficient Approximate Data De-Duplication in Streams

The newest version!
package com.github.jparkie.deduplicator.impl;

import com.github.jparkie.deduplicator.BitArray;
import com.github.jparkie.deduplicator.ProbabilisticDeDuplicatorSerializer;

import java.io.*;

public class BSBFSDDeDuplicatorSerializer implements ProbabilisticDeDuplicatorSerializer {
    @Override
    public byte version() {
        return 1;
    }

    @Override
    public void writeTo(BSBFSDDeDuplicator probabilisticDeDuplicator, OutputStream out) throws IOException {
        final DataOutputStream dos = new DataOutputStream(out);
        dos.writeByte(version());
        dos.writeLong(probabilisticDeDuplicator.numBits);
        dos.writeInt(probabilisticDeDuplicator.numHashFunctions);
        for (BitArray bloomFilter : probabilisticDeDuplicator.bloomFilters) {
            bloomFilter.writeTo(dos);
        }
    }

    @Override
    public BSBFSDDeDuplicator readFrom(InputStream in) throws IOException {
        final DataInputStream dis = new DataInputStream(in);
        final byte serializedVersion = dis.readByte();
        if (serializedVersion != version()) {
            final String error = String.format("Unexpected ProbabilisticDeDuplicator version number (%d), expected %d", serializedVersion, version());
            throw new IllegalArgumentException(error);
        }
        final long numBits = dis.readLong();
        final int numHashFunctions = dis.readInt();
        final BitArray[] bloomFilters = new BitArray[numHashFunctions];
        for (int index = 0; index < numHashFunctions; index++) {
            bloomFilters[index] = BitArray.readFrom(dis);
        }
        return new BSBFSDDeDuplicator(numBits, numHashFunctions, bloomFilters);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy