All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.aksw.commons.utils.OmitSimilarItems Maven / Gradle / Ivy

There is a newer version: 3.17.0-1
Show newest version
package org.aksw.commons.utils;

import java.util.function.BiPredicate;
import java.util.function.Consumer;

import org.apache.commons.lang.StringUtils;

public class OmitSimilarItems
    implements Consumer
{
    protected BiPredicate isTooSimilar;

    protected Consumer itemDelegate;
    protected Consumer skipCountDelegate;

    // TODO Turn these into Optionals
    protected T firstDistinguishedItem = null;
    protected T recentlySkippedItem = null;
    protected long numSkippedItems = 0;

    public OmitSimilarItems(Consumer itemDelegate, Consumer skipCountDelegate, BiPredicate isTooSimilar) {
        super();
        this.itemDelegate = itemDelegate;
        this.skipCountDelegate = skipCountDelegate;
        this.isTooSimilar = isTooSimilar;
    }


    @Override
    public void accept(T item) {

        boolean skip = isTooSimilar.test(firstDistinguishedItem, item);
        if(skip) {
            recentlySkippedItem = item;
            ++numSkippedItems;
        } else {

            if(numSkippedItems > 0) { // implies recentlySkippedItem != null
                boolean recentlySkippedItemDiffersFromCurrentOne = !isTooSimilar.test(recentlySkippedItem, item);
                if(recentlySkippedItemDiffersFromCurrentOne) {
                    // We are going to pass on the prior item after all, so decrement the skip count
                    --numSkippedItems;
                }

                if(numSkippedItems > 0) {
                    skipCountDelegate.accept(numSkippedItems);
                }

                // Send out the prior item if it differs significantly from the current one
                if(recentlySkippedItemDiffersFromCurrentOne) {
                    itemDelegate.accept(recentlySkippedItem);
                }
            }

            itemDelegate.accept(item);
            firstDistinguishedItem = item;

            recentlySkippedItem = null;
            numSkippedItems = 0;
        }
    }

//    public static int tmp(String a, String b) {
//        int result = StringUtils.getLevenshteinDistance(a, b);
//        System.out.println("  | " + a);
//        System.out.println("  | " + b);
//        System.out.println("  | " + "---------------------------------------");
//        System.out.println("  | " + result);
//        return result;
//
//    }
    public static Consumer forStrings(int maxLevenshteinDistance, Consumer delegate) {
        BiPredicate predicate =
            (a, b) -> a == null || b == null
                ? false
                : StringUtils.getLevenshteinDistance(a, b) <= maxLevenshteinDistance;

        Consumer result = new OmitSimilarItems<>(
            delegate,
            (itemSkipCount) -> delegate.accept("  ... " + itemSkipCount + " similar lines omitted ..."),
            predicate
        );

        return result;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy