All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.deeplearning4j.spark.text.functions.FoldBetweenPartitionFunction Maven / Gradle / Ivy

There is a newer version: 1.0.0-beta_spark_1
Show newest version
package org.deeplearning4j.spark.text.functions;

import org.apache.spark.api.java.function.Function2;
import org.apache.spark.broadcast.Broadcast;
import org.deeplearning4j.berkeley.Counter;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.atomic.AtomicLong;

/**
 * @author jeffreytang
 */
public class FoldBetweenPartitionFunction implements Function2, Iterator>{
    private Broadcast> broadcastedMaxPerPartitionCounter;

    public FoldBetweenPartitionFunction(Broadcast> broadcastedMaxPerPartitionCounter) {
        this.broadcastedMaxPerPartitionCounter = broadcastedMaxPerPartitionCounter;
    }

    @Override
    public Iterator call(Integer ind, Iterator partition) throws Exception {
        int sumToAdd = 0;
        Counter maxPerPartitionCounterInScope = broadcastedMaxPerPartitionCounter.value();

        // Add the sum of counts of all the partition with an index lower than the current one
        if (ind != 0) {
            for (int i=0; i < ind; i++) { sumToAdd += maxPerPartitionCounterInScope.getCount(i); }
        }

        // Add the sum of counts to each element of the partition
        List itemsAddedToList = new ArrayList<>();
        while (partition.hasNext()) {
            itemsAddedToList.add(partition.next().get() + sumToAdd);
        }

        return itemsAddedToList.iterator();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy