gov.sandia.cognition.learning.data.SequentialDataMultiPartitioner Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cognitive-foundry Show documentation
Show all versions of cognitive-foundry Show documentation
A single jar with all the Cognitive Foundry components.
/*
* File: SequentialDataMultiPartitioner.java
* Authors: Kevin R. Dixon
* Company: Sandia National Laboratories
* Project: Cognitive Foundry
*
* Copyright Sep 22, 2008, Sandia Corporation.
* Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
* license for use of this work by or on behalf of the U.S. Government.
* Export of this program may require a license from the United States
* Government. See CopyrightHistory.txt for complete details.
*
*/
package gov.sandia.cognition.learning.data;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
/**
* This partitioner splits a Collection of data into a pre-defined number of
* approximately equal sequential partitions, with the nonzero remainder
* elements going into the final partition. For example, if we have ten
* data points in a Collection, and we want to split the Collection into
* three partitions, then the method will return three Collections
*
* - : 1, 2, 3
*
- : 4, 5, 6
*
- : 7, 8, 9, 10
*
*
* This is class was primarily intended for splitting datasets for
* parallelization.
*
*
* @author Kevin R. Dixon
* @since 2.1
*/
public class SequentialDataMultiPartitioner
{
/**
* Creates a partition of the given data into "numPartition" roughly equal
* sets, preserving their pre-existing sequential ordering, with the
* nonzero remainder elements going into the final partition.
*
* @param Type of data to partition.
* @param data Collection of data to partition
* @param numPartitions Number of partitions to create.
* @return
* List of Lists of size data.size()/numPartitions, with the remainder of
* data elements going into the final partition.
*/
public static ArrayList> create(
Collection extends DataType> data,
int numPartitions )
{
int numData = data.size();
int numEach = data.size() / numPartitions;
ArrayList> retval =
new ArrayList>( numPartitions );
int index = 0;
Iterator extends DataType> iterator = data.iterator();
for( int n = 0; n < numPartitions; n++ )
{
// The remainder goes into the final partition
int numThis = (n < (numPartitions-1)) ? numEach : (numData-index);
ArrayList partition = new ArrayList( numThis );
for( int i = 0; i < numThis; i++ )
{
partition.add( iterator.next() );
index++;
}
retval.add( partition );
}
return retval;
}
}