All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.deeplearning4j.spark.data.BatchDataSetsFunction Maven / Gradle / Ivy

The newest version!
/*
 *  * Copyright 2016 Skymind,Inc.
 *  *
 *  *    Licensed under the Apache License, Version 2.0 (the "License");
 *  *    you may not use this file except in compliance with the License.
 *  *    You may obtain a copy of the License at
 *  *
 *  *        http://www.apache.org/licenses/LICENSE-2.0
 *  *
 *  *    Unless required by applicable law or agreed to in writing, software
 *  *    distributed under the License is distributed on an "AS IS" BASIS,
 *  *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  *    See the License for the specific language governing permissions and
 *  *    limitations under the License.
 */

package org.deeplearning4j.spark.data;

import org.apache.spark.api.java.function.FlatMapFunction;
import org.nd4j.linalg.dataset.DataSet;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/**
 * Function used to batch DataSet objects together. Typically used to combine singe-example DataSet objects out of
 * something like {@link org.deeplearning4j.spark.datavec.DataVecDataSetFunction} together into minibatches.
* * Usage: *
 * {@code
 *      RDD mySingleExampleDataSets = ...;
 *      RDD batchData = mySingleExampleDataSets.mapPartitions(new BatchDataSetsFunction(batchSize));
 * }
 * 
* * @author Alex Black */ public class BatchDataSetsFunction implements FlatMapFunction,DataSet> { private final int minibatchSize; public BatchDataSetsFunction(int minibatchSize) { this.minibatchSize = minibatchSize; } @Override public Iterable call(Iterator iter) throws Exception { List out = new ArrayList<>(); while(iter.hasNext()) { List list = new ArrayList<>(); int count = 0; while (count < minibatchSize && iter.hasNext()) { DataSet ds = iter.next(); count += ds.getFeatureMatrix().size(0); list.add(ds); } DataSet next; if (list.size() == 0) next = list.get(0); else next = DataSet.merge(list); out.add(next); } return out; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy