com.davidbracewell.apollo.ml.data.DatasetBuilder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of apollo Show documentation
Show all versions of apollo Show documentation
A machine learning library for Java.
The newest version!
package com.davidbracewell.apollo.ml.data;
import com.davidbracewell.apollo.ml.*;
import com.davidbracewell.apollo.ml.data.source.DataSource;
import com.davidbracewell.apollo.ml.encoder.Encoder;
import com.davidbracewell.apollo.ml.encoder.IndexEncoder;
import com.davidbracewell.apollo.ml.encoder.LabelEncoder;
import com.davidbracewell.apollo.ml.preprocess.PreprocessorList;
import com.davidbracewell.guava.common.base.Throwables;
import com.davidbracewell.io.resource.Resource;
import com.davidbracewell.stream.MStream;
import com.davidbracewell.stream.StreamingContext;
import lombok.NonNull;
import java.io.IOException;
import java.util.Collection;
import java.util.stream.Stream;
/**
* Builder for {@link Dataset}
*
* @param the example type parameter
* @author David B. Bracewell
*/
public class DatasetBuilder {
private final LabelEncoder labelEncoder;
private final Class exampleType;
private DataSource dataSource;
private DatasetType type = DatasetType.InMemory;
private Encoder featureEncoder = new IndexEncoder();
private MStream source;
private Resource load;
/**
* Instantiates a new Dataset builder.
*
* @param labelEncoder the label encoder
* @param exampleType the example type
*/
protected DatasetBuilder(@NonNull LabelEncoder labelEncoder, @NonNull Class exampleType) {
this.labelEncoder = labelEncoder;
this.exampleType = exampleType;
}
private Dataset createDataset() {
switch (type) {
case Distributed:
return new DistributedDataset<>(featureEncoder, labelEncoder, PreprocessorList.empty());
case OffHeap:
return new OffHeapDataset<>(featureEncoder, labelEncoder, PreprocessorList.empty());
default:
return new InMemoryDataset<>(featureEncoder, labelEncoder, PreprocessorList.empty());
}
}
/**
* Sets the feature encoder to use.
*
* @param featureEncoder the feature encoder
*/
public DatasetBuilder featureEncoder(@NonNull Encoder featureEncoder) {
this.featureEncoder = featureEncoder;
return this;
}
/**
* Sets the feature encoder to use.
*
* @param datasetFile the feature encoder
*/
public Dataset load(@NonNull Resource datasetFile) {
try {
return createDataset().read(datasetFile, exampleType);
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
/**
* Sets the streaming source from a Java Stream.
*
* @param stream the stream
* @return the dataset builder
*/
public Dataset source(@NonNull Stream stream) {
Dataset dataset = createDataset();
dataset.addAll(StreamingContext.local().stream(stream));
return dataset;
}
/**
* Sets the streaming source from a collection of examples.
*
* @param collection the collection of examples
* @return the dataset builder
*/
public Dataset source(@NonNull Collection collection) {
Dataset dataset = createDataset();
dataset.addAll(StreamingContext.local().stream(collection));
return dataset;
}
/**
* Sets the examples to be read in from the given data source.
*
* @param dataSource the data source
* @return the dataset builder
*/
public Dataset source(@NonNull DataSource dataSource) {
Dataset dataset = createDataset();
dataSource.setStreamingContext(type.getStreamingContext());
try {
dataset.addAll(dataSource.stream());
} catch (IOException e) {
throw Throwables.propagate(e);
}
return dataset;
}
/**
* Sets the streaming source from a Mango Stream.
*
* @param stream the stream
* @return the dataset builder
*/
public Dataset source(@NonNull MStream stream) {
Dataset dataset = createDataset();
dataset.addAll(stream);
return dataset;
}
/**
* Sets the type (In-Memory, Distributed, or Off Heap) of the dataset
*
* @param type the type
* @return the dataset builder
*/
public DatasetBuilder type(@NonNull DatasetType type) {
this.type = type;
return this;
}
}// END OF DatasetBuilder
© 2015 - 2025 Weber Informatics LLC | Privacy Policy