
ai.h2o.mojos.runtime.frame.MojoFrameBuilder Maven / Gradle / Ivy
package ai.h2o.mojos.runtime.frame;
import ai.h2o.mojos.runtime.api.MojoColumnMeta;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The builder is used for constructing a {@link MojoFrame}. A frame is constructed by the following procedure:
*
* 1. Get a MojoRowBuilder instance from the frame builder
* 2. Construct a row from the MojoRowBuilder
* 3. Append the resulting row to the frame builder
* 4. Repeat steps 1-3 until all rows are constructed
* 5. Construct the MojoFrame
*
* See {@link MojoRowBuilder}
*/
public class MojoFrameBuilder {
private static final Logger log = LoggerFactory.getLogger(MojoFrameBuilder.class);
private final Map stringConvertersMap;
public static final StringConverter DEFAULT_CONVERTER = new StringConverter() {
@Override
public Object convert(String s, MojoColumn.Type outputType) {
return outputType.parse(s);
}
};
private final MojoFrameMeta _meta;
private final Set _missingValues;
private final MojoColumnBuilder[] _columnBuilders;
private final StringConverter[] _stringConverters;
/**
* Constructor for a MojoFrameBuilder.
*
* @param frameMeta The meta data for the resulting frame (see {@link MojoFrameMeta})
*/
public MojoFrameBuilder(final MojoFrameMeta frameMeta) {
this(frameMeta, Collections.emptyList(), Collections.emptyMap());
}
/**
* Constructor for a MojoFrameBuilder.
*
* @param frameMeta The meta data for the resulting frame (see {@link MojoFrameMeta})
* @param missingValues List of string values which are interpreted as missing value.
*/
public MojoFrameBuilder(final MojoFrameMeta frameMeta, final Set missingValues) {
this(frameMeta, missingValues, Collections.emptyMap());
}
/**
* Constructor for a MojoFrameBuilder.
*
* @param frameMeta The meta data for the resulting frame (see {@link MojoFrameMeta})
* @param missingValues List of string values which are interpreted as missing value.
* @param stringConverters A `Map` that associates column names to their respective {@link StringConverter}.
* `DEFAULT_CONVERTER` is used if a column's name is not found in the `Map`
*/
public MojoFrameBuilder(MojoFrameMeta frameMeta, Collection missingValues,
Map stringConverters) {
this(frameMeta, missingValues, stringConverters, null);
}
private MojoFrameBuilder(MojoFrameMeta frameMeta, Collection missingValues,
Map stringConverters, MojoFrameBuilder shared) {
_meta = frameMeta;
if (missingValues != null) {
_missingValues = new HashSet<>(missingValues);
} else {
_missingValues = new HashSet<>(0);
}
_columnBuilders = new MojoColumnBuilder[frameMeta.size()];
_stringConverters = new StringConverter[frameMeta.size()];
int i = 0;
if (shared != null && stringConverters == null) {
stringConverters = shared.stringConvertersMap;
}
log.trace("Contructing MojoFrameBuilder[{}]", frameMeta.size());
for (final MojoColumnMeta column : frameMeta.getColumns()) {
final String name = column.getColumnName();
if (shared != null) {
// if shared contains the same name, use its info
final Integer sharedColumnIndex = shared._meta.indexOf(name);
if (sharedColumnIndex != null) {
log.trace("shared: {} ({})", name, column.getColumnType());
_columnBuilders[i] = shared._columnBuilders[sharedColumnIndex];
_stringConverters[i] = shared._stringConverters[sharedColumnIndex];
i++;
continue;
}
log.trace("not shared: {} ({})", name, column.getColumnType());
}
_columnBuilders[i] = new MojoColumnBuilder(column.getColumnType());
_stringConverters[i] = stringConverters.getOrDefault(name, DEFAULT_CONVERTER);
i++;
}
this.stringConvertersMap = stringConverters;
}
/**
* Use when you need some output columns to just expose input columns.
* @param frameMeta output columns descriptor
* @param shared the input frame builder to use for matching columns (= same name and type)
* @return output frame builder
*/
public static MojoFrameBuilder withSharedColumns(MojoFrameMeta frameMeta, MojoFrameBuilder shared) {
return new MojoFrameBuilder(frameMeta, shared._missingValues, null, shared);
}
/**
* Create a MojoFrame with `nrows` rows based on the meta data provided. The values in this frame will all be NA.
*
* @param meta The meta data of the frame to be constructed
* @param nrows The number of rows
* @return A new MojoFrame filled with NA values
*/
public static MojoFrame getEmpty(MojoFrameMeta meta, int nrows) {
final MojoColumnFactory mojoColumnFactory = MojoColumnFactoryService.getInstance().getMojoColumnFactory();
final MojoColumn[] columnsWithData = new MojoColumn[meta.size()];
for (int i = 0; i < meta.size(); i += 1) {
final MojoColumn.Type colType = meta.getColumnType(i);
columnsWithData[i] = mojoColumnFactory.create(colType, nrows);
}
return new MojoFrame(meta, columnsWithData, nrows);
}
/**
* Create a MojoFrame from an array of MojoColumns as specified by the provided meta data.
*
* @param meta The meta data to be used as a template
* @param columns The columns to be used in the resulting frame
* @return A new MojoFrame
*/
public static MojoFrame fromColumns(MojoFrameMeta meta, MojoColumn[] columns) {
if (columns.length != meta.size())
throw new IllegalArgumentException(String.format("Number of columns(%d) does not match size of frame meta (%d)",columns.length, meta.size()));
final int nrows = columns.length == 0 ? 0 : columns[0].size();
if (columns.length > 0) {
final String firstColumnName = meta.getColumnName(0);
int i = 0;
for (MojoColumnMeta column : meta.getColumns()) {
final MojoColumn c = columns[i];
if (c != null) {
if (c.size() != nrows) {
throw new IllegalArgumentException(String.format("Number of rows in columns %d ('%s') and 0 ('%s') do not match (%d != %d)",
i, column.getColumnName(),
firstColumnName,
c.size(), nrows));
}
if (c.getType() != column.getColumnType()) {
throw new IllegalArgumentException(String.format("Type of column %d ('%s') does not match frame meta: %s != %s",
i, column.getColumnName(), c.getType(), column.getColumnType()));
}
}
i++;
}
}
return new MojoFrame(meta, columns, nrows);
}
/**
* Append a row from the current state of a MojoRowBuilder. The MojoRowBuilder will subsequently be reset.
*
* @param rowBuilder The MojoRowBuilder containing the row to be constructed and appended
* @return The given MojoRowBuilder instance with its state reset
*/
public MojoRowBuilder addRow(MojoRowBuilder rowBuilder) {
addRow(rowBuilder.toMojoRow());
rowBuilder.clear();
return rowBuilder;
}
void addRow(MojoRow row) {
Object[] values = row.getValues();
if (values.length != _columnBuilders.length)
throw new IllegalArgumentException("Row argument does not have the same column count as frame");
for (int i = 0; i < _columnBuilders.length; i += 1) {
_columnBuilders[i].pushValue(values[i]);
}
}
/**
* Get an instance of a MojoRowBuilder that can be used to construct a row for this builder. Each call to this method
* creates a new MojoRowBuilder instance
*
* @return A MojoRowBuilder for constructing rows for this frame builder
*/
public MojoRowBuilder getMojoRowBuilder() {
return getMojoRowBuilder(false);
}
/**
* Get an instance of a MojoRowBuilder that can be used to construct a row for this builder. Each call to this method
* creates a new MojoRowBuilder instance
*
* @param strictMode flag to determine if the created MojoRowBuilder should be in "strict" mode (see {@link MojoRowBuilder}).
* @return A MojoRowBuilder for constructing rows for this frame builder
*/
public MojoRowBuilder getMojoRowBuilder(boolean strictMode) {
return new MojoRowBuilder(_meta.getColumnNamesMap(), _meta.getColumnTypes(), _missingValues, _stringConverters, strictMode);
}
/**
* Create a MojoFrame from the current state of this builder
*
* @return The constructed MojoFrame
*/
public MojoFrame toMojoFrame() {
final int nrows = _columnBuilders.length == 0 ? 0 : _columnBuilders[0].size();
return toMojoFrame(nrows);
}
public MojoFrame toMojoFrame(int nrows) {
final MojoColumn[] columns = new MojoColumn[_columnBuilders.length];
for (int i = 0; i < columns.length; i += 1) {
columns[i] = _columnBuilders[i].toMojoColumn();
columns[i].resize(nrows);
}
return new MojoFrame(_meta, columns, nrows);
}
private static class MojoColumnBuilder {
private final MojoColumn.Type colType;
private final List