All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.h2o.mojos.runtime.frame.MojoFrameBuilder Maven / Gradle / Ivy

There is a newer version: 2.8.7.1
Show newest version
package ai.h2o.mojos.runtime.frame;

import ai.h2o.mojos.runtime.api.MojoColumnMeta;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * The builder is used for constructing a {@link MojoFrame}. A frame is constructed by the following procedure:
 * 

* 1. Get a MojoRowBuilder instance from the frame builder * 2. Construct a row from the MojoRowBuilder * 3. Append the resulting row to the frame builder * 4. Repeat steps 1-3 until all rows are constructed * 5. Construct the MojoFrame *

* See {@link MojoRowBuilder} */ public class MojoFrameBuilder { private static final Logger log = LoggerFactory.getLogger(MojoFrameBuilder.class); private final Map stringConvertersMap; public static final StringConverter DEFAULT_CONVERTER = new StringConverter() { @Override public Object convert(String s, MojoColumn.Type outputType) { return outputType.parse(s); } }; private final MojoFrameMeta _meta; private final Set _missingValues; private final MojoColumnBuilder[] _columnBuilders; private final StringConverter[] _stringConverters; /** * Constructor for a MojoFrameBuilder. * * @param frameMeta The meta data for the resulting frame (see {@link MojoFrameMeta}) */ public MojoFrameBuilder(final MojoFrameMeta frameMeta) { this(frameMeta, Collections.emptyList(), Collections.emptyMap()); } /** * Constructor for a MojoFrameBuilder. * * @param frameMeta The meta data for the resulting frame (see {@link MojoFrameMeta}) * @param missingValues List of string values which are interpreted as missing value. */ public MojoFrameBuilder(final MojoFrameMeta frameMeta, final Set missingValues) { this(frameMeta, missingValues, Collections.emptyMap()); } /** * Constructor for a MojoFrameBuilder. * * @param frameMeta The meta data for the resulting frame (see {@link MojoFrameMeta}) * @param missingValues List of string values which are interpreted as missing value. * @param stringConverters A `Map` that associates column names to their respective {@link StringConverter}. * `DEFAULT_CONVERTER` is used if a column's name is not found in the `Map` */ public MojoFrameBuilder(MojoFrameMeta frameMeta, Collection missingValues, Map stringConverters) { this(frameMeta, missingValues, stringConverters, null); } private MojoFrameBuilder(MojoFrameMeta frameMeta, Collection missingValues, Map stringConverters, MojoFrameBuilder shared) { _meta = frameMeta; if (missingValues != null) { _missingValues = new HashSet<>(missingValues); } else { _missingValues = new HashSet<>(0); } _columnBuilders = new MojoColumnBuilder[frameMeta.size()]; _stringConverters = new StringConverter[frameMeta.size()]; int i = 0; if (shared != null && stringConverters == null) { stringConverters = shared.stringConvertersMap; } log.trace("Contructing MojoFrameBuilder[{}]", frameMeta.size()); for (final MojoColumnMeta column : frameMeta.getColumns()) { final String name = column.getColumnName(); if (shared != null) { // if shared contains the same name, use its info final Integer sharedColumnIndex = shared._meta.indexOf(name); if (sharedColumnIndex != null) { log.trace("shared: {} ({})", name, column.getColumnType()); _columnBuilders[i] = shared._columnBuilders[sharedColumnIndex]; _stringConverters[i] = shared._stringConverters[sharedColumnIndex]; i++; continue; } log.trace("not shared: {} ({})", name, column.getColumnType()); } _columnBuilders[i] = new MojoColumnBuilder(column.getColumnType()); _stringConverters[i] = stringConverters.getOrDefault(name, DEFAULT_CONVERTER); i++; } this.stringConvertersMap = stringConverters; } /** * Use when you need some output columns to just expose input columns. * @param frameMeta output columns descriptor * @param shared the input frame builder to use for matching columns (= same name and type) * @return output frame builder */ public static MojoFrameBuilder withSharedColumns(MojoFrameMeta frameMeta, MojoFrameBuilder shared) { return new MojoFrameBuilder(frameMeta, shared._missingValues, null, shared); } /** * Create a MojoFrame with `nrows` rows based on the meta data provided. The values in this frame will all be NA. * * @param meta The meta data of the frame to be constructed * @param nrows The number of rows * @return A new MojoFrame filled with NA values */ public static MojoFrame getEmpty(MojoFrameMeta meta, int nrows) { final MojoColumnFactory mojoColumnFactory = MojoColumnFactoryService.getInstance().getMojoColumnFactory(); final MojoColumn[] columnsWithData = new MojoColumn[meta.size()]; for (int i = 0; i < meta.size(); i += 1) { final MojoColumn.Type colType = meta.getColumnType(i); columnsWithData[i] = mojoColumnFactory.create(colType, nrows); } return new MojoFrame(meta, columnsWithData, nrows); } /** * Create a MojoFrame from an array of MojoColumns as specified by the provided meta data. * * @param meta The meta data to be used as a template * @param columns The columns to be used in the resulting frame * @return A new MojoFrame */ public static MojoFrame fromColumns(MojoFrameMeta meta, MojoColumn[] columns) { if (columns.length != meta.size()) throw new IllegalArgumentException(String.format("Number of columns(%d) does not match size of frame meta (%d)",columns.length, meta.size())); final int nrows = columns.length == 0 ? 0 : columns[0].size(); if (columns.length > 0) { final String firstColumnName = meta.getColumnName(0); int i = 0; for (MojoColumnMeta column : meta.getColumns()) { final MojoColumn c = columns[i]; if (c != null) { if (c.size() != nrows) { throw new IllegalArgumentException(String.format("Number of rows in columns %d ('%s') and 0 ('%s') do not match (%d != %d)", i, column.getColumnName(), firstColumnName, c.size(), nrows)); } if (c.getType() != column.getColumnType()) { throw new IllegalArgumentException(String.format("Type of column %d ('%s') does not match frame meta: %s != %s", i, column.getColumnName(), c.getType(), column.getColumnType())); } } i++; } } return new MojoFrame(meta, columns, nrows); } /** * Append a row from the current state of a MojoRowBuilder. The MojoRowBuilder will subsequently be reset. * * @param rowBuilder The MojoRowBuilder containing the row to be constructed and appended * @return The given MojoRowBuilder instance with its state reset */ public MojoRowBuilder addRow(MojoRowBuilder rowBuilder) { addRow(rowBuilder.toMojoRow()); rowBuilder.clear(); return rowBuilder; } void addRow(MojoRow row) { Object[] values = row.getValues(); if (values.length != _columnBuilders.length) throw new IllegalArgumentException("Row argument does not have the same column count as frame"); for (int i = 0; i < _columnBuilders.length; i += 1) { _columnBuilders[i].pushValue(values[i]); } } /** * Get an instance of a MojoRowBuilder that can be used to construct a row for this builder. Each call to this method * creates a new MojoRowBuilder instance * * @return A MojoRowBuilder for constructing rows for this frame builder */ public MojoRowBuilder getMojoRowBuilder() { return getMojoRowBuilder(false); } /** * Get an instance of a MojoRowBuilder that can be used to construct a row for this builder. Each call to this method * creates a new MojoRowBuilder instance * * @param strictMode flag to determine if the created MojoRowBuilder should be in "strict" mode (see {@link MojoRowBuilder}). * @return A MojoRowBuilder for constructing rows for this frame builder */ public MojoRowBuilder getMojoRowBuilder(boolean strictMode) { return new MojoRowBuilder(_meta.getColumnNamesMap(), _meta.getColumnTypes(), _missingValues, _stringConverters, strictMode); } /** * Create a MojoFrame from the current state of this builder * * @return The constructed MojoFrame */ public MojoFrame toMojoFrame() { final int nrows = _columnBuilders.length == 0 ? 0 : _columnBuilders[0].size(); return toMojoFrame(nrows); } public MojoFrame toMojoFrame(int nrows) { final MojoColumn[] columns = new MojoColumn[_columnBuilders.length]; for (int i = 0; i < columns.length; i += 1) { columns[i] = _columnBuilders[i].toMojoColumn(); columns[i].resize(nrows); } return new MojoFrame(_meta, columns, nrows); } private static class MojoColumnBuilder { private final MojoColumn.Type colType; private final List values = new ArrayList<>(); MojoColumnBuilder(MojoColumn.Type type) { colType = type; } void pushValue(Object value) { values.add(value == null ? colType.NULL : value); } MojoColumn toMojoColumn() { final MojoColumn col = MojoColumnFactoryService.getInstance().getMojoColumnFactory().create(colType, values.size()); col.fillFromParsedListData(values); return col; } int size() { return values.size(); } } }