
ai.h2o.mojos.runtime.frame.MojoColumn Maven / Gradle / Ivy
package ai.h2o.mojos.runtime.frame;
import ai.h2o.mojos.runtime.utils.MojoDateTime;
import java.io.Serializable;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Container for values across a column in a {@link MojoFrame}.
*
* MojoColumn are merely wrappers of an array of data, meaning that a column instance does not have a name property.
* Columns are only "named" when they are in a MojoFrame. These structures are immutable in terms of size, but the
* values in its underlying array may be modified.
*/
public abstract class MojoColumn implements Serializable {
private static final Logger log = LoggerFactory.getLogger(MojoColumn.class);
/**
* @deprecated
*/
@Deprecated
public abstract String debug();
/**
* Get the type of data the column instance contains
*
* @return The column type
*/
public abstract Type getType();
/**
* Get the array reference that the column instance is wrapping.
*
* @return The array that is wrapped by the column. This value should be cast into an array with a type corresponding
* to the column instance's `Type` value
*/
public abstract Object getData();
/**
* Retrieve the data in a column as an array of Strings.
*
* @return The column data in String format
*/
public abstract String[] getDataAsStrings();
/**
* Get the length of the array wrapped by the column instance
*
* @return The length of the internal array
*/
abstract public int size();
abstract void resize(int n);
abstract void fillFromParsedListData(List values);
/**
* MojoColumn Type Enum
* ====================
*
* An enum for determining the type of array a MojoColumn instance is wrapping.
*/
public enum Type {
Bool(byte.class, Byte.MIN_VALUE, false, false, new Class[]{Boolean.class, Byte.class, Short.class, Integer.class, Float.class, Long.class, Double.class}) {
@Override
protected Object castFromJavaType(Object value) {
if (value instanceof Boolean) {
return (Boolean) value ? (byte) 1 : (byte) 0;
} else {
byte b = ((Number) value).byteValue();
// it's Byte type, check if it's either 1 or 0
if (b == 1) {
return true;
} else if (b == 0) {
return false;
} else {
throw new IllegalArgumentException("Only 1 or 0 is allowed when storing data into Boolean column from numeric types.");
}
}
}
@Override
protected Object parseImpl(String val) {
return parseBoolean(val) ? (byte) 1 : (byte) 0;
}
@Override
public boolean isNA(Object val) {
return val instanceof Byte && (((Byte) val).byteValue() == ((Byte) ona).byteValue());
}
},
Int32(int.class, Integer.MIN_VALUE, true, false, new Class[]{Byte.class, Short.class, Integer.class}) {
@Override
protected Object castFromJavaType(Object value) {
// To ensure the backend type is consistent
if (value instanceof Number) {
return ((Number) value).intValue();
}
return value;
}
@Override
protected Integer parseImpl(String val) {
try {
return Integer.valueOf(val);
} catch (NumberFormatException nfe) {
try {
final int result = (int) parseDoubleOrBoolean(val);
log.warn("Int32: value '{}' parsed as {}", val, result);
return result;
} catch (NumberFormatException tmp) {
throw nfe;
}
}
}
@Override
public boolean isNA(Object val) {
return val instanceof Integer && (((Integer) val).intValue() == ((Integer) ona).intValue());
}
},
Int64(long.class, Long.MIN_VALUE, true, false, new Class[]{Byte.class, Short.class, Integer.class, Long.class}) {
@Override
protected Long parseImpl(String val) {
try {
return Long.valueOf(val);
} catch (NumberFormatException nfe) {
try {
final long result = (long) parseDoubleOrBoolean(val);
log.warn("Int64: value '{}' parsed as {}", val, result);
return result;
} catch (NumberFormatException tmp) {
throw nfe;
}
}
}
@Override
public boolean isNA(Object val) {
return val instanceof Long && (((Long) val).longValue() == ((Long) ona).longValue());
}
},
Float32(float.class, Float.NaN, true, true, new Class[]{Byte.class, Short.class, Integer.class, Float.class, Double.class}) {
@Override
protected Float parseImpl(String val) {
try {
return Float.valueOf(val);
} catch (NumberFormatException nfe) {
try {
return (float) parseDoubleOrBoolean(val);
} catch (NumberFormatException tmp) {
throw nfe;
}
}
}
@Override
protected Object castFromJavaType(Object value) {
if (value instanceof Number) {
return ((Number) value).floatValue();
}
return value;
}
@Override
public boolean isNA(Object val) {
return val instanceof Float && Float.isNaN((Float) val);
}
},
Float64(double.class, Double.NaN, true, true, new Class[]{Byte.class, Short.class, Integer.class, Float.class, Long.class, Double.class}) {
@Override
protected Double parseImpl(String val) {
return parseDoubleOrBoolean(val);
}
@Override
protected Object castFromJavaType(Object value) {
if (value instanceof Number) {
return ((Number) value).doubleValue();
}
return value;
}
@Override
public boolean isNA(Object val) {
return val instanceof Double && Double.isNaN((Double) val);
}
},
Str(String.class, null, false, false, new Class[]{String.class, Character.class}) {
@Override
protected Object castFromJavaType(Object value) {
return value != null ? value.toString() : null;
}
@Override
public Object parse(String val) {
if (val != null && val.length() > 1) {
// TODO: Is this necessary?
int eidx = val.length() - 1;
if (val.charAt(0) == '"' && val.charAt(eidx) == '"') {
val = val.substring(1, eidx);
}
}
return val;
}
@Override
protected Object parseImpl(String val) {
throw new UnsupportedOperationException("This method should not be called!");
}
@Override
public boolean isNA(Object val) {
return val == ona;
}
},
Time64(MojoDateTime.class, null, false, false, new Class[]{java.sql.Timestamp.class, java.sql.Date.class}) {
@Override
protected Object castFromJavaType(Object value) {
return value != null ? MojoDateTime.parse(value.toString()) : null;
}
@Override
protected Object parseImpl(String val) {
return MojoDateTime.parse(val);
}
@Override
public boolean isNA(Object val) {
return val == ona;
}
};
/**
* This map holds all recognized representations of {@link Type#Bool} values.
* All other strings are considered parsing error.
*/
private static final Map MOJO2_BOOL_STRINGS = new LinkedHashMap<>();
static {
MOJO2_BOOL_STRINGS.put("true", Boolean.TRUE);
MOJO2_BOOL_STRINGS.put("True", Boolean.TRUE);
MOJO2_BOOL_STRINGS.put("TRUE", Boolean.TRUE);
MOJO2_BOOL_STRINGS.put("1", Boolean.TRUE);
MOJO2_BOOL_STRINGS.put("1.0", Boolean.TRUE);
MOJO2_BOOL_STRINGS.put("false", Boolean.FALSE);
MOJO2_BOOL_STRINGS.put("False", Boolean.FALSE);
MOJO2_BOOL_STRINGS.put("FALSE", Boolean.FALSE);
MOJO2_BOOL_STRINGS.put("0", Boolean.FALSE);
MOJO2_BOOL_STRINGS.put("0.0", Boolean.FALSE);
}
public final Object NULL;
public final Class> javaclass;
public final Object ona;
public final boolean isnumeric;
public final boolean isfloat;
final HashSet assignableFromJavaTypes;
Type(Class> javaclass, Object ona, boolean isnumeric, boolean isfloat, Class[] assignableFromJavaTypes) {
this.javaclass = javaclass;
this.ona = ona;
this.NULL = parse(null);
this.isnumeric = isnumeric;
this.isfloat = isfloat;
this.assignableFromJavaTypes = new HashSet<>(Arrays.asList(assignableFromJavaTypes));
}
/**
* A custom implementation of parsing boolean values. Unlike {@link Boolean#parseBoolean(String)}, this method
* throws a {@link NumberFormatException} if the String argument doesn't match any valid value.
*
* Valid values are defined in {@link #MOJO2_BOOL_STRINGS} constant.
*
* @param val The String to be used to retrieve a boolean value
* @return The boolean value `val` falls under
*/
private static boolean parseBoolean(String val) {
final Boolean result = MOJO2_BOOL_STRINGS.get(val.trim());
if (result == null) {
throw new NumberFormatException(String.format("For input string: '%s'", val));
}
return result;
}
private static double parseDoubleOrBoolean(String s) {
try {
return Double.parseDouble(s);
} catch (NumberFormatException e) {
try {
final double result = parseBoolean(s) ? 1.0 : 0.0;
log.warn("Bool value '{}' parsed as '{}' (double)", s, result); // very doubtful fallback, discused at https://github.com/h2oai/mojo2/pull/1145
return result;
} catch (NumberFormatException ignored) {
throw e;
}
}
}
public Object parse(String val) {
if (val == null || val.isEmpty()) {
return ona;
} else {
return parseImpl(val);
}
}
public boolean isAssignableFrom(Class> javaClazz) {
return assignableFromJavaTypes.contains(javaClazz);
}
private void failIfNotAssignableFrom(Class> javaClazz) {
if (!isAssignableFrom(javaClazz)) {
throw new ClassCastException(String.format("Mojo column of type %s can be assigned Java values only from the following types: %s , Java class on the input was: %s",
this.name(),
this.assignableFromJavaTypes.toString(),
javaClazz.getSimpleName()));
}
}
protected Object castFromJavaType(Object value) {
return value;
}
protected Object fromJavaClass(Object value) {
if (value != null) {
failIfNotAssignableFrom(value.getClass());
return castFromJavaType(value);
} else {
return null;
}
}
protected abstract Object parseImpl(String val);
public abstract boolean isNA(Object val);
}
public enum Kind {
Feature,
Output,
Interim,
}
}