All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.bigml.mimir.utils.fields.Field Maven / Gradle / Ivy

package org.bigml.mimir.utils.fields;

import java.io.Serializable;
import java.util.List;

import com.fasterxml.jackson.databind.JsonNode;

/**
 * A class representing a BigML "field", which in turn represents a "column" of
 * the input data. Fields are aware of their name and BigML id. The primary
 * workflow for these fields is to be passed an "empty" double array along with
 * some input value, for the field to fill the array with values according to
 * the input. So for text fields, all positions in the array corresponding to
 * token counts for the given field will be filled with the token counts for
 * that field. This "output array" is then passed to the guts of the prediction
 * algorithm.
 *
 * Note that fields can be specific to the underlying prediction algorithm;
 * Deepnets, for example, expect an additional element to represent a "missing"
 * value whereas trees expect no such thing.
 *
 * @author [email protected]
 *
 */
public abstract class Field implements Serializable {
    /**
     * Given an input value, fill the output array with one or more values
     * corresponding to the given input. For example, a text value will populate
     * all positions of the array corresponding to the token counts for that
     * field. Use GPU for computation if requested.
     *
     * @param value  the input field value
     * @param output the array to fill based on the input value
     * @param OpenCL device on which to run the computation if possible, where a
     *               -1 indicates not to use OpenCL.
     */
    public abstract void intoDoubles(Object value, double[] output, int start);

    public final String _name;
    public final String _id;

    public Field(String id, String name, int inIndex) {
        _name = name;
        _id = id;
        _inputIndex = inIndex;
    }

    public Field(String id, JsonNode descriptor, int inIndex) {
        _id = id;
        _inputIndex = inIndex;

        if (descriptor != null)
            _name = descriptor.get("name").asText();
        else
            _name = null;
    }

    public Field(int inIndex) {
        _name = null;
        _id = null;
        _inputIndex = inIndex;
    }

    public Field(String id, JsonNode descriptor) {
        this(id, descriptor, -1);
    }

    /**
     * Gets the ID of this field, passed into the constructor
     *
     * @return the field ID
     */
    public String getId() {
        return _id;
    }

    /**
     * Gets the name of this field, passed into the constructor
     *
     * @return the field name
     */
    public String getName() {
        return _name;
    }

    /**
     * Gets the original input index of this field, passed into the constructor
     *
     * @return the field name
     */
    public int getInputIndex() {
        return _inputIndex;
    }

    /**
     * Gets the number of positions of the output array that will be filled by
     * this field, such as the number of vocabulary words for a text field.
     *
     * @return the number of positions
     */
    public int getOutputSize() {
        return _outputSize;
    }

    /**
     * Gets the starting position in the output array for this field, such that
     * all fields from startIndex() to
     * startIndex() + _outputSize are determined by the given input to
     * this field.
     *
     * @return the starting index in the output array
     */
    public int getOutputStart() {
        if (_outputSize == 0)
            throw new IllegalStateException("_outputStart not set!");

        return _outputStart;
    }

    /**
     * Sets the starting position of the output array for this field, such that
     * all fields from index to index + _outputSize are
     * determined by the given input to this field.
     *
     * @param index
     */
    public void setOutputStart(int index) {
        if (_outputSize == 0)
            throw new IllegalStateException("_outputSize not set!");

        _outputStart = index;
    }

    public void intoDoubles(Object value, double[] output) {
        intoDoubles(value, output, _outputStart);
    }

    public void intoList(Object value, List output) {
        double[] values = new double[_outputSize];
        intoDoubles(value, values, 0);
        for (double d : values) output.add(d);
    }

    protected int _outputStart;
    protected int _outputSize;

    protected final int _inputIndex;

    private static final long serialVersionUID = 1L;
}