All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.plot.vega.Transform Maven / Gradle / Ivy

There is a newer version: 4.3.0
Show newest version
/*
 * Copyright (c) 2010-2021 Haifeng Li. All rights reserved.
 *
 * Smile is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Smile is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Smile.  If not, see .
 */
package smile.plot.vega;

import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;

/**
 * View-level data transformations such as filter and new field calculation.
 * When both view-level transforms and field transforms inside encoding are
 * specified, the view-level transforms are executed first based on the order
 * in the array. Then the inline transforms are executed in this order: bin,
 * timeUnit, aggregate, sort, and stack.
 *
 * @author Haifeng Li
 */
public class Transform {
    /** VegaLite's Transform definition object. */
    final ArrayNode spec;

    /**
     * Hides the constructor so that users cannot create the instances directly.
     */
    Transform(ArrayNode spec) {
        this.spec = spec;
    }

    @Override
    public String toString() {
        return spec.toString();
    }

    /**
     * Returns the specification in pretty print.
     * @return the specification in pretty print.
     */
    public String toPrettyString() {
        return spec.toPrettyString();
    }

    /**
     * Aggregate summarizes a table as one record for each group.
     * To preserve the original table structure and instead add
     * a new column with the aggregate values, use the join aggregate
     * transform.
     *
     * @param op The aggregation operation to apply to the fields
     *          (e.g., "sum", "average", or "count").
     * @param field The data field for which to compute aggregate function.
     *             This is required for all aggregation operations except "count".
     * @param as The output field names to use for each aggregated field.
     * @param groupby The data fields to group by. If not specified, a single
     *               group containing all data objects will be used.
     * @return this object.
     */
    public Transform aggregate(String op, String field, String as, String... groupby) {
        ObjectNode node = spec.addObject();
        ArrayNode a = node.putArray("aggregate");
        a.addObject()
                .put("op", op)
                .put("field", field)
                .put("as", as);
        ArrayNode g = node.putArray("groupby");
        for (var f : groupby) {
            g.add(f);
        }
        return this;
    }

    /**
     * The join-aggregate transform extends the input data objects with
     * aggregate values in a new field. Aggregation is performed and the
     * results are then joined with the input data. This transform can be
     * helpful for creating derived values that combine both raw data and
     * aggregate calculations, such as percentages of group totals. This
     * transform is a special case of the window transform where the frame
     * is always [null, null]. Compared with the regular aggregate transform,
     * join-aggregate preserves the original table structure and augments
     * records with aggregate values rather than summarizing the data in
     * one record for each group.
     *
     * @param op The aggregation operation to apply to the fields
     *          (e.g., "sum", "average", or "count").
     * @param field The data field for which to compute aggregate function.
     *             This is required for all aggregation operations except "count".
     * @param as The output field names to use for each aggregated field.
     * @param groupby The data fields to group by. If not specified, a single
     *               group containing all data objects will be used.
     * @return this object.
     */
    public Transform joinAggregate(String op, String field, String as, String... groupby) {
        ObjectNode node = spec.addObject();
        ArrayNode a = node.putArray("joinaggregate");
        a.addObject()
                .put("op", op)
                .put("field", field)
                .put("as", as);
        ArrayNode g = node.putArray("groupby");
        for (var f : groupby) {
            g.add(f);
        }
        return this;
    }

    /**
     * Adds a bin transformation.
     *
     * @param field The data field to bin.
     * @param as The output fields at which to write the start and end bin values.
     * @return this object.
     */
    public Transform bin(String field, String as) {
        ObjectNode node = spec.addObject();
        node.put("bin", true)
                .put("field", field)
                .put("as", as);
        return this;
    }

    /**
     * Adds a formula transform extends data objects with new fields
     * (columns) according to an expression.
     * @param expr an expression string. Use the variable datum to refer
     *            to the current data object.
     * @param field the field for storing the computed formula value.
     * @return this object.
     */
    public Transform calculate(String expr, String field) {
        ObjectNode node = spec.addObject();
        node.put("calculate", expr);
        node.put("as", field);
        return this;
    }

    /**
     * Adds a density transformation.
     *
     * @param field The data field for which to perform density estimation.
     * @param groupby The data fields to group by. If not specified, a single
     *               group containing all data objects will be used.
     * @return this object.
     */
    public DensityTransform density(String field, String... groupby) {
        ObjectNode node = spec.addObject().put("density", field);
        if (groupby.length > 0) {
            ArrayNode array = node.putArray("groupby");
            for (var f : groupby) {
                array.add(f);
            }
        }
        return new DensityTransform(node);
    }

    /**
     * Adds an extent transform. The extent transform finds the extent
     * of a field and stores the result in a parameter.
     *
     * @param field The field of which to get the extent.
     * @param param The output parameter produced by the extent transform.
     * @return this object.
     */
    public Transform extent(String field, String param) {
        ObjectNode node = spec.addObject();
        node.put("extent", field).put("param", param);
        return this;
    }

    /**
     * Adds a flatten transform. The flatten transform maps array-valued fields
     * to a set of individual data objects, one per array entry. This transform
     * generates a new data stream in which each data object consists of an
     * extracted array value as well as all the original fields of the
     * corresponding input data object.
     *
     * @param fields An array of one or more data fields containing arrays to
     *              flatten. If multiple fields are specified, their array
     *              values should have a parallel structure, ideally with the
     *              same length. If the lengths of parallel arrays do not
     *              match, the longest array will be used with null values
     *              added for missing entries.
     * @param output The output parameter produced by the extent transform.
     * @return this object.
     */
    public Transform flatten(String[] fields, String[] output) {
        ObjectNode node = spec.addObject();
        ArrayNode array = node.putArray("flatten");
        for (var field : fields) {
            array.add(field);
        }

        array = node.putArray("as");
        for (var field : output) {
            array.add(field);
        }
        return this;
    }

    /**
     * Adds a fold transform. The fold transform collapses (or "folds") one or
     * more data fields into two properties: a key property (containing the
     * original data field name) and a value property (containing the data value).
     * 

* The fold transform is useful for mapping matrix or cross-tabulation data * into a standardized format. *

* This transform generates a new data stream in which each data object * consists of the key and value properties as well as all the original * fields of the corresponding input data object. *

* Note: The fold transform only applies to a list of known fields (set * using the fields parameter). If your data objects instead contain * array-typed fields, you may wish to use the flatten transform instead. * * @param fields An array of data fields indicating the properties to fold. * @param output The output field names for the key and value properties * produced by the fold transform. * @return this object. */ public Transform fold(String[] fields, String[] output) { ObjectNode node = spec.addObject(); ArrayNode array = node.putArray("fold"); for (var field : fields) { array.add(field); } array = node.putArray("as"); for (var field : output) { array.add(field); } return this; } /** * Adds a filter transform. * @param predicate an expression string, where datum can be used to refer * to the current data object. For example, "datum.b2 > 60" * would make the output data includes only items that have * values in the field b2 over 60. * @return this object. */ public Transform filter(String predicate) { ObjectNode node = spec.addObject(); node.put("filter", predicate); return this; } /** * Adds a filter transform. * @param predicate a predicate object. * @return this object. */ public Transform filter(Predicate predicate) { ObjectNode node = spec.addObject(); node.set("filter", predicate.spec); return this; } /** * Adds an impute transform. * @param field The data field for which the missing values should be imputed. * @param key A key field that uniquely identifies data objects within a group. * Missing key values (those occurring in the data but not in the * current group) will be imputed. * @return an impute transform object. */ public ImputeTransform impute(String field, String key) { ObjectNode node = spec.addObject().put("impute", field); node.put("key", key); return new ImputeTransform(node); } /** * Adds a loess transform. * @param field The data field of the dependent variable to smooth. * @param on The data field of the independent variable to use a predictor. * @return a loess transform object. */ public LoessTransform loess(String field, String on) { ObjectNode node = spec.addObject().put("loess", field); node.put("on", on); return new LoessTransform(node); } /** * Adds a lookup transformation. * @param key the key in primary data source. * @param param Selection parameter name to look up. * @return this object. */ public Transform lookup(String key, String param) { ObjectNode node = spec.addObject(); node.put("lookup", key); node.putObject("from").put("param", param); return this; } /** * Adds a lookup transformation. * @param key the key in primary data source. * @param from the data source or selection for secondary data reference. * @return this object. */ public Transform lookup(String key, LookupData from) { ObjectNode node = spec.addObject(); node.put("lookup", key).set("from", from.spec); return this; } /** * Creates a lookup data. * * @param key the key in data to lookup. * @return a lookup data. */ public LookupData lookupData(String key) { ObjectNode node = VegaLite.mapper.createObjectNode().put("key", key); Data data = new Data(); node.set("data", data.spec); return new LookupData(node, data); } /** * Adds a pivot transform. * @param field The data field to pivot on. The unique values of this * field become new field names in the output stream. * @param value The data field to populate pivoted fields. The aggregate * values of this field become the values of the new pivoted * fields. * @return a pivot transform object. */ public PivotTransform pivot(String field, String value) { ObjectNode node = spec.addObject().put("pivot", field); node.put("value", value); return new PivotTransform(node); } /** * Adds a quantile transform. * @param field The data field for which to perform quantile estimation. * @return a quantile transform object. */ public QuantileTransform quantile(String field) { ObjectNode node = spec.addObject().put("quantile", field); return new QuantileTransform(node); } /** * Adds a regression transform. * @param field The data field of the dependent variable to predict. * @param on The data field of the independent variable to use a predictor. * @return a regression transform object. */ public RegressionTransform regression(String field, String on) { ObjectNode node = spec.addObject().put("regression", field); node.put("on", on); return new RegressionTransform(node); } /** * Adds a sample transform. The sample transform filters random rows from * the data source to reduce its size. As input data objects are added and * removed, the sampled values may change in first-in, first-out manner. * This transform uses reservoir sampling to maintain a representative * sample of the stream. * * @param size The maximum number of data objects to include in the sample. * @return this object. */ public Transform sample(int size) { spec.addObject().put("sample", size); return this; } /** * Adds a stack transform. * * @param field The field which is stacked. * @param groupby The data fields to group by. * @param as the output start field name. The end field will be "$as_end". * @return a stack transform object. */ public StackTransform stack(String field, String as, String... groupby) { ObjectNode node = spec.addObject().put("stack", field); ArrayNode g = node.putArray("groupby"); for (var f : groupby) { g.add(f); } node.put("as", as); return new StackTransform(node); } /** * Adds a time unit transform. * * @param timeUnit The timeUnit. * @param field The data field to apply time unit. * @param as The output field to write the timeUnit value. * @return this object. */ public Transform timeUnit(String timeUnit, String field, String as) { spec.addObject().put("timeUnit", timeUnit).put("field", field).put("as", as); return this; } /** * Creates a data specification object. * @return a data specification object. */ public WindowTransform window(WindowTransformField... fields) { ObjectNode node = spec.addObject(); ArrayNode array = node.putArray("window"); for (var field : fields) { array.addObject() .put("op", field.op()) .put("field", field.field()) .put("param", field.param()) .put("as", field.as()); } return new WindowTransform(node); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy