All Downloads are FREE. Search and download functionalities are using the official Maven repository.

hex.schemas.KMeansV3 Maven / Gradle / Ivy

There is a newer version: 3.46.0.6
Show newest version
package hex.schemas;

import hex.kmeans.KMeans;
import hex.kmeans.KMeansModel.KMeansParameters;
import water.api.API;
import water.api.schemas3.ClusteringModelParametersSchemaV3;
import water.api.schemas3.KeyV3;

public class KMeansV3 extends ClusteringModelBuilderSchema {

  public static final class KMeansParametersV3 extends ClusteringModelParametersSchemaV3 {
    static public String[] fields = new String[] {
        "model_id",
        "training_frame",
        "validation_frame",
        "nfolds",
        "keep_cross_validation_models",
        "keep_cross_validation_predictions",
        "keep_cross_validation_fold_assignment",
        "fold_assignment",
        "fold_column",
        "ignored_columns",
        "ignore_const_cols",
        "score_each_iteration",
        "k",
        "estimate_k",
        "user_points",
        "max_iterations",
        "standardize",
        "seed",
        "init",
        "max_runtime_secs",
        "categorical_encoding",
        "export_checkpoints_dir", 
        "cluster_size_constraints"
    };

    // Input fields
    @API(help = "This option allows you to specify a dataframe, where each row represents an initial cluster center. " +
            "The user-specified points must have the same number of columns as the training observations. " +
            "The number of rows must equal the number of clusters", required = false, level = API.Level.expert)
    public KeyV3.FrameKeyV3 user_points;

    @API(help="Maximum training iterations (if estimate_k is enabled, then this is for each inner Lloyds iteration)", gridable = true)
    public int max_iterations;        // Max iterations

    @API(help = "Standardize columns before computing distances", level = API.Level.critical, gridable = true)
    public boolean standardize = true;

    @API(help = "RNG Seed", level = API.Level.secondary /* tested, works: , dependsOn = {"k", "max_iterations"} */, gridable = true)
    public long seed;

    @API(help = "Initialization mode", values = { "Random", "PlusPlus", "Furthest", "User" }, gridable = true) // TODO: pull out of categorical class. . .
    public KMeans.Initialization init;

    @API(help = "Whether to estimate the number of clusters (<=k) iteratively and deterministically.", level = API.Level.critical, gridable = true)
    public boolean estimate_k = false;

    @API(help = "An array specifying the minimum number of points that should be in each cluster. The length of the constraints array has to be the same as the number of clusters.", level = API.Level.expert)
    public int[] cluster_size_constraints = null;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy