Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
package googleapis.bigquery
import io.circe._
import io.circe.syntax._
final case class TrainingOptions(
/** Optional. Names of the columns to slice on. Applies to contribution analysis models.
*/
dimensionIdColumns: Option[List[String]] = None,
/** Number of trials to run this hyperparameter tuning job.
*/
numTrials: Option[Long] = None,
/** L2 regularization coefficient.
*/
l2Regularization: Option[Double] = None,
/** The geographical region based on which the holidays are considered in time series modeling. If a valid value is specified, then holiday effects modeling is enabled.
*/
holidayRegion: Option[TrainingOptionsHolidayRegion] = None,
/** The version aliases to apply in Vertex AI model registry. Always overwrite if the version aliases exists in a existing model.
*/
vertexAiModelVersionAliases: Option[List[String]] = None,
/** Column to be designated as time series data for ARIMA model.
*/
timeSeriesDataColumn: Option[String] = None,
/** Optimizer used for training the neural nets.
*/
optimizer: Option[String] = None,
/** Specifies the initial learning rate for the line search learn rate strategy.
*/
initialLearnRate: Option[Double] = None,
/** The fraction of the interpolated length of the time series that's used to model the time series trend component. All of the time points of the time series are used to model the non-trend component. This training option accelerates modeling training without sacrificing much forecasting accuracy. You can use this option with `minTimeSeriesLength` but not with `maxTimeSeriesLength`.
*/
timeSeriesLengthFraction: Option[Double] = None,
/** A specification of the non-seasonal part of the ARIMA model: the three components (p, d, q) are the AR order, the degree of differencing, and the MA order.
*/
nonSeasonalOrder: Option[ArimaOrder] = None,
/** Batch size for dnn models.
*/
batchSize: Option[Long] = None,
/** The number of periods ahead that need to be forecasted.
*/
horizon: Option[Long] = None,
/** The column used to provide the initial centroids for kmeans algorithm when kmeans_initialization_method is CUSTOM.
*/
kmeansInitializationColumn: Option[String] = None,
/** If true, enable global explanation during training.
*/
enableGlobalExplain: Option[Boolean] = None,
/** The data split type for training and evaluation, e.g. RANDOM.
*/
dataSplitMethod: Option[TrainingOptionsDataSplitMethod] = None,
/** Feedback type that specifies which algorithm to run for matrix factorization.
*/
feedbackType: Option[TrainingOptionsFeedbackType] = None,
/** Whether to calculate class weights automatically based on the popularity of each label.
*/
autoClassWeights: Option[Boolean] = None,
/** The target evaluation metrics to optimize the hyperparameters for.
*/
hparamTuningObjectives: Option[List[TrainingOptionsHparamTuningObjective]] = None,
/** The maximum number of time points in a time series that can be used in modeling the trend component of the time series. Don't use this option with the `timeSeriesLengthFraction` or `minTimeSeriesLength` options.
*/
maxTimeSeriesLength: Option[Long] = None,
/** Dropout probability for dnn models.
*/
dropout: Option[Double] = None,
/** Minimum sum of instance weight needed in a child for boosted tree models.
*/
minTreeChildWeight: Option[Long] = None,
/** Whether to standardize numerical features. Default to true.
*/
standardizeFeatures: Option[Boolean] = None,
/** Subsample ratio of columns for each level for boosted tree models.
*/
colsampleBylevel: Option[Double] = None,
/** Number of integral steps for the integrated gradients explain method.
*/
integratedGradientsNumSteps: Option[Long] = None,
/** Distance type for clustering models.
*/
distanceType: Option[TrainingOptionsDistanceType] = None,
/** Subsample fraction of the training data to grow tree to prevent overfitting for boosted tree models.
*/
subsample: Option[Double] = None,
/** Google Cloud Storage URI from which the model was imported. Only applicable for imported models.
*/
modelUri: Option[String] = None,
/** Type of normalization algorithm for boosted tree models using dart booster.
*/
dartNormalizeType: Option[TrainingOptionsDartNormalizeType] = None,
/** Name of the instance weight column for training data. This column isn't be used as a feature.
*/
instanceWeightColumn: Option[String] = None,
/** Number of paths for the sampled Shapley explain method.
*/
sampledShapleyNumPaths: Option[Long] = None,
/** Whether to train a model from the last checkpoint.
*/
warmStart: Option[Boolean] = None,
/** The method used to initialize the centroids for kmeans algorithm.
*/
kmeansInitializationMethod: Option[TrainingOptionsKmeansInitializationMethod] = None,
/** Number of clusters for clustering models.
*/
numClusters: Option[Long] = None,
/** Activation function of the neural nets.
*/
activationFn: Option[String] = None,
/** The maximum number of iterations in training. Used only for iterative training algorithms.
*/
maxIterations: Option[Long] = None,
/** The time series id column that was used during ARIMA model training.
*/
timeSeriesIdColumn: Option[String] = None,
/** Budget in hours for AutoML training.
*/
budgetHours: Option[Double] = None,
/** The contribution metric. Applies to contribution analysis models. Allowed formats supported are for summable and summable ratio contribution metrics. These include expressions such as "SUM(x)" or "SUM(x)/SUM(y)", where x and y are column names from the base table.
*/
contributionMetric: Option[String] = None,
/** User column specified for matrix factorization models.
*/
userColumn: Option[String] = None,
/** The time series id columns that were used during ARIMA model training.
*/
timeSeriesIdColumns: Option[List[String]] = None,
/** Maximum number of trials to run in parallel.
*/
maxParallelTrials: Option[Long] = None,
/** The minimum ratio of cumulative explained variance that needs to be given by the PCA model.
*/
pcaExplainedVarianceRatio: Option[Double] = None,
/** Item column specified for matrix factorization models.
*/
itemColumn: Option[String] = None,
/** If true, detect step changes and make data adjustment in the input time series.
*/
adjustStepChanges: Option[Boolean] = None,
/** Whether the model should include intercept during model training.
*/
fitIntercept: Option[Boolean] = None,
/** Name of input label columns in training data.
*/
inputLabelColumns: Option[List[String]] = None,
/** Booster type for boosted tree models.
*/
boosterType: Option[TrainingOptionsBoosterType] = None,
/** The model registry.
*/
modelRegistry: Option[TrainingOptionsModelRegistry] = None,
/** Column to be designated as time series timestamp for ARIMA model.
*/
timeSeriesTimestampColumn: Option[String] = None,
/** Subsample ratio of columns for each node(split) for boosted tree models.
*/
colsampleBynode: Option[Double] = None,
/** Hyperparameter for matrix factoration when implicit feedback type is specified.
*/
walsAlpha: Option[Double] = None,
/** L1 regularization coefficient to activations.
*/
l1RegActivation: Option[Double] = None,
/** The strategy to determine learn rate for the current iteration.
*/
learnRateStrategy: Option[TrainingOptionsLearnRateStrategy] = None,
/** Based on the selected TF version, the corresponding docker image is used to train external models.
*/
tfVersion: Option[String] = None,
/** Minimum split loss for boosted tree models.
*/
minSplitLoss: Option[Double] = None,
/** Learning rate in training. Used only for iterative training algorithms.
*/
learnRate: Option[Double] = None,
/** Whether or not p-value test should be computed for this model. Only available for linear and logistic regression models.
*/
calculatePValues: Option[Boolean] = None,
/** A list of geographical regions that are used for time series modeling.
*/
holidayRegions: Option[List[TrainingOptionsHolidayRegion]] = None,
/** Number of principal components to keep in the PCA model. Must be <= the number of features.
*/
numPrincipalComponents: Option[Long] = None,
/** L1 regularization coefficient.
*/
l1Regularization: Option[Double] = None,
/** Number of parallel trees constructed during each iteration for boosted tree models.
*/
numParallelTree: Option[Long] = None,
/** Num factors specified for matrix factorization models.
*/
numFactors: Option[Long] = None,
/** Subsample ratio of columns when constructing each tree for boosted tree models.
*/
colsampleBytree: Option[Double] = None,
/** Tree construction algorithm for boosted tree models.
*/
treeMethod: Option[TrainingOptionsTreeMethod] = None,
/** If true, scale the feature values by dividing the feature standard deviation. Currently only apply to PCA.
*/
scaleFeatures: Option[Boolean] = None,
/** Whether to stop early when the loss doesn't improve significantly any more (compared to min_relative_progress). Used only for iterative training algorithms.
*/
earlyStop: Option[Boolean] = None,
/** User-selected XGBoost versions for training of XGBoost models.
*/
xgboostVersion: Option[String] = None,
/** Name of the column used to determine the rows corresponding to control and test. Applies to contribution analysis models.
*/
isTestColumn: Option[String] = None,
/** The fraction of evaluation data over the whole input data. The rest of data will be used as training data. The format should be double. Accurate to two decimal places. Default value is 0.2.
*/
dataSplitEvalFraction: Option[Double] = None,
/** The data frequency of a time series.
*/
dataFrequency: Option[TrainingOptionsDataFrequency] = None,
/** Whether to use approximate feature contribution method in XGBoost model explanation for global explain.
*/
approxGlobalFeatureContrib: Option[Boolean] = None,
/** Smoothing window size for the trend component. When a positive value is specified, a center moving average smoothing is applied on the history trend. When the smoothing window is out of the boundary at the beginning or the end of the trend, the first element or the last element is padded to fill the smoothing window before the average is applied.
*/
trendSmoothingWindowSize: Option[Long] = None,
/** The min value of the sum of non-seasonal p and q.
*/
autoArimaMinOrder: Option[Long] = None,
/** Include drift when fitting an ARIMA model.
*/
includeDrift: Option[Boolean] = None,
/** When early_stop is true, stops training when accuracy improvement is less than 'min_relative_progress'. Used only for iterative training algorithms.
*/
minRelativeProgress: Option[Double] = None,
/** Whether to enable auto ARIMA or not.
*/
autoArima: Option[Boolean] = None,
/** The column to split data with. This column won't be used as a feature. 1. When data_split_method is CUSTOM, the corresponding column should be boolean. The rows with true value tag are eval data, and the false are training data. 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION rows (from smallest to largest) in the corresponding column are used as training data, and the rest are eval data. It respects the order in Orderable data types: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties
*/
dataSplitColumn: Option[String] = None,
/** Hidden units for dnn models.
*/
hiddenUnits: Option[List[Long]] = None,
/** Optimization strategy for training linear regression models.
*/
optimizationStrategy: Option[TrainingOptionsOptimizationStrategy] = None,
/** If true, clean spikes and dips in the input time series.
*/
cleanSpikesAndDips: Option[Boolean] = None,
/** The minimum number of time points in a time series that are used in modeling the trend component of the time series. If you use this option you must also set the `timeSeriesLengthFraction` option. This training option ensures that enough time points are available when you use `timeSeriesLengthFraction` in trend modeling. This is particularly important when forecasting multiple time series in a single query using `timeSeriesIdColumn`. If the total number of time points is less than the `minTimeSeriesLength` value, then the query uses all available time points.
*/
minTimeSeriesLength: Option[Long] = None,
/** The apriori support minimum. Applies to contribution analysis models.
*/
minAprioriSupport: Option[Double] = None,
/** If true, perform decompose time series and save the results.
*/
decomposeTimeSeries: Option[Boolean] = None,
/** Maximum depth of a tree for boosted tree models.
*/
maxTreeDepth: Option[Long] = None,
/** Type of loss function used during training run.
*/
lossType: Option[TrainingOptionsLossType] = None,
/** Weights associated with each label class, for rebalancing the training data. Only applicable for classification models.
*/
labelClassWeights: Option[Map[String, Double]] = None,
/** The solver for PCA.
*/
pcaSolver: Option[TrainingOptionsPcaSolver] = None,
/** Categorical feature encoding method.
*/
categoryEncodingMethod: Option[TrainingOptionsCategoryEncodingMethod] = None,
/** The max value of the sum of non-seasonal p and q.
*/
autoArimaMaxOrder: Option[Long] = None,
/** Enums for color space, used for processing images in Object Table. See more details at https://www.tensorflow.org/io/tutorials/colorspace.
*/
colorSpace: Option[TrainingOptionsColorSpace] = None,
)
object TrainingOptions {
implicit val encoder: Encoder[TrainingOptions] = Encoder.instance { x =>
Json.obj(
"dimensionIdColumns" := x.dimensionIdColumns,
"numTrials" := x.numTrials,
"l2Regularization" := x.l2Regularization,
"holidayRegion" := x.holidayRegion,
"vertexAiModelVersionAliases" := x.vertexAiModelVersionAliases,
"timeSeriesDataColumn" := x.timeSeriesDataColumn,
"optimizer" := x.optimizer,
"initialLearnRate" := x.initialLearnRate,
"timeSeriesLengthFraction" := x.timeSeriesLengthFraction,
"nonSeasonalOrder" := x.nonSeasonalOrder,
"batchSize" := x.batchSize,
"horizon" := x.horizon,
"kmeansInitializationColumn" := x.kmeansInitializationColumn,
"enableGlobalExplain" := x.enableGlobalExplain,
"dataSplitMethod" := x.dataSplitMethod,
"feedbackType" := x.feedbackType,
"autoClassWeights" := x.autoClassWeights,
"hparamTuningObjectives" := x.hparamTuningObjectives,
"maxTimeSeriesLength" := x.maxTimeSeriesLength,
"dropout" := x.dropout,
"minTreeChildWeight" := x.minTreeChildWeight,
"standardizeFeatures" := x.standardizeFeatures,
"colsampleBylevel" := x.colsampleBylevel,
"integratedGradientsNumSteps" := x.integratedGradientsNumSteps,
"distanceType" := x.distanceType,
"subsample" := x.subsample,
"modelUri"
:= x.modelUri,
"dartNormalizeType" := x.dartNormalizeType,
"instanceWeightColumn" := x.instanceWeightColumn,
"sampledShapleyNumPaths"
:= x.sampledShapleyNumPaths,
"warmStart" := x.warmStart,
"kmeansInitializationMethod" := x.kmeansInitializationMethod,
"numClusters" := x.numClusters,
"activationFn" := x.activationFn,
"maxIterations" := x.maxIterations,
"timeSeriesIdColumn" := x.timeSeriesIdColumn,
"budgetHours" := x.budgetHours,
"contributionMetric"
:= x.contributionMetric,
"userColumn" := x.userColumn,
"timeSeriesIdColumns" := x.timeSeriesIdColumns,
"maxParallelTrials" := x.maxParallelTrials,
"pcaExplainedVarianceRatio" := x.pcaExplainedVarianceRatio,
"itemColumn" := x.itemColumn,
"adjustStepChanges" := x.adjustStepChanges,
"fitIntercept" := x.fitIntercept,
"inputLabelColumns" := x.inputLabelColumns,
"boosterType" :=
x.boosterType,
"modelRegistry" := x.modelRegistry,
"timeSeriesTimestampColumn" := x.timeSeriesTimestampColumn,
"colsampleBynode" := x.colsampleBynode,
"walsAlpha" := x.walsAlpha,
"l1RegActivation" := x.l1RegActivation,
"learnRateStrategy" := x.learnRateStrategy,
"tfVersion" := x.tfVersion,
"minSplitLoss" := x.minSplitLoss,
"learnRate" := x.learnRate,
"calculatePValues" := x.calculatePValues,
"holidayRegions" := x.holidayRegions,
"numPrincipalComponents" := x.numPrincipalComponents,
"l1Regularization"
:= x.l1Regularization,
"numParallelTree" := x.numParallelTree,
"numFactors" := x.numFactors,
"colsampleBytree" := x.colsampleBytree,
"treeMethod" := x.treeMethod,
"scaleFeatures" := x.scaleFeatures,
"earlyStop" := x.earlyStop,
"xgboostVersion" := x.xgboostVersion,
"isTestColumn" := x.isTestColumn,
"dataSplitEvalFraction" := x.dataSplitEvalFraction,
"dataFrequency" := x.dataFrequency,
"approxGlobalFeatureContrib" := x.approxGlobalFeatureContrib,
"trendSmoothingWindowSize" := x.trendSmoothingWindowSize,
"autoArimaMinOrder" := x.autoArimaMinOrder,
"includeDrift" := x.includeDrift,
"minRelativeProgress" := x.minRelativeProgress,
"autoArima"
:= x.autoArima,
"dataSplitColumn" := x.dataSplitColumn,
"hiddenUnits" := x.hiddenUnits,
"optimizationStrategy" := x.optimizationStrategy,
"cleanSpikesAndDips" := x.cleanSpikesAndDips,
"minTimeSeriesLength" := x.minTimeSeriesLength,
"minAprioriSupport" := x.minAprioriSupport,
"decomposeTimeSeries" := x.decomposeTimeSeries,
"maxTreeDepth" := x.maxTreeDepth,
"lossType" := x.lossType,
"labelClassWeights" := x.labelClassWeights,
"pcaSolver" := x.pcaSolver,
"categoryEncodingMethod" :=
x.categoryEncodingMethod,
"autoArimaMaxOrder" := x.autoArimaMaxOrder,
"colorSpace" := x.colorSpace,
)
}
implicit val decoder: Decoder[TrainingOptions] = Decoder.instance { c =>
for {
v0 <- c.get[Option[List[String]]]("dimensionIdColumns")
v1 <- c.get[Option[Long]]("numTrials")
v2 <- c.get[Option[Double]]("l2Regularization")
v3 <- c.get[Option[TrainingOptionsHolidayRegion]]("holidayRegion")
v4 <- c.get[Option[List[String]]]("vertexAiModelVersionAliases")
v5 <- c.get[Option[String]]("timeSeriesDataColumn")
v6 <- c.get[Option[String]]("optimizer")
v7 <- c.get[Option[Double]]("initialLearnRate")
v8 <- c.get[Option[Double]]("timeSeriesLengthFraction")
v9 <- c.get[Option[ArimaOrder]]("nonSeasonalOrder")
v10 <- c.get[Option[Long]]("batchSize")
v11 <- c.get[Option[Long]]("horizon")
v12 <- c.get[Option[String]]("kmeansInitializationColumn")
v13 <- c.get[Option[Boolean]]("enableGlobalExplain")
v14 <- c.get[Option[TrainingOptionsDataSplitMethod]]("dataSplitMethod")
v15 <- c.get[Option[TrainingOptionsFeedbackType]]("feedbackType")
v16 <- c.get[Option[Boolean]]("autoClassWeights")
v17 <- c.get[Option[List[TrainingOptionsHparamTuningObjective]]]("hparamTuningObjectives")
v18 <- c.get[Option[Long]]("maxTimeSeriesLength")
v19 <- c.get[Option[Double]]("dropout")
v20 <- c.get[Option[Long]]("minTreeChildWeight")
v21 <- c.get[Option[Boolean]]("standardizeFeatures")
v22 <- c.get[Option[Double]]("colsampleBylevel")
v23 <- c.get[Option[Long]]("integratedGradientsNumSteps")
v24 <- c.get[Option[TrainingOptionsDistanceType]]("distanceType")
v25 <- c.get[Option[Double]]("subsample")
v26 <- c.get[Option[String]]("modelUri")
v27 <- c.get[Option[TrainingOptionsDartNormalizeType]]("dartNormalizeType")
v28 <- c.get[Option[String]]("instanceWeightColumn")
v29 <- c.get[Option[Long]]("sampledShapleyNumPaths")
v30 <- c.get[Option[Boolean]]("warmStart")
v31 <- c.get[Option[TrainingOptionsKmeansInitializationMethod]]("kmeansInitializationMethod")
v32 <- c.get[Option[Long]]("numClusters")
v33 <- c.get[Option[String]]("activationFn")
v34 <- c.get[Option[Long]]("maxIterations")
v35 <- c.get[Option[String]]("timeSeriesIdColumn")
v36 <- c.get[Option[Double]]("budgetHours")
v37 <- c.get[Option[String]]("contributionMetric")
v38 <- c.get[Option[String]]("userColumn")
v39 <- c.get[Option[List[String]]]("timeSeriesIdColumns")
v40 <- c.get[Option[Long]]("maxParallelTrials")
v41 <- c.get[Option[Double]]("pcaExplainedVarianceRatio")
v42 <- c.get[Option[String]]("itemColumn")
v43 <- c.get[Option[Boolean]]("adjustStepChanges")
v44 <- c.get[Option[Boolean]]("fitIntercept")
v45 <- c.get[Option[List[String]]]("inputLabelColumns")
v46 <- c.get[Option[TrainingOptionsBoosterType]]("boosterType")
v47 <- c.get[Option[TrainingOptionsModelRegistry]]("modelRegistry")
v48 <- c.get[Option[String]]("timeSeriesTimestampColumn")
v49 <- c.get[Option[Double]]("colsampleBynode")
v50 <- c.get[Option[Double]]("walsAlpha")
v51 <- c.get[Option[Double]]("l1RegActivation")
v52 <- c.get[Option[TrainingOptionsLearnRateStrategy]]("learnRateStrategy")
v53 <- c.get[Option[String]]("tfVersion")
v54 <- c.get[Option[Double]]("minSplitLoss")
v55 <- c.get[Option[Double]]("learnRate")
v56 <- c.get[Option[Boolean]]("calculatePValues")
v57 <- c.get[Option[List[TrainingOptionsHolidayRegion]]]("holidayRegions")
v58 <- c.get[Option[Long]]("numPrincipalComponents")
v59 <- c.get[Option[Double]]("l1Regularization")
v60 <- c.get[Option[Long]]("numParallelTree")
v61 <- c.get[Option[Long]]("numFactors")
v62 <- c.get[Option[Double]]("colsampleBytree")
v63 <- c.get[Option[TrainingOptionsTreeMethod]]("treeMethod")
v64 <- c.get[Option[Boolean]]("scaleFeatures")
v65 <- c.get[Option[Boolean]]("earlyStop")
v66 <- c.get[Option[String]]("xgboostVersion")
v67 <- c.get[Option[String]]("isTestColumn")
v68 <- c.get[Option[Double]]("dataSplitEvalFraction")
v69 <- c.get[Option[TrainingOptionsDataFrequency]]("dataFrequency")
v70 <- c.get[Option[Boolean]]("approxGlobalFeatureContrib")
v71 <- c.get[Option[Long]]("trendSmoothingWindowSize")
v72 <- c.get[Option[Long]]("autoArimaMinOrder")
v73 <- c.get[Option[Boolean]]("includeDrift")
v74 <- c.get[Option[Double]]("minRelativeProgress")
v75 <- c.get[Option[Boolean]]("autoArima")
v76 <- c.get[Option[String]]("dataSplitColumn")
v77 <- c.get[Option[List[Long]]]("hiddenUnits")
v78 <- c.get[Option[TrainingOptionsOptimizationStrategy]]("optimizationStrategy")
v79 <- c.get[Option[Boolean]]("cleanSpikesAndDips")
v80 <- c.get[Option[Long]]("minTimeSeriesLength")
v81 <- c.get[Option[Double]]("minAprioriSupport")
v82 <- c.get[Option[Boolean]]("decomposeTimeSeries")
v83 <- c.get[Option[Long]]("maxTreeDepth")
v84 <- c.get[Option[TrainingOptionsLossType]]("lossType")
v85 <- c.get[Option[Map[String, Double]]]("labelClassWeights")
v86 <- c.get[Option[TrainingOptionsPcaSolver]]("pcaSolver")
v87 <- c.get[Option[TrainingOptionsCategoryEncodingMethod]]("categoryEncodingMethod")
v88 <- c.get[Option[Long]]("autoArimaMaxOrder")
v89 <- c.get[Option[TrainingOptionsColorSpace]]("colorSpace")
} yield TrainingOptions(
v0,
v1,
v2,
v3,
v4,
v5,
v6,
v7,
v8,
v9,
v10,
v11,
v12,
v13,
v14,
v15,
v16,
v17,
v18,
v19,
v20,
v21,
v22,
v23,
v24,
v25,
v26,
v27,
v28,
v29,
v30,
v31,
v32,
v33,
v34,
v35,
v36,
v37,
v38,
v39,
v40,
v41,
v42,
v43,
v44,
v45,
v46,
v47,
v48,
v49,
v50,
v51,
v52,
v53,
v54,
v55,
v56,
v57,
v58,
v59,
v60,
v61,
v62,
v63,
v64,
v65,
v66,
v67,
v68,
v69,
v70,
v71,
v72,
v73,
v74,
v75,
v76,
v77,
v78,
v79,
v80,
v81,
v82,
v83,
v84,
v85,
v86,
v87,
v88,
v89,
)
}
}