All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.ml.regression.GeneralizedLinearRegression.scala Maven / Gradle / Ivy

There is a newer version: 4.0.0-preview2
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.ml.regression

import java.util.Locale

import breeze.stats.{distributions => dist}
import org.apache.commons.lang3.StringUtils
import org.apache.hadoop.fs.Path

import org.apache.spark.SparkException
import org.apache.spark.annotation.Since
import org.apache.spark.internal.Logging
import org.apache.spark.ml.PredictorParams
import org.apache.spark.ml.attribute._
import org.apache.spark.ml.feature.{Instance, OffsetInstance}
import org.apache.spark.ml.functions.checkNonNegativeWeight
import org.apache.spark.ml.linalg.{BLAS, Vector, Vectors}
import org.apache.spark.ml.optim._
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared._
import org.apache.spark.ml.util._
import org.apache.spark.ml.util.Instrumentation.instrumented
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Column, DataFrame, Dataset, Row}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.{DataType, DoubleType, StructType}

/**
 * Params for Generalized Linear Regression.
 */
private[regression] trait GeneralizedLinearRegressionBase extends PredictorParams
  with HasFitIntercept with HasMaxIter with HasTol with HasRegParam with HasWeightCol
  with HasSolver with HasAggregationDepth with Logging {

  import GeneralizedLinearRegression._

  /**
   * Param for the name of family which is a description of the error distribution
   * to be used in the model.
   * Supported options: "gaussian", "binomial", "poisson", "gamma" and "tweedie".
   * Default is "gaussian".
   *
   * @group param
   */
  @Since("2.0.0")
  final val family: Param[String] = new Param(this, "family",
    "The name of family which is a description of the error distribution to be used in the " +
      s"model. Supported options: ${supportedFamilyNames.mkString(", ")}.",
    (value: String) => supportedFamilyNames.contains(value.toLowerCase(Locale.ROOT)))

  /** @group getParam */
  @Since("2.0.0")
  def getFamily: String = $(family)

  /**
   * Param for the power in the variance function of the Tweedie distribution which provides
   * the relationship between the variance and mean of the distribution.
   * Only applicable to the Tweedie family.
   * (see 
   * Tweedie Distribution (Wikipedia))
   * Supported values: 0 and [1, Inf).
   * Note that variance power 0, 1, or 2 corresponds to the Gaussian, Poisson or Gamma
   * family, respectively.
   *
   * @group param
   */
  @Since("2.2.0")
  final val variancePower: DoubleParam = new DoubleParam(this, "variancePower",
    "The power in the variance function of the Tweedie distribution which characterizes " +
    "the relationship between the variance and mean of the distribution. " +
    "Only applicable to the Tweedie family. Supported values: 0 and [1, Inf).",
    (x: Double) => x >= 1.0 || x == 0.0)

  /** @group getParam */
  @Since("2.2.0")
  def getVariancePower: Double = $(variancePower)

  /**
   * Param for the name of link function which provides the relationship
   * between the linear predictor and the mean of the distribution function.
   * Supported options: "identity", "log", "inverse", "logit", "probit", "cloglog" and "sqrt".
   * This is used only when family is not "tweedie". The link function for the "tweedie" family
   * must be specified through [[linkPower]].
   *
   * @group param
   */
  @Since("2.0.0")
  final val link: Param[String] = new Param(this, "link", "The name of link function " +
    "which provides the relationship between the linear predictor and the mean of the " +
    s"distribution function. Supported options: ${supportedLinkNames.mkString(", ")}",
    (value: String) => supportedLinkNames.contains(value.toLowerCase(Locale.ROOT)))

  /** @group getParam */
  @Since("2.0.0")
  def getLink: String = $(link)

  /**
   * Param for the index in the power link function. Only applicable to the Tweedie family.
   * Note that link power 0, 1, -1 or 0.5 corresponds to the Log, Identity, Inverse or Sqrt
   * link, respectively.
   * When not set, this value defaults to 1 - [[variancePower]], which matches the R "statmod"
   * package.
   *
   * @group param
   */
  @Since("2.2.0")
  final val linkPower: DoubleParam = new DoubleParam(this, "linkPower",
    "The index in the power link function. Only applicable to the Tweedie family.")

  /** @group getParam */
  @Since("2.2.0")
  def getLinkPower: Double = $(linkPower)

  /**
   * Param for link prediction (linear predictor) column name.
   * Default is not set, which means we do not output link prediction.
   *
   * @group param
   */
  @Since("2.0.0")
  final val linkPredictionCol: Param[String] = new Param[String](this, "linkPredictionCol",
    "link prediction (linear predictor) column name")

  /** @group getParam */
  @Since("2.0.0")
  def getLinkPredictionCol: String = $(linkPredictionCol)

  /**
   * Param for offset column name. If this is not set or empty, we treat all instance offsets
   * as 0.0. The feature specified as offset has a constant coefficient of 1.0.
   *
   * @group param
   */
  @Since("2.3.0")
  final val offsetCol: Param[String] = new Param[String](this, "offsetCol", "The offset " +
    "column name. If this is not set or empty, we treat all instance offsets as 0.0")

  /** @group getParam */
  @Since("2.3.0")
  def getOffsetCol: String = $(offsetCol)

  /** Checks whether weight column is set and nonempty. */
  private[regression] def hasWeightCol: Boolean =
    isSet(weightCol) && $(weightCol).nonEmpty

  /** Checks whether offset column is set and nonempty. */
  private[regression] def hasOffsetCol: Boolean =
    isSet(offsetCol) && $(offsetCol).nonEmpty

  /** Checks whether we should output link prediction. */
  private[regression] def hasLinkPredictionCol: Boolean = {
    isDefined(linkPredictionCol) && $(linkPredictionCol).nonEmpty
  }

  /**
   * The solver algorithm for optimization.
   * Supported options: "irls" (iteratively reweighted least squares).
   * Default: "irls"
   *
   * @group param
   */
  @Since("2.0.0")
  final override val solver: Param[String] = new Param[String](this, "solver",
    "The solver algorithm for optimization. Supported options: " +
      s"${supportedSolvers.mkString(", ")}. (Default irls)",
    ParamValidators.inArray[String](supportedSolvers))

  setDefault(family -> Gaussian.name, variancePower -> 0.0, maxIter -> 25, tol -> 1E-6,
    regParam -> 0.0, solver -> IRLS)

  @Since("2.0.0")
  override def validateAndTransformSchema(
      schema: StructType,
      fitting: Boolean,
      featuresDataType: DataType): StructType = {
    if ($(family).toLowerCase(Locale.ROOT) == "tweedie") {
      if (isSet(link)) {
        logWarning("When family is tweedie, use param linkPower to specify link function. " +
          "Setting param link will take no effect.")
      }
    } else {
      if (isSet(variancePower)) {
        logWarning("When family is not tweedie, setting param variancePower will take no effect.")
      }
      if (isSet(linkPower)) {
        logWarning("When family is not tweedie, use param link to specify link function. " +
          "Setting param linkPower will take no effect.")
      }
      if (isSet(link)) {
        require(supportedFamilyAndLinkPairs.contains(
          Family.fromParams(this) -> Link.fromParams(this)),
          s"Generalized Linear Regression with ${$(family)} family " +
            s"does not support ${$(link)} link function.")
      }
    }

    val newSchema = super.validateAndTransformSchema(schema, fitting, featuresDataType)

    if (hasOffsetCol) {
      SchemaUtils.checkNumericType(schema, $(offsetCol))
    }

    if (hasLinkPredictionCol) {
      val attr = NumericAttribute.defaultAttr
        .withName($(linkPredictionCol))
      SchemaUtils.appendColumn(newSchema, attr.toStructField())
    } else {
      newSchema
    }
  }
}

/**
 * Fit a Generalized Linear Model
 * (see 
 * Generalized linear model (Wikipedia))
 * specified by giving a symbolic description of the linear
 * predictor (link function) and a description of the error distribution (family).
 * It supports "gaussian", "binomial", "poisson", "gamma" and "tweedie" as family.
 * Valid link functions for each family is listed below. The first link function of each family
 * is the default one.
 *  - "gaussian" : "identity", "log", "inverse"
 *  - "binomial" : "logit", "probit", "cloglog"
 *  - "poisson"  : "log", "identity", "sqrt"
 *  - "gamma"    : "inverse", "identity", "log"
 *  - "tweedie"  : power link function specified through "linkPower". The default link power in
 *  the tweedie family is 1 - variancePower.
 */
@Since("2.0.0")
class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val uid: String)
  extends Regressor[Vector, GeneralizedLinearRegression, GeneralizedLinearRegressionModel]
  with GeneralizedLinearRegressionBase with DefaultParamsWritable with Logging {

  import GeneralizedLinearRegression._

  @Since("2.0.0")
  def this() = this(Identifiable.randomUID("glm"))

  /**
   * Sets the value of param [[family]].
   * Default is "gaussian".
   *
   * @group setParam
   */
  @Since("2.0.0")
  def setFamily(value: String): this.type = set(family, value)

  /**
   * Sets the value of param [[variancePower]].
   * Used only when family is "tweedie".
   * Default is 0.0, which corresponds to the "gaussian" family.
   *
   * @group setParam
   */
  @Since("2.2.0")
  def setVariancePower(value: Double): this.type = set(variancePower, value)

  /**
   * Sets the value of param [[linkPower]].
   * Used only when family is "tweedie".
   *
   * @group setParam
   */
  @Since("2.2.0")
  def setLinkPower(value: Double): this.type = set(linkPower, value)

  /**
   * Sets the value of param [[link]].
   * Used only when family is not "tweedie".
   *
   * @group setParam
   */
  @Since("2.0.0")
  def setLink(value: String): this.type = set(link, value)

  /**
   * Sets if we should fit the intercept.
   * Default is true.
   *
   * @group setParam
   */
  @Since("2.0.0")
  def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value)

  /**
   * Sets the maximum number of iterations (applicable for solver "irls").
   * Default is 25.
   *
   * @group setParam
   */
  @Since("2.0.0")
  def setMaxIter(value: Int): this.type = set(maxIter, value)

  /**
   * Sets the convergence tolerance of iterations.
   * Smaller value will lead to higher accuracy with the cost of more iterations.
   * Default is 1E-6.
   *
   * @group setParam
   */
  @Since("2.0.0")
  def setTol(value: Double): this.type = set(tol, value)

  /**
   * Sets the regularization parameter for L2 regularization.
   * The regularization term is
   * 
* $$ * 0.5 * regParam * L2norm(coefficients)^2 * $$ *
* Default is 0.0. * * @group setParam */ @Since("2.0.0") def setRegParam(value: Double): this.type = set(regParam, value) /** * Sets the value of param [[weightCol]]. * If this is not set or empty, we treat all instance weights as 1.0. * Default is not set, so all instances have weight one. * In the Binomial family, weights correspond to number of trials and should be integer. * Non-integer weights are rounded to integer in AIC calculation. * * @group setParam */ @Since("2.0.0") def setWeightCol(value: String): this.type = set(weightCol, value) /** * Sets the value of param [[offsetCol]]. * If this is not set or empty, we treat all instance offsets as 0.0. * Default is not set, so all instances have offset 0.0. * * @group setParam */ @Since("2.3.0") def setOffsetCol(value: String): this.type = set(offsetCol, value) /** * Sets the solver algorithm used for optimization. * Currently only supports "irls" which is also the default solver. * * @group setParam */ @Since("2.0.0") def setSolver(value: String): this.type = set(solver, value) /** * Sets the link prediction (linear predictor) column name. * * @group setParam */ @Since("2.0.0") def setLinkPredictionCol(value: String): this.type = set(linkPredictionCol, value) /** @group expertSetParam */ @Since("3.0.0") def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value) override protected def train( dataset: Dataset[_]): GeneralizedLinearRegressionModel = instrumented { instr => val familyAndLink = FamilyAndLink(this) instr.logPipelineStage(this) instr.logDataset(dataset) instr.logParams(this, labelCol, featuresCol, weightCol, offsetCol, predictionCol, linkPredictionCol, family, solver, fitIntercept, link, maxIter, regParam, tol, aggregationDepth) val numFeatures = MetadataUtils.getNumFeatures(dataset, $(featuresCol)) instr.logNumFeatures(numFeatures) if (numFeatures > WeightedLeastSquares.MAX_NUM_FEATURES) { val msg = "Currently, GeneralizedLinearRegression only supports number of features" + s" <= ${WeightedLeastSquares.MAX_NUM_FEATURES}. Found $numFeatures in the input dataset." throw new SparkException(msg) } require(numFeatures > 0 || $(fitIntercept), "GeneralizedLinearRegression was given data with 0 features, and with Param fitIntercept " + "set to false. To fit a model with 0 features, fitIntercept must be set to true." ) val w = if (!hasWeightCol) lit(1.0) else checkNonNegativeWeight(col($(weightCol))) val offset = if (!hasOffsetCol) lit(0.0) else col($(offsetCol)).cast(DoubleType) val model = if (familyAndLink.family == Gaussian && familyAndLink.link == Identity) { // TODO: Make standardizeFeatures and standardizeLabel configurable. val instances: RDD[Instance] = dataset.select(col($(labelCol)), w, offset, col($(featuresCol))).rdd.map { case Row(label: Double, weight: Double, offset: Double, features: Vector) => Instance(label - offset, weight, features) } val optimizer = new WeightedLeastSquares($(fitIntercept), $(regParam), elasticNetParam = 0.0, standardizeFeatures = true, standardizeLabel = true) val wlsModel = optimizer.fit(instances, instr = OptionalInstrumentation.create(instr), depth = $(aggregationDepth)) val model = copyValues( new GeneralizedLinearRegressionModel(uid, wlsModel.coefficients, wlsModel.intercept) .setParent(this)) val trainingSummary = new GeneralizedLinearRegressionTrainingSummary(dataset, model, wlsModel.diagInvAtWA.toArray, 1, getSolver) model.setSummary(Some(trainingSummary)) } else { val instances: RDD[OffsetInstance] = dataset.select(col($(labelCol)), w, offset, col($(featuresCol))).rdd.map { case Row(label: Double, weight: Double, offset: Double, features: Vector) => OffsetInstance(label, weight, offset, features) } // Fit Generalized Linear Model by iteratively reweighted least squares (IRLS). val initialModel = familyAndLink.initialize(instances, $(fitIntercept), $(regParam), instr = OptionalInstrumentation.create(instr), $(aggregationDepth)) val optimizer = new IterativelyReweightedLeastSquares(initialModel, familyAndLink.reweightFunc, $(fitIntercept), $(regParam), $(maxIter), $(tol)) val irlsModel = optimizer.fit(instances, instr = OptionalInstrumentation.create(instr)) val model = copyValues( new GeneralizedLinearRegressionModel(uid, irlsModel.coefficients, irlsModel.intercept) .setParent(this)) val trainingSummary = new GeneralizedLinearRegressionTrainingSummary(dataset, model, irlsModel.diagInvAtWA.toArray, irlsModel.numIterations, getSolver) model.setSummary(Some(trainingSummary)) } model } @Since("2.0.0") override def copy(extra: ParamMap): GeneralizedLinearRegression = defaultCopy(extra) } @Since("2.0.0") object GeneralizedLinearRegression extends DefaultParamsReadable[GeneralizedLinearRegression] { @Since("2.0.0") override def load(path: String): GeneralizedLinearRegression = super.load(path) /** * Set of family (except for tweedie) and link pairs that GeneralizedLinearRegression supports. * The link function of the Tweedie family is specified through param linkPower. */ private[regression] lazy val supportedFamilyAndLinkPairs = Set( Gaussian -> Identity, Gaussian -> Log, Gaussian -> Inverse, Binomial -> Logit, Binomial -> Probit, Binomial -> CLogLog, Poisson -> Log, Poisson -> Identity, Poisson -> Sqrt, Gamma -> Inverse, Gamma -> Identity, Gamma -> Log ) /** String name for "irls" (iteratively reweighted least squares) solver. */ private[regression] val IRLS = "irls" /** Set of solvers that GeneralizedLinearRegression supports. */ private[regression] val supportedSolvers = Array(IRLS) /** Set of family names that GeneralizedLinearRegression supports. */ private[regression] lazy val supportedFamilyNames = supportedFamilyAndLinkPairs.map(_._1.name).toArray :+ "tweedie" /** Set of link names that GeneralizedLinearRegression supports. */ private[regression] lazy val supportedLinkNames = supportedFamilyAndLinkPairs.map(_._2.name).toArray private[regression] val epsilon: Double = 1E-16 private[regression] def ylogy(y: Double, mu: Double): Double = { if (y == 0) 0.0 else y * math.log(y / mu) } /** * Wrapper of family and link combination used in the model. */ private[regression] class FamilyAndLink(val family: Family, val link: Link) extends Serializable { /** Linear predictor based on given mu. */ def predict(mu: Double): Double = link.link(family.project(mu)) /** Fitted value based on linear predictor eta. */ def fitted(eta: Double): Double = family.project(link.unlink(eta)) /** * Get the initial guess model for [[IterativelyReweightedLeastSquares]]. */ def initialize( instances: RDD[OffsetInstance], fitIntercept: Boolean, regParam: Double, instr: OptionalInstrumentation = OptionalInstrumentation.create( classOf[GeneralizedLinearRegression]), depth: Int = 2 ): WeightedLeastSquaresModel = { val newInstances = instances.map { instance => val mu = family.initialize(instance.label, instance.weight) val eta = predict(mu) - instance.offset Instance(eta, instance.weight, instance.features) } // TODO: Make standardizeFeatures and standardizeLabel configurable. val initialModel = new WeightedLeastSquares(fitIntercept, regParam, elasticNetParam = 0.0, standardizeFeatures = true, standardizeLabel = true) .fit(newInstances, instr, depth) initialModel } /** * The reweight function used to update working labels and weights * at each iteration of [[IterativelyReweightedLeastSquares]]. */ def reweightFunc( instance: OffsetInstance, model: WeightedLeastSquaresModel): (Double, Double) = { val eta = model.predict(instance.features) + instance.offset val mu = fitted(eta) val newLabel = eta - instance.offset + (instance.label - mu) * link.deriv(mu) val newWeight = instance.weight / (math.pow(this.link.deriv(mu), 2.0) * family.variance(mu)) (newLabel, newWeight) } } private[regression] object FamilyAndLink { /** * Constructs the FamilyAndLink object from a parameter map */ def apply(params: GeneralizedLinearRegressionBase): FamilyAndLink = { val familyObj = Family.fromParams(params) val linkObj = if ((params.getFamily.toLowerCase(Locale.ROOT) != "tweedie" && params.isSet(params.link)) || (params.getFamily.toLowerCase(Locale.ROOT) == "tweedie" && params.isSet(params.linkPower))) { Link.fromParams(params) } else { familyObj.defaultLink } new FamilyAndLink(familyObj, linkObj) } } /** * A description of the error distribution to be used in the model. * * @param name the name of the family. */ private[regression] abstract class Family(val name: String) extends Serializable { /** The default link instance of this family. */ val defaultLink: Link /** Initialize the starting value for mu. */ def initialize(y: Double, weight: Double): Double /** The variance of the endogenous variable's mean, given the value mu. */ def variance(mu: Double): Double /** Deviance of (y, mu) pair. */ def deviance(y: Double, mu: Double, weight: Double): Double /** * Akaike Information Criterion (AIC) value of the family for a given dataset. * * @param predictions an RDD of (y, mu, weight) of instances in evaluation dataset * @param deviance the deviance for the fitted model in evaluation dataset * @param numInstances number of instances in evaluation dataset * @param weightSum weights sum of instances in evaluation dataset */ def aic( predictions: RDD[(Double, Double, Double)], deviance: Double, numInstances: Double, weightSum: Double): Double /** Trim the fitted value so that it will be in valid range. */ def project(mu: Double): Double = mu } private[regression] object Family { /** * Gets the [[Family]] object based on param family and variancePower. * If param family is set with "gaussian", "binomial", "poisson" or "gamma", * return the corresponding object directly; otherwise, construct a Tweedie object * according to variancePower. * * @param params the parameter map containing family name and variance power */ def fromParams(params: GeneralizedLinearRegressionBase): Family = { params.getFamily.toLowerCase(Locale.ROOT) match { case Gaussian.name => Gaussian case Binomial.name => Binomial case Poisson.name => Poisson case Gamma.name => Gamma case "tweedie" => params.getVariancePower match { case 0.0 => Gaussian case 1.0 => Poisson case 2.0 => Gamma case others => new Tweedie(others) } } } } /** * Tweedie exponential family distribution. * This includes the special cases of Gaussian, Poisson and Gamma. */ private[regression] class Tweedie(val variancePower: Double) extends Family("tweedie") { override val defaultLink: Link = new Power(1.0 - variancePower) override def initialize(y: Double, weight: Double): Double = { if (variancePower >= 1.0 && variancePower < 2.0) { require(y >= 0.0, s"The response variable of $name($variancePower) family " + s"should be non-negative, but got $y") } else if (variancePower >= 2.0) { require(y > 0.0, s"The response variable of $name($variancePower) family " + s"should be positive, but got $y") } if (y == 0) Tweedie.delta else y } override def variance(mu: Double): Double = math.pow(mu, variancePower) private def yp(y: Double, mu: Double, p: Double): Double = { if (p == 0) { math.log(y / mu) } else { (math.pow(y, p) - math.pow(mu, p)) / p } } override def deviance(y: Double, mu: Double, weight: Double): Double = { // Force y >= delta for Poisson or compound Poisson val y1 = if (variancePower >= 1.0 && variancePower < 2.0) { math.max(y, Tweedie.delta) } else { y } 2.0 * weight * (y * yp(y1, mu, 1.0 - variancePower) - yp(y, mu, 2.0 - variancePower)) } override def aic( predictions: RDD[(Double, Double, Double)], deviance: Double, numInstances: Double, weightSum: Double): Double = { /* This depends on the density of the Tweedie distribution. Only implemented for Gaussian, Poisson and Gamma at this point. */ throw new UnsupportedOperationException("No AIC available for the tweedie family") } override def project(mu: Double): Double = { if (mu < epsilon) { epsilon } else if (mu.isInfinity) { Double.MaxValue } else { mu } } } private[regression] object Tweedie{ /** Constant used in initialization and deviance to avoid numerical issues. */ val delta: Double = 0.1 } /** * Gaussian exponential family distribution. * The default link for the Gaussian family is the identity link. */ private[regression] object Gaussian extends Tweedie(0.0) { override val name: String = "gaussian" override val defaultLink: Link = Identity override def initialize(y: Double, weight: Double): Double = y override def variance(mu: Double): Double = 1.0 override def deviance(y: Double, mu: Double, weight: Double): Double = { weight * (y - mu) * (y - mu) } override def aic( predictions: RDD[(Double, Double, Double)], deviance: Double, numInstances: Double, weightSum: Double): Double = { val wt = predictions.map(x => math.log(x._3)).sum() numInstances * (math.log(deviance / numInstances * 2.0 * math.Pi) + 1.0) + 2.0 - wt } override def project(mu: Double): Double = { if (mu.isNegInfinity) { Double.MinValue } else if (mu.isPosInfinity) { Double.MaxValue } else { mu } } } /** * Binomial exponential family distribution. * The default link for the Binomial family is the logit link. */ private[regression] object Binomial extends Family("binomial") { val defaultLink: Link = Logit override def initialize(y: Double, weight: Double): Double = { val mu = (weight * y + 0.5) / (weight + 1.0) require(mu > 0.0 && mu < 1.0, "The response variable of Binomial family" + s"should be in range (0, 1), but got $mu") mu } override def variance(mu: Double): Double = mu * (1.0 - mu) override def deviance(y: Double, mu: Double, weight: Double): Double = { 2.0 * weight * (ylogy(y, mu) + ylogy(1.0 - y, 1.0 - mu)) } override def aic( predictions: RDD[(Double, Double, Double)], deviance: Double, numInstances: Double, weightSum: Double): Double = { -2.0 * predictions.map { case (y: Double, mu: Double, weight: Double) => // weights for Binomial distribution correspond to number of trials val wt = math.round(weight).toInt if (wt == 0) { 0.0 } else { dist.Binomial(wt, mu).logProbabilityOf(math.round(y * weight).toInt) } }.sum() } override def project(mu: Double): Double = { if (mu < epsilon) { epsilon } else if (mu > 1.0 - epsilon) { 1.0 - epsilon } else { mu } } } /** * Poisson exponential family distribution. * The default link for the Poisson family is the log link. */ private[regression] object Poisson extends Tweedie(1.0) { override val name: String = "poisson" override val defaultLink: Link = Log override def initialize(y: Double, weight: Double): Double = { require(y >= 0.0, "The response variable of Poisson family " + s"should be non-negative, but got $y") /* Force Poisson mean > 0 to avoid numerical instability in IRLS. R uses y + delta for initialization. See poisson()$initialize. */ math.max(y, Tweedie.delta) } override def variance(mu: Double): Double = mu override def deviance(y: Double, mu: Double, weight: Double): Double = { 2.0 * weight * (ylogy(y, mu) - (y - mu)) } override def aic( predictions: RDD[(Double, Double, Double)], deviance: Double, numInstances: Double, weightSum: Double): Double = { -2.0 * predictions.map { case (y: Double, mu: Double, weight: Double) => weight * dist.Poisson(mu).logProbabilityOf(y.toInt) }.sum() } } /** * Gamma exponential family distribution. * The default link for the Gamma family is the inverse link. */ private[regression] object Gamma extends Tweedie(2.0) { override val name: String = "gamma" override val defaultLink: Link = Inverse override def initialize(y: Double, weight: Double): Double = { require(y > 0.0, "The response variable of Gamma family " + s"should be positive, but got $y") y } override def variance(mu: Double): Double = mu * mu override def deviance(y: Double, mu: Double, weight: Double): Double = { -2.0 * weight * (math.log(y / mu) - (y - mu)/mu) } override def aic( predictions: RDD[(Double, Double, Double)], deviance: Double, numInstances: Double, weightSum: Double): Double = { val disp = deviance / weightSum -2.0 * predictions.map { case (y: Double, mu: Double, weight: Double) => weight * dist.Gamma(1.0 / disp, mu * disp).logPdf(y) }.sum() + 2.0 } } /** * A description of the link function to be used in the model. * The link function provides the relationship between the linear predictor * and the mean of the distribution function. * * @param name the name of link function. */ private[regression] abstract class Link(val name: String) extends Serializable { /** The link function. */ def link(mu: Double): Double /** Derivative of the link function. */ def deriv(mu: Double): Double /** The inverse link function. */ def unlink(eta: Double): Double } private[regression] object Link { /** * Gets the [[Link]] object based on param family, link and linkPower. * If param family is set with "tweedie", return or construct link function object * according to linkPower; otherwise, return link function object according to link. * * @param params the parameter map containing family, link and linkPower */ def fromParams(params: GeneralizedLinearRegressionBase): Link = { if (params.getFamily.toLowerCase(Locale.ROOT) == "tweedie") { params.getLinkPower match { case 0.0 => Log case 1.0 => Identity case -1.0 => Inverse case 0.5 => Sqrt case others => new Power(others) } } else { params.getLink.toLowerCase(Locale.ROOT) match { case Identity.name => Identity case Logit.name => Logit case Log.name => Log case Inverse.name => Inverse case Probit.name => Probit case CLogLog.name => CLogLog case Sqrt.name => Sqrt } } } } /** Power link function class */ private[regression] class Power(val linkPower: Double) extends Link("power") { override def link(mu: Double): Double = { if (linkPower == 0.0) { math.log(mu) } else { math.pow(mu, linkPower) } } override def deriv(mu: Double): Double = { if (linkPower == 0.0) { 1.0 / mu } else { linkPower * math.pow(mu, linkPower - 1.0) } } override def unlink(eta: Double): Double = { if (linkPower == 0.0) { math.exp(eta) } else { math.pow(eta, 1.0 / linkPower) } } } private[regression] object Identity extends Power(1.0) { override val name: String = "identity" override def link(mu: Double): Double = mu override def deriv(mu: Double): Double = 1.0 override def unlink(eta: Double): Double = eta } private[regression] object Logit extends Link("logit") { override def link(mu: Double): Double = math.log(mu / (1.0 - mu)) override def deriv(mu: Double): Double = 1.0 / (mu * (1.0 - mu)) override def unlink(eta: Double): Double = 1.0 / (1.0 + math.exp(-1.0 * eta)) } private[regression] object Log extends Power(0.0) { override val name: String = "log" override def link(mu: Double): Double = math.log(mu) override def deriv(mu: Double): Double = 1.0 / mu override def unlink(eta: Double): Double = math.exp(eta) } private[regression] object Inverse extends Power(-1.0) { override val name: String = "inverse" override def link(mu: Double): Double = 1.0 / mu override def deriv(mu: Double): Double = -1.0 * math.pow(mu, -2.0) override def unlink(eta: Double): Double = 1.0 / eta } private[regression] object Probit extends Link("probit") { override def link(mu: Double): Double = dist.Gaussian(0.0, 1.0).inverseCdf(mu) override def deriv(mu: Double): Double = { 1.0 / dist.Gaussian(0.0, 1.0).pdf(dist.Gaussian(0.0, 1.0).inverseCdf(mu)) } override def unlink(eta: Double): Double = dist.Gaussian(0.0, 1.0).cdf(eta) } private[regression] object CLogLog extends Link("cloglog") { override def link(mu: Double): Double = math.log(-math.log1p(-mu)) override def deriv(mu: Double): Double = 1.0 / ((mu - 1.0) * math.log1p(-mu)) override def unlink(eta: Double): Double = 1.0 - math.exp(-1.0 * math.exp(eta)) } private[regression] object Sqrt extends Power(0.5) { override val name: String = "sqrt" override def link(mu: Double): Double = math.sqrt(mu) override def deriv(mu: Double): Double = 1.0 / (2.0 * math.sqrt(mu)) override def unlink(eta: Double): Double = eta * eta } } /** * Model produced by [[GeneralizedLinearRegression]]. */ @Since("2.0.0") class GeneralizedLinearRegressionModel private[ml] ( @Since("2.0.0") override val uid: String, @Since("2.0.0") val coefficients: Vector, @Since("2.0.0") val intercept: Double) extends RegressionModel[Vector, GeneralizedLinearRegressionModel] with GeneralizedLinearRegressionBase with MLWritable with HasTrainingSummary[GeneralizedLinearRegressionTrainingSummary] { /** * Sets the link prediction (linear predictor) column name. * * @group setParam */ @Since("2.0.0") def setLinkPredictionCol(value: String): this.type = set(linkPredictionCol, value) import GeneralizedLinearRegression._ private lazy val familyAndLink = FamilyAndLink(this) override def predict(features: Vector): Double = { predict(features, 0.0) } /** * Calculates the predicted value when offset is set. */ private def predict(features: Vector, offset: Double): Double = { val eta = predictLink(features, offset) familyAndLink.fitted(eta) } /** * Calculates the link prediction (linear predictor) of the given instance. */ private def predictLink(features: Vector, offset: Double): Double = { BLAS.dot(features, coefficients) + intercept + offset } override def transform(dataset: Dataset[_]): DataFrame = { transformSchema(dataset.schema) transformImpl(dataset) } override protected def transformImpl(dataset: Dataset[_]): DataFrame = { val outputSchema = transformSchema(dataset.schema, logging = true) val offset = if (!hasOffsetCol) lit(0.0) else col($(offsetCol)).cast(DoubleType) var outputData = dataset var numColsOutput = 0 if (hasLinkPredictionCol) { val predLinkUDF = udf((features: Vector, offset: Double) => predictLink(features, offset)) outputData = outputData .withColumn($(linkPredictionCol), predLinkUDF(col($(featuresCol)), offset), outputSchema($(linkPredictionCol)).metadata) numColsOutput += 1 } if ($(predictionCol).nonEmpty) { if (hasLinkPredictionCol) { val predUDF = udf((eta: Double) => familyAndLink.fitted(eta)) outputData = outputData.withColumn($(predictionCol), predUDF(col($(linkPredictionCol))), outputSchema($(predictionCol)).metadata) } else { val predUDF = udf((features: Vector, offset: Double) => predict(features, offset)) outputData = outputData.withColumn($(predictionCol), predUDF(col($(featuresCol)), offset), outputSchema($(predictionCol)).metadata) } numColsOutput += 1 } if (numColsOutput == 0) { this.logWarning(s"$uid: GeneralizedLinearRegressionModel.transform() does nothing" + " because no output columns were set.") } outputData.toDF } /** * Gets R-like summary of model on training set. An exception is * thrown if there is no summary available. */ @Since("2.0.0") override def summary: GeneralizedLinearRegressionTrainingSummary = super.summary /** * Evaluate the model on the given dataset, returning a summary of the results. */ @Since("2.0.0") def evaluate(dataset: Dataset[_]): GeneralizedLinearRegressionSummary = { new GeneralizedLinearRegressionSummary(dataset, this) } @Since("2.0.0") override def copy(extra: ParamMap): GeneralizedLinearRegressionModel = { val copied = copyValues(new GeneralizedLinearRegressionModel(uid, coefficients, intercept), extra) copied.setSummary(trainingSummary).setParent(parent) } /** * Returns a [[org.apache.spark.ml.util.MLWriter]] instance for this ML instance. * * For [[GeneralizedLinearRegressionModel]], this does NOT currently save the * training [[summary]]. An option to save [[summary]] may be added in the future. * */ @Since("2.0.0") override def write: MLWriter = new GeneralizedLinearRegressionModel.GeneralizedLinearRegressionModelWriter(this) override val numFeatures: Int = coefficients.size @Since("3.0.0") override def toString: String = { s"GeneralizedLinearRegressionModel: uid=$uid, family=${$(family)}, link=${$(link)}, " + s"numFeatures=$numFeatures" } } @Since("2.0.0") object GeneralizedLinearRegressionModel extends MLReadable[GeneralizedLinearRegressionModel] { @Since("2.0.0") override def read: MLReader[GeneralizedLinearRegressionModel] = new GeneralizedLinearRegressionModelReader @Since("2.0.0") override def load(path: String): GeneralizedLinearRegressionModel = super.load(path) /** [[MLWriter]] instance for [[GeneralizedLinearRegressionModel]] */ private[GeneralizedLinearRegressionModel] class GeneralizedLinearRegressionModelWriter(instance: GeneralizedLinearRegressionModel) extends MLWriter with Logging { private case class Data(intercept: Double, coefficients: Vector) override protected def saveImpl(path: String): Unit = { // Save metadata and Params DefaultParamsWriter.saveMetadata(instance, path, sc) // Save model data: intercept, coefficients val data = Data(instance.intercept, instance.coefficients) val dataPath = new Path(path, "data").toString sparkSession.createDataFrame(Seq(data)).repartition(1).write.parquet(dataPath) } } private class GeneralizedLinearRegressionModelReader extends MLReader[GeneralizedLinearRegressionModel] { /** Checked against metadata when loading model */ private val className = classOf[GeneralizedLinearRegressionModel].getName override def load(path: String): GeneralizedLinearRegressionModel = { val metadata = DefaultParamsReader.loadMetadata(path, sc, className) val dataPath = new Path(path, "data").toString val data = sparkSession.read.parquet(dataPath) .select("intercept", "coefficients").head() val intercept = data.getDouble(0) val coefficients = data.getAs[Vector](1) val model = new GeneralizedLinearRegressionModel(metadata.uid, coefficients, intercept) metadata.getAndSetParams(model) model } } } /** * Summary of [[GeneralizedLinearRegression]] model and predictions. * * @param dataset Dataset to be summarized. * @param origModel Model to be summarized. This is copied to create an internal * model which cannot be modified from outside. */ @Since("2.0.0") class GeneralizedLinearRegressionSummary private[regression] ( dataset: Dataset[_], origModel: GeneralizedLinearRegressionModel) extends Serializable { import GeneralizedLinearRegression._ /** * Field in "predictions" which gives the predicted value of each instance. * This is set to a new column name if the original model's `predictionCol` is not set. */ @Since("2.0.0") val predictionCol: String = { if (origModel.isDefined(origModel.predictionCol) && origModel.getPredictionCol.nonEmpty) { origModel.getPredictionCol } else { "prediction_" + java.util.UUID.randomUUID.toString } } /** * Private copy of model to ensure Params are not modified outside this class. * Coefficients is not a deep copy, but that is acceptable. * * @note [[predictionCol]] must be set correctly before the value of [[model]] is set, * and [[model]] must be set before [[predictions]] is set! */ protected val model: GeneralizedLinearRegressionModel = origModel.copy(ParamMap.empty).setPredictionCol(predictionCol) /** * Predictions output by the model's `transform` method. */ @Since("2.0.0") @transient val predictions: DataFrame = model.transform(dataset) private[regression] lazy val familyLink: FamilyAndLink = FamilyAndLink(model) private[regression] lazy val family: Family = familyLink.family private[regression] lazy val link: Link = familyLink.link /** * summary row containing: * numInstances, weightSum, deviance, rss, weighted average of label - offset. */ private lazy val glrSummary = { val devUDF = udf { (label: Double, pred: Double, weight: Double) => family.deviance(label, pred, weight) } val devCol = sum(devUDF(label, prediction, weight)) val rssCol = if (model.getFamily.toLowerCase(Locale.ROOT) != Binomial.name && model.getFamily.toLowerCase(Locale.ROOT) != Poisson.name) { val rssUDF = udf { (label: Double, pred: Double, weight: Double) => (label - pred) * (label - pred) * weight / family.variance(pred) } sum(rssUDF(label, prediction, weight)) } else { lit(Double.NaN) } val avgCol = if (model.getFitIntercept && (!model.hasOffsetCol || (model.hasOffsetCol && family == Gaussian && link == Identity))) { sum((label - offset) * weight) / sum(weight) } else { lit(Double.NaN) } predictions .select(count(label), sum(weight), devCol, rssCol, avgCol) .head() } /** Number of instances in DataFrame predictions. */ @Since("2.2.0") lazy val numInstances: Long = glrSummary.getLong(0) /** * Name of features. If the name cannot be retrieved from attributes, * set default names to feature column name with numbered suffix "_0", "_1", and so on. */ private[ml] lazy val featureNames: Array[String] = { val featureAttrs = AttributeGroup.fromStructField( dataset.schema(model.getFeaturesCol)).attributes if (featureAttrs.isDefined) { featureAttrs.get.map(_.name.get) } else { Array.tabulate[String](origModel.numFeatures)((x: Int) => model.getFeaturesCol + "_" + x) } } /** The numeric rank of the fitted linear model. */ @Since("2.0.0") lazy val rank: Long = if (model.getFitIntercept) { model.coefficients.size + 1 } else { model.coefficients.size } /** Degrees of freedom. */ @Since("2.0.0") lazy val degreesOfFreedom: Long = numInstances - rank /** The residual degrees of freedom. */ @Since("2.0.0") lazy val residualDegreeOfFreedom: Long = degreesOfFreedom /** The residual degrees of freedom for the null model. */ @Since("2.0.0") lazy val residualDegreeOfFreedomNull: Long = { if (model.getFitIntercept) numInstances - 1 else numInstances } private def label: Column = col(model.getLabelCol).cast(DoubleType) private def prediction: Column = col(predictionCol) private def weight: Column = { if (!model.hasWeightCol) lit(1.0) else col(model.getWeightCol) } private def offset: Column = { if (!model.hasOffsetCol) lit(0.0) else col(model.getOffsetCol).cast(DoubleType) } private[regression] lazy val devianceResiduals: DataFrame = { val drUDF = udf { (y: Double, mu: Double, weight: Double) => val r = math.sqrt(math.max(family.deviance(y, mu, weight), 0.0)) if (y > mu) r else -1.0 * r } predictions.select( drUDF(label, prediction, weight).as("devianceResiduals")) } private[regression] lazy val pearsonResiduals: DataFrame = { val prUDF = udf { mu: Double => family.variance(mu) } predictions.select(label.minus(prediction) .multiply(sqrt(weight)).divide(sqrt(prUDF(prediction))).as("pearsonResiduals")) } private[regression] lazy val workingResiduals: DataFrame = { val wrUDF = udf { (y: Double, mu: Double) => (y - mu) * link.deriv(mu) } predictions.select(wrUDF(label, prediction).as("workingResiduals")) } private[regression] lazy val responseResiduals: DataFrame = { predictions.select(label.minus(prediction).as("responseResiduals")) } /** * Get the default residuals (deviance residuals) of the fitted model. */ @Since("2.0.0") def residuals(): DataFrame = devianceResiduals /** * Get the residuals of the fitted model by type. * * @param residualsType The type of residuals which should be returned. * Supported options: deviance, pearson, working and response. */ @Since("2.0.0") def residuals(residualsType: String): DataFrame = { residualsType match { case "deviance" => devianceResiduals case "pearson" => pearsonResiduals case "working" => workingResiduals case "response" => responseResiduals case other => throw new UnsupportedOperationException( s"The residuals type $other is not supported by Generalized Linear Regression.") } } /** * The deviance for the null model. */ @Since("2.0.0") lazy val nullDeviance: Double = { val intercept: Double = if (!model.getFitIntercept) { 0.0 } else { /* Estimate intercept analytically when there is no offset, or when there is offset but the model is Gaussian family with identity link. Otherwise, fit an intercept only model. */ if (!model.hasOffsetCol || (model.hasOffsetCol && family == Gaussian && link == Identity)) { link.link(glrSummary.getDouble(4)) } else { // Create empty feature column and fit intercept only model using param setting from model val featureNull = "feature_" + java.util.UUID.randomUUID.toString val paramMap = model.extractParamMap() paramMap.put(model.featuresCol, featureNull) if (family.name != "tweedie") { paramMap.remove(model.variancePower) } val emptyVectorUDF = udf{ () => Vectors.zeros(0) } model.parent.fit( dataset.withColumn(featureNull, emptyVectorUDF()), paramMap ).intercept } } predictions.select(label, offset, weight).rdd.map { case Row(y: Double, offset: Double, weight: Double) => family.deviance(y, link.unlink(intercept + offset), weight) }.sum() } /** * The deviance for the fitted model. */ @Since("2.0.0") lazy val deviance: Double = glrSummary.getDouble(2) /** * The dispersion of the fitted model. * It is taken as 1.0 for the "binomial" and "poisson" families, and otherwise * estimated by the residual Pearson's Chi-Squared statistic (which is defined as * sum of the squares of the Pearson residuals) divided by the residual degrees of freedom. */ @Since("2.0.0") lazy val dispersion: Double = if ( model.getFamily.toLowerCase(Locale.ROOT) == Binomial.name || model.getFamily.toLowerCase(Locale.ROOT) == Poisson.name) { 1.0 } else { val rss = glrSummary.getDouble(3) rss / degreesOfFreedom } /** Akaike Information Criterion (AIC) for the fitted model. */ @Since("2.0.0") lazy val aic: Double = { val weightSum = glrSummary.getDouble(1) val t = predictions.select( label, prediction, weight).rdd.map { case Row(label: Double, pred: Double, weight: Double) => (label, pred, weight) } family.aic(t, deviance, numInstances, weightSum) + 2 * rank } } /** * Summary of [[GeneralizedLinearRegression]] fitting and model. * * @param dataset Dataset to be summarized. * @param origModel Model to be summarized. This is copied to create an internal * model which cannot be modified from outside. * @param diagInvAtWA diagonal of matrix (A^T * W * A)^-1 in the last iteration * @param numIterations number of iterations * @param solver the solver algorithm used for model training */ @Since("2.0.0") class GeneralizedLinearRegressionTrainingSummary private[regression] ( dataset: Dataset[_], origModel: GeneralizedLinearRegressionModel, private val diagInvAtWA: Array[Double], @Since("2.0.0") val numIterations: Int, @Since("2.0.0") val solver: String) extends GeneralizedLinearRegressionSummary(dataset, origModel) with Serializable { import GeneralizedLinearRegression._ /** * Whether the underlying `WeightedLeastSquares` using the "normal" solver. */ private[ml] val isNormalSolver: Boolean = { diagInvAtWA.length != 1 || diagInvAtWA(0) != 0 } /** * Standard error of estimated coefficients and intercept. * This value is only available when the underlying `WeightedLeastSquares` * using the "normal" solver. * * If `GeneralizedLinearRegression.fitIntercept` is set to true, * then the last element returned corresponds to the intercept. */ @Since("2.0.0") lazy val coefficientStandardErrors: Array[Double] = { if (isNormalSolver) { diagInvAtWA.map(_ * dispersion).map(math.sqrt) } else { throw new UnsupportedOperationException( "No Std. Error of coefficients available for this GeneralizedLinearRegressionModel") } } /** * T-statistic of estimated coefficients and intercept. * This value is only available when the underlying `WeightedLeastSquares` * using the "normal" solver. * * If `GeneralizedLinearRegression.fitIntercept` is set to true, * then the last element returned corresponds to the intercept. */ @Since("2.0.0") lazy val tValues: Array[Double] = { if (isNormalSolver) { val estimate = if (model.getFitIntercept) { Array.concat(model.coefficients.toArray, Array(model.intercept)) } else { model.coefficients.toArray } estimate.zip(coefficientStandardErrors).map { x => x._1 / x._2 } } else { throw new UnsupportedOperationException( "No t-statistic available for this GeneralizedLinearRegressionModel") } } /** * Two-sided p-value of estimated coefficients and intercept. * This value is only available when the underlying `WeightedLeastSquares` * using the "normal" solver. * * If `GeneralizedLinearRegression.fitIntercept` is set to true, * then the last element returned corresponds to the intercept. */ @Since("2.0.0") lazy val pValues: Array[Double] = { if (isNormalSolver) { if (model.getFamily.toLowerCase(Locale.ROOT) == Binomial.name || model.getFamily.toLowerCase(Locale.ROOT) == Poisson.name) { tValues.map { x => 2.0 * (1.0 - dist.Gaussian(0.0, 1.0).cdf(math.abs(x))) } } else { tValues.map { x => 2.0 * (1.0 - dist.StudentsT(degreesOfFreedom.toDouble).cdf(math.abs(x))) } } } else { throw new UnsupportedOperationException( "No p-value available for this GeneralizedLinearRegressionModel") } } /** * Coefficients with statistics: feature name, coefficients, standard error, tValue and pValue. */ private[ml] lazy val coefficientsWithStatistics: Array[ (String, Double, Double, Double, Double)] = { var featureNamesLocal = featureNames var coefficientsArray = model.coefficients.toArray var index = Array.range(0, coefficientsArray.length) if (model.getFitIntercept) { featureNamesLocal = featureNamesLocal :+ "(Intercept)" coefficientsArray = coefficientsArray :+ model.intercept // Reorder so that intercept comes first index = (coefficientsArray.length - 1) +: index } index.map { i => (featureNamesLocal(i), coefficientsArray(i), coefficientStandardErrors(i), tValues(i), pValues(i)) } } override def toString: String = { if (isNormalSolver) { def round(x: Double): String = { BigDecimal(x).setScale(4, BigDecimal.RoundingMode.HALF_UP).toString } val colNames = Array("Feature", "Estimate", "Std Error", "T Value", "P Value") val data = coefficientsWithStatistics.map { row => val strRow = row.productIterator.map { cell => val str = cell match { case s: String => s case n: Double => round(n) } // Truncate if length > 20 if (str.length > 20) { str.substring(0, 17) + "..." } else { str } } strRow.toArray } // Compute the width of each column val colWidths = colNames.map(_.length) data.foreach { strRow => strRow.zipWithIndex.foreach { case (cell: String, i: Int) => colWidths(i) = math.max(colWidths(i), cell.length) } } val sb = new StringBuilder // Output coefficients with statistics sb.append("Coefficients:\n") colNames.zipWithIndex.map { case (colName: String, i: Int) => StringUtils.leftPad(colName, colWidths(i)) }.addString(sb, "", " ", "\n") data.foreach { case strRow: Array[String] => strRow.zipWithIndex.map { case (cell: String, i: Int) => StringUtils.leftPad(cell.toString, colWidths(i)) }.addString(sb, "", " ", "\n") } sb.append("\n") sb.append(s"(Dispersion parameter for ${family.name} family taken to be " + s"${round(dispersion)})") sb.append("\n") val nd = s"Null deviance: ${round(nullDeviance)} on $degreesOfFreedom degrees of freedom" val rd = s"Residual deviance: ${round(deviance)} on $residualDegreeOfFreedom degrees of " + "freedom" val l = math.max(nd.length, rd.length) sb.append(StringUtils.leftPad(nd, l)) sb.append("\n") sb.append(StringUtils.leftPad(rd, l)) if (family.name != "tweedie") { sb.append("\n") sb.append(s"AIC: " + round(aic)) } sb.toString() } else { throw new UnsupportedOperationException( "No summary available for this GeneralizedLinearRegressionModel") } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy