All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.microsoft.ml.spark.lime.BreezeUtils.scala Maven / Gradle / Ivy

The newest version!
// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark.lime

import breeze.generic.UFunc
import breeze.linalg.{DenseMatrix, DenseVector}
import breeze.stats.regression.{LassoResult, LeastSquaresRegressionResult, leastSquaresDestructive}
import spire.implicits.cfor

/*
This is a copy of the LassoCalculator class in Breeze,
 the only difference is the removal of one requirement check
 */
//scalastyle: off
private case class LassoCalculator2(data: DenseMatrix[Double],
                                    outputs: DenseVector[Double],
                                    lambda: Double,
                                    workArray: Array[Double],
                                    MAX_ITER: Int = 100,
                                    IMPROVE_THRESHOLD: Double = 1e-8) {
  /*
   * The main purpose of this complicated calculator object is to recycle all the assorted work arrays.
   * If we didn't write it this way, we'd have to manually thread all the work arrays
   * throughout a slew of functions.
   */
  require(data.rows == outputs.size)
  require(data.rows == outputs.size)
  require(workArray.size >= 2 * data.rows * data.cols)

  private val outputCopy = DenseVector.zeros[Double](outputs.size)
  private val singleColumnMatrix = new DenseMatrix[Double](data.rows, 1)
  private val resultVec = DenseVector.zeros[Double](data.cols)

  lazy val result: LassoResult = {

    var improvedResult = true
    var iter = 0

    while (improvedResult && (iter < MAX_ITER)) {
      iter += 1
      improvedResult = false
      cfor(0)(i => i < data.cols, i => i + 1)(i => {
        val eoc = estimateOneColumn(i)
        val oldCoefficient = resultVec(i)
        resultVec(i) = shrink(eoc.coefficients(0))
        if (oldCoefficient != resultVec(i)) {
          improvedResult = true
        }
      })
    }

    LassoResult(resultVec, computeRsquared, lambda)
  }

  private def shrink(x: Double): Double = {
    // Soft thresholding
    val sb = math.signum(x)
    val ab = sb * x
    if (ab > lambda) {
      sb * (ab - lambda)
    } else {
      0.0
    }
  }

  private def copyColumn(column: Int): Unit = {
    /* After running this routine, outputCopy should consist of the residuals after multiplying
     * data against resultVec, excluding the specified column.
     *
     * The single column matrix should then be set to equal the data from that column.
     */
    require(column < data.cols)
    require(column >= 0)
    cfor(0)(i => i < outputs.size, i => i + 1)(i => {
      singleColumnMatrix(i, 0) = data(i, column)

      var o = outputs(i)
      cfor(0)(j => j < data.cols, j => j + 1)(j => {
        if (j != column) {
          o -= data(i, j) * resultVec(j)
        }
      })
      outputCopy(i) = o
    })
  }

  private def computeRsquared = {
    var r2 = 0.0
    cfor(0)(i => i < outputs.size, i => i + 1)(i => {
      var o = outputs(i)
      cfor(0)(j => j < data.cols, j => j + 1)(j => {
        o -= data(i, j) * resultVec(j)
      })
      r2 += o * o
    })
    r2
  }

  private def estimateOneColumn(column: Int): LeastSquaresRegressionResult = {
    /*
     * Goal of this routine is to use the specified column to explain as much of the residual
     * as possible, after using the already specified values in other columns.
     */
    copyColumn(column)
    leastSquaresDestructive(singleColumnMatrix, outputCopy, workArray)
  }
}

object LassoUtils {

  def lasso(data: DenseMatrix[Double], outputs: DenseVector[Double], lambda: Double): LassoResult =
      LassoCalculator2(
        data.copy,
        outputs.copy,
        lambda,
        new Array[Double](math.max(1, data.rows * data.cols * 2))
      ).result
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy