All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.alpine.model.pack.preprocess.PolynomialModel.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2015 Alpine Data Labs
 * All rights reserved.
 */
package com.alpine.model.pack.preprocess

import com.alpine.model.RowModel
import com.alpine.model.pack.sql.SimpleSQLTransformer
import com.alpine.model.pack.util.TransformerUtil
import com.alpine.plugin.core.io.{ColumnDef, ColumnType}
import com.alpine.sql.SQLGenerator
import com.alpine.transformer.Transformer
import com.alpine.transformer.sql.{ColumnarSQLExpression, SQLTransformer}

/**
  * Model that creates output features that are polynomial combinations of the input features.
  *
  *
  * We use java.lang.Double for the type of the numeric values, because the scala Double type information
  * is lost by scala/Gson and the deserialization fails badly for edge cases (e.g. Double.NaN).
  *
  * If the exponents are a matrix
  * {{{[[1,2,0], [0.5,3,2]]}}}
  * Then the transformation of a row (x1, x2, x3) will be
  * {{{(y1, y2) = (x1 * x2 pow 2, sqrt(x1) * x2 pow 3 * x3 pow 2).}}}
  */
case class PolynomialModel(exponents: Seq[Seq[java.lang.Double]], inputFeatures: Seq[ColumnDef], override val identifier: String = "") extends RowModel {
  override def transformer: Transformer = PolynomialTransformer(this)

  override def sqlTransformer(sqlGenerator: SQLGenerator): Option[SQLTransformer] = Some(new PolynomialSQLTransformer(this, sqlGenerator))

  def outputFeatures = {
    exponents.indices.map(i => ColumnDef("y_" + i, ColumnType.Double))
  }

}

case class PolynomialTransformer(model: PolynomialModel) extends Transformer {

  private val rowAsDoubleArray = Array.ofDim[Double](model.transformationSchema.inputFeatures.length)

  private val exponents: Array[Array[Double]] = model.exponents.map(v => TransformerUtil.javaDoubleSeqToArray(v)).toArray

  override def apply(row: Row): Row = {
    TransformerUtil.fillRowToDoubleArray(row, rowAsDoubleArray)
    val result = Array.ofDim[Double](exponents.length)
    var i = 0
    while (i < exponents.length) {
      var x = 1.0
      var j = 0
      while (j < rowAsDoubleArray.length) {
        // math.pow(0,0) = 1 in Scala, so we do not have to check the 0 case.
        x *= math.pow(rowAsDoubleArray(j), exponents(i)(j))
        j += 1
      }
      result(i) = x
      i += 1
    }
    result
  }
}

case class PolynomialSQLTransformer(model: PolynomialModel, sqlGenerator: SQLGenerator) extends SimpleSQLTransformer {
  override def getSQLExpressions: Seq[ColumnarSQLExpression] = {
    model.exponents.map(doubles => {
      val expression = (doubles zip inputColumnNames).flatMap {
        case (exponent, name) =>
          if (exponent == 1) {
            // Slightly more efficient.
            Some(name.escape(sqlGenerator))
          } else if (exponent == 0) {
            None // Do this to avoid "ERROR: zero raised to zero is undefined".
          } else {
            Some(s"POWER(${name.escape(sqlGenerator)}, $exponent)")
          }
      }.mkString(" * ")
      if (expression != "") {
        expression
      } else {
        // 1 is the identity for multiplication.
        "1"
      }
    }
    ).map(ColumnarSQLExpression)
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy