All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.ml.feature.PolynomialExpansion.scala Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.ml.feature

import scala.collection.mutable

import org.apache.commons.math3.util.CombinatoricsUtils

import org.apache.spark.annotation.Since
import org.apache.spark.ml.UnaryTransformer
import org.apache.spark.ml.linalg._
import org.apache.spark.ml.param.{IntParam, ParamMap, ParamValidators}
import org.apache.spark.ml.util._
import org.apache.spark.sql.types.DataType

/**
 * Perform feature expansion in a polynomial space. As said in wikipedia of Polynomial Expansion,
 * which is available at
 * Polynomial expansion (Wikipedia)
 * , "In mathematics, an expansion of a product of sums expresses it as a sum of products by using
 * the fact that multiplication distributes over addition". Take a 2-variable feature vector
 * as an example: `(x, y)`, if we want to expand it with degree 2, then we get
 * `(x, x * x, y, x * y, y * y)`.
 */
@Since("1.4.0")
class PolynomialExpansion @Since("1.4.0") (@Since("1.4.0") override val uid: String)
  extends UnaryTransformer[Vector, Vector, PolynomialExpansion] with DefaultParamsWritable {

  @Since("1.4.0")
  def this() = this(Identifiable.randomUID("poly"))

  /**
   * The polynomial degree to expand, which should be greater than equal to 1. A value of 1 means
   * no expansion.
   * Default: 2
   * @group param
   */
  @Since("1.4.0")
  val degree = new IntParam(this, "degree", "the polynomial degree to expand (>= 1)",
    ParamValidators.gtEq(1))

  setDefault(degree -> 2)

  /** @group getParam */
  @Since("1.4.0")
  def getDegree: Int = $(degree)

  /** @group setParam */
  @Since("1.4.0")
  def setDegree(value: Int): this.type = set(degree, value)

  override protected def createTransformFunc: Vector => Vector = { v =>
    PolynomialExpansion.expand(v, $(degree))
  }

  override protected def validateInputType(inputType: DataType): Unit = {
    require(inputType.isInstanceOf[VectorUDT],
      s"Input type must be ${(new VectorUDT).catalogString} but got ${inputType.catalogString}.")
  }

  override protected def outputDataType: DataType = new VectorUDT()

  @Since("1.4.1")
  override def copy(extra: ParamMap): PolynomialExpansion = defaultCopy(extra)

  @Since("3.0.0")
  override def toString: String = {
    s"PolynomialExpansion: uid=$uid, degree=${$(degree)}"
  }
}

/**
 * The expansion is done via recursion. Given n features and degree d, the size after expansion is
 * (n + d choose d) (including 1 and first-order values). For example, let f([a, b, c], 3) be the
 * function that expands [a, b, c] to their monomials of degree 3. We have the following recursion:
 *
 * 
* $$ * f([a, b, c], 3) &= f([a, b], 3) ++ f([a, b], 2) * c ++ f([a, b], 1) * c^2 ++ [c^3] * $$ *
* * To handle sparsity, if c is zero, we can skip all monomials that contain it. We remember the * current index and increment it properly for sparse input. */ @Since("1.6.0") object PolynomialExpansion extends DefaultParamsReadable[PolynomialExpansion] { private def getPolySize(numFeatures: Int, degree: Int): Int = { val n = CombinatoricsUtils.binomialCoefficient(numFeatures + degree, degree) require(n <= Integer.MAX_VALUE) n.toInt } private def expandDense( values: Array[Double], lastIdx: Int, degree: Int, multiplier: Double, polyValues: Array[Double], curPolyIdx: Int): Int = { if (multiplier == 0.0) { // do nothing } else if (degree == 0 || lastIdx < 0) { if (curPolyIdx >= 0) { // skip the very first 1 polyValues(curPolyIdx) = multiplier } } else { val v = values(lastIdx) val lastIdx1 = lastIdx - 1 var alpha = multiplier var i = 0 var curStart = curPolyIdx while (i <= degree && alpha != 0.0) { curStart = expandDense(values, lastIdx1, degree - i, alpha, polyValues, curStart) i += 1 alpha *= v } } curPolyIdx + getPolySize(lastIdx + 1, degree) } private def expandSparse( indices: Array[Int], values: Array[Double], lastIdx: Int, lastFeatureIdx: Int, degree: Int, multiplier: Double, polyIndices: mutable.ArrayBuilder[Int], polyValues: mutable.ArrayBuilder[Double], curPolyIdx: Int): Int = { if (multiplier == 0.0) { // do nothing } else if (degree == 0 || lastIdx < 0) { if (curPolyIdx >= 0) { // skip the very first 1 polyIndices += curPolyIdx polyValues += multiplier } } else { // Skip all zeros at the tail. val v = values(lastIdx) val lastIdx1 = lastIdx - 1 val lastFeatureIdx1 = indices(lastIdx) - 1 var alpha = multiplier var curStart = curPolyIdx var i = 0 while (i <= degree && alpha != 0.0) { curStart = expandSparse(indices, values, lastIdx1, lastFeatureIdx1, degree - i, alpha, polyIndices, polyValues, curStart) i += 1 alpha *= v } } curPolyIdx + getPolySize(lastFeatureIdx + 1, degree) } private def expand(dv: DenseVector, degree: Int): DenseVector = { val n = dv.size val polySize = getPolySize(n, degree) val polyValues = new Array[Double](polySize - 1) expandDense(dv.values, n - 1, degree, 1.0, polyValues, -1) new DenseVector(polyValues) } private def expand(sv: SparseVector, degree: Int): SparseVector = { val polySize = getPolySize(sv.size, degree) val nnz = sv.values.length val nnzPolySize = getPolySize(nnz, degree) val polyIndices = mutable.ArrayBuilder.make[Int] polyIndices.sizeHint(nnzPolySize - 1) val polyValues = mutable.ArrayBuilder.make[Double] polyValues.sizeHint(nnzPolySize - 1) expandSparse( sv.indices, sv.values, nnz - 1, sv.size - 1, degree, 1.0, polyIndices, polyValues, -1) new SparseVector(polySize - 1, polyIndices.result(), polyValues.result()) } private[feature] def expand(v: Vector, degree: Int): Vector = { v match { case dv: DenseVector => expand(dv, degree) case sv: SparseVector => expand(sv, degree) case _ => throw new IllegalArgumentException } } @Since("1.6.0") override def load(path: String): PolynomialExpansion = super.load(path) }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy