![JAR search and dependency download from the Maven repository](/logo.png)
com.tencent.angel.sona.ml.feature.PolynomialExpansion.scala Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.tencent.angel.sona.ml.feature
import scala.collection.mutable
import org.apache.commons.math3.util.CombinatoricsUtils
import org.apache.spark.linalg.{DenseVector, IntSparseVector, LongSparseVector, VectorUDT}
import com.tencent.angel.sona.ml.param.{IntParam, ParamMap, ParamValidators}
import com.tencent.angel.sona.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable}
import com.tencent.angel.sona.ml.UnaryTransformer
import org.apache.spark.sql.types.DataType
import org.apache.spark.linalg
/**
* Perform feature expansion in a polynomial space. As said in wikipedia of Polynomial Expansion,
* which is available at
* Polynomial expansion (Wikipedia)
* , "In mathematics, an expansion of a product of sums expresses it as a sum of products by using
* the fact that multiplication distributes over addition". Take a 2-variable feature vector
* as an example: `(x, y)`, if we want to expand it with degree 2, then we get
* `(x, x * x, y, x * y, y * y)`.
*/
class PolynomialExpansion(override val uid: String)
extends UnaryTransformer[linalg.Vector, linalg.Vector, PolynomialExpansion] with DefaultParamsWritable {
def this() = this(Identifiable.randomUID("poly"))
/**
* The polynomial degree to expand, which should be greater than equal to 1. A value of 1 means
* no expansion.
* Default: 2
*
* @group param
*/
val degree = new IntParam(this, "degree", "the polynomial degree to expand (>= 1)",
ParamValidators.gtEq(1))
setDefault(degree -> 2)
/** @group getParam */
def getDegree: Int = $(degree)
/** @group setParam */
def setDegree(value: Int): this.type = set(degree, value)
override protected def createTransformFunc: linalg.Vector => linalg.Vector = { v =>
PolynomialExpansion.expand(v, $(degree))
}
override protected def outputDataType: DataType = new VectorUDT()
override def copy(extra: ParamMap): PolynomialExpansion = defaultCopy(extra)
}
/**
* The expansion is done via recursion. Given n features and degree d, the size after expansion is
* (n + d choose d) (including 1 and first-order values). For example, let f([a, b, c], 3) be the
* function that expands [a, b, c] to their monomials of degree 3. We have the following recursion:
*
*
* $$
* f([a, b, c], 3) &= f([a, b], 3) ++ f([a, b], 2) * c ++ f([a, b], 1) * c^2 ++ [c^3]
* $$
*
*
* To handle sparsity, if c is zero, we can skip all monomials that contain it. We remember the
* current index and increment it properly for sparse input.
*/
object PolynomialExpansion extends DefaultParamsReadable[PolynomialExpansion] {
private def getPolySize(numFeatures: Int, degree: Int): Int = {
val n = CombinatoricsUtils.binomialCoefficient(numFeatures + degree, degree)
require(n <= Integer.MAX_VALUE)
n.toInt
}
private def getPolySizeLong(numFeatures: Long, degree: Int): Long = {
var numerator = 1L
var denominator = 1
(1 to degree).foreach { d =>
numerator *= ((numFeatures + degree) - d + 1)
denominator *= d
}
val binomialCoefficient = numerator / denominator
require(binomialCoefficient <= Long.MaxValue)
binomialCoefficient
}
private def expandDense(
values: Array[Double],
lastIdx: Int,
degree: Int,
multiplier: Double,
polyValues: Array[Double],
curPolyIdx: Int): Int = {
if (multiplier == 0.0) {
// do nothing
} else if (degree == 0 || lastIdx < 0) {
if (curPolyIdx >= 0) { // skip the very first 1
polyValues(curPolyIdx) = multiplier
}
} else {
val v = values(lastIdx)
val lastIdx1 = lastIdx - 1
var alpha = multiplier
var i = 0
var curStart = curPolyIdx
while (i <= degree && alpha != 0.0) {
curStart = expandDense(values, lastIdx1, degree - i, alpha, polyValues, curStart)
i += 1
alpha *= v
}
}
curPolyIdx + getPolySize(lastIdx + 1, degree)
}
private def expandSparse(
indices: Array[Int],
values: Array[Double],
lastIdx: Int,
lastFeatureIdx: Int,
degree: Int,
multiplier: Double,
polyIndices: mutable.ArrayBuilder[Int],
polyValues: mutable.ArrayBuilder[Double],
curPolyIdx: Int): Int = {
if (multiplier == 0.0) {
// do nothing
} else if (degree == 0 || lastIdx < 0) {
if (curPolyIdx >= 0) { // skip the very first 1
polyIndices += curPolyIdx
polyValues += multiplier
}
} else {
// Skip all zeros at the tail.
val v = values(lastIdx)
val lastIdx1 = lastIdx - 1
val lastFeatureIdx1 = indices(lastIdx) - 1
var alpha = multiplier
var curStart = curPolyIdx
var i = 0
while (i <= degree && alpha != 0.0) {
curStart = expandSparse(indices, values, lastIdx1, lastFeatureIdx1, degree - i, alpha,
polyIndices, polyValues, curStart)
i += 1
alpha *= v
}
}
curPolyIdx + getPolySize(lastFeatureIdx + 1, degree)
}
private def expandSparseLong(
indices: Array[Long],
values: Array[Double],
lastIdx: Int,
lastFeatureIdx: Long,
degree: Int,
multiplier: Double,
polyIndices: mutable.ArrayBuilder[Long],
polyValues: mutable.ArrayBuilder[Double],
curPolyIdx: Long): Long = {
if (multiplier == 0.0) {
// do nothing
} else if (degree == 0 || lastIdx < 0) {
if (curPolyIdx >= 0) { // skip the very first 1
polyIndices += curPolyIdx
polyValues += multiplier
}
} else {
// Skip all zeros at the tail.
val v = values(lastIdx)
val lastIdx1 = lastIdx - 1
val lastFeatureIdx1 = indices(lastIdx) - 1
var alpha = multiplier
var curStart = curPolyIdx
var i = 0
while (i <= degree && alpha != 0.0) {
curStart = expandSparseLong(indices, values, lastIdx1, lastFeatureIdx1, degree - i, alpha,
polyIndices, polyValues, curStart)
i += 1
alpha *= v
}
}
curPolyIdx + getPolySizeLong(lastFeatureIdx + 1, degree)
}
private def expandDense(dv: DenseVector, degree: Int): DenseVector = {
val n = dv.size.toInt
val polySize = getPolySize(n, degree)
val polyValues = new Array[Double](polySize - 1)
expandDense(dv.values, n - 1, degree, 1.0, polyValues, -1)
new DenseVector(polyValues)
}
private def expandSparseInt(sv: IntSparseVector, degree: Int): IntSparseVector = {
val polySize = getPolySize(sv.size.toInt, degree)
val nnz = sv.values.length
val nnzPolySize = getPolySize(nnz, degree)
val polyIndices = mutable.ArrayBuilder.make[Int]
polyIndices.sizeHint(nnzPolySize - 1)
val polyValues = mutable.ArrayBuilder.make[Double]
polyValues.sizeHint(nnzPolySize - 1)
expandSparse(
sv.indices, sv.values, nnz - 1, sv.size.toInt - 1, degree, 1.0, polyIndices, polyValues, -1)
new IntSparseVector(polySize - 1, polyIndices.result(), polyValues.result())
}
private def expandSparseLong(sv: LongSparseVector, degree: Int): LongSparseVector = {
val polySize = getPolySizeLong(sv.size.toInt, degree)
val nnz = sv.values.length
val nnzPolySize = getPolySize(nnz, degree)
val polyIndices = mutable.ArrayBuilder.make[Long]
polyIndices.sizeHint(nnzPolySize - 1)
val polyValues = mutable.ArrayBuilder.make[Double]
polyValues.sizeHint(nnzPolySize - 1)
expandSparseLong(
sv.indices, sv.values, nnz - 1, sv.size - 1, degree, 1.0, polyIndices, polyValues, -1)
new LongSparseVector(polySize - 1, polyIndices.result(), polyValues.result())
}
private[sona] def expand(v: linalg.Vector, degree: Int): linalg.Vector = {
v match {
case dv: DenseVector => expandDense(dv, degree)
case sv: IntSparseVector => expandSparseInt(sv, degree)
case sv: LongSparseVector => expandSparseLong(sv, degree)
case _ => throw new IllegalArgumentException
}
}
override def load(path: String): PolynomialExpansion = super.load(path)
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy