All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.spotify.featran.MultiFeatureExtractor.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2017 Spotify AB.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.spotify.featran

import scala.reflect.ClassTag

/**
 * Encapsulate features extracted from a [[MultiFeatureSpec]].
 * Allows separation back into specs by names or vectors.
 * @tparam M input collection type, e.g. `Array`, List
 * @tparam T input record type to extract features from
 */
class MultiFeatureExtractor[M[_]: CollectionType, T] private[featran] (
  private val fs: M[MultiFeatureSet[T]],
  @transient private val input: M[T],
  @transient private val settings: Option[M[String]]
) extends Serializable {
  import CollectionType.ops._

  private[this] val extractor =
    new FeatureExtractor(fs.asInstanceOf[M[FeatureSet[T]]], input, settings)

  /**
   * JSON settings of the [[MultiFeatureSpec]] and aggregated feature summary.
   *
   * This can be used with [[MultiFeatureSpec.extractWithSettings]] to bypass the `reduce` step
   * when extracting new records of the same type.
   */
  @transient lazy val featureSettings: M[String] = extractor.featureSettings

  /** Names of the extracted features, in the same order as values in [[featureValues]]. */
  @transient lazy val featureNames: M[Seq[Seq[String]]] =
    extractor.aggregate.cross(fs).map(x => x._2.multiFeatureNames(x._1))

  /**
   * Values of the extracted features, in the same order as names in [[featureNames]].
   * @tparam F output data type, e.g. `Array[Float]`, `Array[Double]`, `DenseVector[Float]`,
   *           `DenseVector[Double]`
   */
  def featureValues[F: FeatureBuilder: ClassTag]: M[Seq[F]] =
    featureResults.map(_._1)

  /**
   * Values of the extracted features, in the same order as names in [[featureNames]] with
   * rejections keyed on feature name and the original input record.
   * @tparam F output data type, e.g. `Array[Float]`, `Array[Double]`, `DenseVector[Float]`,
   *           `DenseVector[Double]`
   */
  def featureResults[F: FeatureBuilder: ClassTag]
    : M[(Seq[F], Seq[Map[String, FeatureRejection]], T)] =
    extractor.as
      .cross(extractor.aggregate)
      .cross(fs)
      .map { case (((o, a), c), featureSet) =>
        val fb = featureSet.multiFeatureBuilders
        featureSet.multiFeatureValues(a, c, fb)
        (fb.map(_.result).toSeq, fb.map(_.rejections), o)
      }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy