com.linkedin.dagli.transformer.MappedIterable Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of core Show documentation
DAG-oriented machine learning framework for bug-resistant, readable, efficient, maintainable and trivially deployable models in Java and other JVM languages
There is a newer version: 15.0.0-beta9
Show newest version
package com.linkedin.dagli.transformer;

import com.linkedin.dagli.annotation.equality.ValueEquality;
import com.linkedin.dagli.dag.DynamicDAG;
import com.linkedin.dagli.objectio.ObjectReader;
import com.linkedin.dagli.placeholder.Placeholder;
import com.linkedin.dagli.preparer.AbstractPreparerDynamic;
import com.linkedin.dagli.preparer.PreparerContext;
import com.linkedin.dagli.preparer.PreparerDynamic;
import com.linkedin.dagli.preparer.PreparerMode;
import com.linkedin.dagli.preparer.PreparerResult;
import com.linkedin.dagli.preparer.PreparerResultMixed;
import com.linkedin.dagli.producer.MissingInput;
import com.linkedin.dagli.producer.Producer;
import com.linkedin.dagli.util.invariant.Arguments;
import com.linkedin.dagli.util.collection.Iterables;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Function;

/**
 * Executes a wrapped preparable transformer by mapping the values of a particular "mapped input", consuming each of the
 * elements of an inputted iterable (collection, list, etc.) and producing the result as a list.  Except for this
 * single "mapped" input, all the inputs are "inherited" from the wrapped transformer.
 *
 * If MappedIterable is provided the iterables [1, 2, 3], [4], [5, 6], the wrapped transformer simply sees the inputs
 * 1, 2, 3, 4, 5, 6, and its output is packaged into the lists [t(1), t(2), t(3)], [t(4)], [t(5), t(6)], where "t" is
 * the transformation.
 *
 * If your transformer is already prepared, use {@link MappedIterable.Prepared}.
 *
 * @param  the type of value accepted by the wrapped transformer's mapped input
 * @param  the type of the transformed value
 */
@ValueEquality
public class MappedIterable
    extends AbstractPreparableTransformerDynamic, MappedIterable.Prepared, MappedIterable> {
  private static final long serialVersionUID = 1;

  private PreparableTransformer _preparable = null;

  private void setPreparable(
      Function, ? extends Transformer> preparableWithInputFunction) {
    Placeholder iterablePlaceholder = new Placeholder<>("Mapped Input Placeholder");
    _preparable = PreparableTransformer.cast(
        DynamicDAG.fromMinimalInputBoundedSubgraph(preparableWithInputFunction.apply(iterablePlaceholder),
            iterablePlaceholder));
    Producer existingMappedInput = getMappedInput();
    _inputs = new ArrayList<>(_preparable.internalAPI().getInputList());
    _inputs.set(0, existingMappedInput);
  }

  /**
   * Returns a copy of this instance that will accept the given producer as its mapped input.  All other inputs are
   * inherited from the wrapped transformer.
   *
   * @param mappedInput the mapped input
   * @return a copy of this instance that will accept the given producer as its mapped input
   */
  public MappedIterable withMappedInput(Producer> mappedInput) {
    return clone(c -> c._inputs.set(0, mappedInput));
  }

  @SuppressWarnings("unchecked")
  private Producer getMappedInput() {
    return (Producer) _inputs.get(0);
  }

  @Override
  protected boolean hasIdempotentPreparer() {
    return _preparable.internalAPI().hasIdempotentPreparer();
  }

  @Override
  protected boolean hasAlwaysConstantResult() {
    return _preparable.internalAPI().hasAlwaysConstantResult();
  }

  /**
   * Creates an instance that will obtain a (possibly preparable) transformer from the given "factory
   * function", which will almost always be a {@code withInput(...)}-type method corresponding to the input you wish to
   * map.
   *
   * Let's say we're training a multinomial {@code LiblinearClassifer}, but our data are packaged in such a way that
   * each String label is associated with a list of feature vectors, with each [label, feature vector] pair construing
   * a training example.  Then we can write something like this:
   * {@code
   *   Placeholder label = new Placeholder<>();
   *   Placeholder> featureVectors = new Placeholder<>();
   *   LiblinearClassification liblinear =
   *     new LiblinearClassification().withLabelInput(label);
   *   MappedIterable> classification =
   *      new MappedIterable<>(liblinear::withFeaturesInput).withMappedInput(featureVectors);
   * }
   *
   * During preparation, {@code classification} will then provide a [String label, DenseVector features] pair for every
   * element in the {@code featureVectors} list, and, during inference, it will correspondingly produce a list of
   * predicted labels (one for each feature vector).
   *
   * @param preparableWithMappedInputFunction a function that obtains a (possibly preparable) transformer given a provided
   *                                    placeholder representing the value to be mapped
   */
  public MappedIterable(
      Function, ? extends Transformer> preparableWithMappedInputFunction) {
    this();
    setPreparable(preparableWithMappedInputFunction);
  }

  /**
   * Creates a new MappedIterable.  You must specify a preparable using {@link #withTransformer(Function)} prior to
   * using this instance.
   */
  public MappedIterable() {
    super(MissingInput.get());
  }

  /**
   * Returns a copy of this instance that will obtain a (possibly preparable) transformer from the given "factory
   * function", which will almost always be a {@code withInput(...)}-type method corresponding to the input you wish to
   * map.
   *
   * Let's say we're training a multinomial {@code LiblinearClassifer}, but our data are packaged in such a way that
   * each String label is associated with a list of feature vectors, with each [label, feature vector] pair construing
   * a training example.  Then we can write something like this:
   * {@code
   *   Placeholder label = new Placeholder<>();
   *   Placeholder> featureVectors = new Placeholder<>();
   *   LiblinearClassification liblinear =
   *     new LiblinearClassification().withLabelInput(label);
   *   MappedIterable> classification =
   *      new MappedIterable<>(liblinear::withFeaturesInput).withMappedInput(featureVectors);
   * }
   *
   * During preparation, {@code classification} will then provide a [String label, DenseVector features] pair for every
   * element in the {@code featureVectors} list, and, during inference, it will correspondingly produce a list of
   * predicted labels (one for each feature vector).
   *
   * @param preparableWithInputFunction a function that obtains a (possibly preparable) transformer given a provided
   *                                    placeholder representing the value to be mapped
   * @return a copy of this instance that will map the specified transformer
   */
  public MappedIterable withTransformer(
      Function, ? extends Transformer> preparableWithInputFunction) {
    return clone(c -> c.setPreparable(preparableWithInputFunction));
  }

  private static class Preparer
      extends AbstractPreparerDynamic, Prepared> {
    com.linkedin.dagli.preparer.Preparer _preparer;

    Preparer(com.linkedin.dagli.preparer.Preparer preparer) {
      Arguments.inSet(preparer.getMode(), () -> "Preparer mode " + preparer.getMode() + " is unknown to MappedIterable",
          PreparerMode.BATCH, PreparerMode.STREAM);
      _preparer = preparer;
    }

    @Override
    public void processUnsafe(Object[] values) {
      Iterable iterable = (Iterable) values[0]; // mapped iterable input is always first
      for (Object val : iterable) {
        values[0] = val;
        _preparer.processUnsafe(values);
      }
    }

    @Override
    public PreparerResult> finishUnsafe(
        ObjectReader inputs) {

      final PreparerResultMixed, ? extends PreparedTransformer>
          prepResult;

      if (inputs != null) {
        ObjectReader explodedInputs =
            inputs.lazyFlatMap(inputsArray -> Iterables.map((Iterable) inputsArray[0], val -> {
              Object[] res = inputsArray.clone();
              res[0] = val;
              return res;
            }));
        prepResult = _preparer.finishUnsafe(explodedInputs);
      } else {
        prepResult = _preparer.finishUnsafe(null);
      }

      return new PreparerResult.Builder>()
          .withTransformerForNewData(new Prepared<>(prepResult.getPreparedTransformerForNewData()))
          .withTransformerForPreparationData(new Prepared<>(prepResult.getPreparedTransformerForPreparationData()))
          .build();
    }

    @Override
    public PreparerMode getMode() {
      return _preparer.getMode(); // inherit the mode of our wrapped preparer
    }
  }

  @Override
  protected PreparerDynamic, Prepared> getPreparer(PreparerContext context) {
    // we have no way of knowing have many mapped examples will actualy be seen by the preparer a priori, although
    // we'll keep the existing estimate (which implies a guess that each mapped iterable input will contain ~1 item).
    context = context.withExampleCountLowerBound(0).withExampleCountUpperBound(Long.MAX_VALUE);
    return new Preparer<>(_preparable.internalAPI().getPreparer(context));
  }

  /**
   * Executes a wrapped prepared transformer by mapping the values of a particular "mapped input", consuming each of the
   * elements of an inputted iterable (collection, list, etc.) and producing the result as a list.  Except for this
   * single "mapped" input, all the inputs are "inherited" from the wrapped transformer.
   *
   * Given a mapped input of [1, 2, 3], [4], [5, 6], the wrapped transformer is provided the inputs 1, 2, 3, 4, 5, 6
   * and its output is packaged into the lists [t(1), t(2), t(3)], [t(4)], [t(5), t(6)], where "t" is the
   * transformation.
   *
   * If your transformer is preparable, use {@link MappedIterable}.
   *
   * @param  the type of value accepted by the wrapped transformer's mapped input
   * @param  the type of the transformed value
   */
  @ValueEquality
  public static class Prepared
      extends AbstractPreparedStatefulTransformerDynamic, Object, Prepared> {
    private static final long serialVersionUID = 1;

    private PreparedTransformer _prepared;

    private void setPrepared(
        Function, ? extends PreparedTransformer> preparedWithInputFunction) {
      Placeholder iterablePlaceholder = new Placeholder<>("Mapped Input Placeholder");
      _prepared = PreparedTransformer.cast(
          DynamicDAG.Prepared.fromMinimalInputBoundedSubgraph(preparedWithInputFunction.apply(iterablePlaceholder),
              iterablePlaceholder));
      Producer existingMappedInput = getMappedInput();
      _inputs = new ArrayList<>(_prepared.internalAPI().getInputList());
      _inputs.set(0, existingMappedInput);
    }

    /**
     * Returns a copy of this instance that will accept the given producer as its mapped input.  All other inputs are
     * inherited from the wrapped transformer.
     *
     * @param mappedInput the mapped input
     * @return a copy of this instance that will accept the given producer as its mapped input
     */
    public Prepared withMappedInput(Producer> mappedInput) {
      return clone(c -> c._inputs.set(0, mappedInput));
    }

    @SuppressWarnings("unchecked")
    private Producer getMappedInput() {
      return (Producer) _inputs.get(0);
    }

    @Override
    protected boolean hasAlwaysConstantResult() {
      return _prepared.internalAPI().hasAlwaysConstantResult();
    }

    /**
     * Creates a new instance that will wrap the provided (prepared) transformer.
     * @param prepared the transformer to be wrapped
     */
    private Prepared(PreparedTransformer prepared) {
      _prepared = PreparedTransformer.cast(prepared);
    }

    /**
     * Creates a new instance.  The wrapped transformer will need to be set using {@link #withTransformer(Function)}.
     */
    public Prepared() {
      super(MissingInput.get());
    }

    /**
     * Creates a new instance that will obtain a prepared transformer from the given "factory function", which
     * will almost always be a {@code withInput(...)}-type method.
     *
     * For example, given a hypothetical {@code Concatenation} transformer that concatenates its two String inputs
     * provided as {@code Concatenation::withInputA(...)} and {@code Concatenation::withInputB(...)}, and wanted to
     * concatenate the String {@code "PREFIX"} to every String in lists of Strings, we could write something like:
     * {@code
     *    Placeholder> stringList = new Placeholder<>();
     *    Concatenation prefixedString = new Concatenation().withInputA(new Constant<>("PREFIX"));
     *    MappedIterable.Prepared prefixedStrings =
     *      new MappedIterable.Prepared<>(prefixedString::withInputB).withMappedInput(stringList);
     * }
     *
     * @param preparedWithMappedInputFunction the prepared transformer to wrap
     */
    public Prepared(
        Function, ? extends PreparedTransformer> preparedWithMappedInputFunction) {
      this();
      setPrepared(preparedWithMappedInputFunction);
    }

    /**
     * Returns a copy of this instance that will obtain a prepared transformer from the given "factory
     * function", which will almost always be a {@code withInput(...)}-type method.
     *
     * For example, given a hypothetical {@code Concatenation} transformer that concatenates its two String inputs
     * provided as {@code Concatenation::withInputA(...)} and {@code Concatenation::withInputB(...)}, and wanted to
     * concatenate the String {@code "PREFIX"} to every String in lists of Strings, we could write something like:
     * {@code
     *    Placeholder> stringList = new Placeholder<>();
     *    Concatenation prefixedString = new Concatenation().withInputA(new Constant<>("PREFIX"));
     *    MappedIterable.Prepared prefixedStrings =
     *      new MappedIterable.Prepared<>(prefixedString::withInputB).withMappedInput(stringList);
     * }
     *
     * @param preparedWithMappedInputFunction the prepared transformer to wrap
     * @return a copy of this instance that will wrap the specified prepared transformer
     */
    public MappedIterable.Prepared witPrepared(
        Function, ? extends PreparedTransformer> preparedWithMappedInputFunction) {
      return clone(c -> c.setPrepared(preparedWithMappedInputFunction));
    }

    @Override
    protected List apply(Object executionCache, List values) {
      Iterable iterable = (Iterable) values.get(0);
      Object[] valuesArray = values.toArray();
      return Iterables.map(iterable, val -> {
        valuesArray[0] = val;
        return _prepared.internalAPI().applyUnsafe(executionCache, valuesArray);
      });
    }

    @Override
    protected Object createExecutionCache(long exampleCountGuess) {
      return _prepared.internalAPI().createExecutionCache(exampleCountGuess);
    }
  }
}