All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.canova.spark.functions.pairdata.PathToKeyFunction Maven / Gradle / Ivy

There is a newer version: 0.0.0.17
Show newest version
package org.canova.spark.functions.pairdata;

import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.input.PortableDataStream;
import scala.Tuple2;
import scala.Tuple3;

/** Given a Tuple2, where the first value is the full path, map this
 * to a Tuple3 where the first value is a key (using a {@link PathToKeyConverter}),
 * second is an index, and third is the original data stream
 */
public class PathToKeyFunction implements PairFunction, String, Tuple3> {

    private PathToKeyConverter converter;
    private int index;

    public PathToKeyFunction(int index, PathToKeyConverter converter){
        this.index = index;
        this.converter = converter;
    }

    @Override
    public Tuple2> call(Tuple2 in) throws Exception {
        Tuple3 out = new Tuple3<>(in._1(),index,in._2());
        String newKey = converter.getKey(in._1());
        return new Tuple2<>(newKey,out);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy