datahub.spark.model.LineageUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of datahub-spark-lineage Show documentation
Show all versions of datahub-spark-lineage Show documentation
Library to push data lineage from spark to datahub
package datahub.spark.model;
import com.linkedin.common.FabricType;
import com.linkedin.common.urn.DataFlowUrn;
import com.linkedin.common.urn.DataPlatformUrn;
import com.linkedin.common.urn.DatasetUrn;
import com.linkedin.common.urn.TupleKey;
import com.linkedin.common.urn.Urn;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.SparkContext$;
import org.apache.spark.SparkEnv;
import org.apache.spark.sql.SparkSession;
import scala.Option;
import scala.runtime.AbstractFunction0;
import scala.runtime.AbstractFunction1;
@Slf4j
public class LineageUtils {
private static Map consumers = new ConcurrentHashMap<>();
// hook for replacing paths during testing. Not the cleanest way, TODO improve.
/* This is for generating urn from a hash of the plan */
// private static Function PATH_REPLACER = (x -> x);
private LineageUtils() {}
public static Urn dataPlatformInstanceUrn(String platform, String instance)
throws URISyntaxException {
return new Urn(
"dataPlatformInstance",
new TupleKey(Arrays.asList(new DataPlatformUrn(platform).toString(), instance)));
}
public static DataFlowUrn flowUrn(String master, String appName) {
return new DataFlowUrn(
"spark", appName, master.replaceAll(":", "_").replaceAll("/", "_").replaceAll("[_]+", "_"));
}
public static Option findSparkCtx() {
return SparkSession.getActiveSession()
.map(
new AbstractFunction1() {
@Override
public SparkContext apply(SparkSession sess) {
return sess.sparkContext();
}
})
.orElse(
new AbstractFunction0