
io.openlineage.spark.agent.lifecycle.plan.InsertIntoHadoopFsRelationVisitor Maven / Gradle / Ivy
/*
/* Copyright 2018-2024 contributors to the OpenLineage project
/* SPDX-License-Identifier: Apache-2.0
*/
package io.openlineage.spark.agent.lifecycle.plan;
import io.openlineage.client.OpenLineage;
import io.openlineage.client.utils.DatasetIdentifier;
import io.openlineage.spark.agent.util.PathUtils;
import io.openlineage.spark.api.OpenLineageContext;
import io.openlineage.spark.api.QueryPlanVisitor;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.catalyst.catalog.CatalogTable;
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan;
import org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand;
import scala.Option;
/**
* {@link LogicalPlan} visitor that matches an {@link InsertIntoHadoopFsRelationCommand} and
* extracts the output {@link OpenLineage.Dataset} being written.
*/
public class InsertIntoHadoopFsRelationVisitor
extends QueryPlanVisitor {
public InsertIntoHadoopFsRelationVisitor(OpenLineageContext context) {
super(context);
}
@Override
public List apply(LogicalPlan x) {
InsertIntoHadoopFsRelationCommand command = (InsertIntoHadoopFsRelationCommand) x;
Option catalogTable = command.catalogTable();
OpenLineage.OutputDataset outputDataset;
if (catalogTable.isEmpty()) {
DatasetIdentifier di = PathUtils.fromURI(command.outputPath().toUri(), "file");
if (SaveMode.Overwrite == command.mode()) {
outputDataset =
outputDataset()
.getDataset(
di,
command.query().schema(),
OpenLineage.LifecycleStateChangeDatasetFacet.LifecycleStateChange.OVERWRITE);
} else {
outputDataset = outputDataset().getDataset(di, command.query().schema());
}
return Collections.singletonList(outputDataset);
} else {
if (SaveMode.Overwrite == command.mode()) {
return Collections.singletonList(
outputDataset()
.getDataset(
PathUtils.fromCatalogTable(catalogTable.get()),
catalogTable.get().schema(),
OpenLineage.LifecycleStateChangeDatasetFacet.LifecycleStateChange.CREATE));
} else {
return Collections.singletonList(
outputDataset()
.getDataset(
PathUtils.fromCatalogTable(catalogTable.get()), catalogTable.get().schema()));
}
}
}
@Override
public Optional jobNameSuffix(InsertIntoHadoopFsRelationCommand command) {
if (command.catalogTable().isEmpty()) {
DatasetIdentifier di = PathUtils.fromURI(command.outputPath().toUri(), "file");
return Optional.of(trimPath(di.getName()));
}
return Optional.of(
trimPath(PathUtils.fromCatalogTable(command.catalogTable().get()).getName()));
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy