org.openmetadata.transport.OpenMetadataTransport Maven / Gradle / Ivy
The newest version!
/*
* Copyright 2024 Collate
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* This code has been referenced from
* https://github.com/Natural-Intelligence/openLineage-openMetadata-transporter.git
*/
package org.openmetadata.transport;
import static org.apache.http.Consts.UTF_8;
import static org.apache.http.HttpHeaders.ACCEPT;
import static org.apache.http.HttpHeaders.AUTHORIZATION;
import static org.apache.http.HttpHeaders.CONTENT_TYPE;
import static org.apache.http.entity.ContentType.APPLICATION_JSON;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import io.openlineage.client.OpenLineage;
import io.openlineage.client.OpenLineageClientException;
import io.openlineage.client.transports.Transport;
import java.io.Closeable;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Supplier;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpResponse;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPut;
import org.apache.http.client.methods.HttpRequestBase;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
@Slf4j
public final class OpenMetadataTransport extends Transport implements Closeable {
private static final String SPARK_LINEAGE_SOURCE = "SparkLineage";
private static final String PIPELINE_SOURCE_TYPE = "Spark";
private final CloseableHttpClient http;
private final URI uri;
private final String pipelineServiceName;
private List databaseServiceNames;
private final String pipelineName;
private final String authToken;
private @Nullable final String pipelineSourceUrl;
private @Nullable final String pipelineDescription;
public OpenMetadataTransport(@NonNull final OpenMetadataConfig openMetadataConfig) {
this(withTimeout(openMetadataConfig.getTimeout()), openMetadataConfig);
}
public OpenMetadataTransport(
@NonNull final CloseableHttpClient httpClient,
@NonNull final OpenMetadataConfig openMetadataConfig) {
this.http = httpClient;
this.uri = openMetadataConfig.getHostPort();
this.authToken = openMetadataConfig.getJwtToken();
this.pipelineName = openMetadataConfig.getPipelineName();
this.pipelineServiceName = openMetadataConfig.getPipelineServiceName();
this.pipelineSourceUrl = openMetadataConfig.getPipelineSourceUrl();
this.pipelineDescription = openMetadataConfig.getPipelineDescription();
String dbServiceNameStr = openMetadataConfig.getDatabaseServiceNames();
if (dbServiceNameStr != null) {
try {
this.databaseServiceNames = Arrays.asList(dbServiceNameStr.split(","));
} catch (Exception e) {
log.error("failed to emit fetch database service names: {}", e.getMessage(), e);
}
} else {
this.databaseServiceNames = new ArrayList<>();
}
createOrUpdatePipelineService();
}
private static CloseableHttpClient withTimeout(Double timeout) {
int timeoutMs;
if (timeout == null) {
timeoutMs = 5000;
} else {
timeoutMs = (int) (timeout * 1000);
}
RequestConfig config =
RequestConfig.custom()
.setConnectTimeout(timeoutMs)
.setConnectionRequestTimeout(timeoutMs)
.setSocketTimeout(timeoutMs)
.build();
return HttpClientBuilder.create().setDefaultRequestConfig(config).build();
}
@Override
public void emit(@NonNull OpenLineage.RunEvent runEvent) {
try {
if (runEvent.getEventType().equals(OpenLineage.RunEvent.EventType.COMPLETE)
&& !runEvent.getInputs().isEmpty()
&& !runEvent.getOutputs().isEmpty()) {
sendToOpenMetadata(runEvent.getInputs(), runEvent.getOutputs());
}
} catch (Exception e) {
log.error("failed to emit event to OpenMetadata: {}", e.getMessage(), e);
}
}
private String getTableNames(OpenLineage.Dataset dataset) {
if (dataset == null) {
return null;
}
String tableName = extractTableNamesFromSymlinks(dataset);
// Handle table names from JDBC queries that don't have symlinks
if (tableName == null) {
tableName = extractTableNamesFromDataSet(dataset);
}
return tableName;
}
private String extractTableNamesFromSymlinks(OpenLineage.Dataset dataset) {
if (dataset.getFacets() != null
&& dataset.getFacets().getSymlinks() != null
&& dataset.getFacets().getSymlinks().getIdentifiers() != null) {
for (OpenLineage.SymlinksDatasetFacetIdentifiers identifier :
dataset.getFacets().getSymlinks().getIdentifiers()) {
String name = identifier.getName();
return name;
}
}
return null;
}
private String extractTableNamesFromDataSet(OpenLineage.Dataset dataset) {
if (dataset != null && dataset.getName() != null && dataset.getNamespace() != null) {
String tableName = generateTableName(dataset.getName(), dataset.getNamespace());
return tableName;
}
return null;
}
private String generateTableName(String name, String namespace) {
if (!name.contains(".")) {
String dbName = extractDbNameFromUrl(namespace);
if (dbName != null) {
return dbName + "." + name;
}
}
return name;
}
public void sendToOpenMetadata(
List extends OpenLineage.Dataset> inputTables,
List extends OpenLineage.Dataset> outputTables) {
String pipelineId = createOrUpdatePipeline();
for (OpenLineage.Dataset fromTable : inputTables) {
String inputTableName = getTableNames(fromTable);
if (inputTableName == null) {
continue;
}
Map fromTableEntity = getTableEntity(inputTableName);
for (OpenLineage.Dataset toTable : outputTables) {
String outputTableName = getTableNames(toTable);
if (outputTableName == null) {
continue;
}
Map toTableEntity = getTableEntity(outputTableName);
createOrUpdateLineage(pipelineId, fromTableEntity, toTableEntity, fromTable, toTable);
log.info(
String.format(
"lineage was sent successfully to OpenMetadata for fromTable: %s, toTable: %s",
inputTableName, outputTableName));
}
}
}
private Map getTableEntity(String tableName, String dbServiceName) {
try {
HttpGet request = createGetTableRequest(tableName, dbServiceName);
Map response = sendRequest(request);
Map hitsResult = (Map) response.get("hits");
int totalHits =
Integer.parseInt(((Map) hitsResult.get("total")).get("value").toString());
if (totalHits == 0) {
log.debug("Failed to get id of table {} from OpenMetadata.", tableName);
return null;
}
List