![JAR search and dependency download from the Maven repository](/logo.png)
org.apache.atlas.kafka.bridge.KafkaBridge Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.kafka.bridge;
import com.google.common.annotations.VisibleForTesting;
import org.apache.atlas.ApplicationProperties;
import org.apache.atlas.AtlasClientV2;
import org.apache.atlas.kafka.model.KafkaDataTypes;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
import org.apache.atlas.model.instance.AtlasEntityHeader;
import org.apache.atlas.model.instance.EntityMutationResponse;
import org.apache.atlas.utils.AtlasConfigurationUtil;
import org.apache.atlas.utils.AuthenticationUtil;
import org.apache.atlas.utils.KafkaUtils;
import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.regex.Pattern;
import org.apache.avro.Schema;
import java.io.IOException;
public class KafkaBridge {
private static final Logger LOG = LoggerFactory.getLogger(KafkaBridge.class);
private static final String KAFKA_SCHEMA_REGISTRY_ENV_VARIABLE= System.getenv("KAFKA_SCHEMA_REGISTRY");
public static String KAFKA_SCHEMA_REGISTRY_HOSTNAME = "localhost";
private static final int EXIT_CODE_SUCCESS = 0;
private static final int EXIT_CODE_FAILED = 1;
private static final String ATLAS_ENDPOINT = "atlas.rest.address";
private static final String DEFAULT_ATLAS_URL = "http://localhost:21000/";
private static final String CLUSTER_NAME_KEY = "atlas.cluster.name";
private static final String KAFKA_METADATA_NAMESPACE = "atlas.metadata.namespace";
private static final String DEFAULT_CLUSTER_NAME = "primary";
private static final String ATTRIBUTE_QUALIFIED_NAME = "qualifiedName";
private static final String DESCRIPTION_ATTR = "description";
private static final String PARTITION_COUNT = "partitionCount";
private static final String REPLICATION_FACTOR = "replicationFactor";
private static final String NAME = "name";
private static final String URI = "uri";
private static final String CLUSTERNAME = "clusterName";
private static final String TOPIC = "topic";
private static final String FORMAT_KAKFA_TOPIC_QUALIFIED_NAME = "%s@%s";
private static final String TYPE = "type";
private static final String NAMESPACE = "namespace";
private static final String FIELDS = "fields";
private static final String AVRO_SCHEMA = "avroSchema";
private static final String SCHEMA_VERSION_ID = "versionId";
private static final String FORMAT_KAKFA_SCHEMA_QUALIFIED_NAME = "%s@%s@%s";
private static final String FORMAT_KAKFA_FIELD_QUALIFIED_NAME = "%s@%s@%s@%s";
private final List availableTopics;
private final String metadataNamespace;
private final AtlasClientV2 atlasClientV2;
private final KafkaUtils kafkaUtils;
private final CloseableHttpClient httpClient;
public static void main(String[] args) {
int exitCode = EXIT_CODE_FAILED;
AtlasClientV2 atlasClientV2 = null;
KafkaUtils kafkaUtils = null;
CloseableHttpClient httpClient = null;
System.out.print("\n################################\n");
System.out.print("# Custom Kafka bridge #\n");
System.out.print("################################\n\n");
try {
Options options = new Options();
options.addOption("t","topic", true, "topic");
options.addOption("f", "filename", true, "filename");
CommandLineParser parser = new BasicParser();
CommandLine cmd = parser.parse(options, args);
String topicToImport = cmd.getOptionValue("t");
String fileToImport = cmd.getOptionValue("f");
Configuration atlasConf = ApplicationProperties.get();
String[] urls = atlasConf.getStringArray(ATLAS_ENDPOINT);
if (urls == null || urls.length == 0) {
urls = new String[] { DEFAULT_ATLAS_URL };
}
if (!AuthenticationUtil.isKerberosAuthenticationEnabled()) {
String[] basicAuthUsernamePassword = AuthenticationUtil.getBasicAuthenticationInput();
atlasClientV2 = new AtlasClientV2(urls, basicAuthUsernamePassword);
} else {
UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
atlasClientV2 = new AtlasClientV2(ugi, ugi.getShortUserName(), urls);
}
kafkaUtils = new KafkaUtils(atlasConf);
KafkaBridge importer = new KafkaBridge(atlasConf, atlasClientV2, kafkaUtils);
if(StringUtils.isNotEmpty(KAFKA_SCHEMA_REGISTRY_ENV_VARIABLE)) {
KAFKA_SCHEMA_REGISTRY_HOSTNAME = KAFKA_SCHEMA_REGISTRY_ENV_VARIABLE;
}
if (StringUtils.isNotEmpty(fileToImport)) {
File f = new File(fileToImport);
if (f.exists() && f.canRead()) {
BufferedReader br = new BufferedReader(new FileReader(f));
String line;
while ((line = br.readLine()) != null) {
topicToImport = line.trim();
importer.importTopic(topicToImport);
}
exitCode = EXIT_CODE_SUCCESS;
} else {
LOG.error("Failed to read the file");
}
} else {
importer.importTopic(topicToImport);
exitCode = EXIT_CODE_SUCCESS;
}
} catch(ParseException e) {
LOG.error("Failed to parse arguments. Error: ", e.getMessage());
printUsage();
} catch(Exception e) {
System.out.println("ImportKafkaEntities failed. Please check the log file for the detailed error message");
e.printStackTrace();
LOG.error("ImportKafkaEntities failed", e);
} finally {
if (atlasClientV2 != null) {
atlasClientV2.close();
}
if (kafkaUtils != null) {
kafkaUtils.close();
}
if (httpClient != null) {
try {
httpClient.close();
} catch (IOException e) {
LOG.error("Could not close http client: ", e);
}
}
}
System.out.print("\n\n");
System.exit(exitCode);
}
public KafkaBridge(Configuration atlasConf, AtlasClientV2 atlasClientV2, KafkaUtils kafkaUtils) throws Exception {
this.atlasClientV2 = atlasClientV2;
this.metadataNamespace = getMetadataNamespace(atlasConf);
this.kafkaUtils = kafkaUtils;
this.availableTopics = this.kafkaUtils.listAllTopics();
this.httpClient = HttpClientBuilder.create().build();
}
private String getMetadataNamespace(Configuration config) {
return AtlasConfigurationUtil.getRecentString(config, KAFKA_METADATA_NAMESPACE, getClusterName(config));
}
private String getClusterName(Configuration config) {
return config.getString(CLUSTER_NAME_KEY, DEFAULT_CLUSTER_NAME);
}
public void importTopic(String topicToImport) throws Exception {
List topics = availableTopics;
if (StringUtils.isNotEmpty(topicToImport)) {
List topics_subset = new ArrayList<>();
for (String topic : topics) {
if (Pattern.compile(topicToImport).matcher(topic).matches()) {
topics_subset.add(topic);
}
}
topics = topics_subset;
}
if (CollectionUtils.isNotEmpty(topics)) {
for (String topic : topics) {
createOrUpdateTopic(topic);
}
}
}
@VisibleForTesting
AtlasEntityWithExtInfo createOrUpdateTopic(String topic) throws Exception {
String topicQualifiedName = getTopicQualifiedName(metadataNamespace, topic);
AtlasEntityWithExtInfo topicEntity = findEntityInAtlas(KafkaDataTypes.KAFKA_TOPIC.getName(),topicQualifiedName);
System.out.print("\n"); // add a new line for each topic
if (topicEntity == null) {
System.out.println("Adding Kafka topic " + topic);
LOG.info("Importing Kafka topic: {}", topicQualifiedName);
AtlasEntity entity = getTopicEntity(topic, null);
topicEntity = createEntityInAtlas(new AtlasEntityWithExtInfo(entity));
} else {
System.out.println("Updating Kafka topic " + topic);
LOG.info("Kafka topic {} already exists in Atlas. Updating it..", topicQualifiedName);
AtlasEntity entity = getTopicEntity(topic, topicEntity.getEntity());
topicEntity.setEntity(entity);
topicEntity = updateEntityInAtlas(topicEntity);
}
return topicEntity;
}
@VisibleForTesting
AtlasEntityWithExtInfo createOrUpdateSchema(String schema, String schemaName, String namespace, int version) throws Exception {
String schemaQualifiedName = getSchemaQualifiedName(metadataNamespace, schemaName + "-value", "v" + version);
AtlasEntityWithExtInfo schemaEntity = findEntityInAtlas(KafkaDataTypes.AVRO_SCHEMA.getName(), schemaQualifiedName);
if (schemaEntity == null) {
System.out.println("---Adding Kafka schema " + schema);
LOG.info("Importing Kafka schema: {}", schemaQualifiedName);
AtlasEntity entity = getSchemaEntity(schema, schemaName, namespace, version, null);
schemaEntity = createEntityInAtlas(new AtlasEntityWithExtInfo(entity));
} else {
System.out.println("---Updating Kafka schema " + schema);
LOG.info("Kafka schema {} already exists in Atlas. Updating it..", schemaQualifiedName);
AtlasEntity entity = getSchemaEntity(schema, schemaName, namespace, version, schemaEntity.getEntity());
schemaEntity.setEntity(entity);
schemaEntity = updateEntityInAtlas(schemaEntity);
}
return schemaEntity;
}
@VisibleForTesting
AtlasEntityWithExtInfo createOrUpdateField(Schema.Field field, String schemaName, String namespace, int version, String fullname) throws Exception {
fullname = concatFullname(field.name(), fullname, "");
String fieldQualifiedName = getFieldQualifiedName(metadataNamespace, fullname, schemaName + "-value", "v" + version);
AtlasEntityWithExtInfo fieldEntity = findEntityInAtlas(KafkaDataTypes.AVRO_FIELD.getName(), fieldQualifiedName);
if (fieldEntity == null) {
System.out.println("---Adding Avro field " + fullname);
LOG.info("Importing Avro field: {}", fieldQualifiedName);
AtlasEntity entity = getFieldEntity(field, schemaName, namespace, version ,null, fullname);
fieldEntity = createEntityInAtlas(new AtlasEntityWithExtInfo(entity));
} else {
System.out.println("---Updating Avro field " + fullname);
LOG.info("Avro field {} already exists in Atlas. Updating it..", fieldQualifiedName);
AtlasEntity entity = getFieldEntity(field, schemaName, namespace, version, fieldEntity.getEntity(), fullname);
fieldEntity.setEntity(entity);
fieldEntity = updateEntityInAtlas(fieldEntity);
}
return fieldEntity;
}
@VisibleForTesting
AtlasEntity getTopicEntity(String topic, AtlasEntity topicEntity) throws Exception {
final AtlasEntity ret;
List createdSchemas;
if (topicEntity == null) {
ret = new AtlasEntity(KafkaDataTypes.KAFKA_TOPIC.getName());
} else {
ret = topicEntity;
}
String qualifiedName = getTopicQualifiedName(metadataNamespace, topic);
ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, qualifiedName);
ret.setAttribute(CLUSTERNAME, metadataNamespace);
ret.setAttribute(TOPIC, topic);
ret.setAttribute(NAME,topic);
ret.setAttribute(DESCRIPTION_ATTR, topic);
ret.setAttribute(URI, topic);
try {
ret.setAttribute(PARTITION_COUNT, kafkaUtils.getPartitionCount(topic));
ret.setAttribute(REPLICATION_FACTOR, kafkaUtils.getReplicationFactor(topic));
} catch (ExecutionException | InterruptedException e) {
LOG.error("Error while getting partition data for topic :" + topic, e);
throw new Exception("Error while getting partition data for topic :" + topic, e);
}
createdSchemas = findOrCreateAtlasSchema(topic);
if(createdSchemas.size() > 0) {
ret.setAttribute(AVRO_SCHEMA, createdSchemas);
ret.setRelationshipAttribute(AVRO_SCHEMA, createdSchemas);
}
return ret;
}
@VisibleForTesting
AtlasEntity getSchemaEntity(String schema, String schemaName, String namespace, int version, AtlasEntity schemaEntity) throws Exception {
final AtlasEntity ret;
List createdFields = new ArrayList<>();
if (schemaEntity == null) {
ret = new AtlasEntity(KafkaDataTypes.AVRO_SCHEMA.getName());
} else {
ret = schemaEntity;
}
Schema parsedSchema = new Schema.Parser().parse(schema);
String qualifiedName = getSchemaQualifiedName(metadataNamespace, schemaName + "-value", "v" + version);
if (namespace == null) {
namespace = (parsedSchema.getNamespace() != null) ? parsedSchema.getNamespace() : KAFKA_METADATA_NAMESPACE;
}
ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, qualifiedName);
ret.setAttribute(TYPE, parsedSchema.getType());
ret.setAttribute(NAMESPACE, namespace);
ret.setAttribute(NAME,parsedSchema.getName() + "(v" + version + ")");
ret.setAttribute(SCHEMA_VERSION_ID, version);
createdFields = createNestedFields(parsedSchema, schemaName, namespace, version, "");
if(createdFields.size() > 0) {
ret.setRelationshipAttribute(FIELDS, createdFields);
}
return ret;
}
List createNestedFields(Schema parsedSchema, String schemaName, String namespace, int version, String fullname) throws Exception {
List entityArray = new ArrayList<>();
AtlasEntityWithExtInfo fieldInAtlas;
JSONParser parser = new JSONParser();
for (Schema.Field field:parsedSchema.getFields()) {
if(field.schema().getType() == Schema.Type.ARRAY){
System.out.println("ARRAY DETECTED");
String subfields = ((JSONObject) parser.parse(field.schema().toString())).get("items").toString();
Schema parsedSubSchema = new Schema.Parser().parse(subfields);
fullname = concatFullname(field.name(), fullname, parsedSubSchema.getName());
entityArray.addAll(createNestedFields(parsedSubSchema, schemaName, namespace, version, fullname));
}
else if(field.schema().getType() == Schema.Type.RECORD && !schemaName.equals(field.name())) {
System.out.println("NESTED RECORD DETECTED");
fullname = concatFullname(field.name(), fullname, "");
entityArray.addAll(createNestedFields(field.schema(), schemaName, namespace, version, fullname));
}
else{
fieldInAtlas = createOrUpdateField(field, schemaName, namespace, version, fullname);
entityArray.add(fieldInAtlas.getEntity());
}
}
entityArray.sort((o1, o2) -> {
if (o1.getAttribute(NAME) != null && o2.getAttribute(NAME) != null) {
String str1 = o1.getAttribute(NAME).toString();
String str2 = o2.getAttribute(NAME).toString();
return str1.compareTo(str2);
} else {
return 0;
}
});
return entityArray;
}
@VisibleForTesting
AtlasEntity getFieldEntity(Schema.Field field, String schemaName, String namespace, int version, AtlasEntity fieldEntity, String fullname) throws Exception {
AtlasEntity ret;
if (fieldEntity == null) {
ret = new AtlasEntity(KafkaDataTypes.AVRO_FIELD.getName());
} else {
ret = fieldEntity;
}
String qualifiedName = getFieldQualifiedName(metadataNamespace, fullname, schemaName + "-value", "v" + version);
ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, qualifiedName);
ret.setAttribute(NAME,fullname + "(v" + version + ")");
//ret.setAttribute(field.schema().getType()); --> does not work, since type expects array. Instead setting Description
ret.setAttribute(DESCRIPTION_ATTR, field.schema().getType());
return ret;
}
@VisibleForTesting
static String getTopicQualifiedName(String metadataNamespace, String topic) {
return String.format(FORMAT_KAKFA_TOPIC_QUALIFIED_NAME, topic.toLowerCase(), metadataNamespace);
}
@VisibleForTesting
static String getSchemaQualifiedName(String metadataNamespace, String schema, String version) {
return String.format(FORMAT_KAKFA_SCHEMA_QUALIFIED_NAME, schema.toLowerCase(), version, metadataNamespace);
}
@VisibleForTesting
static String getFieldQualifiedName(String metadataNamespace, String field, String schemaName, String version) {
return String.format(FORMAT_KAKFA_FIELD_QUALIFIED_NAME , field.toLowerCase(), schemaName.toLowerCase(), version, metadataNamespace);
}
@VisibleForTesting
AtlasEntityWithExtInfo findEntityInAtlas(String typeName, String qualifiedName) throws Exception {
AtlasEntityWithExtInfo ret = null;
try {
ret = atlasClientV2.getEntityByAttribute(typeName, Collections.singletonMap(ATTRIBUTE_QUALIFIED_NAME, qualifiedName));
}
catch (Exception e){
LOG.info("Exception on finding Atlas Entity: {}", e);
}
return ret;
}
@VisibleForTesting
AtlasEntityWithExtInfo createEntityInAtlas(AtlasEntityWithExtInfo entity) throws Exception {
AtlasEntityWithExtInfo ret = null;
EntityMutationResponse response = atlasClientV2.createEntity(entity);
List entities = response.getCreatedEntities();
if (CollectionUtils.isNotEmpty(entities)) {
AtlasEntityWithExtInfo getByGuidResponse = atlasClientV2.getEntityByGuid(entities.get(0).getGuid());
ret = getByGuidResponse;
LOG.info("Created {} entity: name={}, guid={}", ret.getEntity().getTypeName(), ret.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME), ret.getEntity().getGuid());
}
return ret;
}
@VisibleForTesting
AtlasEntityWithExtInfo updateEntityInAtlas(AtlasEntityWithExtInfo entity) throws Exception {
AtlasEntityWithExtInfo ret;
EntityMutationResponse response = atlasClientV2.updateEntity(entity);
if (response != null) {
List entities = response.getUpdatedEntities();
if (CollectionUtils.isNotEmpty(entities)) {
AtlasEntityWithExtInfo getByGuidResponse = atlasClientV2.getEntityByGuid(entities.get(0).getGuid());
ret = getByGuidResponse;
LOG.info("Updated {} entity: name={}, guid={} ", ret.getEntity().getTypeName(), ret.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME), ret.getEntity().getGuid());
} else {
LOG.info("Entity: name={} ", entity.toString() + " not updated as it is unchanged from what is in Atlas" );
ret = entity;
}
} else {
LOG.info("Entity: name={} ", entity.toString() + " not updated as it is unchanged from what is in Atlas" );
ret = entity;
}
return ret;
}
private static void printUsage(){
System.out.println("Usage 1: import-kafka.sh");
System.out.println("Usage 2: import-kafka.sh [-t OR --topic ]");
System.out.println("Usage 3: import-kafka.sh [-f ]" );
System.out.println(" Format:");
System.out.println(" topic1 OR topic1 regex");
System.out.println(" topic2 OR topic2 regex");
System.out.println(" topic3 OR topic3 regex");
}
private void clearRelationshipAttributes(AtlasEntityWithExtInfo entity) {
if (entity != null) {
clearRelationshipAttributes(entity.getEntity());
if (entity.getReferredEntities() != null) {
clearRelationshipAttributes(entity.getReferredEntities().values());
}
}
}
private void clearRelationshipAttributes(Collection entities) {
if (entities != null) {
for (AtlasEntity entity : entities) {
clearRelationshipAttributes(entity);
}
}
}
private void clearRelationshipAttributes(AtlasEntity entity) {
if (entity != null && entity.getRelationshipAttributes() != null) {
entity.getRelationshipAttributes().clear();
}
}
private List findOrCreateAtlasSchema(String schemaName) throws Exception {
List entities = new ArrayList<>();
// Handling Schemas
ArrayList versions = SchemaRegistryConnector.getVersionsKafkaSchemaRegistry(httpClient,schemaName + "-value");
for (int version:versions) {
String kafkaSchema = SchemaRegistryConnector.getSchemaFromKafkaSchemaRegistry(httpClient, schemaName + "-value", version);
if(kafkaSchema != null) {
// Schema exists in Kafka Schema Registry
System.out.println("---Found Schema " + schemaName + "-value in Kafka Schema Registry with Version " + version);
LOG.info("Found Schema {}-value in Kafka Schema Registry with Version {}", schemaName, version);
AtlasEntityWithExtInfo atlasSchemaEntity = findEntityInAtlas(KafkaDataTypes.AVRO_SCHEMA.getName(), getSchemaQualifiedName(metadataNamespace, schemaName + "-value", "v" + version));
if(atlasSchemaEntity != null) {
// Schema exists in Kafka Schema Registry AND in Atlas
System.out.println("---Found Entity avro_schema " + schemaName + " in Atlas");
LOG.info("Found Entity avro_schema {} in Atlas", schemaName);
AtlasEntityWithExtInfo createdSchema = createOrUpdateSchema(kafkaSchema, schemaName, null, version);
entities.add(createdSchema.getEntity());
}
else {
// Schema exists in Kafka Schema Registry but NOT in Atlas
System.out.println("---NOT Found Entity avro_schema " + schemaName + " in Atlas");
LOG.info("NOT Found Entity avro_schema {} in Atlas", schemaName);
AtlasEntityWithExtInfo createdSchema = createOrUpdateSchema(kafkaSchema, schemaName, null, version);
entities.add(createdSchema.getEntity());
}
}
}
return entities;
}
private String concatFullname(String fieldName,String fullname, String subSchemaName){
if(fullname.isEmpty()){
if(subSchemaName.isEmpty()) {
fullname = fieldName;
}
else {
fullname = fieldName + "." + subSchemaName;
}
}
else{
if(subSchemaName.isEmpty()) {
fullname = fullname + "." + fieldName;
}
else {
fullname = fullname + "." + subSchemaName + "." + fieldName;
}
}
return fullname;
}
}