Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.discovery;
import org.apache.atlas.AtlasConfiguration;
import org.apache.atlas.AtlasErrorCode;
import org.apache.atlas.annotation.GraphTransaction;
import org.apache.atlas.authorize.AtlasAuthorizationUtils;
import org.apache.atlas.authorize.AtlasEntityAccessRequest;
import org.apache.atlas.authorize.AtlasPrivilege;
import org.apache.atlas.exception.AtlasBaseException;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
import org.apache.atlas.model.instance.AtlasEntityHeader;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.atlas.model.lineage.AtlasLineageInfo;
import org.apache.atlas.model.lineage.AtlasLineageInfo.LineageInfoOnDemand;
import org.apache.atlas.model.lineage.AtlasLineageInfo.LineageDirection;
import org.apache.atlas.model.lineage.AtlasLineageInfo.LineageRelation;
import org.apache.atlas.model.lineage.LineageOnDemandConstraints;
import org.apache.atlas.repository.graphdb.AtlasEdge;
import org.apache.atlas.repository.graphdb.AtlasEdgeDirection;
import org.apache.atlas.repository.graphdb.AtlasGraph;
import org.apache.atlas.repository.graphdb.AtlasVertex;
import org.apache.atlas.repository.store.graph.v2.AtlasGraphUtilsV2;
import org.apache.atlas.repository.store.graph.v2.EntityGraphRetriever;
import org.apache.atlas.type.AtlasEntityType;
import org.apache.atlas.type.AtlasTypeRegistry;
import org.apache.atlas.type.AtlasTypeUtil;
import org.apache.atlas.util.AtlasGremlinQueryProvider;
import org.apache.atlas.v1.model.lineage.SchemaResponse.SchemaDetails;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.collections.MapUtils;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import javax.inject.Inject;
import javax.script.ScriptEngine;
import javax.script.ScriptException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import static org.apache.atlas.AtlasClient.DATA_SET_SUPER_TYPE;
import static org.apache.atlas.AtlasClient.PROCESS_SUPER_TYPE;
import static org.apache.atlas.AtlasErrorCode.INSTANCE_LINEAGE_QUERY_FAILED;
import static org.apache.atlas.model.lineage.AtlasLineageInfo.LineageDirection.BOTH;
import static org.apache.atlas.model.lineage.AtlasLineageInfo.LineageDirection.INPUT;
import static org.apache.atlas.model.lineage.AtlasLineageInfo.LineageDirection.OUTPUT;
import static org.apache.atlas.repository.Constants.RELATIONSHIP_GUID_PROPERTY_KEY;
import static org.apache.atlas.repository.graphdb.AtlasEdgeDirection.IN;
import static org.apache.atlas.repository.graphdb.AtlasEdgeDirection.OUT;
import static org.apache.atlas.util.AtlasGremlinQueryProvider.AtlasGremlinQuery.FULL_LINEAGE_DATASET;
import static org.apache.atlas.util.AtlasGremlinQueryProvider.AtlasGremlinQuery.FULL_LINEAGE_PROCESS;
import static org.apache.atlas.util.AtlasGremlinQueryProvider.AtlasGremlinQuery.PARTIAL_LINEAGE_DATASET;
import static org.apache.atlas.util.AtlasGremlinQueryProvider.AtlasGremlinQuery.PARTIAL_LINEAGE_PROCESS;
@Service
public class EntityLineageService implements AtlasLineageService {
private static final Logger LOG = LoggerFactory.getLogger(EntityLineageService.class);
private static final String PROCESS_INPUTS_EDGE = "__Process.inputs";
private static final String PROCESS_OUTPUTS_EDGE = "__Process.outputs";
private static final String COLUMNS = "columns";
private static final boolean LINEAGE_USING_GREMLIN = AtlasConfiguration.LINEAGE_USING_GREMLIN.getBoolean();
private static final Integer DEFAULT_LINEAGE_MAX_NODE_COUNT = 9000;
private static final int LINEAGE_ON_DEMAND_DEFAULT_DEPTH = 3;
private static final int LINEAGE_ON_DEMAND_DEFAULT_NODE_COUNT = AtlasConfiguration.LINEAGE_ON_DEMAND_DEFAULT_NODE_COUNT.getInt();
private static final String SEPARATOR = "->";
private final AtlasGraph graph;
private final AtlasGremlinQueryProvider gremlinQueryProvider;
private final EntityGraphRetriever entityRetriever;
private final AtlasTypeRegistry atlasTypeRegistry;
@Inject
EntityLineageService(AtlasTypeRegistry typeRegistry, AtlasGraph atlasGraph) {
this.graph = atlasGraph;
this.gremlinQueryProvider = AtlasGremlinQueryProvider.INSTANCE;
this.entityRetriever = new EntityGraphRetriever(atlasGraph, typeRegistry);
this.atlasTypeRegistry = typeRegistry;
}
@Override
@GraphTransaction
public AtlasLineageInfo getAtlasLineageInfo(String guid, LineageDirection direction, int depth) throws AtlasBaseException {
AtlasLineageInfo ret;
boolean isDataSet = validateEntityTypeAndCheckIfDataSet(guid);
if (LINEAGE_USING_GREMLIN) {
ret = getLineageInfoV1(guid, direction, depth, isDataSet);
} else {
ret = getLineageInfoV2(guid, direction, depth, isDataSet);
}
return ret;
}
@Override
@GraphTransaction
public AtlasLineageInfo getAtlasLineageInfo(String guid, Map lineageConstraintsMap) throws AtlasBaseException {
AtlasLineageInfo ret;
if (MapUtils.isEmpty(lineageConstraintsMap)) {
lineageConstraintsMap = new HashMap<>();
lineageConstraintsMap.put(guid, getDefaultLineageConstraints(guid));
}
boolean isDataSet = validateEntityTypeAndCheckIfDataSet(guid);
ret = getLineageInfoOnDemand(guid, lineageConstraintsMap, isDataSet);
appendLineageOnDemandPayload(ret, lineageConstraintsMap);
// filtering out on-demand relations which has input & output nodes within the limit
cleanupRelationsOnDemand(ret);
return ret;
}
private boolean validateEntityTypeAndCheckIfDataSet(String guid) throws AtlasBaseException {
AtlasEntityHeader entity = entityRetriever.toAtlasEntityHeaderWithClassifications(guid);
AtlasAuthorizationUtils.verifyAccess(new AtlasEntityAccessRequest(atlasTypeRegistry, AtlasPrivilege.ENTITY_READ, entity), "read entity lineage: guid=", guid);
AtlasEntityType entityType = atlasTypeRegistry.getEntityTypeByName(entity.getTypeName());
if (entityType == null) {
throw new AtlasBaseException(AtlasErrorCode.TYPE_NAME_NOT_FOUND, entity.getTypeName());
}
boolean isDataSet = entityType.getTypeAndAllSuperTypes().contains(DATA_SET_SUPER_TYPE);
if (!isDataSet) {
boolean isProcess = entityType.getTypeAndAllSuperTypes().contains(PROCESS_SUPER_TYPE);
if (!isProcess) {
throw new AtlasBaseException(AtlasErrorCode.INVALID_LINEAGE_ENTITY_TYPE, guid, entity.getTypeName());
}
}
return isDataSet;
}
private void appendLineageOnDemandPayload(AtlasLineageInfo lineageInfo, Map lineageConstraintsMap) {
if (lineageInfo == null || MapUtils.isEmpty(lineageConstraintsMap)) {
return;
}
lineageInfo.setLineageOnDemandPayload(lineageConstraintsMap);
}
//Consider only relationsOnDemand which has either more inputs or more outputs than given limit
private void cleanupRelationsOnDemand(AtlasLineageInfo lineageInfo) {
if (lineageInfo != null && MapUtils.isNotEmpty(lineageInfo.getRelationsOnDemand())) {
lineageInfo.getRelationsOnDemand().entrySet().removeIf(x -> !(x.getValue().hasMoreInputs() || x.getValue().hasMoreOutputs()));
}
}
@Override
@GraphTransaction
public SchemaDetails getSchemaForHiveTableByName(final String datasetName) throws AtlasBaseException {
if (StringUtils.isEmpty(datasetName)) {
// TODO: Complete error handling here
throw new AtlasBaseException(AtlasErrorCode.BAD_REQUEST);
}
AtlasEntityType hive_table = atlasTypeRegistry.getEntityTypeByName("hive_table");
Map lookupAttributes = new HashMap<>();
lookupAttributes.put("qualifiedName", datasetName);
String guid = AtlasGraphUtilsV2.getGuidByUniqueAttributes(hive_table, lookupAttributes);
return getSchemaForHiveTableByGuid(guid);
}
@Override
@GraphTransaction
public SchemaDetails getSchemaForHiveTableByGuid(final String guid) throws AtlasBaseException {
if (StringUtils.isEmpty(guid)) {
throw new AtlasBaseException(AtlasErrorCode.BAD_REQUEST);
}
SchemaDetails ret = new SchemaDetails();
AtlasEntityType hive_column = atlasTypeRegistry.getEntityTypeByName("hive_column");
ret.setDataType(AtlasTypeUtil.toClassTypeDefinition(hive_column));
AtlasEntityWithExtInfo entityWithExtInfo = entityRetriever.toAtlasEntityWithExtInfo(guid);
AtlasEntity entity = entityWithExtInfo.getEntity();
AtlasAuthorizationUtils.verifyAccess(new AtlasEntityAccessRequest(atlasTypeRegistry, AtlasPrivilege.ENTITY_READ, new AtlasEntityHeader(entity)),
"read entity schema: guid=", guid);
Map referredEntities = entityWithExtInfo.getReferredEntities();
List columnIds = getColumnIds(entity);
if (MapUtils.isNotEmpty(referredEntities)) {
List