All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.atlan.pkg.aim.ProductImporter.kt Maven / Gradle / Ivy

There is a newer version: 3.0.1
Show newest version
/* SPDX-License-Identifier: Apache-2.0
   Copyright 2023 Atlan Pte. Ltd. */
package com.atlan.pkg.aim

import com.atlan.model.assets.Asset
import com.atlan.model.assets.DataDomain
import com.atlan.model.assets.DataProduct
import com.atlan.model.fields.AtlanField
import com.atlan.pkg.cache.DataDomainCache
import com.atlan.pkg.cache.DataProductCache
import com.atlan.pkg.serde.RowDeserializer
import com.atlan.pkg.serde.cell.DataDomainXformer
import com.atlan.pkg.serde.cell.DataDomainXformer.DATA_PRODUCT_DELIMITER
import com.atlan.pkg.serde.csv.CSVImporter
import com.atlan.pkg.serde.csv.ImportResults
import mu.KotlinLogging

/**
 * Import data products (only) into Atlan from a provided CSV file.
 *
 * Only the data products and attributes in the provided CSV file will attempt to be loaded.
 * By default, any blank values in a cell in the CSV file will be ignored. If you would like any
 * particular column's blank values to actually overwrite (i.e. remove) existing values for that
 * asset in Atlan, then add that column's field to getAttributesToOverwrite.
 *
 * @param filename name of the file to import
 * @param attrsToOverwrite list of fields that should be overwritten in Atlan, if their value is empty in the CSV
 * @param updateOnly if true, only update an asset (first check it exists), if false allow upserts (create if it does not exist)
 * @param batchSize maximum number of records to save per API request
 * @param failOnErrors if true, fail if errors are encountered, otherwise continue processing
 * @param fieldSeparator character to use to separate fields (for example ',' or ';')
 */
class ProductImporter(
    private val filename: String,
    private val attrsToOverwrite: List,
    private val updateOnly: Boolean,
    private val batchSize: Int,
    private val failOnErrors: Boolean,
    private val fieldSeparator: Char,
) : CSVImporter(
        filename = filename,
        attrsToOverwrite = attrsToOverwrite,
        updateOnly = updateOnly,
        batchSize = batchSize,
        typeNameFilter = DataProduct.TYPE_NAME,
        logger = KotlinLogging.logger {},
        failOnErrors = failOnErrors,
        fieldSeparator = fieldSeparator,
    ) {
    private val cache = DataProductCache

    /** {@inheritDoc} */
    override fun import(columnsToSkip: Set): ImportResults? {
        // Also ignore any inbound qualifiedName
        val colsToSkip = columnsToSkip.toMutableSet()
        colsToSkip.add(DataProduct.QUALIFIED_NAME.atlanFieldName)
        colsToSkip.add(DataDomain.PARENT_DOMAIN.atlanFieldName)
        colsToSkip.add(DataDomain.ASSET_ICON.atlanFieldName)
        colsToSkip.add(DataDomain.ASSET_THEME_HEX.atlanFieldName)
        return super.import(colsToSkip)
    }

    /** {@inheritDoc} */
    override fun getBuilder(deserializer: RowDeserializer): Asset.AssetBuilder<*, *> {
        val name = deserializer.getValue(DataProduct.NAME.atlanFieldName) as String
        val dataDomainMinimal = deserializer.getValue(DataProduct.DATA_DOMAIN.atlanFieldName)?.let { it as DataDomain }
        val dataDomain = if (dataDomainMinimal != null) DataDomainCache.getByGuid(dataDomainMinimal.guid) as DataDomain else null
        val dataProductAssetsDSL = deserializer.getValue(DataProduct.DATA_PRODUCT_ASSETS_DSL.atlanFieldName) as String?
        val qualifiedName = generateQualifiedName(deserializer, dataDomain)
        val candidateDP = DataProduct.creator(name, dataDomain?.qualifiedName, dataProductAssetsDSL)
        return if (qualifiedName != getCacheId(deserializer, dataDomain)) {
            // If there is an existing qualifiedName, use it, otherwise we will get a conflict exception
            candidateDP.qualifiedName(qualifiedName)
        } else {
            // If there is no existing qualifiedName, we MUST use the generated qualifiedName,
            // otherwise we will get a permission exception
            candidateDP
        }
    }

    /**
     * Determine the qualifiedName for the data product, irrespective of whether it is
     * present in the input file or not. Since these qualifiedNames are generated, and the object may
     * have been created in a previous pass (and cached), we can resolve to its known qualifiedName
     * here based on the information in the row of the input file.
     *
     * @param deserializer a row of deserialized values
     * @param dataDomain domain in which this data product should be contained
     * @return the qualifiedName, calculated from the deserialized values
     */
    private fun generateQualifiedName(
        deserializer: RowDeserializer,
        dataDomain: DataDomain?,
    ): String {
        val cacheId = getCacheId(deserializer, dataDomain)
        return cache.getByIdentity(cacheId)?.qualifiedName ?: cacheId
    }

    /**
     * Calculate the cache identity for this row of the CSV, based purely on the information in the CSV.
     *
     * @param deserializer a row of deserialized values
     * @param dataDomain domain in which this data product should be contained
     * @return the cache identity for the row
     */
    private fun getCacheId(
        deserializer: RowDeserializer,
        dataDomain: DataDomain?,
    ): String {
        val productName = deserializer.getValue(DataProduct.NAME.atlanFieldName)
        return if (dataDomain != null) {
            "${productName}$DATA_PRODUCT_DELIMITER${DataDomainXformer.encode(dataDomain)}"
        } else {
            "$productName"
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy