com.atlan.pkg.aim.GTCImporter.kt Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of asset-import Show documentation
Atlan custom package for importing asset metadata
There is a newer version: 3.0.1
/* SPDX-License-Identifier: Apache-2.0
   Copyright 2023 Atlan Pte. Ltd. */
package com.atlan.pkg.aim

import com.atlan.model.assets.Asset
import com.atlan.model.fields.AtlanField
import com.atlan.pkg.cache.AssetCache
import com.atlan.pkg.serde.FieldSerde
import com.atlan.pkg.serde.RowDeserializer
import com.atlan.pkg.serde.csv.CSVImporter
import mu.KLogger

/**
 * Import glossaries, terms and categories (only) into Atlan from a provided CSV file.
 *
 * Only the terms and attributes in the provided CSV file will attempt to be loaded.
 * By default, any blank values in a cell in the CSV file will be ignored. If you would like any
 * particular column's blank values to actually overwrite (i.e. remove) existing values for that
 * asset in Atlan, then add that column's field to getAttributesToOverwrite.
 *
 * @param filename name of the file to import
 * @param attrsToOverwrite list of fields that should be overwritten in Atlan, if their value is empty in the CSV
 * @param updateOnly if true, only update an asset (first check it exists), if false allow upserts (create if it does not exist)
 * @param batchSize maximum number of records to save per API request
 * @param cache of existing glossaries, terms or categories (will be preloaded by import)
 * @param typeNameFilter name of the specific type that should be handled by this importer
 * @param logger through which to log any problems
 * @param failOnErrors if true, fail if errors are encountered, otherwise continue processing
 * @param fieldSeparator character to use to separate fields (for example ',' or ';')
 */
abstract class GTCImporter(
    filename: String,
    attrsToOverwrite: List,
    updateOnly: Boolean,
    batchSize: Int,
    protected val cache: AssetCache,
    typeNameFilter: String,
    logger: KLogger,
    failOnErrors: Boolean,
    fieldSeparator: Char,
) : CSVImporter(
        filename,
        logger,
        typeNameFilter,
        attrsToOverwrite,
        updateOnly = updateOnly,
        batchSize = batchSize,
        failOnErrors = failOnErrors,
        trackBatches = true,
        fieldSeparator = fieldSeparator,
    ) {
    // Note: Always track batches (above) for GTC importers, to ensure cache is managed

    /** {@inheritDoc} */
    override fun cacheCreated(list: List) {
        // Cache any assets that were created by processing
        list.forEach { asset ->
            // We must look up the asset and then cache to ensure we have the necessary identity
            // characteristics and status
            val result = cache.lookupAssetByGuid(asset.guid, maxRetries = 5)
            result?.let {
                cache.addByGuid(asset.guid, result)
            } ?: throw IllegalStateException("Result of searching by GUID for ${asset.guid} was null.")
        }
    }

    /** {@inheritDoc} */
    override fun getBuilder(deserializer: RowDeserializer): Asset.AssetBuilder<*, *> {
        val qualifiedName = generateQualifiedName(deserializer)
        return FieldSerde.getBuilderForType(typeNameFilter)
            .qualifiedName(qualifiedName)
    }

    /**
     * Determine the qualifiedName for the glossary, term or category, irrespective of whether it is
     * present in the input file or not. Since these qualifiedNames are generated, and the object may
     * have been created in a previous pass (and cached), we can resolve to its known qualifiedName
     * here based on the information in the row of the input file.
     *
     * @param deserializer a row of deserialized values
     * @return the qualifiedName, calculated from the deserialized values
     */
    private fun generateQualifiedName(deserializer: RowDeserializer): String {
        val cacheId = getCacheId(deserializer)
        return cache.getByIdentity(cacheId)?.qualifiedName ?: cacheId
    }

    /**
     * Calculate the cache identity for this row of the CSV, based purely on the information in the CSV.
     *
     * @param deserializer a row of deserialized values
     * @return the cache identity for the row
     */
    abstract fun getCacheId(deserializer: RowDeserializer): String
}