et-export-basic.2.2.4.source-code.GlossaryExporter.kt Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of asset-export-basic Show documentation
Show all versions of asset-export-basic Show documentation
Atlan custom package for exporting enriched asset metadata
/* SPDX-License-Identifier: Apache-2.0
Copyright 2023 Atlan Pte. Ltd. */
import com.atlan.model.assets.Asset
import com.atlan.model.assets.Glossary
import com.atlan.model.assets.GlossaryCategory
import com.atlan.model.assets.GlossaryTerm
import com.atlan.model.assets.Link
import com.atlan.model.fields.AtlanField
import com.atlan.pkg.cache.CategoryCache
import com.atlan.pkg.serde.FieldSerde
import com.atlan.pkg.serde.RowSerde
import com.atlan.pkg.serde.csv.CSVWriter
import com.atlan.pkg.serde.csv.RowGenerator
import mu.KLogger
import mu.KotlinLogging
import java.util.stream.Collectors
import java.util.stream.Stream
/**
* Export glossary assets from Atlan (including terms and categories).
*
* @param ctx context containing the resolved configuration
* @param filename name of the file into which to export assets
* @param batchSize maximum number of assets to request per API call
*/
class GlossaryExporter(
private val ctx: Exporter.Context,
private val filename: String,
private val batchSize: Int,
) : RowGenerator {
private val logger = KotlinLogging.logger {}
fun export() {
CSVWriter(filename).use { csv ->
// TODO: qualifiedName is not a good way to do this for glossary objects...
val headerNames =
Stream.of(Asset.QUALIFIED_NAME, Asset.TYPE_NAME)
.map(AtlanField::getAtlanFieldName)
.collect(Collectors.toList())
headerNames.addAll(
getAttributesToExtract().stream()
.map { f -> RowSerde.getHeaderForField(f) }
.collect(Collectors.toList()),
)
csv.writeHeader(headerNames)
val start = System.currentTimeMillis()
// Retrieve all glossaries up-front
val glossaries =
Glossary.select(ctx.includeArchived)
.pageSize(batchSize)
.includesOnResults(getAttributesToExtract())
.includesOnRelations(getRelatedAttributesToExtract())
.stream(true)
.toList()
logger.info { "Appending ${glossaries.size} glossaries..." }
csv.appendAssets(glossaries, this, glossaries.size.toLong(), batchSize, logger)
// Then extract all categories, per glossary, up-front (caching them
// as we go, for later reference)
glossaries.parallelStream().forEach {
val categories = CategoryCache.traverseAndCacheHierarchy(it.name, getAttributesToExtract(), getRelatedAttributesToExtract())
if (categories.isNotEmpty()) {
logger.info { "Appending ${categories.size} categories from ${it.name}..." }
csv.appendAssets(
categories,
this,
categories.size.toLong(),
batchSize,
logger,
)
}
}
// And finally extract all the terms
val assets =
GlossaryTerm.select(ctx.includeArchived)
.pageSize(batchSize)
.includesOnResults(getAttributesToExtract())
.includesOnRelations(getRelatedAttributesToExtract())
csv.streamAssets(assets.stream(true), this, assets.count(), batchSize, logger)
logger.info { "Total time taken: ${System.currentTimeMillis() - start} ms" }
}
}
private fun getAttributesToExtract(): MutableList {
val attributeList: MutableList =
mutableListOf(
Asset.NAME,
GlossaryTerm.ANCHOR,
GlossaryCategory.PARENT_CATEGORY,
GlossaryTerm.CATEGORIES,
Asset.DISPLAY_NAME,
Asset.DESCRIPTION,
Asset.USER_DESCRIPTION,
Asset.OWNER_USERS,
Asset.OWNER_GROUPS,
Asset.CERTIFICATE_STATUS,
Asset.CERTIFICATE_STATUS_MESSAGE,
Asset.ANNOUNCEMENT_TYPE,
Asset.ANNOUNCEMENT_TITLE,
Asset.ANNOUNCEMENT_MESSAGE,
Asset.ATLAN_TAGS,
Asset.LINKS,
Asset.README,
Asset.STARRED_DETAILS,
GlossaryTerm.SEE_ALSO,
GlossaryTerm.PREFERRED_TERMS,
GlossaryTerm.SYNONYMS,
GlossaryTerm.ANTONYMS,
GlossaryTerm.TRANSLATED_TERMS,
GlossaryTerm.VALID_VALUES_FOR,
GlossaryTerm.CLASSIFIES,
)
for (cmField in ctx.cmFields) {
attributeList.add(cmField)
}
return attributeList
}
private fun getRelatedAttributesToExtract(): MutableList {
// Needed for:
// - asset referencing
// - Link embedding
// - README embedding
return mutableListOf(
Asset.QUALIFIED_NAME,
Asset.NAME,
Asset.DESCRIPTION,
Link.LINK,
)
}
/**
* Generate a set of values for a row of CSV, based on the provided asset.
*
* @param asset the asset from which to generate the values
* @return the values, as an iterable set of strings
*/
override fun buildFromAsset(asset: Asset): Iterable {
return GlossaryRowSerializer(asset, getAttributesToExtract(), logger).getRow()
}
/**
* Class to serialize glossary assets into a row of tabular data.
* Note: this replaces the general asset row serializer to handle nuances of glossary objects.
*
* @param asset the asset to be serialized
* @param fields the full list of fields to be serialized from the asset, in the order they should be serialized
* @param logger through which to record any problems
*/
class GlossaryRowSerializer(
private val asset: Asset,
private val fields: List,
private val logger: KLogger,
) {
/**
* Actually serialize the provided inputs into a list of string values.
*
* @return the list of string values giving a row-based tabular representation of the asset
*/
fun getRow(): Iterable {
val row = mutableListOf()
row.add(FieldSerde.getValueForField(asset, Asset.QUALIFIED_NAME, logger))
row.add(FieldSerde.getValueForField(asset, Asset.TYPE_NAME, logger))
for (field in fields) {
if (field != Asset.QUALIFIED_NAME && field != Asset.TYPE_NAME) {
if (asset !is GlossaryTerm && field == GlossaryTerm.CATEGORIES) {
// Only serialize the categories attribute for terms, no other
// glossary object types
row.add("")
} else {
row.add(FieldSerde.getValueForField(asset, field, logger))
}
}
}
return row
}
}
}