All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.atlan.util.ParallelBatch Maven / Gradle / Ivy

There is a newer version: 4.2.1
Show newest version
/* SPDX-License-Identifier: Apache-2.0
   Copyright 2023 Atlan Pte. Ltd. */
package com.atlan.util;

import com.atlan.AtlanClient;
import com.atlan.cache.OffHeapAssetCache;
import com.atlan.exception.AtlanException;
import com.atlan.model.assets.Asset;
import com.atlan.model.core.AssetMutationResponse;
import com.atlan.model.enums.AssetCreationHandling;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

/**
 * Utility class for managing bulk updates across multiple parallel-running batches.
 */
public class ParallelBatch implements Closeable {

    protected final ReadWriteLock lock = new ReentrantReadWriteLock();

    /** Connectivity to an Atlan tenant. */
    private final AtlanClient client;

    /** Maximum number of assets to submit in each batch. */
    private final int maxSize;

    /** Whether to replace Atlan tags (true), or ignore them (false). */
    private final boolean replaceAtlanTags;

    /** How to handle any custom metadata on assets (ignore, replace, or merge). */
    private final AssetBatch.CustomMetadataHandling customMetadataHandling;

    /** Whether to capture details about any failures (true) or throw exceptions for any failures (false). */
    private final boolean captureFailures;

    /** Whether to track the basic information about every asset that is created or updated (true) or only track counts (false). */
    private final boolean track;

    /** Whether to allow assets to be created (false) or only allow existing assets to be updated (true). */
    private final boolean updateOnly;

    /** When running with {@link #updateOnly} as true, whether to consider only exact matches (false) or ignore case (true). */
    private final boolean caseSensitive;

    /** When allowing assets to be created, how to handle those creations (full assets or partial assets). */
    private final AssetCreationHandling creationHandling;

    /** Whether tables and views should be treated interchangeably (an asset in the batch marked as a table will attempt to match a view if not found as a table, and vice versa). */
    private final boolean tableViewAgnostic;

    private final ConcurrentHashMap batchMap = new ConcurrentHashMap<>();
    private final List failures = Collections.synchronizedList(new ArrayList<>());
    private final Map resolvedGuids = new ConcurrentHashMap<>();
    private final Map resolvedQualifiedNames = new ConcurrentHashMap<>();

    private OffHeapAssetCache created = null;
    private OffHeapAssetCache updated = null;
    private OffHeapAssetCache restored = null;
    private OffHeapAssetCache skipped = null;

    /**
     * Create a new batch of assets to be bulk-saved, in parallel (across threads).
     *
     * @param client connectivity to Atlan
     * @param maxSize maximum size of each batch that should be processed (per API call)
     */
    public ParallelBatch(AtlanClient client, int maxSize) {
        this(client, maxSize, false, AssetBatch.CustomMetadataHandling.IGNORE);
    }

    /**
     * Create a new batch of assets to be bulk-saved, in parallel (across threads).
     *
     * @param client connectivity to Atlan
     * @param maxSize maximum size of each batch that should be processed (per API call)
     * @param replaceAtlanTags if true, all Atlan tags on an existing asset will be overwritten; if false, all Atlan tags will be ignored
     * @param customMetadataHandling how to handle custom metadata (ignore it, replace it (wiping out anything pre-existing), or merge it)
     */
    public ParallelBatch(
            AtlanClient client,
            int maxSize,
            boolean replaceAtlanTags,
            AssetBatch.CustomMetadataHandling customMetadataHandling) {
        this(client, maxSize, replaceAtlanTags, customMetadataHandling, false);
    }

    /**
     * Create a new batch of assets to be bulk-saved, in parallel (across threads).
     *
     * @param client connectivity to Atlan
     * @param maxSize maximum size of each batch that should be processed (per API call)
     * @param replaceAtlanTags if true, all Atlan tags on an existing asset will be overwritten; if false, all Atlan tags will be ignored
     * @param customMetadataHandling how to handle custom metadata (ignore it, replace it (wiping out anything pre-existing), or merge it)
     * @param captureFailures when true, any failed batches will be captured and retained rather than exceptions being raised (for large amounts of processing this could cause memory issues!)
     */
    public ParallelBatch(
            AtlanClient client,
            int maxSize,
            boolean replaceAtlanTags,
            AssetBatch.CustomMetadataHandling customMetadataHandling,
            boolean captureFailures) {
        this(client, maxSize, replaceAtlanTags, customMetadataHandling, captureFailures, false);
    }

    /**
     * Create a new batch of assets to be bulk-saved, in parallel (across threads).
     *
     * @param client connectivity to Atlan
     * @param maxSize maximum size of each batch that should be processed (per API call)
     * @param replaceAtlanTags if true, all Atlan tags on an existing asset will be overwritten; if false, all Atlan tags will be ignored
     * @param customMetadataHandling how to handle custom metadata (ignore it, replace it (wiping out anything pre-existing), or merge it)
     * @param captureFailures when true, any failed batches will be captured and retained rather than exceptions being raised (for large amounts of processing this could cause memory issues!)
     * @param updateOnly when true, only attempt to update existing assets and do not create any assets (note: this will incur a performance penalty)
     */
    public ParallelBatch(
            AtlanClient client,
            int maxSize,
            boolean replaceAtlanTags,
            AssetBatch.CustomMetadataHandling customMetadataHandling,
            boolean captureFailures,
            boolean updateOnly) {
        this(client, maxSize, replaceAtlanTags, customMetadataHandling, captureFailures, updateOnly, true);
    }

    /**
     * Create a new batch of assets to be bulk-saved, in parallel (across threads).
     *
     * @param client connectivity to Atlan
     * @param maxSize maximum size of each batch that should be processed (per API call)
     * @param replaceAtlanTags if true, all Atlan tags on an existing asset will be overwritten; if false, all Atlan tags will be ignored
     * @param customMetadataHandling how to handle custom metadata (ignore it, replace it (wiping out anything pre-existing), or merge it)
     * @param captureFailures when true, any failed batches will be captured and retained rather than exceptions being raised (for large amounts of processing this could cause memory issues!)
     * @param updateOnly when true, only attempt to update existing assets and do not create any assets (note: this will incur a performance penalty)
     * @param track when false, details about each created and updated asset will no longer be tracked (only an overall count of each) -- useful if you intend to send close to (or more than) 1 million assets through a batch
     */
    public ParallelBatch(
            AtlanClient client,
            int maxSize,
            boolean replaceAtlanTags,
            AssetBatch.CustomMetadataHandling customMetadataHandling,
            boolean captureFailures,
            boolean updateOnly,
            boolean track) {
        this(client, maxSize, replaceAtlanTags, customMetadataHandling, captureFailures, updateOnly, track, true);
    }

    /**
     * Create a new batch of assets to be bulk-saved, in parallel (across threads).
     *
     * @param client connectivity to Atlan
     * @param maxSize maximum size of each batch that should be processed (per API call)
     * @param replaceAtlanTags if true, all Atlan tags on an existing asset will be overwritten; if false, all Atlan tags will be ignored
     * @param customMetadataHandling how to handle custom metadata (ignore it, replace it (wiping out anything pre-existing), or merge it)
     * @param captureFailures when true, any failed batches will be captured and retained rather than exceptions being raised (for large amounts of processing this could cause memory issues!)
     * @param updateOnly when true, only attempt to update existing assets and do not create any assets (note: this will incur a performance penalty)
     * @param track when false, details about each created and updated asset will no longer be tracked (only an overall count of each) -- useful if you intend to send close to (or more than) 1 million assets through a batch
     * @param caseSensitive (only applies when updateOnly is true) attempt to match assets case-sensitively (true) or case-insensitively (false)
     */
    public ParallelBatch(
            AtlanClient client,
            int maxSize,
            boolean replaceAtlanTags,
            AssetBatch.CustomMetadataHandling customMetadataHandling,
            boolean captureFailures,
            boolean updateOnly,
            boolean track,
            boolean caseSensitive) {
        this(
                client,
                maxSize,
                replaceAtlanTags,
                customMetadataHandling,
                captureFailures,
                updateOnly,
                track,
                caseSensitive,
                AssetCreationHandling.FULL);
    }

    /**
     * Create a new batch of assets to be bulk-saved, in parallel (across threads).
     *
     * @param client connectivity to Atlan
     * @param maxSize maximum size of each batch that should be processed (per API call)
     * @param replaceAtlanTags if true, all Atlan tags on an existing asset will be overwritten; if false, all Atlan tags will be ignored
     * @param customMetadataHandling how to handle custom metadata (ignore it, replace it (wiping out anything pre-existing), or merge it)
     * @param captureFailures when true, any failed batches will be captured and retained rather than exceptions being raised (for large amounts of processing this could cause memory issues!)
     * @param updateOnly when true, only attempt to update existing assets and do not create any assets (note: this will incur a performance penalty)
     * @param track when false, details about each created and updated asset will no longer be tracked (only an overall count of each) -- useful if you intend to send close to (or more than) 1 million assets through a batch
     * @param caseSensitive (only applies when updateOnly is true) attempt to match assets case-sensitively (true) or case-insensitively (false)
     * @param creationHandling if assets are to be created, how they should be created (as full assets or only partial assets)
     */
    public ParallelBatch(
            AtlanClient client,
            int maxSize,
            boolean replaceAtlanTags,
            AssetBatch.CustomMetadataHandling customMetadataHandling,
            boolean captureFailures,
            boolean updateOnly,
            boolean track,
            boolean caseSensitive,
            AssetCreationHandling creationHandling) {
        this(
                client,
                maxSize,
                replaceAtlanTags,
                customMetadataHandling,
                captureFailures,
                updateOnly,
                track,
                caseSensitive,
                creationHandling,
                false);
    }

    /**
     * Create a new batch of assets to be bulk-saved, in parallel (across threads).
     *
     * @param client connectivity to Atlan
     * @param maxSize maximum size of each batch that should be processed (per API call)
     * @param replaceAtlanTags if true, all Atlan tags on an existing asset will be overwritten; if false, all Atlan tags will be ignored
     * @param customMetadataHandling how to handle custom metadata (ignore it, replace it (wiping out anything pre-existing), or merge it)
     * @param captureFailures when true, any failed batches will be captured and retained rather than exceptions being raised (for large amounts of processing this could cause memory issues!)
     * @param updateOnly when true, only attempt to update existing assets and do not create any assets (note: this will incur a performance penalty)
     * @param track when false, details about each created and updated asset will no longer be tracked (only an overall count of each) -- useful if you intend to send close to (or more than) 1 million assets through a batch
     * @param caseSensitive (only applies when updateOnly is true) attempt to match assets case-sensitively (true) or case-insensitively (false)
     * @param creationHandling if assets are to be created, how they should be created (as full assets or only partial assets)
     * @param tableViewAgnostic if true, tables and views will be treated interchangeably (an asset in the batch marked as a table will attempt to match a view if not found as a table, and vice versa)
     */
    public ParallelBatch(
            AtlanClient client,
            int maxSize,
            boolean replaceAtlanTags,
            AssetBatch.CustomMetadataHandling customMetadataHandling,
            boolean captureFailures,
            boolean updateOnly,
            boolean track,
            boolean caseSensitive,
            AssetCreationHandling creationHandling,
            boolean tableViewAgnostic) {
        this.client = client;
        this.maxSize = maxSize;
        this.replaceAtlanTags = replaceAtlanTags;
        this.customMetadataHandling = customMetadataHandling;
        this.creationHandling = creationHandling;
        this.track = track;
        this.captureFailures = captureFailures;
        this.updateOnly = updateOnly;
        this.caseSensitive = caseSensitive;
        this.tableViewAgnostic = tableViewAgnostic;
    }

    /**
     * Add an asset to the batch to be processed.
     *
     * @param single the asset to add to a batch
     * @return the assets that were created or updated in this batch, or null if the batch is still queued
     * @throws AtlanException on any problems adding the asset to or processing the batch
     */
    public AssetMutationResponse add(Asset single) throws AtlanException {
        long id = Thread.currentThread().getId();
        // Note: these are thread-specific operations, so not explicitly locked or synchronized
        AssetBatch batch = batchMap.computeIfAbsent(
                id,
                k -> new AssetBatch(
                        client,
                        maxSize,
                        replaceAtlanTags,
                        customMetadataHandling,
                        captureFailures,
                        updateOnly,
                        track,
                        !caseSensitive,
                        creationHandling,
                        tableViewAgnostic));
        return batch.add(single);
    }

    /**
     * Flush any remaining assets in the parallel batches.
     *
     * @throws IllegalStateException on any problems flushing (submitting) any of the parallel batches
     */
    public void flush() throws AtlanException {
        lock.writeLock().lock();
        try {
            batchMap.values().parallelStream().forEach(batch -> {
                try {
                    batch.flush();
                } catch (AtlanException e) {
                    throw new IllegalStateException(e);
                }
            });
        } finally {
            lock.writeLock().unlock();
        }
    }

    /**
     * Number of assets that were created (no details, only a count).
     *
     * @return a count of the number of created assets, across all parallel batches
     */
    public long getNumCreated() {
        lock.readLock().lock();
        try {
            long count = 0;
            for (AssetBatch batch : batchMap.values()) {
                count += batch.getNumCreated().get();
            }
            return count;
        } finally {
            lock.readLock().unlock();
        }
    }

    /**
     * Number of assets that were updated (no details, only a count).
     *
     * @return a count of the number of updated assets, across all parallel batches
     */
    public long getNumUpdated() {
        lock.readLock().lock();
        try {
            long count = 0;
            for (AssetBatch batch : batchMap.values()) {
                count += batch.getNumUpdated().get();
            }
            return count;
        } finally {
            lock.readLock().unlock();
        }
    }

    /**
     * Number of assets that were potentially restored from being archived, or otherwise touched
     * without actually being updated (no details, just a count).
     *
     * @return a count of the number of potentially restored assets, across all parallel batches
     */
    public long getNumRestored() {
        lock.readLock().lock();
        try {
            long count = 0;
            for (AssetBatch batch : batchMap.values()) {
                count += batch.getNumRestored().get();
            }
            return count;
        } finally {
            lock.readLock().unlock();
        }
    }

    /**
     * Number of assets that were skipped during processing (no details, just a count).
     *
     * @return a count of the number of skipped assets, across all parallel batches
     */
    public long getNumSkipped() {
        lock.readLock().lock();
        try {
            long count = 0;
            for (AssetBatch batch : batchMap.values()) {
                count += batch.getNumSkipped().get();
            }
            return count;
        } finally {
            lock.readLock().unlock();
        }
    }

    /**
     * Assets that were created (minimal info only).
     *
     * @return all created assets, across all parallel batches
     */
    public OffHeapAssetCache getCreated() {
        if (!track) return null;
        if (created == null) {
            lock.writeLock().lock();
            try {
                created = new OffHeapAssetCache(client, "p-created");
                for (AssetBatch batch : batchMap.values()) {
                    if (batch.getCreated().isNotClosed()) {
                        try {
                            created.extendedWith(batch.getCreated(), true);
                        } catch (IOException e) {
                            throw new IllegalStateException("Unable to close underlying off-heap cache.", e);
                        }
                    }
                }
            } finally {
                lock.writeLock().unlock();
            }
        }
        lock.readLock().lock();
        try {
            return created;
        } finally {
            lock.readLock().unlock();
        }
    }

    /**
     * Assets that were updated (minimal info only).
     *
     * @return all updated assets, across all parallel batches
     */
    public OffHeapAssetCache getUpdated() {
        if (!track) return null;
        if (updated == null) {
            lock.writeLock().lock();
            try {
                updated = new OffHeapAssetCache(client, "p-updated");
                for (AssetBatch batch : batchMap.values()) {
                    if (batch.getUpdated().isNotClosed()) {
                        try {
                            updated.extendedWith(batch.getUpdated(), true);
                        } catch (IOException e) {
                            throw new IllegalStateException("Unable to close underlying off-heap cache.", e);
                        }
                    }
                }
            } finally {
                lock.writeLock().unlock();
            }
        }
        lock.readLock().lock();
        try {
            return updated;
        } finally {
            lock.readLock().unlock();
        }
    }

    /**
     * Assets that were potentially restored from being archived, or otherwise touched without actually
     * being updated (minimal info only).
     *
     * @return all potentially restored assets, across all parallel batches
     */
    public OffHeapAssetCache getRestored() {
        if (!track) return null;
        if (restored == null) {
            lock.writeLock().lock();
            try {
                restored = new OffHeapAssetCache(client, "p-restored");
                for (AssetBatch batch : batchMap.values()) {
                    if (batch.getRestored().isNotClosed()) {
                        try {
                            restored.extendedWith(batch.getRestored(), true);
                        } catch (IOException e) {
                            throw new IllegalStateException("Unable to close underlying off-heap cache.", e);
                        }
                    }
                }
            } finally {
                lock.writeLock().unlock();
            }
        }
        lock.readLock().lock();
        try {
            return restored;
        } finally {
            lock.readLock().unlock();
        }
    }

    /**
     * Batches that failed to be committed (only populated when captureFailures is set to true).
     *
     * @return all batches that failed, across all parallel batches
     */
    public List getFailures() {
        boolean empty;
        lock.readLock().lock();
        try {
            empty = failures.isEmpty();
        } finally {
            lock.readLock().unlock();
        }
        if (empty) {
            lock.writeLock().lock();
            try {
                for (AssetBatch batch : batchMap.values()) {
                    failures.addAll(batch.getFailures());
                }
            } finally {
                lock.writeLock().unlock();
            }
        }
        lock.readLock().lock();
        try {
            return failures;
        } finally {
            lock.readLock().unlock();
        }
    }

    /**
     * Assets that were skipped, when updateOnly is requested and the asset does not exist in Atlan.
     *
     * @return all assets that were skipped, across all parallel batches
     */
    public OffHeapAssetCache getSkipped() {
        if (!track) return null;
        if (skipped == null) {
            lock.writeLock().lock();
            try {
                skipped = new OffHeapAssetCache(client, "p-skipped");
                for (AssetBatch batch : batchMap.values()) {
                    if (batch.getSkipped().isNotClosed()) {
                        try {
                            skipped.extendedWith(batch.getSkipped(), true);
                        } catch (IOException e) {
                            throw new IllegalStateException("Unable to close underlying off-heap cache.", e);
                        }
                    }
                }
            } finally {
                lock.writeLock().unlock();
            }
        }
        lock.readLock().lock();
        try {
            return skipped;
        } finally {
            lock.readLock().unlock();
        }
    }

    /**
     * Map from placeholder GUID to resolved (actual) GUID, for all assets that were processed through the batch.
     *
     * @return all resolved GUIDs, across all parallel batches
     */
    public Map getResolvedGuids() {
        boolean empty;
        lock.readLock().lock();
        try {
            empty = resolvedGuids.isEmpty();
        } finally {
            lock.readLock().unlock();
        }
        if (empty) {
            lock.writeLock().lock();
            try {
                for (AssetBatch batch : batchMap.values()) {
                    resolvedGuids.putAll(batch.getResolvedGuids());
                }
            } finally {
                lock.writeLock().unlock();
            }
        }
        lock.readLock().lock();
        try {
            return resolvedGuids;
        } finally {
            lock.readLock().unlock();
        }
    }

    /**
     * Map from case-insensitive qualifiedName to resolved (actual) qualifiedName,
     * for all assets that were processed through the batch.
     * Note: this is only populated when caseSensitive is false, and will otherwise be empty
     *
     * @return all resolved qualifiedNames, across all parallel batches
     */
    public Map getResolvedQualifiedNames() {
        boolean empty;
        lock.readLock().lock();
        try {
            empty = resolvedQualifiedNames.isEmpty();
        } finally {
            lock.readLock().unlock();
        }
        if (empty) {
            lock.writeLock().lock();
            try {
                for (AssetBatch batch : batchMap.values()) {
                    resolvedQualifiedNames.putAll(batch.getResolvedQualifiedNames());
                }
            } finally {
                lock.writeLock().unlock();
            }
        }
        lock.readLock().lock();
        try {
            return resolvedQualifiedNames;
        } finally {
            lock.readLock().unlock();
        }
    }

    /**
     * Close the batch by freeing up any resources it has used.
     * Note: this will clear any internal caches of results, so only call this after you have processed those!
     *
     * @throws IOException on any problems freeing up resources
     */
    @Override
    public void close() throws IOException {
        IOException exception = null;
        for (AssetBatch batch : batchMap.values()) {
            try {
                batch.close();
            } catch (IOException e) {
                if (exception == null) {
                    exception = e;
                } else {
                    exception.addSuppressed(e);
                }
            }
        }
        try {
            if (created != null) created.close();
        } catch (IOException e) {
            exception = e;
        }
        try {
            if (updated != null) updated.close();
        } catch (IOException e) {
            if (exception == null) {
                exception = e;
            } else {
                exception.addSuppressed(e);
            }
        }
        try {
            if (restored != null) restored.close();
        } catch (IOException e) {
            if (exception == null) {
                exception = e;
            } else {
                exception.addSuppressed(e);
            }
        }
        try {
            if (skipped != null) skipped.close();
        } catch (IOException e) {
            if (exception == null) {
                exception = e;
            } else {
                exception.addSuppressed(e);
            }
        }
        if (exception != null) {
            throw exception;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy