All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.atlan.pkg.objectstore.GCSSync.kt Maven / Gradle / Ivy

There is a newer version: 3.1.2
Show newest version
/* SPDX-License-Identifier: Apache-2.0
   Copyright 2023 Atlan Pte. Ltd. */
package com.atlan.pkg.objectstore

import com.google.auth.oauth2.GoogleCredentials
import com.google.cloud.storage.Storage
import com.google.cloud.storage.StorageOptions
import mu.KLogger
import java.io.File
import java.io.FileInputStream
import java.io.FileOutputStream
import java.io.IOException

/**
 * Class to generally move data between GCS and local storage.
 *
 * @param projectId identifier of the GCP project
 * @param bucketName name of the bucket in GCS to use for syncing
 * @param logger through which to record any problems
 * @param credentials JSON application credentials (as a string), or empty to use Atlan's backing store
 */
class GCSSync(
    private val projectId: String,
    private val bucketName: String,
    private val logger: KLogger,
    private val credentials: String,
) : ObjectStorageSyncer {
    private val storage =
        if (credentials.isNotBlank()) {
            StorageOptions.newBuilder().setProjectId(projectId)
                .setCredentials(GoogleCredentials.fromStream(credentials.byteInputStream()))
                .build().service
        } else {
            StorageOptions.newBuilder().setProjectId(projectId).build().service
        }

    /** {@inheritDoc} */
    override fun copyFrom(
        prefix: String,
        localDirectory: String,
    ): List {
        logger.info { "Syncing files from gcs://$bucketName/$prefix to $localDirectory" }

        val bucket = storage.get(bucketName)

        val localFilesLastModified =
            File(localDirectory).walkTopDown().filter { it.isFile }.map {
                it.relativeTo(File(localDirectory)).path to it.lastModified()
            }.toMap()

        val filesToDownload = mutableListOf()
        bucket.list(Storage.BlobListOption.prefix(prefix)).iterateAll().forEach { file ->
            val info = file.asBlobInfo()
            val key = File(info.name).relativeTo(File(prefix)).path
            if (key.isNotBlank()) {
                if (key !in localFilesLastModified ||
                    info.updateTimeOffsetDateTime.toInstant().toEpochMilli() > localFilesLastModified[key]!!
                ) {
                    filesToDownload.add(key)
                }
            }
        }

        val copiedList = mutableListOf()
        filesToDownload.forEach { key ->
            val target = File(localDirectory, key).path
            downloadFrom(
                key,
                target,
            )
            copiedList.add(target)
        }
        return copiedList
    }

    /** {@inheritDoc} */
    override fun copyLatestFrom(
        prefix: String,
        extension: String,
        localDirectory: String,
    ): String {
        logger.info { "Copying latest $extension file from gcs://$bucketName/$prefix to $localDirectory" }

        val bucket = storage.get(bucketName)

        val filesToDownload = mutableListOf()

        bucket.list(Storage.BlobListOption.prefix(prefix)).iterateAll().forEach { file ->
            val info = file.asBlobInfo()
            val key = File(info.name).relativeTo(File(prefix)).path
            if (key.isNotBlank() && key.endsWith(extension)) {
                filesToDownload.add(key)
            }
        }
        filesToDownload.sortDescending()
        val latestFileKey =
            if (filesToDownload.isNotEmpty()) {
                filesToDownload[0]
            } else {
                ""
            }

        val localFilePath =
            if (latestFileKey.isNotBlank()) {
                val local = File(localDirectory, latestFileKey).path
                downloadFrom(
                    File(prefix, latestFileKey).path,
                    local,
                )
                local
            } else {
                ""
            }
        return localFilePath
    }

    /** {@inheritDoc} */
    override fun downloadFrom(
        remoteKey: String,
        localFile: String,
    ) {
        logger.info { " ... downloading gcs://$bucketName/$remoteKey to $localFile" }
        try {
            val local = File(localFile)
            if (local.exists()) {
                local.delete()
            }
            if (!local.parentFile.exists()) {
                local.parentFile.mkdirs()
            }
            val blob = storage.get(bucketName, remoteKey)
            FileOutputStream(local).use { fos ->
                blob.downloadTo(fos)
            }
        } catch (e: Exception) {
            throw IOException(e)
        }
    }

    /** {@inheritDoc} */
    override fun copyTo(
        localDirectory: String,
        prefix: String,
    ): Boolean {
        logger.info { "Syncing files from $localDirectory to gcs://$bucketName/$prefix" }

        val bucket = storage.get(bucketName)

        val filesLastModified =
            bucket.list(Storage.BlobListOption.prefix(prefix)).iterateAll().associate {
                val info = it.asBlobInfo()
                File(info.name).relativeTo(File(prefix)).path to info.updateTimeOffsetDateTime.toInstant().toEpochMilli()
            }

        val localFilesToUpload = mutableListOf()
        File(localDirectory).walkTopDown().filter { it.isFile }.forEach { file ->
            val key = file.relativeTo(File(localDirectory)).path
            if (key.isNotBlank()) {
                if (key !in filesLastModified ||
                    file.lastModified() > filesLastModified[key]!!
                ) {
                    localFilesToUpload.add(key)
                }
            }
        }

        var anySynced = false

        localFilesToUpload.forEach {
            uploadTo(File(localDirectory, it).path, File(prefix, it).path)
            anySynced = true
        }
        return anySynced
    }

    /** {@inheritDoc} */
    override fun uploadTo(
        localFile: String,
        remoteKey: String,
    ) {
        logger.info { " ... uploading $localFile to gcs://$bucketName/$remoteKey" }
        // Note: no need to delete files first (putObject overwrites, including auto-versioning
        // if enabled on the bucket), and no need to create parent prefixes in GCS
        try {
            val local = File(localFile)
            val bucket = storage.get(bucketName)
            FileInputStream(local).use { fis ->
                bucket.create(remoteKey, fis)
            }
        } catch (e: Exception) {
            throw IOException(e)
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy