All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.http4k.connect.langchain.S3DocumentLoader.kt Maven / Gradle / Ivy

package org.http4k.connect.langchain.org.http4k.connect.langchain

import dev.forkhandles.result4k.allValues
import dev.forkhandles.result4k.flatMap
import dev.forkhandles.result4k.map
import dev.forkhandles.result4k.peek
import dev.langchain4j.data.document.DocumentParser
import org.http4k.client.JavaHttpClient
import org.http4k.cloudnative.env.Environment
import org.http4k.connect.amazon.AWS_REGION
import org.http4k.connect.amazon.CredentialsProvider
import org.http4k.connect.amazon.Environment
import org.http4k.connect.amazon.s3.Http
import org.http4k.connect.amazon.s3.S3Bucket
import org.http4k.connect.amazon.s3.listObjectsV2Paginated
import org.http4k.connect.amazon.s3.model.BucketKey
import org.http4k.connect.amazon.s3.model.BucketName
import org.http4k.core.HttpHandler
import org.http4k.core.Uri
import org.http4k.filter.Payload
import java.time.Clock

class S3DocumentLoader(
    private val environment: Environment,
    private val credentialsProvider: CredentialsProvider = CredentialsProvider.Environment(environment),
    private val http: HttpHandler = JavaHttpClient(),
    private val clock: Clock = Clock.systemUTC(),
    private val overrideEndpoint: Uri? = null,
    private val forcePathStyle: Boolean = false
) {
    operator fun invoke(bucket: BucketName, key: BucketKey, parser: DocumentParser) =
        s3Client(bucket)[key]
            .map(parser::parse)
            .peek { it.metadata().add("source", "s3://$bucket/$key") }

    operator fun invoke(bucket: BucketName, parser: DocumentParser) = this(bucket, null, parser)

    operator fun invoke(
        bucket: BucketName,
        prefix: String?,
        parser: DocumentParser
    ) = s3Client(bucket)
        .listObjectsV2Paginated(prefix = prefix)
        .map {
            it.map {
                it.map { item ->
                    this(bucket, item.Key, parser)
                        .peek { it.metadata().add("source", "s3://$bucket/${item.Key}") }
                }
                    .allValues()
            }.flatMap { it }
        }
        .allValues()
        .map { it.flatten() }

    private fun s3Client(bucket: BucketName) = S3Bucket.Http(
        bucket,
        AWS_REGION(environment),
        credentialsProvider,
        http,
        clock,
        Payload.Mode.Signed,
        overrideEndpoint,
        forcePathStyle
    )
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy