All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cc.unitmesh.rag.DocumentDsl.kt Maven / Gradle / Ivy

Go to download

Chocolate Factory is a cutting-edge LLM toolkit designed to empower you in creating your very own AI assistant.

The newest version!
package cc.unitmesh.rag

import cc.unitmesh.document.DocumentFactory
import cc.unitmesh.rag.document.Document
import cc.unitmesh.rag.document.DocumentParser
import cc.unitmesh.rag.store.EmbeddingMatch
import java.io.File

/**
 * Document DSL for indexing document.
 */
class DocumentDsl(val path: String, val isDir: Boolean) {
    fun split(): List {
        val file = File(path)
        if (file.isFile) {
            val parser = parserByExt(file.extension)
            return parser.parse(file.inputStream())
        }

        if (file.isDirectory) {
            return file.walk()
                .filter { it.isFile }
                .map {
                    val parser = parserByExt(it.extension)
                    parser.parse(it.inputStream())
                }
                .flatten()
                .toList()
        }

        return emptyList()
    }

    companion object {
        fun byFile(file: String): DocumentDsl {
            return DocumentDsl(file, true)
        }

        fun byDir(directory: String): DocumentDsl {
            return DocumentDsl(directory, isDir = true)
        }

        fun parserByExt(extension: String): DocumentParser {
            return DocumentFactory.parserByExt(extension) ?: throw IllegalArgumentException("Unsupported file type: $extension")
        }
    }
}


// TODO: add order by score value
fun  Iterable>.lowInMiddle(): List> {
    val reversedDocuments = this.reversed()
    val reorderedResult = mutableListOf>()

    for ((index, value) in reversedDocuments.withIndex()) {
        if (index % 2 == 1) {
            reorderedResult.add(value)
        } else {
            reorderedResult.add(0, value)
        }
    }

    return reorderedResult
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy