All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tri.promptfx.docs.SnippetJoiner.kt Maven / Gradle / Ivy

/*-
 * #%L
 * tri.promptfx:promptfx
 * %%
 * Copyright (C) 2023 - 2024 Johns Hopkins University Applied Physics Laboratory
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */
package tri.promptfx.docs

import tri.ai.embedding.EmbeddingMatch
import tri.ai.prompt.AiPromptLibrary
import tri.ai.text.chunks.TextDoc

/** Name used in snippet joiner template for matching text. */
const val MATCHES_TEMPLATE = "matches"
/** Key used in [TextDoc] attributes for contextual information that will be added to any chunk information returned by the joiner. */
const val TEXT_DOC_ATTRIBUTE_CONTEXT_PREFIX = "context-prefix"

/**
 * Strategy for constructing a context from a set of matches.
 * This may rearrange the snippets and/or construct the concatenated text for an LLM query.
 */
sealed class SnippetJoiner(val id: String) {
    /** Constructs the context from the given matches. */
    abstract fun constructContext(matches: List): String
}

/** A basic joiner with content of each chunk in a separate match. */
class BasicTemplateJoiner(_id: String) : SnippetJoiner(_id) {
    override fun constructContext(matches: List) =
        AiPromptLibrary.lookupPrompt(id)
            .fill(MATCHES_TEMPLATE to matches.mapIndexed { i, it ->
                NameText(i + 1, it)
            })
}

/** A joiner with content of each chunk grouped by document, along with document metadata. */
class GroupingTemplateJoiner(_id: String) : SnippetJoiner(_id) {
    override fun constructContext(matches: List) =
        matches.groupBy { it.shortDocName }.entries.mapIndexed { i, en ->
            val doc = en.value.first().document
            val joinedInThisDoc = en.value.joinToString("\n...\n") { it.chunkText.trim() }.trim()
            val docPrefix = doc.attributes[TEXT_DOC_ATTRIBUTE_CONTEXT_PREFIX] as? String ?: ""
            NameText(i + 1, en.key, docPrefix, joinedInThisDoc)
        }.let {
            AiPromptLibrary.lookupPrompt(id).fill(MATCHES_TEMPLATE to it)
        }
}

/** Utility class for holding the name and text of a match. This is used when filling in joiner templates with Mustache templates. */
private class NameText(val number: Int, val name: String, val prefix: String, val text: String) {
    constructor(index: Int, match: EmbeddingMatch) : this(index, match.document.browsable()!!.shortNameWithoutExtension, "", match.chunkText.trim())
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy