tri.promptfx.docs.SnippetJoiner.kt Maven / Gradle / Ivy
/*-
* #%L
* tri.promptfx:promptfx
* %%
* Copyright (C) 2023 - 2024 Johns Hopkins University Applied Physics Laboratory
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
package tri.promptfx.docs
import tri.ai.embedding.EmbeddingMatch
import tri.ai.prompt.AiPromptLibrary
import tri.ai.text.chunks.TextDoc
/** Name used in snippet joiner template for matching text. */
const val MATCHES_TEMPLATE = "matches"
/** Key used in [TextDoc] attributes for contextual information that will be added to any chunk information returned by the joiner. */
const val TEXT_DOC_ATTRIBUTE_CONTEXT_PREFIX = "context-prefix"
/**
* Strategy for constructing a context from a set of matches.
* This may rearrange the snippets and/or construct the concatenated text for an LLM query.
*/
sealed class SnippetJoiner(val id: String) {
/** Constructs the context from the given matches. */
abstract fun constructContext(matches: List): String
}
/** A basic joiner with content of each chunk in a separate match. */
class BasicTemplateJoiner(_id: String) : SnippetJoiner(_id) {
override fun constructContext(matches: List) =
AiPromptLibrary.lookupPrompt(id)
.fill(MATCHES_TEMPLATE to matches.mapIndexed { i, it ->
NameText(i + 1, it)
})
}
/** A joiner with content of each chunk grouped by document, along with document metadata. */
class GroupingTemplateJoiner(_id: String) : SnippetJoiner(_id) {
override fun constructContext(matches: List) =
matches.groupBy { it.shortDocName }.entries.mapIndexed { i, en ->
val doc = en.value.first().document
val joinedInThisDoc = en.value.joinToString("\n...\n") { it.chunkText.trim() }.trim()
val docPrefix = doc.attributes[TEXT_DOC_ATTRIBUTE_CONTEXT_PREFIX] as? String ?: ""
NameText(i + 1, en.key, docPrefix, joinedInThisDoc)
}.let {
AiPromptLibrary.lookupPrompt(id).fill(MATCHES_TEMPLATE to it)
}
}
/** Utility class for holding the name and text of a match. This is used when filling in joiner templates with Mustache templates. */
private class NameText(val number: Int, val name: String, val prefix: String, val text: String) {
constructor(index: Int, match: EmbeddingMatch) : this(index, match.document.browsable()!!.shortNameWithoutExtension, "", match.chunkText.trim())
} © 2015 - 2025 Weber Informatics LLC | Privacy Policy