All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.cequence.cohereapi.model.ChatSettings.scala Maven / Gradle / Ivy

The newest version!
package io.cequence.cohereapi.model

import io.cequence.wsclient.domain.EnumValue

case class ChatSettings(
  /**
    * The name of a compatible Cohere model or the ID of a fine-tuned model.
    * 
    * Compatible Deployments: Cohere Platform, Private Deployments
    */
  model: String,

  /**
   * When specified, the default Cohere preamble will be replaced with the provided one.
   * Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style,
   * and use the SYSTEM role.
   *
   * The SYSTEM role is also used for the contents of the optional chat_history= parameter.
   * When used with the chat_history= parameter it adds content throughout a conversation.
   * Conversely, when used with the preamble= parameter it adds content at the start of the conversation only.
   *
   * Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
   */
  preamble: Option[String] = None,

  /**
   * A list of previous messages in the conversation, allowing the model to have context of the ongoing dialogue.
   * Each message should be a tuple of (role, content), where role is either "USER" or "CHATBOT".
   * The chat_history parameter uses the SYSTEM role when sent to the model.
   *
   * Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
   */
  chat_history: Seq[ChatMessage] = Nil,

  /**
   * When true, the response will be a JSON stream of events. The final event will contain
   * the complete response, and will have an event_type of "stream-end".
   * Streaming is beneficial for user interfaces that render the contents of the response
   * piece by piece, as it gets generated.
   * Defaults to false.
   *
   * Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
   */
  stream: Boolean = false,

  /**
   * An alternative to chat_history.
   * Providing a conversation_id creates or resumes a persisted conversation with the specified ID.
   * The ID can be any non-empty string.
   *
   * Compatible Deployments: Cohere Platform
   */
  conversation_id: Option[String] = None,

  /**
   * Dictates how the prompt will be constructed.
   * Defaults to AUTO when connectors are specified and OFF in all other cases.
   *
   * - AUTO: Some elements from chat_history and documents will be dropped to fit within the model's context length limit.
   *         The order of documents and chat history may change and be ranked by relevance.
   * - AUTO_PRESERVE_ORDER: Similar to AUTO, but preserves the input order of documents and chat history.
   * - OFF: No elements will be dropped. If inputs exceed the model's context length limit, a TooManyTokens error will be returned.
   *
   * Compatible Deployments:
   * - AUTO: Cohere Platform Only
   * - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
   */
  prompt_truncation: Option[PromptTruncation] = None,

  /**
   * A list of connectors to enrich the model's reply with information.
   * Accepts {"id": "web-search"}, and/or the "id" for a custom connector, if you've created one.
   * When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).
   *
   * Compatible Deployments: Cohere Platform
   */
  connectors: Seq[Connector] = Nil,

  /**
   * When true, the response will only contain a list of generated search queries, but no search will take place,
   * and no reply from the model to the user's message will be generated.
   * Defaults to false.
   *
   * Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
   */
  search_queries_only: Boolean = false,

  /**
   * A list of relevant documents that the model can cite to generate a more accurate reply.
   * Each document is a string-string dictionary.
   *
   * Example:
   * [
   *   { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
   *   { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
   * ]
   *
   * Keys and values from each document will be serialized to a string and passed to the model.
   * The resulting generation will include citations that reference some of these documents.
   *
   * Some suggested keys are "text", "author", and "date". For better generation quality,
   * it is recommended to keep the total word count of the strings in the dictionary to under 300 words.
   *
   * An id field (string) can be optionally supplied to identify the document in the citations.
   * This field will not be passed to the model.
   *
   * An _excludes field (array of strings) can be optionally supplied to omit some key-value pairs
   * from being shown to the model. The omitted fields will still show up in the citation object.
   * The "_excludes" field will not be passed to the model.
   *
   * See 'Document Mode' in the guide for more information.
   *
   * Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
   */
  documents: Seq[Map[String, String]] = Nil,

  /**
   * Dictates the approach taken to generating citations as part of the RAG flow by allowing
   * the user to specify whether they want "accurate" results, "fast" results or no results.
   * Defaults to "accurate".
   *
   * Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
   */
  citation_quality: Option[CitationQuality] = None,

  /**
   * A non-negative float that tunes the degree of randomness in generation.
   * Lower temperatures mean less random generations, and higher temperatures mean more random generations.
   * Defaults to 0.3.
   *
   * Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
   */
  temperature: Option[Double] = None,

  /**
   * The maximum number of tokens the model will generate as part of the response.
   * Note: Setting a low value may result in incomplete generations.
   *
   * Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
   */
  max_tokens: Option[Int] = None,

  /**
   * The maximum number of input tokens to send to the model.
   * If not specified, max_input_tokens is the model's context length limit minus a small buffer.
   * Input will be truncated according to the prompt_truncation parameter.
   *
   * Compatible Deployments: Cohere Platform
   */
  max_input_tokens: Option[Int] = None,

  /**
   * Ensures only the top k most likely tokens are considered for generation at each step.
   * Defaults to 0, min value of 0, max value of 500.
   *
   * Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
   */
  k: Option[Int] = None,

  /**
   * Ensures that only the most likely tokens, with total probability mass of p,
   * are considered for generation at each step. If both k and p are enabled, p acts after k.
   * Defaults to 0.75. min value of 0.01, max value of 0.99.
   *
   * Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
   */
  p: Option[Double] = None,

  /**
   * If specified, the backend will make a best effort to sample tokens deterministically,
   * such that repeated requests with the same seed and parameters should return the same result.
   * However, determinism cannot be totally guaranteed.
   *
   * Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
   */
  seed: Option[Int] = None,

  /**
   * A list of up to 5 strings that the model will use to stop generation.
   * If the model generates a string that matches any of the strings in the list,
   * it will stop generating tokens and return the generated text up to that point
   * not including the stop sequence.
   *
   * Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
   */
  stop_sequences: Seq[String] = Nil,

  /**
   * Used to reduce repetitiveness of generated tokens.
   * The higher the value, the stronger a penalty is applied to previously present tokens,
   * proportional to how many times they have already appeared in the prompt or prior generation.
   * Defaults to 0.0, min value of 0.0, max value of 1.0.
   *
   * Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
   */
  frequency_penalty: Option[Double] = Some(0.0),

  /**
   * Used to reduce repetitiveness of generated tokens.
   * Similar to frequency_penalty, except that this penalty is applied equally to all tokens
   * that have already appeared, regardless of their exact frequencies.
   * Defaults to 0.0, min value of 0.0, max value of 1.0.
   *
   * Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
   */
  presence_penalty: Option[Double] = Some(0.0),

  /**
   * Configuration for forcing the model output to adhere to the specified format.
   * Supported on Command R 03-2024, Command R+ 04-2024 and newer models.
   *
   * Compatible Deployments: Cohere Platform
   * 
   * The model can be forced into outputting JSON objects (with up to 5 levels of nesting) by setting { "type": "json_object" }.
   * 
   * A JSON Schema can optionally be provided, to ensure a specific structure.
   */
  response_type: Option[ResponseType] = None

//   tools: Option[String] = None,
//   tool_results: Option[String] = None,
//   force_single_step: Option[Int] = None,
//   safety_mode: Option[Seq[String]] = None,
//   returnLikelihoods: Option[String] = None,
)

sealed trait ResponseType

object ResponseType {
  case object Text extends ResponseType
  case class JsonObject(schema: Option[Map[String, Any]] = None) extends ResponseType
}

sealed trait PromptTruncation extends EnumValue

case object PromptTruncation {

  case object AUTO extends PromptTruncation
  case object AUTO_PRESERVE_ORDER extends PromptTruncation
  case object OFF extends PromptTruncation
}

sealed trait CitationQuality extends EnumValue

object CitationQuality {
  case object accurate extends CitationQuality
  case object fast extends CitationQuality
  case object off extends CitationQuality
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy