com.theokanning.openai.finetune.FineTuneRequest Maven / Gradle / Ivy

Go to download
package com.theokanning.openai.finetune;

import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.*;

import java.util.List;

/**
 * A request for OpenAi to create a fine-tuned model
 * All fields except trainingFile are nullable.
 *
 * https://beta.openai.com/docs/api-reference/fine-tunes/create
 */
@Deprecated
@Builder
@NoArgsConstructor
@AllArgsConstructor
@Data
public class FineTuneRequest {

    /**
     * The ID of an uploaded file that contains training data.
     */
    @NonNull
    @JsonProperty("training_file")
    String trainingFile;

    /**
     * The ID of an uploaded file that contains validation data.
     */
    @JsonProperty("validation_file")
    String validationFile;

    /**
     * The name of the base model to fine-tune. You can select one of "ada", "babbage", "curie", or "davinci".
     * To learn more about these models, see the Engines documentation.
     */
    String model;

    /**
     * The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset.
     */
    @JsonProperty("n_epochs")
    Integer nEpochs;

    /**
     * The batch size to use for training.
     * The batch size is the number of training examples used to train a single forward and backward pass.
     *
     * By default, the batch size will be dynamically configured to be ~0.2% of the number of examples in the training
     * set, capped at 256 - in general, we've found that larger batch sizes tend to work better for larger datasets.
     */
    @JsonProperty("batch_size")
    Integer batchSize;

    /**
     * The learning rate multiplier to use for training.
     * The fine-tuning learning rate is the original learning rate used for pretraining multiplied by this value.
     *
     * By default, the learning rate multiplier is the 0.05, 0.1, or 0.2 depending on final batch_size
     * (larger learning rates tend to perform better with larger batch sizes).
     * We recommend experimenting with values in the range 0.02 to 0.2 to see what produces the best results.
     */
    @JsonProperty("learning_rate_multiplier")
    Double learningRateMultiplier;

    /**
     * The weight to use for loss on the prompt tokens.
     * This controls how much the model tries to learn to generate the prompt
     * (as compared to the completion which always has a weight of 1.0),
     * and can add a stabilizing effect to training when completions are short.
     *
     * If prompts are extremely long (relative to completions), it may make sense to reduce this weight so as to
     * avoid over-prioritizing learning the prompt.
     */
    @JsonProperty("prompt_loss_weight")
    Double promptLossWeight;

    /**
     * If set, we calculate classification-specific metrics such as accuracy and F-1 score using the validation set
     * at the end of every epoch. These metrics can be viewed in the results file.
     *
     * In order to compute classification metrics, you must provide a validation_file.
     * Additionally, you must specify {@link FineTuneRequest#classificationNClasses} for multiclass
     * classification or {@link FineTuneRequest#classificationPositiveClass} for binary classification.
     */
    @JsonProperty("compute_classification_metrics")
    Boolean computeClassificationMetrics;

    /**
     * The number of classes in a classification task.
     *
     * This parameter is required for multiclass classification.
     */
    @JsonProperty("classification_n_classes")
    Integer classificationNClasses;

    /**
     * The positive class in binary classification.
     *
     * This parameter is needed to generate precision, recall, and F1 metrics when doing binary classification.
     */
    @JsonProperty("classification_positive_class")
    String classificationPositiveClass;

    /**
     * If this is provided, we calculate F-beta scores at the specified beta values.
     * The F-beta score is a generalization of F-1 score. This is only used for binary classification.
     *
     * With a beta of 1 (i.e. the F-1 score), precision and recall are given the same weight.
     * A larger beta score puts more weight on recall and less on precision.
     * A smaller beta score puts more weight on precision and less on recall.
     */
    @JsonProperty("classification_betas")
    List classificationBetas;

    /**
     * A string of up to 40 characters that will be added to your fine-tuned model name.
     */
    String suffix;
}