All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.quarkiverse.langchain4j.jlama.JlamaModel Maven / Gradle / Ivy

There is a newer version: 0.23.0.CR1
Show newest version
package io.quarkiverse.langchain4j.jlama;

import java.io.File;
import java.nio.file.Path;
import java.util.Map;
import java.util.Optional;

import com.github.tjake.jlama.model.AbstractModel;
import com.github.tjake.jlama.model.ModelSupport;
import com.github.tjake.jlama.model.functions.Generator;
import com.github.tjake.jlama.safetensors.DType;
import com.github.tjake.jlama.safetensors.SafeTensorSupport;
import com.github.tjake.jlama.safetensors.prompt.Function;
import com.github.tjake.jlama.safetensors.prompt.Tool;

import dev.langchain4j.agent.tool.ToolSpecification;
import dev.langchain4j.model.chat.request.json.JsonSchemaElement;
import dev.langchain4j.model.chat.request.json.JsonSchemaElementHelper;
import dev.langchain4j.model.output.FinishReason;

/**
 * A Jlama model. Very basic information. Allows the model to be loaded with different options.
 */
public class JlamaModel {
    private final JlamaModelRegistry registry;

    private final ModelSupport.ModelType modelType;
    private final String modelName;
    private final Optional owner;
    private final String modelId;
    private final boolean isLocal;

    JlamaModel(JlamaModelRegistry registry, ModelSupport.ModelType modelType, String modelName, Optional owner,
            String modelId, boolean isLocal) {
        this.registry = registry;
        this.modelType = modelType;
        this.modelName = modelName;
        this.owner = owner;
        this.modelId = modelId;
        this.isLocal = isLocal;
    }

    ModelSupport.ModelType getModelType() {
        return modelType;
    }

    String getModelName() {
        return modelName;
    }

    Optional getOwner() {
        return owner;
    }

    String getModelId() {
        return modelId;
    }

    boolean isLocal() {
        return isLocal;
    }

    Loader loader() {
        return new Loader(registry, modelName);
    }

    static class Loader {
        private final JlamaModelRegistry registry;
        private final String modelName;

        private Path workingDirectory;
        private DType workingQuantizationType = DType.I8;
        private DType quantizationType;
        private Integer threadCount;
        private AbstractModel.InferenceType inferenceType = AbstractModel.InferenceType.FULL_GENERATION;

        private Loader(JlamaModelRegistry registry, String modelName) {
            this.registry = registry;
            this.modelName = modelName;
        }

        public Loader quantized() {
            //For now only allow Q4 quantization at runtime
            this.quantizationType = DType.Q4;
            return this;
        }

        /**
         * Set the working quantization type. This is the type that the model will use for working inference memory.
         */
        public Loader workingQuantizationType(DType workingQuantizationType) {
            this.workingQuantizationType = workingQuantizationType;
            return this;
        }

        public Loader workingDirectory(Path workingDirectory) {
            this.workingDirectory = workingDirectory;
            return this;
        }

        public Loader threadCount(Integer threadCount) {
            this.threadCount = threadCount;
            return this;
        }

        public Loader inferenceType(AbstractModel.InferenceType inferenceType) {
            this.inferenceType = inferenceType;
            return this;
        }

        public AbstractModel load() {
            return ModelSupport.loadModel(
                    inferenceType,
                    new File(registry.getModelCachePath().toFile(), modelName),
                    workingDirectory == null ? null : workingDirectory.toFile(),
                    DType.F32,
                    workingQuantizationType,
                    Optional.ofNullable(quantizationType),
                    Optional.ofNullable(threadCount),
                    Optional.empty(),
                    SafeTensorSupport::loadWeights);
        }
    }

    static Tool toTool(ToolSpecification toolSpecification) {
        Function.Builder builder = Function.builder()
                .name(toolSpecification.name())
                .description(toolSpecification.description());

        if (toolSpecification.parameters() != null) {
            for (Map.Entry p : toolSpecification.parameters().properties().entrySet()) {
                builder.addParameter(p.getKey(), JsonSchemaElementHelper.toMap(p.getValue()),
                        toolSpecification.parameters().required().contains(p.getKey()));
            }
        }

        return Tool.from(builder.build());
    }

    static FinishReason toFinishReason(Generator.FinishReason reason) {
        return switch (reason) {
            case STOP_TOKEN -> FinishReason.STOP;
            case MAX_TOKENS -> FinishReason.LENGTH;
            case ERROR -> FinishReason.OTHER;
            case TOOL_CALL -> FinishReason.TOOL_EXECUTION;
            default -> throw new IllegalArgumentException("Unknown reason: " + reason);
        };
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy