dev.langchain4j.model.jlama.JlamaModel Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of langchain4j-jlama Show documentation
Show all versions of langchain4j-jlama Show documentation
Jlama: Pure Java LLM Inference Engine - Requires Java 21
The newest version!
package dev.langchain4j.model.jlama;
import com.github.tjake.jlama.model.AbstractModel;
import com.github.tjake.jlama.model.ModelSupport;
import com.github.tjake.jlama.safetensors.DType;
import com.github.tjake.jlama.safetensors.SafeTensorSupport;
import com.github.tjake.jlama.safetensors.prompt.Function;
import com.github.tjake.jlama.safetensors.prompt.Parameters;
import com.github.tjake.jlama.safetensors.prompt.Tool;
import dev.langchain4j.agent.tool.ToolParameters;
import dev.langchain4j.agent.tool.ToolSpecification;
import lombok.Getter;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
/**
* A Jlama model. Very basic information. Allows the model to be loaded with different options.
*/
class JlamaModel {
private final JlamaModelRegistry registry;
@Getter
private final ModelSupport.ModelType modelType;
@Getter
private final String modelName;
@Getter
private final Optional owner;
@Getter
private final String modelId;
private final boolean isLocal;
public JlamaModel(JlamaModelRegistry registry, ModelSupport.ModelType modelType, String modelName, Optional owner, String modelId, boolean isLocal) {
this.registry = registry;
this.modelType = modelType;
this.modelName = modelName;
this.owner = owner;
this.modelId = modelId;
this.isLocal = isLocal;
}
public boolean isLocal() {
return isLocal;
}
public Loader loader() {
return new Loader();
}
public void download(Optional authToken) throws IOException {
SafeTensorSupport.maybeDownloadModel(
registry.getModelCachePath().toString(),
owner,
modelName,
true,
Optional.empty(),
authToken,
Optional.empty());
}
public class Loader {
private Path workingDirectory;
private DType workingQuantizationType = DType.I8;
private DType quantizationType;
private Integer threadCount;
private AbstractModel.InferenceType inferenceType = AbstractModel.InferenceType.FULL_GENERATION;
private Loader() {
}
public Loader quantized() {
//For now only allow Q4 quantization at runtime
this.quantizationType = DType.Q4;
return this;
}
/**
* Set the working quantization type. This is the type that the model will use for working inference memory.
*/
public Loader workingQuantizationType(DType workingQuantizationType) {
this.workingQuantizationType = workingQuantizationType;
return this;
}
public Loader workingDirectory(Path workingDirectory) {
this.workingDirectory = workingDirectory;
return this;
}
public Loader threadCount(Integer threadCount) {
this.threadCount = threadCount;
return this;
}
public Loader inferenceType(AbstractModel.InferenceType inferenceType) {
this.inferenceType = inferenceType;
return this;
}
public AbstractModel load() {
return ModelSupport.loadModel(
inferenceType,
new File(registry.getModelCachePath().toFile(), modelName),
workingDirectory == null ? null : workingDirectory.toFile(),
DType.F32,
workingQuantizationType,
Optional.ofNullable(quantizationType),
Optional.ofNullable(threadCount),
Optional.empty(),
SafeTensorSupport::loadWeights);
}
}
public static Tool toTool(ToolSpecification toolSpecification) {
Function.Builder builder = Function.builder()
.name(toolSpecification.name())
.description(toolSpecification.description());
for (Map.Entry> p : toolSpecification.parameters().properties().entrySet()) {
builder.addParameter(p.getKey(), p.getValue(), toolSpecification.parameters().required().contains(p.getKey()));
}
return Tool.from(builder.build());
}
}