All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.quarkiverse.langchain4j.llama3.copy.AOT Maven / Gradle / Ivy

The newest version!
package io.quarkiverse.langchain4j.llama3.copy;

import java.io.IOException;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Map;
import java.util.Objects;

/**
 * Support for AOT preloading of GGUF metadata with GraalVM's Native Image.
 *
 * 

* To preload a model at build time, pass {@code -Dllama.PreloadGGUF=/path/to/model.gguf} * to the native-image builder command. At runtime, the preloaded model will be used * iff the specified and preloaded file names (base name) match. */ public final class AOT { public record PartialModel(String modelFileName, Llama model, long tensorDataOffset, Map tensorInfos) { } private static final PartialModel PRELOADED_GGUF = preLoadGGUF(System.getProperty("llama.PreloadGGUF")); public static PartialModel preLoadGGUF(String modelPath) { if (modelPath == null || modelPath.isEmpty()) { return null; } try { Path path = Path.of(modelPath); if (!Files.exists(path) || !Files.isRegularFile(path)) { throw new IllegalArgumentException("Cannot pre-load model: " + path); } GGUF gguf = GGUF.loadModel(path); try (FileChannel fileChannel = FileChannel.open(path, StandardOpenOption.READ)) { return new PartialModel( path.getFileName().toString(), ModelLoader.loadModel(fileChannel, gguf, Llama3.Options.DEFAULT_MAX_TOKENS, false), gguf.getTensorDataOffset(), gguf.getTensorInfos()); } } catch (IOException e) { throw new RuntimeException(e); } } /** * Tries to reuse a compatible AOT preloaded model. * The file name (base name) must match with the preloaded file name. * No checksum/hash is checked for performance reasons. */ public static Llama tryUsePreLoaded(Path modelPath, int contextLength) throws IOException { AOT.PartialModel preLoaded = AOT.PRELOADED_GGUF; if (preLoaded == null) { return null; // no pre-loaded model stored } String optionsModel = modelPath.getFileName().toString(); String preLoadedModel = preLoaded.modelFileName(); if (!Objects.equals(optionsModel, preLoadedModel)) { // Preloaded and specified model file names didn't match. return null; } Llama baseModel = preLoaded.model(); try (var timer = Timer.log("Load tensors from pre-loaded model"); var fileChannel = FileChannel.open(modelPath, StandardOpenOption.READ)) { // Load only the tensors (mmap slices). Map tensorEntries = GGUF.loadTensors(fileChannel, preLoaded.tensorDataOffset(), preLoaded.tensorInfos()); Llama.Weights weights = ModelLoader.loadWeights(tensorEntries, baseModel.configuration()); return new Llama(baseModel.configuration().withContextLength(contextLength), baseModel.tokenizer(), weights); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy