io.trino.plugin.ml.AbstractSvmModel Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of trino-ml Show documentation
Show all versions of trino-ml Show documentation
Trino - Machine Learning Plugin
The newest version!
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.ml;
import com.google.common.util.concurrent.SimpleTimeLimiter;
import com.google.common.util.concurrent.TimeLimiter;
import libsvm.svm;
import libsvm.svm_model;
import libsvm.svm_node;
import libsvm.svm_parameter;
import libsvm.svm_problem;
import java.io.File;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.Files;
import java.util.List;
import java.util.SortedMap;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import static com.google.common.base.Throwables.throwIfUnchecked;
import static io.airlift.concurrent.Threads.threadsNamed;
import static java.util.Objects.requireNonNull;
import static java.util.concurrent.Executors.newCachedThreadPool;
public abstract class AbstractSvmModel
implements Model
{
protected svm_model model;
protected svm_parameter params;
protected AbstractSvmModel(svm_parameter params)
{
this.params = requireNonNull(params, "params is null");
}
protected AbstractSvmModel(svm_model model)
{
this.model = requireNonNull(model, "model is null");
}
@Override
public byte[] getSerializedData()
{
File file = null;
try {
// libsvm doesn't have a method to serialize the model into a buffer, so write it out to a file and then read it back in
file = File.createTempFile("svm", null);
svm.svm_save_model(file.getAbsolutePath(), model);
return Files.readAllBytes(file.toPath());
}
catch (IOException e) {
throw new UncheckedIOException(e);
}
finally {
if (file != null) {
//noinspection ResultOfMethodCallIgnored
file.delete();
}
}
}
@Override
public void train(Dataset dataset)
{
params.svm_type = getLibsvmType();
svm_problem problem = toSvmProblem(dataset);
ExecutorService service = newCachedThreadPool(threadsNamed("libsvm-trainer-" + System.identityHashCode(this) + "-%s"));
try {
TimeLimiter limiter = SimpleTimeLimiter.create(service);
//TODO: this time limit should be configurable
model = limiter.callWithTimeout(getTrainingFunction(problem, params), 1, TimeUnit.HOURS);
}
catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(e);
}
catch (ExecutionException e) {
Throwable cause = e.getCause();
if (cause != null) {
throwIfUnchecked(cause);
throw new RuntimeException(cause);
}
}
catch (Exception e) {
throwIfUnchecked(e);
throw new RuntimeException(e);
}
finally {
service.shutdownNow();
}
}
private static Callable getTrainingFunction(svm_problem problem, svm_parameter param)
{
return () -> svm.svm_train(problem, param);
}
protected abstract int getLibsvmType();
private static svm_problem toSvmProblem(Dataset dataset)
{
svm_problem problem = new svm_problem();
List labels = dataset.getLabels();
problem.l = labels.size();
problem.y = new double[labels.size()];
for (int i = 0; i < labels.size(); i++) {
problem.y[i] = labels.get(i);
}
problem.x = new svm_node[labels.size()][];
for (int i = 0; i < dataset.getDatapoints().size(); i++) {
problem.x[i] = toSvmNodes(dataset.getDatapoints().get(i));
}
return problem;
}
protected static svm_node[] toSvmNodes(FeatureVector features)
{
svm_node[] nodes = new svm_node[features.size()];
int i = 0;
// Features map is sorted, so we can just flatten it to a list for libsvm
for (SortedMap.Entry feature : features.getFeatures().entrySet()) {
nodes[i] = new svm_node();
nodes[i].index = feature.getKey();
nodes[i].value = feature.getValue();
i++;
}
return nodes;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy