com.feedzai.fos.impl.weka.WekaManager Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of fos-impl-weka Show documentation
Show all versions of fos-impl-weka Show documentation
Feedzai Open Scoring Server - Weka Implementation
/*
* $#
* FOS Weka
*
* Copyright (C) 2013 Feedzai SA
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program. If not, see
* .
* #$
*/
package com.feedzai.fos.impl.weka;
import au.com.bytecode.opencsv.CSVReader;
import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.io.Input;
import com.esotericsoftware.kryo.io.Output;
import com.esotericsoftware.kryo.serializers.CollectionSerializer;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.feedzai.fos.api.*;
import com.feedzai.fos.common.kryo.CustomUUIDSerializer;
import com.feedzai.fos.common.kryo.ScoringRequestEnvelope;
import com.feedzai.fos.common.validation.NotBlank;
import com.feedzai.fos.common.validation.NotNull;
import com.feedzai.fos.impl.weka.config.WekaManagerConfig;
import com.feedzai.fos.impl.weka.config.WekaModelConfig;
import com.feedzai.fos.impl.weka.utils.WekaUtils;
import com.feedzai.fos.impl.weka.utils.setter.InstanceSetter;
import com.google.common.io.Files;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import weka.classifiers.Classifier;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import java.io.*;
import java.net.ServerSocket;
import java.net.Socket;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import static com.google.common.base.Preconditions.checkNotNull;
/**
* This class implements a manager that is able to train and score
* using Weka classifiers.
*
* Aditionally, it also implements a Kryo endpoint for scoring to be used along
* KryoScorer.
*
* @author Marco Jorge ([email protected])
* @author Miguel Duarte ([email protected])
*/
public class WekaManager implements Manager {
private final static Logger logger = LoggerFactory.getLogger(WekaManager.class);
private Thread acceptThread;
private ServerSocket serverSocket;
ObjectMapper mapper = new ObjectMapper();
private Map modelConfigs = new HashMap<>();
private WekaManagerConfig wekaManagerConfig;
private WekaScorer wekaScorer;
private KryoScoringEndpoint scorerHandler;
private volatile boolean acceptThreadRunning = false;
/**
* Save dirty configurations to disk.
* If saving configuration was not possible, a log is produced but no exception is thrown.
*/
private synchronized void saveConfiguration() {
for (WekaModelConfig wekaModelConfig : modelConfigs.values()) {
if (wekaModelConfig.isDirty() && wekaModelConfig.getModelConfig().isStoreModel()) {
try {
String modelConfigJson = mapper.writeValueAsString(wekaModelConfig.getModelConfig());
// create a new file because this model has never been written
if (wekaModelConfig.getHeader() == null) {
File file = File.createTempFile(wekaModelConfig.getId().toString(), "." + WekaManagerConfig.HEADER_EXTENSION, wekaManagerConfig.getHeaderLocation());
wekaModelConfig.setHeader(file);
}
FileUtils.write((wekaModelConfig).getHeader(), modelConfigJson);
wekaModelConfig.setDirty(false /* contents have been updated so the model is no longer dirty*/);
} catch (IOException e) {
logger.error("Could not store configuration for model '{}' (will continue to save others)", wekaModelConfig.getId(), e);
}
}
}
}
/**
* Create a new manager from the given configuration.
* Will lookup any headers files and to to instantiate the model.
* If a model fails, a log is produced but loading other models will continue (no exception is thrown).
*
* @param wekaManagerConfig the manager configuration
*/
public WekaManager(WekaManagerConfig wekaManagerConfig) {
checkNotNull(wekaManagerConfig, "Manager config cannot be null");
this.wekaManagerConfig = wekaManagerConfig;
Collection headers = FileUtils.listFiles(wekaManagerConfig.getHeaderLocation(), new String[]{WekaManagerConfig.HEADER_EXTENSION}, true);
for (File header : headers) {
logger.trace("Reading model file '{}'", header);
FileInputStream fileInputStream = null;
try {
fileInputStream = new FileInputStream(header);
String modelConfigJson = IOUtils.toString(fileInputStream);
ModelConfig modelConfig = mapper.readValue(modelConfigJson, ModelConfig.class);
WekaModelConfig wekaModelConfig = new WekaModelConfig(modelConfig, wekaManagerConfig);
wekaModelConfig.setHeader(header);
wekaModelConfig.setDirty(false /* not changed so far */);
if (modelConfigs.containsKey(wekaModelConfig.getId())) {
logger.error("Model with ID '{}' is duplicated in the configuration (the configuration from '{}' is discarded)", wekaModelConfig.getId(), header.getAbsolutePath());
} else {
modelConfigs.put(wekaModelConfig.getId(), wekaModelConfig);
}
} catch (Exception e) {
logger.error("Could not load from '{}' (continuing to load others)", header, e);
} finally {
IOUtils.closeQuietly(fileInputStream);
}
}
this.wekaScorer = new WekaScorer(modelConfigs, wekaManagerConfig);
try {
int port = wekaManagerConfig.getScoringPort();
this.serverSocket = new ServerSocket(port);
serverSocket.setReuseAddress(true);
final int max_threads = wekaManagerConfig.getMaxSimultaneousScoringThreads();
Runnable acceptRunnable = new Runnable() {
ExecutorService executor = Executors.newFixedThreadPool(max_threads);
@Override
public void run() {
acceptThreadRunning = true;
try {
while (acceptThreadRunning &&
Thread.currentThread().isInterrupted() == false) {
Socket client = serverSocket.accept();
client.setTcpNoDelay(true);
scorerHandler = new KryoScoringEndpoint(client, wekaScorer);
executor.submit(scorerHandler);
}
} catch (IOException e) {
logger.error(e.getMessage(), e);
}
}
};
acceptThread = new Thread(acceptRunnable);
acceptThread.start();
} catch (IOException e) {
logger.error(e.getMessage(), e);
}
}
/**
* Persists the model to disk.
*
* @param id the id of the model
* @param model the serialized classifier
* @return the File where the model was written
* @throws IOException if saving to disk was not possible
*/
private File createModelFile(UUID id, byte[] model) throws IOException {
File file = File.createTempFile(id.toString(), ".model", wekaManagerConfig.getHeaderLocation());
FileUtils.writeByteArrayToFile(file, model);
return file;
}
/**
* Persists a classifier to disk.
*
* @param id the id of the model
* @param classifier The classifier
* @return the File where the model was written
* @throws IOException if saving to disk was not possible
*/
private File createModelFileFromClassifier(UUID id, Classifier classifier) throws IOException {
File file = File.createTempFile(id.toString(), ".model", wekaManagerConfig.getHeaderLocation());
FileOutputStream fos = new FileOutputStream(file);
ObjectOutputStream oos = new ObjectOutputStream(fos);
oos.writeObject(classifier);
oos.close();
return file;
}
@Override
public synchronized UUID addModel(ModelConfig config, byte[] model) throws FOSException {
UUID uuid = getUuid(config);
File file = null;
try {
file = createModelFile(uuid, model);
} catch (IOException e) {
throw new FOSException("Unable to create model file", e);
}
return addModel(config, file.getAbsolutePath());
}
private UUID addModelFromClassifier(ModelConfig config, Classifier classifier) throws FOSException {
UUID uuid = getUuid(config);
File file = null;
try {
file = createModelFileFromClassifier(uuid, classifier);
} catch (IOException e) {
throw new FOSException("Unable to create model file", e);
}
addModelFromFile(config, uuid, file);
return uuid;
}
@Override
public synchronized UUID addModel(ModelConfig config, @NotBlank String localFileName) throws FOSException {
UUID uuid = getUuid(config);
addModelFromFile(config, uuid, new File(localFileName));
return uuid;
}
/**
* Adds a model from a path in the file system.
*
* @param config The model configuration.
* @param uuid The model uuid.
* @param file The file to add.
* @throws FOSException If the model cannot be added.
*/
private void addModelFromFile(ModelConfig config, UUID uuid, File file) throws FOSException {
WekaModelConfig wekaModelConfig = new WekaModelConfig(config, wekaManagerConfig);
wekaModelConfig.setId(uuid);
wekaModelConfig.setModel(file);
modelConfigs.put(uuid, wekaModelConfig);
wekaScorer.addOrUpdate(wekaModelConfig);
saveConfiguration();
logger.debug("Model {} added", uuid);
}
@Override
public synchronized void removeModel(UUID modelId) throws FOSException {
WekaModelConfig wekaModelConfig = modelConfigs.remove(modelId);
if (wekaModelConfig == null) {
logger.warn("Could not remove model with id {} because it does not exists", modelId);
return;
}
wekaScorer.removeModel(modelId);
if (wekaModelConfig.getModelConfig().isStoreModel()) {
// delete the header & model file (or else it will be picked up on the next restart)
wekaModelConfig.getHeader().delete();
// only delete if is in our header location
if (!wekaManagerConfig.getHeaderLocation().toURI().relativize(wekaModelConfig.getModel().toURI()).isAbsolute()) {
wekaModelConfig.getModel().delete();
}
}
logger.debug("Model {} removed", modelId);
}
@Override
public synchronized void reconfigureModel(UUID modelId, ModelConfig modelConfig) throws FOSException {
WekaModelConfig wekaModelConfig = this.modelConfigs.get(modelId);
wekaModelConfig.update(modelConfig);
wekaScorer.addOrUpdate(wekaModelConfig);
saveConfiguration();
logger.debug("Model {} reconfigured", modelId);
}
@Override
public synchronized void reconfigureModel(UUID modelId, ModelConfig modelConfig, byte[] model) throws FOSException {
try {
File file = createModelFile(modelId, model);
WekaModelConfig wekaModelConfig = this.modelConfigs.get(modelId);
wekaModelConfig.update(modelConfig);
wekaModelConfig.setModel(file);
wekaScorer.addOrUpdate(wekaModelConfig);
saveConfiguration();
logger.debug("Model {} reconfigured", modelId);
} catch (IOException e) {
throw new FOSException(e);
}
}
@Override
public synchronized void reconfigureModel(UUID modelId, ModelConfig modelConfig, @NotBlank String localFileName) throws FOSException {
File file = new File(localFileName);
WekaModelConfig wekaModelConfig = this.modelConfigs.get(modelId);
wekaModelConfig.update(modelConfig);
wekaModelConfig.setModel(file);
wekaScorer.addOrUpdate(wekaModelConfig);
saveConfiguration();
}
@Override
@NotNull
public synchronized Map listModels() {
Map result = new HashMap<>(modelConfigs.size());
for (Map.Entry entry : modelConfigs.entrySet()) {
result.put(entry.getKey(), entry.getValue().getModelConfig());
}
return result;
}
@Override
@NotNull
public WekaScorer getScorer() {
return wekaScorer;
}
@Override
public synchronized UUID trainAndAdd(ModelConfig config, List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy