Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
net.myrrix.online.eval.ParameterOptimizer Maven / Gradle / Ivy
/*
* Copyright Myrrix Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.myrrix.online.eval;
import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.regex.Pattern;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.commons.math3.util.Pair;
import org.apache.mahout.cf.taste.common.TasteException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This is an experimental utility class which can find a nearly-optimal set of learning algorithm parameters,
* for a given set of parameters and range of values, and a given metric to optimize. It simply performs an
* exhaustive search.
*
* It can be run on the command line with:
*
* {@code java net.myrrix.online.eval.ParameterOptimizer dataDirectory numSteps evaluationPercentage
* property=min:max [property2=min2:max2 ...]}
*
*
* {@code dataDirectory}: directory containing test data
* {@code numSteps}: number of different values of each parameter to try. Generally use 3-5.
* {@code evaluationPercentage}: fraction of all data to use in the test. Lower this to down-sample
* a very large data set. Must be in (0,1].
* {@code property=min:max}: repeated argument specifying a system property and the range of values
* to try, inclusive
*
*
* @author Sean Owen
* @since 1.0
*/
public final class ParameterOptimizer implements Callable> {
private static final Logger log = LoggerFactory.getLogger(ParameterOptimizer.class);
private static final Pattern EQUALS = Pattern.compile("=");
private static final Pattern COLON = Pattern.compile(":");
private final Map parameterRanges;
private final int numSteps;
private final Callable extends Number> evaluator;
private final boolean minimize;
public ParameterOptimizer(Map parameterRanges,
Callable extends Number> evaluator) {
this(parameterRanges, 4, evaluator, false);
}
/**
* @param parameterRanges mapping between names of {@link System} properties whose parameters will be optimized
* (e.g. {@code model.als.lambda}), and a {@link ParameterRange} describing a range of parameter values to try
* @param numSteps number of different values of each parameter to try. Note that with m parameters, and
* n steps, running time will scale proportionally to nm
* @param evaluator the objective to maximize (or minimize). This typically wraps a call to something like
* {@link PrecisionRecallEvaluator}
* @param minimize if {@code true}, find values that maximize {@code evaluator}'s value, otherwise minimize
*/
public ParameterOptimizer(Map parameterRanges,
int numSteps,
Callable extends Number> evaluator,
boolean minimize) {
Preconditions.checkNotNull(parameterRanges);
Preconditions.checkArgument(!parameterRanges.isEmpty(), "parameterRanges is empty");
Preconditions.checkArgument(numSteps >= 2);
Preconditions.checkNotNull(evaluator);
this.parameterRanges = parameterRanges;
this.numSteps = numSteps;
this.evaluator = evaluator;
this.minimize = minimize;
}
/**
* @return {@link #findGoodParameterValues()}
*/
@Override
public Map call() throws ExecutionException {
return findGoodParameterValues();
}
/**
* @return a {@link Map} between the values of the given {@link System} properties and the best value found
* during search
* @throws ExecutionException if an error occurs while calling {@code evaluator}; the cause is the
* underlying exception
*/
public Map findGoodParameterValues() throws ExecutionException {
int numProperties = parameterRanges.size();
String[] propertyNames = new String[numProperties];
Number[][] parameterValuesToTry = new Number[numProperties][];
int index = 0;
for (Map.Entry entry : parameterRanges.entrySet()) {
propertyNames[index] = entry.getKey();
parameterValuesToTry[index] = entry.getValue().buildSteps(numSteps);
index++;
}
int numTests = 1;
for (Number[] toTry : parameterValuesToTry) {
numTests *= toTry.length;
}
List> testResultLinesByValue = Lists.newArrayListWithCapacity(numTests);
Map bestParameterValues = Maps.newHashMap();
double bestValue = minimize ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
for (int test = 0; test < numTests; test++) {
StringBuilder testResultLine = new StringBuilder();
for (int prop = 0; prop < numProperties; prop++) {
String property = propertyNames[prop];
Number parameterValue = getParameterValueToTry(parameterValuesToTry, test, prop);
String propertyString = parameterValue.toString();
log.info("Setting {}={}", property, propertyString);
System.setProperty(property, propertyString);
testResultLine.append('[').append(property).append('=').append(propertyString).append("] ");
}
double testValue;
try {
testValue = evaluator.call().doubleValue();
} catch (Exception e) {
throw new ExecutionException(e);
}
testResultLine.append("= ").append(testValue);
testResultLinesByValue.add(new Pair(testValue, testResultLine.toString()));
log.info("{}", testResultLine);
if (minimize ? testValue < bestValue : testValue > bestValue) {
log.info("New best value {}", testValue);
bestValue = testValue;
for (int prop = 0; prop < numProperties; prop++) {
String property = propertyNames[prop];
Number parameterValue = getParameterValueToTry(parameterValuesToTry, test, prop);
bestParameterValues.put(property, parameterValue);
}
}
Collections.sort(testResultLinesByValue, new Comparator>() {
@Override
public int compare(Pair a, Pair b) {
if (a.getFirst() > b.getFirst()) {
return -1;
}
if (a.getFirst() < b.getFirst()) {
return 1;
}
return 0;
}
});
for (Pair result : testResultLinesByValue) {
log.info("{}", result.getSecond());
}
log.info("Best parameter values so far are {}", bestParameterValues);
}
log.info("Final best parameter values are {}", bestParameterValues);
return bestParameterValues;
}
private static Number getParameterValueToTry(Number[][] parameterValuesToTry, int test, int prop) {
int whichValueToTry = test;
for (int i = 0; i < prop; i++) {
whichValueToTry /= parameterValuesToTry[i].length;
}
whichValueToTry %= parameterValuesToTry[prop].length;
return parameterValuesToTry[prop][whichValueToTry];
}
public static void main(String[] args) throws Exception {
if (args.length < 4) {
System.err.println(
"Usage: dataDirectory numSteps evaluationPercentage property=min:max [property2=min2:max2 ...]");
return;
}
final File dataDir = new File(args[0]);
Preconditions.checkArgument(dataDir.exists() && dataDir.isDirectory(), "Not a directory: %s", dataDir);
Preconditions.checkArgument(dataDir.listFiles().length > 0, "No files in: %s", dataDir);
int numSteps = Integer.parseInt(args[1]);
Preconditions.checkArgument(numSteps >= 2, "# steps must be at least 2: %s", numSteps);
final double evaluationPercentage = Double.parseDouble(args[2]);
Preconditions.checkArgument(evaluationPercentage > 0.0 && evaluationPercentage <= 1.0,
"evaluationPercentage must be in (0,1]: %s", evaluationPercentage);
Map parameterRanges = Maps.newHashMapWithExpectedSize(args.length);
for (int i = 3; i < args.length; i++) {
String[] propValue = EQUALS.split(args[i]);
String systemProperty = propValue[0];
String[] minMax = COLON.split(propValue[1]);
ParameterRange range;
try {
int min = Integer.parseInt(minMax[0]);
int max = Integer.parseInt(minMax.length == 1 ? minMax[0] : minMax[1]);
range = new ParameterRange(min, max);
} catch (NumberFormatException ignored) {
double min = Double.parseDouble(minMax[0]);
double max = Double.parseDouble(minMax.length == 1 ? minMax[0] : minMax[1]);
range = new ParameterRange(min, max);
}
parameterRanges.put(systemProperty, range);
}
Callable evaluator = new Callable() {
@Override
public Number call() throws IOException, TasteException, InterruptedException {
PrecisionRecallEvaluator prEvaluator = new PrecisionRecallEvaluator();
MyrrixIRStatistics stats =
(MyrrixIRStatistics) prEvaluator.evaluate(dataDir, 0.9, evaluationPercentage, null);
return stats.getMeanAveragePrecision();
}
};
ParameterOptimizer optimizer = new ParameterOptimizer(parameterRanges, numSteps, evaluator, false);
Map optimalValues = optimizer.findGoodParameterValues();
System.out.println(optimalValues);
}
}