Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Ivory: A Hadoop toolkit for web-scale information retrieval
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package ivory.ltr;
import ivory.core.ConfigurationException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
/**
* @author Don Metzler
*
*/
public class GreedyLearn {
private static final double TOLERANCE = 0.0001;
public void train(String featFile, String modelOutputFile, int numModels, String metricClassName, boolean pruneCorrelated, double correlationThreshold, boolean logFeatures, boolean productFeatures, boolean quotientFeatures, int numThreads) throws IOException, InterruptedException, ExecutionException, ConfigurationException, InstantiationException, IllegalAccessException, ClassNotFoundException {
// read training instances
Instances trainInstances = new Instances(featFile);
// get feature map (mapping from feature names to feature number)
Map featureMap = trainInstances.getFeatureMap();
// construct initial model
Model initialModel = new Model();
// initialize feature pools
Map> featurePool = new HashMap>();
featurePool.put(initialModel, new ArrayList());
// add simple features to feature pools
for(String featureName : featureMap.keySet()) {
featurePool.get(initialModel).add(new SimpleFeature(featureMap.get(featureName), featureName));
}
// eliminate document-independent features
List constantFeatures = new ArrayList();
for(int i = 0; i < featurePool.size(); i++) {
Feature f = featurePool.get(initialModel).get(i);
if(trainInstances.featureIsConstant(f)) {
System.err.println("Feature " + f.getName() + " is constant -- removing from feature pool!");
constantFeatures.add(f);
}
}
featurePool.get(initialModel).removeAll(constantFeatures);
// initialize score tables
Map scoreTable = new HashMap();
scoreTable.put(initialModel, new ScoreTable(trainInstances));
// initialize model queue
List models = new ArrayList();
models.add(initialModel);
// set up threading
ExecutorService threadPool = Executors.newFixedThreadPool(numThreads);
Map>> featureBatches = new HashMap>>();
featureBatches.put(initialModel, new ArrayList>());
for(int i = 0; i < numThreads; i++) {
featureBatches.get(initialModel).add(new ArrayList());
}
for(int i = 0; i < featurePool.get(initialModel).size(); i++) {
featureBatches.get(initialModel).get(i % numThreads).add(featurePool.get(initialModel).get(i));
}
// greedily add features
double curMetric = 0.0;
double prevMetric = Double.NEGATIVE_INFINITY;
int iter = 1;
while(curMetric - prevMetric > TOLERANCE ) {
Map modelFeaturePairMeasures = new HashMap();
// update models
for(Model model : models) {
List>> futures = new ArrayList>>();
for(int i = 0; i < numThreads; i++) {
// construct measure
Measure metric = (Measure)Class.forName(metricClassName).newInstance();
// line searcher
LineSearch search = new LineSearch(model, featureBatches.get(model).get(i), scoreTable.get(model), metric);
Future