
org.terrier.matching.JforestsModelMatching Maven / Gradle / Ivy
The newest version!
/*
* Terrier - Terabyte Retriever
* Webpage: http://terrier.org
* Contact: terrier{a.}dcs.gla.ac.uk
* University of Glasgow - School of Computing Science
* http://www.gla.ac.uk/
*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is JforestsModelMatching.java.
*
* The Original Code is Copyright (C) 2004-2020 the University of Glasgow.
* All Rights Reserved.
*
* Contributor(s):
* Craig Macdonald
*/
package org.terrier.matching;
import java.io.File;
import java.util.Arrays;
import org.terrier.learning.FeaturedResultSet;
import org.terrier.structures.Index;
import org.terrier.utility.ApplicationSetup;
import edu.uci.jforests.dataset.BitNumericArray;
import edu.uci.jforests.dataset.ByteNumericArray;
import edu.uci.jforests.dataset.Feature;
import edu.uci.jforests.dataset.NullNumericArray;
import edu.uci.jforests.dataset.NumericArray;
import edu.uci.jforests.dataset.RankingDataset;
import edu.uci.jforests.dataset.ShortNumericArray;
import edu.uci.jforests.input.FeatureAnalyzer;
import edu.uci.jforests.learning.LearningUtils;
import edu.uci.jforests.learning.trees.Ensemble;
import edu.uci.jforests.learning.trees.Tree;
import edu.uci.jforests.learning.trees.decision.DecisionTree;
import edu.uci.jforests.learning.trees.regression.RegressionTree;
import edu.uci.jforests.sample.RankingSample;
import edu.uci.jforests.sample.Sample;
import gnu.trove.TIntIntHashMap;
/** Applies a Jforests regression tree learned model to a {@link FeaturedResultSet}. Learned model files are generated by Jforests.
* If you use this class, you are implicitly using the jforests library, and the Jforests citation policy applies.
* Properties
*
* - fat.matching.learned.jforest.model - filename of the ensemble model generated by jforests
* - fat.matching.learned.jforest.statistics - filename of the feature statistics file generated by jforests
*
* @author Craig Macdonald
* @since 4.0
*/
public class JforestsModelMatching extends LearnedModelMatching {
final Ensemble ensemble = new Ensemble();
final FeatureAnalyzer featureAnalyzer = new FeatureAnalyzer();
public JforestsModelMatching(Index _index, Matching _parent) throws Exception {
super(_index, _parent);
loadModel(ApplicationSetup.getProperty("fat.matching.learned.jforest.model", null));
}
public JforestsModelMatching(Matching _parent) throws Exception {
super(_parent);
loadModel(ApplicationSetup.getProperty("fat.matching.learned.jforest.model", null));
}
public JforestsModelMatching(Matching _parent, String modelFilename) throws Exception {
super(_parent);
loadModel(modelFilename);
}
public JforestsModelMatching(Index _index, Matching _parent, String modelFilename) throws Exception {
super(_parent);
loadModel(modelFilename);
}
public JforestsModelMatching(Index _index, Matching _parent, String modelFilename, Class extends Tree> treeClass) throws Exception {
super(_parent);
loadModel(modelFilename, treeClass);
}
protected void loadModel(String model_filename) throws Exception
{
final boolean regression = true;
loadModel(model_filename, regression ? RegressionTree.class : DecisionTree.class);
}
protected void loadModel(String model_filename, Class extends Tree> treeClass) throws Exception
{
if (model_filename == null)
throw new IllegalArgumentException("model_filename not specified, perhaps you did not specify property fat.matching.learned.jforest.model ?" );
ensemble.loadFromFile(treeClass, new File(model_filename));
final String featureStats_filename =
ApplicationSetup.getProperty("fat.matching.learned.jforest.statistics", model_filename+".features");
featureAnalyzer.loadFeaturesFromFile(featureStats_filename);
}
protected RankingDataset makeDataset(int N, int featureCount, double[][] doubleFeatures)
{
//doubleFeatures: indexed by feature then document
//intFeatures: indexed by document then feature
final int[][] intFeatures = new int[N][featureCount];
TIntIntHashMap[] _valueHashMap = new TIntIntHashMap[featureCount];
for (int f = 0; f < featureCount; f++) {
_valueHashMap[f] = new TIntIntHashMap();
}
//we are also rotating here.
for(int d=0;d
© 2015 - 2025 Weber Informatics LLC | Privacy Policy