All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.experiment.ExplicitTestsetResultProducer Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    ExplicitTestsetResultProducer.java
 *    Copyright (C) 2009-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.experiment;

import java.io.File;
import java.util.Calendar;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.TimeZone;
import java.util.Vector;

import weka.core.AdditionalMeasureProducer;
import weka.core.Environment;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.core.WekaException;
import weka.core.converters.ConverterUtils.DataSource;

/**
 *  Loads the external test set and calls the
 * appropriate SplitEvaluator to generate some results.
* The filename of the test set is constructed as follows:
* <dir> + / + <prefix> + <relation-name> + <suffix>
* The relation-name can be modified by using the regular expression to replace * the matching sub-string with a specified replacement string. In order to get * rid of the string that the Weka filters add to the end of the relation name, * just use '.*-weka' as the regular expression to find.
* The suffix determines the type of file to load, i.e., one is not restricted * to ARFF files. As long as Weka recognizes the extension specified in the * suffix, the data will be loaded with one of Weka's converters. *

* * * Valid options are: *

* *

 * -D
 * Save raw split evaluator output.
 * 
* *
 * -O <file/directory name/path>
 *  The filename where raw output will be stored.
 *  If a directory name is specified then then individual
 *  outputs will be gzipped, otherwise all output will be
 *  zipped to the named file. Use in conjuction with -D.
 *  (default: splitEvalutorOut.zip)
 * 
* *
 * -W <class name>
 *  The full class name of a SplitEvaluator.
 *  eg: weka.experiment.ClassifierSplitEvaluator
 * 
* *
 * -R
 *  Set when data is to be randomized.
 * 
* *
 * -dir <directory>
 *  The directory containing the test sets.
 *  (default: current directory)
 * 
* *
 * -prefix <string>
 *  An optional prefix for the test sets (before the relation name).
 * (default: empty string)
 * 
* *
 * -suffix <string>
 *  The suffix to append to the test set.
 *  (default: _test.arff)
 * 
* *
 * -find <regular expression>
 *  The regular expression to search the relation name with.
 *  Not used if an empty string.
 *  (default: empty string)
 * 
* *
 * -replace <string>
 *  The replacement string for the all the matches of '-find'.
 *  (default: empty string)
 * 
* *
 * Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
 * 
* *
 * -W <class name>
 *  The full class name of the classifier.
 *  eg: weka.classifiers.bayes.NaiveBayes
 * 
* *
 * -C <index>
 *  The index of the class for which IR statistics
 *  are to be output. (default 1)
 * 
* *
 * -I <index>
 *  The index of an attribute to output in the
 *  results. This attribute should identify an
 *  instance in order to know which instances are
 *  in the test set of a cross validation. if 0
 *  no output (default 0).
 * 
* *
 * -P
 *  Add target and prediction columns to the result
 *  for each fold.
 * 
* *
 * Options specific to classifier weka.classifiers.rules.ZeroR:
 * 
* *
 * -D
 *  If set, classifier is run in debug mode and
 *  may output additional info to the console
 * 
* * * * All options after -- will be passed to the split evaluator. * * @author Len Trigg ([email protected]) * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 10203 $ */ public class ExplicitTestsetResultProducer implements ResultProducer, OptionHandler, AdditionalMeasureProducer, RevisionHandler { /** for serialization. */ private static final long serialVersionUID = 2613585409333652530L; /** the default suffix. */ public final static String DEFAULT_SUFFIX = "_test.arff"; /** The dataset of interest. */ protected Instances m_Instances; /** The ResultListener to send results to. */ protected ResultListener m_ResultListener = new CSVResultListener(); /** The directory containing all the test sets. */ protected File m_TestsetDir = new File(System.getProperty("user.dir")); /** The prefix for all the test sets. */ protected String m_TestsetPrefix = ""; /** The suffix for all the test sets. */ protected String m_TestsetSuffix = DEFAULT_SUFFIX; /** The regular expression to search for in the relation name. */ protected String m_RelationFind = ""; /** The string to use to replace the matches of the regular expression. */ protected String m_RelationReplace = ""; /** Whether dataset is to be randomized. */ protected boolean m_randomize = false; /** The SplitEvaluator used to generate results. */ protected SplitEvaluator m_SplitEvaluator = new ClassifierSplitEvaluator(); /** The names of any additional measures to look for in SplitEvaluators. */ protected String[] m_AdditionalMeasures = null; /** Save raw output of split evaluators --- for debugging purposes. */ protected boolean m_debugOutput = false; /** The output zipper to use for saving raw splitEvaluator output. */ protected OutputZipper m_ZipDest = null; /** The destination output file/directory for raw output. */ protected File m_OutputFile = new File(new File( System.getProperty("user.dir")), "splitEvalutorOut.zip"); /** The name of the key field containing the dataset name. */ public static String DATASET_FIELD_NAME = "Dataset"; /** The name of the key field containing the run number. */ public static String RUN_FIELD_NAME = "Run"; /** The name of the result field containing the timestamp. */ public static String TIMESTAMP_FIELD_NAME = "Date_time"; protected transient Environment m_env; /** * Returns a string describing this result producer. * * @return a description of the result producer suitable for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "Loads the external test set and calls the appropriate " + "SplitEvaluator to generate some results.\n" + "The filename of the test set is constructed as follows:\n" + "
+ / + + + \n" + "The relation-name can be modified by using the regular expression " + "to replace the matching sub-string with a specified replacement " + "string. In order to get rid of the string that the Weka filters " + "add to the end of the relation name, just use '.*-weka' as the " + "regular expression to find.\n" + "The suffix determines the type of file to load, i.e., one is " + "not restricted to ARFF files. As long as Weka recognizes the " + "extension specified in the suffix, the data will be loaded with " + "one of Weka's converters."; } /** * Returns an enumeration describing the available options.. * * @return an enumeration of all the available options. */ @Override public Enumeration




© 2015 - 2025 Weber Informatics LLC | Privacy Policy