All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.filters.SimpleBatchFilter Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This is the stable version. Apart from bugfixes, this version does not receive any other updates.

There is a newer version: 3.8.6
Show newest version
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 * SimpleBatchFilter.java
 * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.filters;

import weka.core.Instance;
import weka.core.Instances;

/** 
 * This filter is a superclass for simple batch filters. 
 * 

* * General notes:
*

    *
  • After adding instances to the filter via input(Instance) one always * has to call batchFinished() to make them available via output().
  • *
  • After the first call of batchFinished() the field m_FirstBatchDone is * set to true.
  • *
*

* * Example:
* The following code snippet uses the filter SomeFilter on a * dataset that is loaded from filename. *

 * import weka.core.*;
 * import weka.filters.*;
 * import java.io.*;
 * ...
 * SomeFilter filter = new SomeFilter();
 * // set necessary options for the filter
 * Instances data = new Instances(
 *                    new BufferedReader(
 *                      new FileReader(filename)));
 * Instances filteredData = Filter.useFilter(data, filter);
 * 
* * Implementation:
* Only the following abstract methods need to be implemented: *
    *
  • globalInfo()
  • *
  • determineOutputFormat(Instances)
  • *
  • process(Instances)
  • *
*
* And the getCapabilities() method must return what kind of * attributes and classes the filter can handle. *

* * If more options are necessary, then the following methods need to be * overriden: *

    *
  • listOptions()
  • *
  • setOptions(String[])
  • *
  • getOptions()
  • *
*

* * To make the filter available from commandline one must add the following * main method for correct execution (<Filtername> must be replaced * with the actual filter classname): *

 *  public static void main(String[] args) {
 *    runFilter(new <Filtername>(), args);
 *  }
 * 
*

* * Example implementation:
*

 * import weka.core.*;
 * import weka.core.Capabilities.*;
 * import weka.filters.*;
 *
 * public class SimpleBatch
 *   extends SimpleBatchFilter {
 *   
 *   public String globalInfo() {
 *     return "A simple batch filter that adds an additional attribute 'bla' at the end containing the index of the processed instance.";
 *   }
 *     
 *   public Capabilities getCapabilities() {
 *     Capabilities result = super.getCapabilities();
 *     result.enableAllAttributes();
 *     result.enableAllClasses();
 *     result.enable(Capability.NO_CLASS);  // filter doesn't need class to be set
 *     return result;
 *   }
 * 
 *   protected Instances determineOutputFormat(Instances inputFormat) {
 *     Instances result = new Instances(inputFormat, 0);
 *     result.insertAttributeAt(new Attribute("bla"), result.numAttributes());
 *     return result;
 *   }
 * 
 *   protected Instances process(Instances inst) {
 *     Instances result = new Instances(determineOutputFormat(inst), 0);
 *     for (int i = 0; i < inst.numInstances(); i++) {
 *       double[] values = new double[result.numAttributes()];
 *       for (int n = 0; n < inst.numAttributes(); n++)
 *         values[n] = inst.instance(i).value(n);
 *       values[values.length - 1] = i;
 *       result.add(new Instance(1, values));
 *     }
 *     return result;
 *   }
 * 
 *   public static void main(String[] args) {
 *     runFilter(new SimpleBatch(), args);
 *   }
 * }
 * 
*

* * Options:
* Valid filter-specific options are:

* * -D
* Turns on output of debugging information.

* * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 8954 $ * @see SimpleStreamFilter * @see #input(Instance) * @see #batchFinished() * @see #m_FirstBatchDone */ public abstract class SimpleBatchFilter extends SimpleFilter { /** for serialization */ private static final long serialVersionUID = 8102908673378055114L; /** * returns true if the output format is immediately available after the * input format has been set and not only after all the data has been * seen (see batchFinished()) * * @return true if the output format is immediately available * @see #batchFinished() * @see #setInputFormat(Instances) */ protected boolean hasImmediateOutputFormat() { return false; } /** * Input an instance for filtering. Filter requires all * training instances be read before producing output (calling the method * batchFinished() makes the data available). If this instance is part of * a new batch, m_NewBatch is set to false. * * @param instance the input instance * @return true if the filtered instance may now be * collected with output(). * @throws IllegalStateException if no input structure has been defined * @throws Exception if something goes wrong * @see #batchFinished() */ public boolean input(Instance instance) throws Exception { if (getInputFormat() == null) throw new IllegalStateException("No input instance format defined"); if (m_NewBatch) { resetQueue(); m_NewBatch = false; } bufferInput((Instance) instance.copy()); if (isFirstBatchDone()) { Instances inst = new Instances(getInputFormat()); inst = process(inst); for (int i = 0; i < inst.numInstances(); i++) push(inst.instance(i)); flushInput(); } return m_FirstBatchDone; } /** * Signify that this batch of input to the filter is finished. If * the filter requires all instances prior to filtering, output() * may now be called to retrieve the filtered instances. Any * subsequent instances filtered should be filtered based on setting * obtained from the first batch (unless the setInputFormat has been * re-assigned or new options have been set). Sets m_FirstBatchDone * and m_NewBatch to true. * * @return true if there are instances pending output * @throws IllegalStateException if no input format has been set. * @throws Exception if something goes wrong * @see #m_NewBatch * @see #m_FirstBatchDone */ public boolean batchFinished() throws Exception { int i; Instances inst; if (getInputFormat() == null) throw new IllegalStateException("No input instance format defined"); // get data inst = new Instances(getInputFormat()); // if output format hasn't been set yet, do it now if (!hasImmediateOutputFormat() && !isFirstBatchDone()) setOutputFormat(determineOutputFormat(new Instances(inst, 0))); // don't do anything in case there are no instances pending. // in case of second batch, they may have already been processed // directly by the input method and added to the output queue if (inst.numInstances() > 0) { // process data inst = process(inst); // clear input queue flushInput(); // move it to the output for (i = 0; i < inst.numInstances(); i++) push(inst.instance(i)); } m_NewBatch = true; m_FirstBatchDone = true; return (numPendingOutput() != 0); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy