weka.filters.SimpleBatchFilter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of weka-stable Show documentation
Show all versions of weka-stable Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine
learning workbench. This is the stable version. Apart from bugfixes, this version
does not receive any other updates.
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* SimpleBatchFilter.java
* Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
*
*/
package weka.filters;
import weka.core.Instance;
import weka.core.Instances;
/**
* This filter is a superclass for simple batch filters.
*
*
* General notes:
*
* - After adding instances to the filter via input(Instance) one always
* has to call batchFinished() to make them available via output().
* - After the first call of batchFinished() the field m_FirstBatchDone is
* set to
true
.
*
*
*
* Example:
* The following code snippet uses the filter SomeFilter
on a
* dataset that is loaded from filename
.
*
* import weka.core.*;
* import weka.filters.*;
* import java.io.*;
* ...
* SomeFilter filter = new SomeFilter();
* // set necessary options for the filter
* Instances data = new Instances(
* new BufferedReader(
* new FileReader(filename)));
* Instances filteredData = Filter.useFilter(data, filter);
*
*
* Implementation:
* Only the following abstract methods need to be implemented:
*
* - globalInfo()
* - determineOutputFormat(Instances)
* - process(Instances)
*
*
* And the getCapabilities() method must return what kind of
* attributes and classes the filter can handle.
*
*
* If more options are necessary, then the following methods need to be
* overriden:
*
* - listOptions()
* - setOptions(String[])
* - getOptions()
*
*
*
* To make the filter available from commandline one must add the following
* main method for correct execution (<Filtername> must be replaced
* with the actual filter classname):
*
* public static void main(String[] args) {
* runFilter(new <Filtername>(), args);
* }
*
*
*
* Example implementation:
*
* import weka.core.*;
* import weka.core.Capabilities.*;
* import weka.filters.*;
*
* public class SimpleBatch
* extends SimpleBatchFilter {
*
* public String globalInfo() {
* return "A simple batch filter that adds an additional attribute 'bla' at the end containing the index of the processed instance.";
* }
*
* public Capabilities getCapabilities() {
* Capabilities result = super.getCapabilities();
* result.enableAllAttributes();
* result.enableAllClasses();
* result.enable(Capability.NO_CLASS); // filter doesn't need class to be set
* return result;
* }
*
* protected Instances determineOutputFormat(Instances inputFormat) {
* Instances result = new Instances(inputFormat, 0);
* result.insertAttributeAt(new Attribute("bla"), result.numAttributes());
* return result;
* }
*
* protected Instances process(Instances inst) {
* Instances result = new Instances(determineOutputFormat(inst), 0);
* for (int i = 0; i < inst.numInstances(); i++) {
* double[] values = new double[result.numAttributes()];
* for (int n = 0; n < inst.numAttributes(); n++)
* values[n] = inst.instance(i).value(n);
* values[values.length - 1] = i;
* result.add(new Instance(1, values));
* }
* return result;
* }
*
* public static void main(String[] args) {
* runFilter(new SimpleBatch(), args);
* }
* }
*
*
*
* Options:
* Valid filter-specific options are:
*
* -D
* Turns on output of debugging information.
*
* @author FracPete (fracpete at waikato dot ac dot nz)
* @version $Revision: 8954 $
* @see SimpleStreamFilter
* @see #input(Instance)
* @see #batchFinished()
* @see #m_FirstBatchDone
*/
public abstract class SimpleBatchFilter
extends SimpleFilter {
/** for serialization */
private static final long serialVersionUID = 8102908673378055114L;
/**
* returns true if the output format is immediately available after the
* input format has been set and not only after all the data has been
* seen (see batchFinished())
*
* @return true if the output format is immediately available
* @see #batchFinished()
* @see #setInputFormat(Instances)
*/
protected boolean hasImmediateOutputFormat() {
return false;
}
/**
* Input an instance for filtering. Filter requires all
* training instances be read before producing output (calling the method
* batchFinished() makes the data available). If this instance is part of
* a new batch, m_NewBatch is set to false.
*
* @param instance the input instance
* @return true if the filtered instance may now be
* collected with output().
* @throws IllegalStateException if no input structure has been defined
* @throws Exception if something goes wrong
* @see #batchFinished()
*/
public boolean input(Instance instance) throws Exception {
if (getInputFormat() == null)
throw new IllegalStateException("No input instance format defined");
if (m_NewBatch) {
resetQueue();
m_NewBatch = false;
}
bufferInput((Instance) instance.copy());
if (isFirstBatchDone()) {
Instances inst = new Instances(getInputFormat());
inst = process(inst);
for (int i = 0; i < inst.numInstances(); i++)
push(inst.instance(i));
flushInput();
}
return m_FirstBatchDone;
}
/**
* Signify that this batch of input to the filter is finished. If
* the filter requires all instances prior to filtering, output()
* may now be called to retrieve the filtered instances. Any
* subsequent instances filtered should be filtered based on setting
* obtained from the first batch (unless the setInputFormat has been
* re-assigned or new options have been set). Sets m_FirstBatchDone
* and m_NewBatch to true.
*
* @return true if there are instances pending output
* @throws IllegalStateException if no input format has been set.
* @throws Exception if something goes wrong
* @see #m_NewBatch
* @see #m_FirstBatchDone
*/
public boolean batchFinished() throws Exception {
int i;
Instances inst;
if (getInputFormat() == null)
throw new IllegalStateException("No input instance format defined");
// get data
inst = new Instances(getInputFormat());
// if output format hasn't been set yet, do it now
if (!hasImmediateOutputFormat() && !isFirstBatchDone())
setOutputFormat(determineOutputFormat(new Instances(inst, 0)));
// don't do anything in case there are no instances pending.
// in case of second batch, they may have already been processed
// directly by the input method and added to the output queue
if (inst.numInstances() > 0) {
// process data
inst = process(inst);
// clear input queue
flushInput();
// move it to the output
for (i = 0; i < inst.numInstances(); i++)
push(inst.instance(i));
}
m_NewBatch = true;
m_FirstBatchDone = true;
return (numPendingOutput() != 0);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy