All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.filters.SimpleStreamFilter Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.

There is a newer version: 3.9.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 * SimpleStreamFilter.java
 * Copyright (C) 2005-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.filters;

import weka.core.Instance;
import weka.core.Instances;

/**
 * This filter is a superclass for simple stream filters.
 * 

*

* * General notes:
*

    *
  • After the first call of batchFinished() the field m_FirstBatchDone is set * to true.
  • *
*

* * Example:
* The following code snippet uses the filter SomeFilter on a * dataset that is loaded from filename. * *

 * import weka.core.*;
 * import weka.filters.*;
 * import java.io.*;
 * ...
 * SomeFilter filter = new SomeFilter();
 * // set necessary options for the filter
 * Instances data = new Instances(
 *                    new BufferedReader(
 *                      new FileReader(filename)));
 * Instances filteredData = Filter.useFilter(data, filter);
 * 
* * Implementation:
* Only the following abstract methods need to be implemented: *
    *
  • globalInfo()
  • *
  • determineOutputFormat(Instances)
  • *
  • process(Instance)
  • *
*
* And the getCapabilities() method must return what kind of attributes * and classes the filter can handle. *

* * If more options are necessary, then the following methods need to be * overriden: *

    *
  • listOptions()
  • *
  • setOptions(String[])
  • *
  • getOptions()
  • *
*

* * To make the filter available from commandline one must add the following main * method for correct execution (<Filtername> must be replaced with the * actual filter classname): * *

 *  public static void main(String[] args) {
 *    runFilter(new <Filtername>(), args);
 *  }
 * 
*

* * Example implementation:
* *

 * import weka.core.*;
 * import weka.core.Capabilities.*;
 * import weka.filters.*;
 * 
 * import java.util.Random;
 * 
 * public class SimpleStream extends SimpleStreamFilter {
 * 
 *   public String globalInfo() {
 *     return "A simple stream filter that adds an attribute 'bla' at the end containing a random number.";
 *   }
 * 
 *   public Capabilities getCapabilities() {
 *     Capabilities result = super.getCapabilities();
 *     result.enableAllAttributes();
 *     result.enableAllClasses();
 *     result.enable(Capability.NO_CLASS); // filter doesn't need class to be set
 *     return result;
 *   }
 * 
 *   protected Instances determineOutputFormat(Instances inputFormat) {
 *     Instances result = new Instances(inputFormat, 0);
 *     result.insertAttributeAt(new Attribute("bla"), result.numAttributes());
 *     return result;
 *   }
 * 
 *   protected Instance process(Instance inst) {
 *     double[] values = new double[inst.numAttributes() + 1];
 *     for (int n = 0; n < inst.numAttributes(); n++)
 *       values[n] = inst.value(n);
 *     values[values.length - 1] = new Random().nextInt();
 *     Instance result = new DenseInstance(1, values);
 *     return result;
 *   }
 * 
 *   public static void main(String[] args) {
 *     runFilter(new SimpleStream(), args);
 *   }
 * }
 * 
 * 
*

* * Options:
* Valid filter-specific options are: *

* * -D
* Turns on output of debugging information. *

* * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 12037 $ * @see SimpleBatchFilter * @see #input(Instance) * @see #batchFinished() * @see #m_FirstBatchDone */ public abstract class SimpleStreamFilter extends SimpleFilter implements StreamableFilter { /** for serialization */ private static final long serialVersionUID = 2754882676192747091L; /** * Returns true if the output format is immediately available after the input * format has been set and not only after all the data has been seen (see * batchFinished()). This method should normally return true for a stream * filter, since the data will be processed in a batch manner instead (or at * least for the second batch of files, see m_FirstBatchDone). * * @return true if the output format is immediately available * @see #batchFinished() * @see #setInputFormat(Instances) * @see #m_FirstBatchDone */ @Override protected boolean hasImmediateOutputFormat() { return true; } /** * Determines the output format based on the input format and returns this. In * case the output format cannot be returned immediately, i.e., * hasImmediateOutputFormat() returns false, then this method will called from * batchFinished() after the call of preprocess(Instances), in which, e.g., * statistics for the actual processing step can be gathered. * * @param inputFormat the input format to base the output format on * @return the output format * @throws Exception in case the determination goes wrong * @see #hasImmediateOutputFormat() * @see #batchFinished() * @see #preprocess(Instances) */ @Override protected abstract Instances determineOutputFormat(Instances inputFormat) throws Exception; /** * processes the given instance (may change the provided instance) and returns * the modified version. * * @param instance the instance to process * @return the modified data * @throws Exception in case the processing goes wrong */ protected abstract Instance process(Instance instance) throws Exception; /** * Processes the given data (may change the provided dataset) and returns the * modified version. This method is called in batchFinished(). This * implementation only calls process(Instance) for each instance in the given * dataset. * * @param instances the data to process * @return the modified data * @throws Exception in case the processing goes wrong * @see #batchFinished() * @see #process(Instance) */ @Override protected Instances process(Instances instances) throws Exception { Instances result; int i; result = new Instances(getOutputFormat(), 0); for (i = 0; i < instances.numInstances(); i++) { result.add(process(instances.instance(i))); } return result; } /** * In case the output format cannot be returned immediately, this method is * called before the actual processing of the instances. Derived classes can * implement specific behavior here. * * @param instances the instances to work on * @see #hasImmediateOutputFormat() * @see #determineOutputFormat(Instances) */ protected void preprocess(Instances instances) { } /** * Input an instance for filtering. Filter requires all training instances be * read before producing output. * * @param instance the input instance * @return true if the filtered instance may now be collected with output(). * @throws IllegalStateException if no input structure has been defined * @throws Exception if something goes wrong */ @Override public boolean input(Instance instance) throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } try { if (hasImmediateOutputFormat() || isFirstBatchDone()) { Instance processed = process((Instance) instance.copy()); if (processed != null) { push(processed, false); // No need to copy instance return true; } return false; } else { bufferInput(instance); return false; } } catch (Exception e) { return false; } } /** * Signify that this batch of input to the filter is finished. If the filter * requires all instances prior to filtering, output() may now be called to * retrieve the filtered instances. Any subsequent instances filtered should * be filtered based on setting obtained from the first batch (unless the * setInputFormat has been re-assigned or new options have been set). * * @return true if there are instances pending output * @throws IllegalStateException if no input format has been set. */ @Override public boolean batchFinished() throws Exception { int i; Instances inst; if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } inst = new Instances(getInputFormat()); flushInput(); if (!hasImmediateOutputFormat()) { preprocess(inst); } // process data inst = process(inst); // if output format hasn't been set yet, do it now if (!hasImmediateOutputFormat() && !isFirstBatchDone()) { setOutputFormat(inst); } // move data to the output for (i = 0; i < inst.numInstances(); i++) { push(inst.instance(i), false); // No need to copy instance } m_NewBatch = true; m_FirstBatchDone = true; return (numPendingOutput() != 0); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy