weka.filters.Filter Maven / Gradle / Ivy
Show all versions of weka-stable Show documentation
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* Filter.java
* Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.filters;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.CapabilitiesHandler;
import weka.core.CapabilitiesIgnorer;
import weka.core.CommandlineRunnable;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Queue;
import weka.core.RelationalLocator;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.SerializedObject;
import weka.core.StringLocator;
import weka.core.UnsupportedAttributeTypeException;
import weka.core.Utils;
import weka.core.Version;
import weka.core.converters.ConverterUtils.DataSource;
import java.io.FileOutputStream;
import java.io.PrintWriter;
import java.io.Serializable;
import java.util.Date;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.Vector;
/**
* An abstract class for instance filters: objects that take instances as input,
* carry out some transformation on the instance and then output the instance.
* The method implementations in this class assume that most of the work will be
* done in the methods overridden by subclasses.
*
*
* A simple example of filter use. This example doesn't remove instances from
* the output queue until all instances have been input, so has higher memory
* consumption than an approach that uses output instances as they are made
* available:
*
*
*
* Filter filter = ..some type of filter..
* Instances instances = ..some instances..
* for (int i = 0; i < data.numInstances(); i++) {
* filter.input(data.instance(i));
* }
* filter.batchFinished();
* Instances newData = filter.outputFormat();
* Instance processed;
* while ((processed = filter.output()) != null) {
* newData.add(processed);
* }
* ..do something with newData..
*
*
* @author Len Trigg ([email protected])
* @version $Revision: 14805 $
*/
public abstract class Filter implements Serializable, CapabilitiesHandler,
RevisionHandler, OptionHandler, CapabilitiesIgnorer, CommandlineRunnable {
/** for serialization */
private static final long serialVersionUID = -8835063755891851218L;
/** The output format for instances */
private Instances m_OutputFormat = null;
/** The output instance queue */
private Queue m_OutputQueue = null;
/** Indices of string attributes in the output format */
protected StringLocator m_OutputStringAtts = null;
/** Indices of string attributes in the input format */
protected StringLocator m_InputStringAtts = null;
/** Indices of relational attributes in the output format */
protected RelationalLocator m_OutputRelAtts = null;
/** Indices of relational attributes in the input format */
protected RelationalLocator m_InputRelAtts = null;
/** The input format for instances */
private Instances m_InputFormat = null;
/** Record whether the filter is at the start of a batch */
protected boolean m_NewBatch = true;
/** True if the first batch has been done */
protected boolean m_FirstBatchDone = false;
/** Whether the classifier is run in debug mode. */
protected boolean m_Debug = false;
/** Whether capabilities should not be checked before classifier is built. */
protected boolean m_DoNotCheckCapabilities = false;
/**
* Returns true if the a new batch was started, either a new instance of the
* filter was created or the batchFinished() method got called.
*
* @return true if a new batch has been initiated
* @see #m_NewBatch
* @see #batchFinished()
*/
public boolean isNewBatch() {
return m_NewBatch;
}
/**
* Returns true if the first batch of instances got processed. Necessary for
* supervised filters, which "learn" from the first batch and then shouldn't
* get updated with subsequent calls of batchFinished().
*
* @return true if the first batch has been processed
* @see #m_FirstBatchDone
* @see #batchFinished()
*/
public boolean isFirstBatchDone() {
return m_FirstBatchDone;
}
/**
* Default implementation returns false. Some filters may not necessarily be
* able to produce an instance for output for every instance input after the
* first batch has been completed - such filters should override this method
* and return true.
*
* @return false by default
*/
public boolean mayRemoveInstanceAfterFirstBatchDone() {
return false;
}
/**
* Returns the Capabilities of this filter. Derived filters have to override
* this method to enable capabilities.
*
* @return the capabilities of this object
* @see Capabilities
*/
@Override
public Capabilities getCapabilities() {
Capabilities result;
result = new Capabilities(this);
result.enableAll();
result.setMinimumNumberInstances(0);
return result;
}
/**
* Returns the revision string.
*
* @return the revision
*/
@Override
public String getRevision() {
return RevisionUtils.extract("$Revision: 14805 $");
}
/**
* Returns the Capabilities of this filter, customized based on the data.
* I.e., if removes all class capabilities, in case there's not class
* attribute present or removes the NO_CLASS capability, in case that there's
* a class present.
*
* @param data the data to use for customization
* @return the capabilities of this object, based on the data
* @see #getCapabilities()
*/
public Capabilities getCapabilities(Instances data) {
Capabilities result;
Capabilities classes;
Iterator iter;
Capability cap;
result = getCapabilities();
// no class? -> remove all class capabilites apart from NO_CLASS
if (data.classIndex() == -1) {
classes = result.getClassCapabilities();
iter = classes.capabilities();
while (iter.hasNext()) {
cap = iter.next();
if (cap != Capability.NO_CLASS) {
result.disable(cap);
result.disableDependency(cap);
}
}
}
// class? -> remove NO_CLASS
else {
result.disable(Capability.NO_CLASS);
result.disableDependency(Capability.NO_CLASS);
}
return result;
}
/**
* Sets the format of output instances. The derived class should use this
* method once it has determined the outputformat. The output queue is
* cleared.
*
* @param outputFormat the new output format
*/
protected void setOutputFormat(Instances outputFormat) {
if (outputFormat != null) {
m_OutputFormat = outputFormat.stringFreeStructure();
initOutputLocators(m_OutputFormat, null);
// Rename the relation
String relationName =
outputFormat.relationName() + "-" + this.getClass().getName();
if (this instanceof OptionHandler) {
String[] options = ((OptionHandler) this).getOptions();
for (String option : options) {
relationName += option.trim();
}
}
m_OutputFormat.setRelationName(relationName);
} else {
m_OutputFormat = null;
}
m_OutputQueue = new Queue();
}
/**
* Gets the currently set inputformat instances. This dataset may contain
* buffered instances.
*
* @return the input Instances.
*/
protected Instances getInputFormat() {
return m_InputFormat;
}
/**
* Gets a copy of just the structure of the input format instances.
*
* @return a copy of the structure (attribute information) of the input
* format instances
*/
public Instances getCopyOfInputFormat() {
return getInputFormat() == null ? null : new Instances(getInputFormat(), 0);
}
/**
* Returns a reference to the current input format without copying it.
*
* @return a reference to the current input format
*/
protected Instances inputFormatPeek() {
return m_InputFormat;
}
/**
* Returns a reference to the current output format without copying it.
*
* @return a reference to the current output format
*/
protected Instances outputFormatPeek() {
return m_OutputFormat;
}
/**
* Adds an output instance to the queue. The derived class should use this
* method for each output instance it makes available. Note that the instance
* is only copied before it is added to the output queue if it has a reference
* to a dataset.
*
* @param instance the instance to be added to the queue.
*/
protected void push(Instance instance) {
push(instance, true);
}
/**
* Adds an output instance to the queue. The derived class should use this
* method for each output instance it makes available. Note that the instance
* is only copied before it is added to the output queue if copyInstance has
* value true and if the instance has a reference to a dataset.
*
* @param instance the instance to be added to the queue.
* @param copyInstance whether instance is to be copied
*/
protected void push(Instance instance, boolean copyInstance) {
if (instance != null) {
if (instance.dataset() != null) {
if (copyInstance) {
instance = (Instance) instance.copy();
}
copyValues(instance, false);
}
instance.setDataset(m_OutputFormat);
m_OutputQueue.push(instance);
}
}
/**
* Clears the output queue.
*/
protected void resetQueue() {
m_OutputQueue = new Queue();
}
/**
* Adds the supplied input instance to the inputformat dataset for later
* processing. Use this method rather than getInputFormat().add(instance). Or
* else. Note that the provided instance gets copied when buffered.
*
* @param instance the Instance
to buffer.
*/
protected void bufferInput(Instance instance) {
if (instance != null) {
instance = (Instance)instance.copy(); // The copyValues() method *does* modify the instance!
copyValues(instance, true);
m_InputFormat.add(instance);
}
}
/**
* Initializes the input attribute locators. If indices is null then all
* attributes of the data will be considered, otherwise only the ones that
* were provided.
*
* @param data the data to initialize the locators with
* @param indices if not null, the indices to which to restrict the locating
*/
protected void initInputLocators(Instances data, int[] indices) {
if (indices == null) {
m_InputStringAtts = new StringLocator(data);
m_InputRelAtts = new RelationalLocator(data);
} else {
m_InputStringAtts = new StringLocator(data, indices);
m_InputRelAtts = new RelationalLocator(data, indices);
}
}
/**
* Initializes the output attribute locators. If indices is null then all
* attributes of the data will be considered, otherwise only the ones that
* were provided.
*
* @param data the data to initialize the locators with
* @param indices if not null, the indices to which to restrict the locating
*/
protected void initOutputLocators(Instances data, int[] indices) {
if (indices == null) {
m_OutputStringAtts = new StringLocator(data);
m_OutputRelAtts = new RelationalLocator(data);
} else {
m_OutputStringAtts = new StringLocator(data, indices);
m_OutputRelAtts = new RelationalLocator(data, indices);
}
}
/**
* Copies string/relational values contained in the instance copied to a new
* dataset. The Instance must already be assigned to a dataset. This dataset
* and the destination dataset must have the same structure.
*
* @param instance the Instance containing the string/relational values to
* copy.
* @param isInput if true the input format and input attribute locators are
* used otherwise the output format and output locators
*/
protected void copyValues(Instance instance, boolean isInput) {
RelationalLocator.copyRelationalValues(instance,
(isInput) ? m_InputFormat : m_OutputFormat,
(isInput) ? m_InputRelAtts : m_OutputRelAtts);
StringLocator.copyStringValues(instance,
(isInput) ? m_InputFormat : m_OutputFormat,
(isInput) ? m_InputStringAtts : m_OutputStringAtts);
}
/**
* Takes string/relational values referenced by an Instance and copies them
* from a source dataset to a destination dataset. The instance references are
* updated to be valid for the destination dataset. The instance may have the
* structure (i.e. number and attribute position) of either dataset (this
* affects where references are obtained from). Only works if the number of
* string/relational attributes is the same in both indices (implicitly these
* string/relational attributes should be semantically same but just with
* shifted positions).
*
* @param instance the instance containing references to strings/ relational
* values in the source dataset that will have references updated to
* be valid for the destination dataset.
* @param instSrcCompat true if the instance structure is the same as the
* source, or false if it is the same as the destination (i.e. which
* of the string/relational attribute indices contains the correct
* locations for this instance).
* @param srcDataset the dataset for which the current instance
* string/relational value references are valid (after any position
* mapping if needed)
* @param destDataset the dataset for which the current instance
* string/relational value references need to be inserted (after any
* position mapping if needed)
*/
protected void copyValues(Instance instance, boolean instSrcCompat,
Instances srcDataset, Instances destDataset) {
RelationalLocator.copyRelationalValues(instance, instSrcCompat, srcDataset,
m_InputRelAtts, destDataset, m_OutputRelAtts);
StringLocator.copyStringValues(instance, instSrcCompat, srcDataset,
m_InputStringAtts, destDataset, m_OutputStringAtts);
}
/**
* This will remove all buffered instances from the inputformat dataset. Use
* this method rather than getInputFormat().delete();
*/
protected void flushInput() {
if ((m_InputStringAtts.getAttributeIndices().length > 0)
|| (m_InputRelAtts.getAttributeIndices().length > 0)) {
m_InputFormat = m_InputFormat.stringFreeStructure();
m_InputStringAtts =
new StringLocator(m_InputFormat, m_InputStringAtts.getAllowedIndices());
m_InputRelAtts = new RelationalLocator(m_InputFormat,
m_InputRelAtts.getAllowedIndices());
} else {
// This more efficient than new Instances(m_InputFormat, 0);
m_InputFormat.delete();
}
}
/**
* tests the data whether the filter can actually handle it
*
* @param instanceInfo the data to test
* @throws Exception if the test fails
*/
protected void testInputFormat(Instances instanceInfo) throws Exception {
getCapabilities(instanceInfo).testWithFail(instanceInfo);
}
/**
* Sets the format of the input instances. If the filter is able to determine
* the output format before seeing any input instances, it does so here. This
* default implementation clears the output format and output queue, and the
* new batch flag is set. Overriders should call
* super.setInputFormat(Instances)
*
* @param instanceInfo an Instances object containing the input instance
* structure (any instances contained in the object are ignored -
* only the structure is required).
* @return true if the outputFormat may be collected immediately
* @throws Exception if the inputFormat can't be set successfully
*/
public boolean setInputFormat(Instances instanceInfo) throws Exception {
testInputFormat(instanceInfo);
m_InputFormat = instanceInfo.stringFreeStructure();
m_OutputFormat = null;
m_OutputQueue = new Queue();
m_NewBatch = true;
m_FirstBatchDone = false;
initInputLocators(m_InputFormat, null);
return false;
}
/**
* Gets the format of the output instances. This should only be called after
* input() or batchFinished() has returned true. The relation name of the
* output instances should be changed to reflect the action of the filter (eg:
* add the filter name and options).
*
* @return an Instances object containing the output instance structure only.
* @throws NullPointerException if no input structure has been defined (or the
* output format hasn't been determined yet)
*/
public Instances getOutputFormat() {
if (m_OutputFormat == null) {
throw new NullPointerException("No output format defined.");
}
return new Instances(m_OutputFormat, 0);
}
/**
* Input an instance for filtering. Ordinarily the instance is processed and
* made available for output immediately. Some filters require all instances
* be read before producing output, in which case output instances should be
* collected after calling batchFinished(). If the input marks the start of a
* new batch, the output queue is cleared. This default implementation assumes
* all instance conversion will occur when batchFinished() is called.
*
* @param instance the input instance
* @return true if the filtered instance may now be collected with output().
* @throws NullPointerException if the input format has not been defined.
* @throws Exception if the input instance was not of the correct format or if
* there was a problem with the filtering.
*/
public boolean input(Instance instance) throws Exception {
if (m_InputFormat == null) {
throw new NullPointerException("No input instance format defined");
}
if (m_NewBatch) {
m_OutputQueue = new Queue();
m_NewBatch = false;
}
bufferInput(instance);
return false;
}
/**
* Signify that this batch of input to the filter is finished. If the filter
* requires all instances prior to filtering, output() may now be called to
* retrieve the filtered instances. Any subsequent instances filtered should
* be filtered based on setting obtained from the first batch (unless the
* inputFormat has been re-assigned or new options have been set). This
* default implementation assumes all instance processing occurs during
* inputFormat() and input().
*
* @return true if there are instances pending output
* @throws NullPointerException if no input structure has been defined,
* @throws Exception if there was a problem finishing the batch.
*/
public boolean batchFinished() throws Exception {
if (m_InputFormat == null) {
throw new NullPointerException("No input instance format defined");
}
flushInput();
m_NewBatch = true;
m_FirstBatchDone = true;
if (m_OutputQueue.empty()) {
// Clear out references to old strings/relationals occasionally
if ((m_OutputStringAtts.getAttributeIndices().length > 0)
|| (m_OutputRelAtts.getAttributeIndices().length > 0)) {
m_OutputFormat = m_OutputFormat.stringFreeStructure();
m_OutputStringAtts = new StringLocator(m_OutputFormat,
m_OutputStringAtts.getAllowedIndices());
}
}
return (numPendingOutput() != 0);
}
/**
* Output an instance after filtering and remove from the output queue.
*
* @return the instance that has most recently been filtered (or null if the
* queue is empty).
* @throws NullPointerException if no output structure has been defined
*/
public Instance output() {
if (m_OutputFormat == null) {
throw new NullPointerException("No output instance format defined");
}
if (m_OutputQueue.empty()) {
return null;
}
Instance result = (Instance) m_OutputQueue.pop();
// Clear out references to old strings/relationals occasionally
/*
* if (m_OutputQueue.empty() && m_NewBatch) { if (
* (m_OutputStringAtts.getAttributeIndices().length > 0) ||
* (m_OutputRelAtts.getAttributeIndices().length > 0) ) { m_OutputFormat =
* m_OutputFormat.stringFreeStructure(); } }
*/
return result;
}
/**
* Output an instance after filtering but do not remove from the output queue.
*
* @return the instance that has most recently been filtered (or null if the
* queue is empty).
* @throws NullPointerException if no input structure has been defined
*/
public Instance outputPeek() {
if (m_OutputFormat == null) {
throw new NullPointerException("No output instance format defined");
}
if (m_OutputQueue.empty()) {
return null;
}
Instance result = (Instance) m_OutputQueue.peek();
return result;
}
/**
* Returns the number of instances pending output
*
* @return the number of instances pending output
* @throws NullPointerException if no input structure has been defined
*/
public int numPendingOutput() {
if (m_OutputFormat == null) {
throw new NullPointerException("No output instance format defined");
}
return m_OutputQueue.size();
}
/**
* Returns whether the output format is ready to be collected
*
* @return true if the output format is set
*/
public boolean isOutputFormatDefined() {
return (m_OutputFormat != null);
}
/**
* Creates a deep copy of the given filter using serialization.
*
* @param model the filter to copy
* @return a deep copy of the filter
* @throws Exception if an error occurs
*/
public static Filter makeCopy(Filter model) throws Exception {
return (Filter) new SerializedObject(model).getObject();
}
/**
* Creates a given number of deep copies of the given filter using
* serialization.
*
* @param model the filter to copy
* @param num the number of filter copies to create.
* @return an array of filters.
* @throws Exception if an error occurs
*/
public static Filter[] makeCopies(Filter model, int num) throws Exception {
if (model == null) {
throw new Exception("No model filter set");
}
Filter[] filters = new Filter[num];
SerializedObject so = new SerializedObject(model);
for (int i = 0; i < filters.length; i++) {
filters[i] = (Filter) so.getObject();
}
return filters;
}
/**
* Filters an entire set of instances through a filter and returns the new
* set.
*
* @param data the data to be filtered
* @param filter the filter to be used
* @return the filtered set of data
* @throws Exception if the filter can't be used successfully
*/
public static Instances useFilter(Instances data, Filter filter)
throws Exception {
/*
* System.err.println(filter.getClass().getName() + " in:" +
* data.numInstances());
*/
if (filter instanceof SimpleBatchFilter) {
((SimpleBatchFilter)filter).input(data);
} else {
for (int i = 0; i < data.numInstances(); i++) {
filter.input(data.instance(i));
}
}
filter.batchFinished();
Instances newData = filter.getOutputFormat();
Instance processed;
while ((processed = filter.output()) != null) {
newData.add(processed);
}
/*
* System.err.println(filter.getClass().getName() + " out:" +
* newData.numInstances());
*/
return newData;
}
/**
* Returns a description of the filter, by default only the classname.
*
* @return a string describing the filter
*/
@Override
public String toString() {
return this.getClass().getName();
}
/**
* generates source code from the filter
*
* @param filter the filter to output as source
* @param className the name of the generated class
* @param input the input data the header is generated for
* @param output the output data the header is generated for
* @return the generated source code
* @throws Exception if source code cannot be generated
*/
public static String wekaStaticWrapper(Sourcable filter, String className,
Instances input, Instances output) throws Exception {
StringBuffer result;
int i;
int n;
result = new StringBuffer();
result.append("// Generated with Weka " + Version.VERSION + "\n");
result.append("//\n");
result
.append("// This code is public domain and comes with no warranty.\n");
result.append("//\n");
result.append("// Timestamp: " + new Date() + "\n");
result.append("// Relation: " + input.relationName() + "\n");
result.append("\n");
result.append("package weka.filters;\n");
result.append("\n");
result.append("import weka.core.Attribute;\n");
result.append("import weka.core.Capabilities;\n");
result.append("import weka.core.Capabilities.Capability;\n");
result.append("import weka.core.DenseInstance;\n");
result.append("import weka.core.Instance;\n");
result.append("import weka.core.Instances;\n");
result.append("import weka.core.Utils;\n");
result.append("import weka.filters.Filter;\n");
result.append("import java.util.ArrayList;\n");
result.append("\n");
result.append("public class WekaWrapper\n");
result.append(" extends Filter {\n");
// globalInfo
result.append("\n");
result.append(" /**\n");
result.append(" * Returns only the toString() method.\n");
result.append(" *\n");
result.append(" * @return a string describing the filter\n");
result.append(" */\n");
result.append(" public String globalInfo() {\n");
result.append(" return toString();\n");
result.append(" }\n");
// getCapabilities
result.append("\n");
result.append(" /**\n");
result.append(" * Returns the capabilities of this filter.\n");
result.append(" *\n");
result.append(" * @return the capabilities\n");
result.append(" */\n");
result.append(" public Capabilities getCapabilities() {\n");
result.append(((Filter) filter).getCapabilities().toSource("result", 4));
result.append(" return result;\n");
result.append(" }\n");
// objectsToInstance
result.append("\n");
result.append(" /**\n");
result.append(" * turns array of Objects into an Instance object\n");
result.append(" *\n");
result
.append(" * @param obj the Object array to turn into an Instance\n");
result.append(" * @param format the data format to use\n");
result.append(" * @return the generated Instance object\n");
result.append(" */\n");
result.append(
" protected Instance objectsToInstance(Object[] obj, Instances format) {\n");
result.append(" Instance result;\n");
result.append(" double[] values;\n");
result.append(" int i;\n");
result.append("\n");
result.append(" values = new double[obj.length];\n");
result.append("\n");
result.append(" for (i = 0 ; i < obj.length; i++) {\n");
result.append(" if (obj[i] == null)\n");
result.append(" values[i] = Utils.missingValue();\n");
result.append(" else if (format.attribute(i).isNumeric())\n");
result.append(" values[i] = (Double) obj[i];\n");
result.append(" else if (format.attribute(i).isNominal())\n");
result.append(
" values[i] = format.attribute(i).indexOfValue((String) obj[i]);\n");
result.append(" }\n");
result.append("\n");
result.append(" // create new instance\n");
result.append(" result = new DenseInstance(1.0, values);\n");
result.append(" result.setDataset(format);\n");
result.append("\n");
result.append(" return result;\n");
result.append(" }\n");
// instanceToObjects
result.append("\n");
result.append(" /**\n");
result.append(" * turns the Instance object into an array of Objects\n");
result.append(" *\n");
result.append(" * @param inst the instance to turn into an array\n");
result.append(
" * @return the Object array representing the instance\n");
result.append(" */\n");
result.append(" protected Object[] instanceToObjects(Instance inst) {\n");
result.append(" Object[] result;\n");
result.append(" int i;\n");
result.append("\n");
result.append(" result = new Object[inst.numAttributes()];\n");
result.append("\n");
result.append(" for (i = 0 ; i < inst.numAttributes(); i++) {\n");
result.append(" if (inst.isMissing(i))\n");
result.append(" result[i] = null;\n");
result.append(" else if (inst.attribute(i).isNumeric())\n");
result.append(" result[i] = inst.value(i);\n");
result.append(" else\n");
result.append(" result[i] = inst.stringValue(i);\n");
result.append(" }\n");
result.append("\n");
result.append(" return result;\n");
result.append(" }\n");
// instancesToObjects
result.append("\n");
result.append(" /**\n");
result.append(" * turns the Instances object into an array of Objects\n");
result.append(" *\n");
result.append(" * @param data the instances to turn into an array\n");
result.append(
" * @return the Object array representing the instances\n");
result.append(" */\n");
result
.append(" protected Object[][] instancesToObjects(Instances data) {\n");
result.append(" Object[][] result;\n");
result.append(" int i;\n");
result.append("\n");
result.append(" result = new Object[data.numInstances()][];\n");
result.append("\n");
result.append(" for (i = 0; i < data.numInstances(); i++)\n");
result.append(" result[i] = instanceToObjects(data.instance(i));\n");
result.append("\n");
result.append(" return result;\n");
result.append(" }\n");
// setInputFormat
result.append("\n");
result.append(" /**\n");
result.append(" * Only tests the input data.\n");
result.append(" *\n");
result
.append(" * @param instanceInfo the format of the data to convert\n");
result.append(
" * @return always true, to indicate that the output format can \n");
result.append(" * be collected immediately.\n");
result.append(" */\n");
result.append(
" public boolean setInputFormat(Instances instanceInfo) throws Exception {\n");
result.append(" super.setInputFormat(instanceInfo);\n");
result.append(" \n");
result.append(" // generate output format\n");
result
.append(" ArrayList atts = new ArrayList();\n");
result.append(" ArrayList attValues;\n");
for (i = 0; i < output.numAttributes(); i++) {
result.append(" // " + output.attribute(i).name() + "\n");
if (output.attribute(i).isNumeric()) {
result.append(" atts.add(new Attribute(\""
+ output.attribute(i).name() + "\"));\n");
} else if (output.attribute(i).isNominal()) {
result.append(" attValues = new ArrayList();\n");
for (n = 0; n < output.attribute(i).numValues(); n++) {
result.append(
" attValues.add(\"" + output.attribute(i).value(n) + "\");\n");
}
result.append(" atts.add(new Attribute(\""
+ output.attribute(i).name() + "\", attValues));\n");
} else {
throw new UnsupportedAttributeTypeException(
"Attribute type '" + output.attribute(i).type() + "' (position "
+ (i + 1) + ") is not supported!");
}
}
result.append(" \n");
result.append(" Instances format = new Instances(\""
+ output.relationName() + "\", atts, 0);\n");
result.append(" format.setClassIndex(" + output.classIndex() + ");\n");
result.append(" setOutputFormat(format);\n");
result.append(" \n");
result.append(" return true;\n");
result.append(" }\n");
// input
result.append("\n");
result.append(" /**\n");
result.append(" * Directly filters the instance.\n");
result.append(" *\n");
result.append(" * @param instance the instance to convert\n");
result
.append(" * @return always true, to indicate that the output can \n");
result.append(" * be collected immediately.\n");
result.append(" */\n");
result
.append(" public boolean input(Instance instance) throws Exception {\n");
result.append(" Object[] filtered = " + className
+ ".filter(instanceToObjects(instance));\n");
result
.append(" push(objectsToInstance(filtered, getOutputFormat()));\n");
result.append(" return true;\n");
result.append(" }\n");
// batchFinished
result.append("\n");
result.append(" /**\n");
result.append(
" * Performs a batch filtering of the buffered data, if any available.\n");
result.append(" *\n");
result
.append(" * @return true if instances were filtered otherwise false\n");
result.append(" */\n");
result.append(" public boolean batchFinished() throws Exception {\n");
result.append(" if (getInputFormat() == null)\n");
result.append(
" throw new NullPointerException(\"No input instance format defined\");;\n");
result.append("\n");
result.append(" Instances inst = getInputFormat();\n");
result.append(" if (inst.numInstances() > 0) {\n");
result.append(" Object[][] filtered = " + className
+ ".filter(instancesToObjects(inst));\n");
result.append(" for (int i = 0; i < filtered.length; i++) {\n");
result.append(
" push(objectsToInstance(filtered[i], getOutputFormat()));\n");
result.append(" }\n");
result.append(" }\n");
result.append("\n");
result.append(" flushInput();\n");
result.append(" m_NewBatch = true;\n");
result.append(" m_FirstBatchDone = true;\n");
result.append("\n");
result.append(" return (inst.numInstances() > 0);\n");
result.append(" }\n");
// toString
result.append("\n");
result.append(" /**\n");
result.append(
" * Returns only the classnames and what filter it is based on.\n");
result.append(" *\n");
result.append(" * @return a short description\n");
result.append(" */\n");
result.append(" public String toString() {\n");
result.append(" return \"Auto-generated filter wrapper, based on "
+ filter.getClass().getName() + " (generated with Weka " + Version.VERSION
+ ").\\n" + "\" + this.getClass().getName() + \"/" + className + "\";\n");
result.append(" }\n");
// main
result.append("\n");
result.append(" /**\n");
result.append(" * Runs the filter from commandline.\n");
result.append(" *\n");
result.append(" * @param args the commandline arguments\n");
result.append(" */\n");
result.append(" public static void main(String args[]) {\n");
result.append(" runFilter(new WekaWrapper(), args);\n");
result.append(" }\n");
result.append("}\n");
// actual filter code
result.append("\n");
result.append(filter.toSource(className, input));
return result.toString();
}
/**
* Method for testing filters.
*
* @param filter the filter to use
* @param options should contain the following arguments:
* -i input_file
* -o output_file
* -c class_index
* -z classname (for filters implementing weka.filters.Sourcable)
*
* -decimal num (the number of decimal places to use in the output;
* default = 6)
* or -h for help on options
* @throws Exception if something goes wrong or the user requests help on
* command options
*/
public static void filterFile(Filter filter, String[] options)
throws Exception {
boolean debug = false;
Instances data = null;
DataSource input = null;
PrintWriter output = null;
boolean helpRequest;
String sourceCode = "";
int maxDecimalPlaces = 6;
try {
helpRequest = Utils.getFlag('h', options);
if (Utils.getFlag('d', options)) {
debug = true;
}
String infileName = Utils.getOption('i', options);
String outfileName = Utils.getOption('o', options);
String classIndex = Utils.getOption('c', options);
if (filter instanceof Sourcable) {
sourceCode = Utils.getOption('z', options);
}
String tmpStr = Utils.getOption("decimal", options);
if (tmpStr.length() > 0) {
maxDecimalPlaces = Integer.parseInt(tmpStr);
}
if (filter instanceof OptionHandler) {
((OptionHandler) filter).setOptions(options);
}
Utils.checkForRemainingOptions(options);
if (helpRequest) {
throw new Exception("Help requested.\n");
}
if (infileName.length() != 0) {
input = new DataSource(infileName);
} else {
input = new DataSource(System.in);
}
if (outfileName.length() != 0) {
output = new PrintWriter(new FileOutputStream(outfileName));
} else {
output = new PrintWriter(System.out);
}
data = input.getStructure();
if (classIndex.length() != 0) {
if (classIndex.equals("first")) {
data.setClassIndex(0);
} else if (classIndex.equals("last")) {
data.setClassIndex(data.numAttributes() - 1);
} else {
data.setClassIndex(Integer.parseInt(classIndex) - 1);
}
}
} catch (Exception ex) {
String filterOptions = "";
// Output the error and also the valid options
if (filter instanceof OptionHandler) {
filterOptions += "\nFilter options:\n\n";
Enumeration