weka.filters.unsupervised.attribute.StringToNominal Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of weka-dev Show documentation
Show all versions of weka-dev Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine
learning workbench. This version represents the developer version, the
"bleeding edge" of development, you could say. New functionality gets added
to this version.
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* StringToNominal.java
* Copyright (C) 2002-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.filters.unsupervised.attribute;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Vector;
import weka.core.*;
import weka.core.Capabilities.Capability;
import weka.filters.Filter;
import weka.filters.UnsupervisedFilter;
/**
* Converts a range of string attributes (unspecified
* number of values) to nominal (set number of values). You should ensure that
* all string values that will appear are represented in the first batch of the
* data.
*
*
*
* Valid options are:
*
*
*
* -R <col>
* Sets the range of attribute indices ("first" and "last" are valid values
* and ranges and lists can also be used) (default "last").
*
*
*
* -V <col>
* Invert the range specified by -R.
*
*
*
*
* @author Len Trigg ([email protected])
* @version $Revision: 14508 $
*/
public class StringToNominal extends Filter implements UnsupervisedFilter,
OptionHandler, WeightedAttributesHandler, WeightedInstancesHandler {
/** for serialization */
private static final long serialVersionUID = 4864084427902797605L;
/** The attribute's range indices setting. */
private final Range m_AttIndices = new Range("last");
/**
* Returns a string describing this filter
*
* @return a description of the filter suitable for displaying in the
* explorer/experimenter gui
*/
public String globalInfo() {
return "Converts a range of string attributes (unspecified number of values) to nominal "
+ "(set number of values). You should ensure that all string values that "
+ "will appear are represented in the first batch of the data.";
}
/**
* Returns the Capabilities of this filter.
*
* @return the capabilities of this object
* @see Capabilities
*/
@Override
public Capabilities getCapabilities() {
Capabilities result = super.getCapabilities();
result.disableAll();
// attributes
result.enableAllAttributes();
result.enable(Capability.MISSING_VALUES);
// class
result.enableAllClasses();
result.enable(Capability.MISSING_CLASS_VALUES);
result.enable(Capability.NO_CLASS);
return result;
}
/**
* Sets the format of the input instances.
*
* @param instanceInfo an Instances object containing the input instance
* structure (any instances contained in the object are ignored -
* only the structure is required).
* @return true if the outputFormat may be collected immediately.
* @throws UnsupportedAttributeTypeException if the selected attribute a
* string attribute.
* @throws Exception if the input format can't be set successfully.
*/
@Override
public boolean setInputFormat(Instances instanceInfo) throws Exception {
super.setInputFormat(instanceInfo);
m_AttIndices.setUpper(instanceInfo.numAttributes() - 1);
return false;
}
/**
* Input an instance for filtering. The instance is processed and made
* available for output immediately.
*
* @param instance the input instance.
* @return true if the filtered instance may now be collected with output().
* @throws IllegalStateException if no input structure has been defined.
*/
@Override
public boolean input(Instance instance) {
if (getInputFormat() == null) {
throw new IllegalStateException("No input instance format defined");
}
if (m_NewBatch) {
resetQueue();
m_NewBatch = false;
}
if (isOutputFormatDefined()) {
Instance newInstance = (Instance) instance.copy();
// make sure that we get the right indexes set for the converted
// string attributes when operating on a second batch of instances
for (int i = 0; i < newInstance.numAttributes(); i++) {
if (newInstance.attribute(i).isString() && !newInstance.isMissing(i)
&& m_AttIndices.isInRange(i)) {
Attribute outAtt = outputFormatPeek().attribute(i);
String inVal = newInstance.stringValue(i);
int outIndex = outAtt.indexOfValue(inVal);
if (outIndex < 0) {
newInstance.setMissing(i);
} else {
newInstance.setValue(i, outIndex);
}
}
}
push(newInstance, false); // No need to copy
return true;
}
bufferInput(instance);
return false;
}
/**
* Signifies that this batch of input to the filter is finished. If the filter
* requires all instances prior to filtering, output() may now be called to
* retrieve the filtered instances.
*
* @return true if there are instances pending output.
* @throws IllegalStateException if no input structure has been defined.
*/
@Override
public boolean batchFinished() {
if (getInputFormat() == null) {
throw new IllegalStateException("No input instance format defined");
}
if (!isOutputFormatDefined()) {
setOutputFormat();
// Convert pending input instances
for (int i = 0; i < getInputFormat().numInstances(); i++) {
push((Instance) getInputFormat().instance(i).copy(), false); // No need to copy
}
}
flushInput();
m_NewBatch = true;
return (numPendingOutput() != 0);
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
@Override
public Enumeration