weka.filters.unsupervised.attribute.MergeTwoValues Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of weka-dev Show documentation
Show all versions of weka-dev Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine
learning workbench. This version represents the developer version, the
"bleeding edge" of development, you could say. New functionality gets added
to this version.
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* MergeTwoValues.java
* Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.filters.unsupervised.attribute;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Vector;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SingleIndex;
import weka.core.UnsupportedAttributeTypeException;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.StreamableFilter;
import weka.filters.UnsupervisedFilter;
/**
* Merges two values of a nominal attribute into one
* value.
*
*
*
* Valid options are:
*
*
*
* -C <col>
* Sets the attribute index (default last).
*
*
*
* -F <value index>
* Sets the first value's index (default first).
*
*
*
* -S <value index>
* Sets the second value's index (default last).
*
*
*
*
* @author Eibe Frank ([email protected])
* @version $Revision: 10215 $
*/
public class MergeTwoValues extends Filter implements UnsupervisedFilter,
StreamableFilter, OptionHandler {
/** for serialization */
static final long serialVersionUID = 2925048980504034018L;
/** The attribute's index setting. */
private final SingleIndex m_AttIndex = new SingleIndex("last");
/** The first value's index setting. */
private final SingleIndex m_FirstIndex = new SingleIndex("first");
/** The second value's index setting. */
private final SingleIndex m_SecondIndex = new SingleIndex("last");
/**
* Returns a string describing this filter
*
* @return a description of the filter suitable for displaying in the
* explorer/experimenter gui
*/
public String globalInfo() {
return "Merges two values of a nominal attribute into one value.";
}
/**
* Returns the Capabilities of this filter.
*
* @return the capabilities of this object
* @see Capabilities
*/
@Override
public Capabilities getCapabilities() {
Capabilities result = super.getCapabilities();
result.disableAll();
// attributes
result.enableAllAttributes();
result.enable(Capability.MISSING_VALUES);
// class
result.enableAllClasses();
result.enable(Capability.MISSING_CLASS_VALUES);
result.enable(Capability.NO_CLASS);
return result;
}
/**
* Sets the format of the input instances.
*
* @param instanceInfo an Instances object containing the input instance
* structure (any instances contained in the object are ignored -
* only the structure is required).
* @return true if the outputFormat may be collected immediately
* @throws Exception if the input format can't be set successfully
*/
@Override
public boolean setInputFormat(Instances instanceInfo) throws Exception {
super.setInputFormat(instanceInfo);
m_AttIndex.setUpper(instanceInfo.numAttributes() - 1);
m_FirstIndex.setUpper(instanceInfo.attribute(m_AttIndex.getIndex())
.numValues() - 1);
m_SecondIndex.setUpper(instanceInfo.attribute(m_AttIndex.getIndex())
.numValues() - 1);
if ((instanceInfo.classIndex() > -1)
&& (instanceInfo.classIndex() == m_AttIndex.getIndex())) {
throw new Exception("Cannot process class attribute.");
}
if (!instanceInfo.attribute(m_AttIndex.getIndex()).isNominal()) {
throw new UnsupportedAttributeTypeException(
"Chosen attribute not nominal.");
}
if (instanceInfo.attribute(m_AttIndex.getIndex()).numValues() < 2) {
throw new UnsupportedAttributeTypeException(
"Chosen attribute has less than " + "two values.");
}
if (m_SecondIndex.getIndex() <= m_FirstIndex.getIndex()) {
// XXX Maybe we should just swap the values??
throw new Exception("The second index has to be greater "
+ "than the first.");
}
setOutputFormat();
return true;
}
/**
* Input an instance for filtering. The instance is processed and made
* available for output immediately.
*
* @param instance the input instance
* @return true if the filtered instance may now be collected with output().
* @throws IllegalStateException if no input format has been set.
*/
@Override
public boolean input(Instance instance) {
if (getInputFormat() == null) {
throw new IllegalStateException("No input instance format defined");
}
if (m_NewBatch) {
resetQueue();
m_NewBatch = false;
}
Instance newInstance = (Instance) instance.copy();
if ((int) newInstance.value(m_AttIndex.getIndex()) == m_SecondIndex
.getIndex()) {
newInstance.setValue(m_AttIndex.getIndex(), m_FirstIndex.getIndex());
} else if ((int) newInstance.value(m_AttIndex.getIndex()) > m_SecondIndex
.getIndex()) {
newInstance.setValue(m_AttIndex.getIndex(),
newInstance.value(m_AttIndex.getIndex()) - 1);
}
push(newInstance);
return true;
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
@Override
public Enumeration