All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.filters.unsupervised.attribute.StringToNominal Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.

There is a newer version: 3.9.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    StringToNominal.java
 *    Copyright (C) 2002-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.filters.unsupervised.attribute;

import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Vector;

import weka.core.*;
import weka.core.Capabilities.Capability;
import weka.filters.Filter;
import weka.filters.UnsupervisedFilter;

/**
 *  Converts a range of string attributes (unspecified
 * number of values) to nominal (set number of values). You should ensure that
 * all string values that will appear are represented in the first batch of the
 * data.
 * 

* * * Valid options are: *

* *

 * -R <col>
 *  Sets the range of attribute indices ("first" and "last" are valid values
 *  and ranges and lists can also be used) (default "last").
 * 
* *
 * -V <col>
 *  Invert the range specified by -R.
 * 
* * * * @author Len Trigg ([email protected]) * @version $Revision: 14508 $ */ public class StringToNominal extends Filter implements UnsupervisedFilter, OptionHandler, WeightedAttributesHandler, WeightedInstancesHandler { /** for serialization */ private static final long serialVersionUID = 4864084427902797605L; /** The attribute's range indices setting. */ private final Range m_AttIndices = new Range("last"); /** * Returns a string describing this filter * * @return a description of the filter suitable for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "Converts a range of string attributes (unspecified number of values) to nominal " + "(set number of values). You should ensure that all string values that " + "will appear are represented in the first batch of the data."; } /** * Returns the Capabilities of this filter. * * @return the capabilities of this object * @see Capabilities */ @Override public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); result.disableAll(); // attributes result.enableAllAttributes(); result.enable(Capability.MISSING_VALUES); // class result.enableAllClasses(); result.enable(Capability.MISSING_CLASS_VALUES); result.enable(Capability.NO_CLASS); return result; } /** * Sets the format of the input instances. * * @param instanceInfo an Instances object containing the input instance * structure (any instances contained in the object are ignored - * only the structure is required). * @return true if the outputFormat may be collected immediately. * @throws UnsupportedAttributeTypeException if the selected attribute a * string attribute. * @throws Exception if the input format can't be set successfully. */ @Override public boolean setInputFormat(Instances instanceInfo) throws Exception { super.setInputFormat(instanceInfo); m_AttIndices.setUpper(instanceInfo.numAttributes() - 1); return false; } /** * Input an instance for filtering. The instance is processed and made * available for output immediately. * * @param instance the input instance. * @return true if the filtered instance may now be collected with output(). * @throws IllegalStateException if no input structure has been defined. */ @Override public boolean input(Instance instance) { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } if (isOutputFormatDefined()) { Instance newInstance = (Instance) instance.copy(); // make sure that we get the right indexes set for the converted // string attributes when operating on a second batch of instances for (int i = 0; i < newInstance.numAttributes(); i++) { if (newInstance.attribute(i).isString() && !newInstance.isMissing(i) && m_AttIndices.isInRange(i)) { Attribute outAtt = outputFormatPeek().attribute(i); String inVal = newInstance.stringValue(i); int outIndex = outAtt.indexOfValue(inVal); if (outIndex < 0) { newInstance.setMissing(i); } else { newInstance.setValue(i, outIndex); } } } push(newInstance, false); // No need to copy return true; } bufferInput(instance); return false; } /** * Signifies that this batch of input to the filter is finished. If the filter * requires all instances prior to filtering, output() may now be called to * retrieve the filtered instances. * * @return true if there are instances pending output. * @throws IllegalStateException if no input structure has been defined. */ @Override public boolean batchFinished() { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (!isOutputFormatDefined()) { setOutputFormat(); // Convert pending input instances for (int i = 0; i < getInputFormat().numInstances(); i++) { push((Instance) getInputFormat().instance(i).copy(), false); // No need to copy } } flushInput(); m_NewBatch = true; return (numPendingOutput() != 0); } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ @Override public Enumeration




© 2015 - 2024 Weber Informatics LLC | Privacy Policy