weka.filters.unsupervised.attribute.AddUserFields Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of weka-dev Show documentation
Show all versions of weka-dev Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine
learning workbench. This version represents the developer version, the
"bleeding edge" of development, you could say. New functionality gets added
to this version.
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* AddUserFields.java
* Copyright (C) 2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.filters.unsupervised.attribute;
import java.io.Serializable;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Enumeration;
import java.util.List;
import java.util.Vector;
import weka.core.*;
import weka.core.Capabilities.Capability;
import weka.filters.Filter;
import weka.filters.UnsupervisedFilter;
/**
* A filter that adds new attributes with user
* specified type and constant value. Numeric, nominal, string and date
* attributes can be created. Attribute name, and value can be set with
* environment variables. Date attributes can also specify a formatting string
* by which to parse the supplied date value. Alternatively, a current time
* stamp can be specified by supplying the special string "now" as the value for
* a date attribute.
*
*
*
* Valid options are:
*
*
*
* -A <name:type:value>
* New field specification (name@type@value).
* Environment variables may be used for any/all parts of the
* specification. Type can be one of (numeric, nominal, string or date).
* The value for date be a specific date string or the special string
* "now" to indicate the current date-time. A specific date format
* string for parsing specific date values can be specified by suffixing
* the type specification - e.g. "myTime@date:MM-dd-yyyy@08-23-2009".This option may be specified multiple times
*
*
*
*
* @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
* @version $Revision: 14508 $
*/
public class AddUserFields extends Filter implements OptionHandler,
EnvironmentHandler, UnsupervisedFilter, WeightedInstancesHandler, WeightedAttributesHandler {
/** For serialization */
private static final long serialVersionUID = -2761427344847891585L;
/** The new attributes to create */
protected List m_attributeSpecs;
protected transient Environment m_env;
/**
* Inner class encapsulating a new user-specified attribute to create.
*
* @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
*/
public static class AttributeSpec implements Serializable {
/** For serialization */
private static final long serialVersionUID = -617328946241474608L;
/** The name of the new attribute */
protected String m_name = "";
/** The constant value it should assume */
protected String m_value = "";
/** The type of the new attribute */
protected String m_type = "";
/** The name after resolving any environment variables */
protected String m_nameS;
/** The value after resolving any environment variables */
protected String m_valueS;
/** The type after resolving any environment variables */
protected String m_typeS;
/** The date format to use (if the new attribute is a date) */
protected SimpleDateFormat m_dateFormat;
/** Holds the parsed date value */
protected Date m_parsedDate;
/**
* Default constructor
*/
public AttributeSpec() {
}
/**
* Constructor that takes an attribute specification in internal format
*
* @param spec the attribute spec to use
*/
public AttributeSpec(String spec) {
parseFromInternal(spec);
}
/**
* Set the name of the new attribute
*
* @param name the name of the new attribute
*/
public void setName(String name) {
m_name = name;
}
/**
* Get the name of the new attribute
*
* @return the name of the new attribute
*/
public String getName() {
return m_name;
}
/**
* Set the type of the new attribute
*
* @param type the type of the new attribute
*/
public void setType(String type) {
m_type = type;
}
/**
* Get the type of the new attribute
*
* @return the type of the new attribute
*/
public String getType() {
return m_type;
}
/**
* Set the value of the new attribute. Date attributes can assume a supplied
* date value (parseable by either the default date format or a user
* specified one) or the current time stamp if the user specifies the
* special string "now".
*
* @param value the value of the new attribute
*/
public void setValue(String value) {
m_value = value;
}
/**
* Get the value of the new attribute. Date attributes can assume a supplied
* date value (parseable by either the default date format or a user
* specified one) or the current time stamp if the user specifies the
* special string "now".
*
* @return the value of the new attribute
*/
public String getValue() {
return m_value;
}
/**
* Get the name of the attribute after substituting any environment
* variables
*
* @return the name of the attribute after environment variables have been
* substituted
*/
public String getResolvedName() {
return m_nameS;
}
/**
* Get the value of the attribute after substituting any environment
* variables
*
* @return the value of the attribute after environment variables have been
* substituted
*/
public String getResolvedValue() {
return m_valueS;
}
/**
* Get the type of the attribute after substituting any environment
* variables
*
* @return the tyep of the attribute after environment variables have been
* substituted
*/
public String getResolvedType() {
return m_typeS;
}
/**
* Get the date formatting string (if any)
*
* @return the date formatting string
*/
public String getDateFormat() {
if (m_dateFormat != null) {
return m_dateFormat.toPattern();
} else {
return null;
}
}
/**
* Get the value of the attribute as a date or null if the attribute isn't
* of type date.
*
* @return the value as a date
*/
public Date getDateValue() {
if (m_parsedDate != null) {
return m_parsedDate;
}
if (getResolvedType().toLowerCase().startsWith("date")) {
return new Date(); // now
}
return null; // not a date attribute
}
/**
* Get the value of the attribute as a number or Utils.missingValue() if the
* attribute is not numeric.
*
* @return the value of the attribute as a number
*/
public double getNumericValue() {
if (getResolvedType().toLowerCase().startsWith("numeric")) {
return Double.parseDouble(getResolvedValue());
}
return Utils.missingValue(); // not a numeric attribute
}
/**
* Get the value of the attribute as a string (nominal and string attribute)
* or null if the attribute is not nominal or string
*
* @return the value of the attribute as a string
*/
public String getNominalOrStringValue() {
if (getResolvedType().toLowerCase().startsWith("nominal")
|| getResolvedType().toLowerCase().startsWith("string")) {
return getResolvedValue();
}
return null; // not a nominal or string attribute
}
protected void parseFromInternal(String spec) {
String[] parts = spec.split("@");
if (parts.length > 0) {
m_name = parts[0].trim();
}
if (parts.length > 1) {
m_type = parts[1].trim();
}
if (parts.length > 2) {
m_value = parts[2].trim();
}
}
/**
* Initialize this attribute spec by resolving any environment variables and
* setting up the date format (if necessary)
*
* @param env environment variables to use
*/
public void init(Environment env) {
m_nameS = m_name;
m_typeS = m_type;
m_valueS = m_value;
try {
m_nameS = env.substitute(m_nameS);
m_typeS = env.substitute(m_typeS);
m_valueS = env.substitute(m_valueS);
} catch (Exception ex) {
}
if (m_typeS.toLowerCase().startsWith("date") && m_typeS.indexOf(":") > 0) {
String format = m_typeS.substring(m_typeS.indexOf(":") + 1,
m_typeS.length());
m_dateFormat = new SimpleDateFormat(format);
if (!m_valueS.toLowerCase().equals("now")) {
try {
m_parsedDate = m_dateFormat.parse(m_valueS);
} catch (ParseException e) {
throw new IllegalArgumentException("Date value \"" + m_valueS
+ " \" can't be parsed with formatting string \"" + format + "\"");
}
}
}
}
/**
* Return a nicely formatted string for display
*
* @return a textual description
*/
@Override
public String toString() {
StringBuffer buff = new StringBuffer();
buff.append("Name: ").append(m_name).append(" ");
String type = m_type;
if (type.toLowerCase().startsWith("date") && type.indexOf(":") > 0) {
type = type.substring(0, type.indexOf(":"));
String format = m_type.substring(m_type.indexOf(":" + 1,
m_type.length()));
buff.append("Type: ").append(type).append(" [").append(format)
.append("] ");
} else {
buff.append("Type: ").append(type).append(" ");
}
buff.append("Value: ").append(m_value);
return buff.toString();
}
public String toStringInternal() {
StringBuffer buff = new StringBuffer();
buff.append(m_name).append("@").append(m_type).append("@")
.append(m_value);
return buff.toString();
}
}
/**
* Constructs a new AddUserFields
*/
public AddUserFields() {
m_attributeSpecs = new ArrayList();
}
/**
* Returns a string describing this filter
*
* @return a description of the filter suitable for displaying in the
* explorer/experimenter gui
*/
public String globalInfo() {
return "A filter that adds new attributes with user specified type and constant value. "
+ "Numeric, nominal, string and date attributes can be created. "
+ "Attribute name, and value can be set with environment variables. Date "
+ "attributes can also specify a formatting string by which to parse "
+ "the supplied date value. Alternatively, a current time stamp can "
+ "be specified by supplying the special string \"now\" as the value "
+ "for a date attribute.";
}
/**
* Returns the Capabilities of this filter.
*
* @return the capabilities of this object
* @see Capabilities
*/
@Override
public Capabilities getCapabilities() {
Capabilities result = super.getCapabilities();
result.disableAll();
// attributes
result.enableAllAttributes();
result.enable(Capability.MISSING_VALUES);
// class
result.enableAllClasses();
result.enable(Capability.NO_CLASS);
return result;
}
/**
* Clear the list of attribute specifications
*/
public void clearAttributeSpecs() {
if (m_attributeSpecs == null) {
m_attributeSpecs = new ArrayList();
}
m_attributeSpecs.clear();
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
@Override
public Enumeration