weka.core.Attribute Maven / Gradle / Ivy
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* Attribute.java
* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
*
*/
package weka.core;
import java.io.IOException;
import java.io.Serializable;
import java.io.StreamTokenizer;
import java.io.StringReader;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Properties;
/**
* Class for handling an attribute. Once an attribute has been created,
* it can't be changed.
*
* The following attribute types are supported:
*
* - numeric:
* This type of attribute represents a floating-point number.
*
* - nominal:
* This type of attribute represents a fixed set of nominal values.
*
* - string:
* This type of attribute represents a dynamically expanding set of
* nominal values. Usually used in text classification.
*
* - date:
* This type of attribute represents a date, internally represented as
* floating-point number storing the milliseconds since January 1,
* 1970, 00:00:00 GMT. The string representation of the date must be
*
* ISO-8601 compliant, the default is yyyy-MM-dd'T'HH:mm:ss
.
*
* - relational:
* This type of attribute can contain other attributes and is, e.g.,
* used for representing Multi-Instance data. (Multi-Instance data
* consists of a nominal attribute containing the bag-id, then a
* relational attribute with all the attributes of the bag, and
* finally the class attribute.)
*
*
*
* Typical usage (code from the main() method of this class):
*
*
* ...
*
* // Create numeric attributes "length" and "weight"
* Attribute length = new Attribute("length");
* Attribute weight = new Attribute("weight");
*
* // Create vector to hold nominal values "first", "second", "third"
* FastVector my_nominal_values = new FastVector(3);
* my_nominal_values.addElement("first");
* my_nominal_values.addElement("second");
* my_nominal_values.addElement("third");
*
* // Create nominal attribute "position"
* Attribute position = new Attribute("position", my_nominal_values);
*
* ...
*
*
* @author Eibe Frank ([email protected])
* @version $Revision: 9518 $
*/
public class Attribute
implements Copyable, Serializable, RevisionHandler {
/** for serialization */
static final long serialVersionUID = -742180568732916383L;
/** Constant set for numeric attributes. */
public static final int NUMERIC = 0;
/** Constant set for nominal attributes. */
public static final int NOMINAL = 1;
/** Constant set for attributes with string values. */
public static final int STRING = 2;
/** Constant set for attributes with date values. */
public static final int DATE = 3;
/** Constant set for relation-valued attributes. */
public static final int RELATIONAL = 4;
/** Constant set for symbolic attributes. */
public static final int ORDERING_SYMBOLIC = 0;
/** Constant set for ordered attributes. */
public static final int ORDERING_ORDERED = 1;
/** Constant set for modulo-ordered attributes. */
public static final int ORDERING_MODULO = 2;
/** The keyword used to denote the start of an arff attribute declaration */
public final static String ARFF_ATTRIBUTE = "@attribute";
/** A keyword used to denote a numeric attribute */
public final static String ARFF_ATTRIBUTE_INTEGER = "integer";
/** A keyword used to denote a numeric attribute */
public final static String ARFF_ATTRIBUTE_REAL = "real";
/** A keyword used to denote a numeric attribute */
public final static String ARFF_ATTRIBUTE_NUMERIC = "numeric";
/** The keyword used to denote a string attribute */
public final static String ARFF_ATTRIBUTE_STRING = "string";
/** The keyword used to denote a date attribute */
public final static String ARFF_ATTRIBUTE_DATE = "date";
/** The keyword used to denote a relation-valued attribute */
public final static String ARFF_ATTRIBUTE_RELATIONAL = "relational";
/** The keyword used to denote the end of the declaration of a subrelation */
public final static String ARFF_END_SUBRELATION = "@end";
/** Dummy first value for String attributes (useful for sparse instances) */
public final static String DUMMY_STRING_VAL = "*WEKA*DUMMY*STRING*FOR*STRING*ATTRIBUTES*";
/** Strings longer than this will be stored compressed. */
private static final int STRING_COMPRESS_THRESHOLD = 200;
/** The attribute's name. */
private /*@ spec_public non_null @*/ String m_Name;
/** The attribute's type. */
private /*@ spec_public @*/ int m_Type;
/*@ invariant m_Type == NUMERIC ||
m_Type == DATE ||
m_Type == STRING ||
m_Type == NOMINAL ||
m_Type == RELATIONAL;
*/
/** The attribute's values (if nominal or string). */
private /*@ spec_public @*/ FastVector m_Values;
/** Mapping of values to indices (if nominal or string). */
private Hashtable m_Hashtable;
/** The header information for a relation-valued attribute. */
private Instances m_Header;
/** Date format specification for date attributes */
private SimpleDateFormat m_DateFormat;
/** The attribute's index. */
private /*@ spec_public @*/ int m_Index;
/** The attribute's metadata. */
private ProtectedProperties m_Metadata;
/** The attribute's ordering. */
private int m_Ordering;
/** Whether the attribute is regular. */
private boolean m_IsRegular;
/** Whether the attribute is averagable. */
private boolean m_IsAveragable;
/** Whether the attribute has a zeropoint. */
private boolean m_HasZeropoint;
/** The attribute's weight. */
private double m_Weight;
/** The attribute's lower numeric bound. */
private double m_LowerBound;
/** Whether the lower bound is open. */
private boolean m_LowerBoundIsOpen;
/** The attribute's upper numeric bound. */
private double m_UpperBound;
/** Whether the upper bound is open */
private boolean m_UpperBoundIsOpen;
/**
* Constructor for a numeric attribute.
*
* @param attributeName the name for the attribute
*/
//@ requires attributeName != null;
//@ ensures m_Name == attributeName;
public Attribute(String attributeName) {
this(attributeName, new ProtectedProperties(new Properties()));
}
/**
* Constructor for a numeric attribute, where metadata is supplied.
*
* @param attributeName the name for the attribute
* @param metadata the attribute's properties
*/
//@ requires attributeName != null;
//@ requires metadata != null;
//@ ensures m_Name == attributeName;
public Attribute(String attributeName, ProtectedProperties metadata) {
m_Name = attributeName;
m_Index = -1;
m_Values = null;
m_Hashtable = null;
m_Header = null;
m_Type = NUMERIC;
setMetadata(metadata);
}
/**
* Constructor for a date attribute.
*
* @param attributeName the name for the attribute
* @param dateFormat a string suitable for use with
* SimpleDateFormatter for parsing dates.
*/
//@ requires attributeName != null;
//@ requires dateFormat != null;
//@ ensures m_Name == attributeName;
public Attribute(String attributeName, String dateFormat) {
this(attributeName, dateFormat,
new ProtectedProperties(new Properties()));
}
/**
* Constructor for a date attribute, where metadata is supplied.
*
* @param attributeName the name for the attribute
* @param dateFormat a string suitable for use with
* SimpleDateFormatter for parsing dates.
* @param metadata the attribute's properties
*/
//@ requires attributeName != null;
//@ requires dateFormat != null;
//@ requires metadata != null;
//@ ensures m_Name == attributeName;
public Attribute(String attributeName, String dateFormat,
ProtectedProperties metadata) {
m_Name = attributeName;
m_Index = -1;
m_Values = null;
m_Hashtable = null;
m_Header = null;
m_Type = DATE;
if (dateFormat != null) {
m_DateFormat = new SimpleDateFormat(dateFormat);
} else {
m_DateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
}
m_DateFormat.setLenient(false);
setMetadata(metadata);
}
/**
* Constructor for nominal attributes and string attributes.
* If a null vector of attribute values is passed to the method,
* the attribute is assumed to be a string.
*
* @param attributeName the name for the attribute
* @param attributeValues a vector of strings denoting the
* attribute values. Null if the attribute is a string attribute.
*/
//@ requires attributeName != null;
//@ ensures m_Name == attributeName;
public Attribute(String attributeName,
FastVector attributeValues) {
this(attributeName, attributeValues,
new ProtectedProperties(new Properties()));
}
/**
* Constructor for nominal attributes and string attributes, where
* metadata is supplied. If a null vector of attribute values is passed
* to the method, the attribute is assumed to be a string.
*
* @param attributeName the name for the attribute
* @param attributeValues a vector of strings denoting the
* attribute values. Null if the attribute is a string attribute.
* @param metadata the attribute's properties
*/
//@ requires attributeName != null;
//@ requires metadata != null;
/*@ ensures m_Name == attributeName;
ensures m_Index == -1;
ensures attributeValues == null && m_Type == STRING
|| attributeValues != null && m_Type == NOMINAL
&& m_Values.size() == attributeValues.size();
signals (IllegalArgumentException ex)
(* if duplicate strings in attributeValues *);
*/
public Attribute(String attributeName,
FastVector attributeValues,
ProtectedProperties metadata) {
m_Name = attributeName;
m_Index = -1;
if (attributeValues == null) {
m_Values = new FastVector();
m_Hashtable = new Hashtable();
m_Header = null;
m_Type = STRING;
// Make sure there is at least one value so that string attribute
// values are always represented when output as part of a sparse instance.
addStringValue(DUMMY_STRING_VAL);
} else {
m_Values = new FastVector(attributeValues.size());
m_Hashtable = new Hashtable(attributeValues.size());
m_Header = null;
for (int i = 0; i < attributeValues.size(); i++) {
Object store = attributeValues.elementAt(i);
if (((String)store).length() > STRING_COMPRESS_THRESHOLD) {
try {
store = new SerializedObject(attributeValues.elementAt(i), true);
} catch (Exception ex) {
System.err.println("Couldn't compress nominal attribute value -"
+ " storing uncompressed.");
}
}
if (m_Hashtable.containsKey(store)) {
throw new IllegalArgumentException("A nominal attribute (" +
attributeName + ") cannot"
+ " have duplicate labels (" + store + ").");
}
m_Values.addElement(store);
m_Hashtable.put(store, new Integer(i));
}
m_Type = NOMINAL;
}
setMetadata(metadata);
}
/**
* Constructor for relation-valued attributes.
*
* @param attributeName the name for the attribute
* @param header an Instances object specifying the header of the relation.
*/
public Attribute(String attributeName, Instances header) {
this(attributeName, header,
new ProtectedProperties(new Properties()));
}
/**
* Constructor for relation-valued attributes.
*
* @param attributeName the name for the attribute
* @param header an Instances object specifying the header of the relation.
* @param metadata the attribute's properties
*/
public Attribute(String attributeName,
Instances header,
ProtectedProperties metadata) {
if (header.numInstances() > 0) {
throw new IllegalArgumentException("Header for relation-valued " +
"attribute should not contain " +
"any instances");
}
m_Name = attributeName;
m_Index = -1;
m_Values = new FastVector();
m_Hashtable = new Hashtable();
m_Header = header;
m_Type = RELATIONAL;
setMetadata(metadata);
}
/**
* Produces a shallow copy of this attribute.
*
* @return a copy of this attribute with the same index
*/
//@ also ensures \result instanceof Attribute;
public /*@ pure non_null @*/ Object copy() {
Attribute copy = new Attribute(m_Name);
copy.m_Index = m_Index;
copy.m_Type = m_Type;
copy.m_Values = m_Values;
copy.m_Hashtable = m_Hashtable;
copy.m_DateFormat = m_DateFormat;
copy.m_Header = m_Header;
copy.setMetadata(m_Metadata);
return copy;
}
/**
* Returns an enumeration of all the attribute's values if the
* attribute is nominal, string, or relation-valued, null otherwise.
*
* @return enumeration of all the attribute's values
*/
public final /*@ pure @*/ Enumeration enumerateValues() {
if (isNominal() || isString()) {
final Enumeration ee = m_Values.elements();
return new Enumeration () {
public boolean hasMoreElements() {
return ee.hasMoreElements();
}
public Object nextElement() {
Object oo = ee.nextElement();
if (oo instanceof SerializedObject) {
return ((SerializedObject)oo).getObject();
} else {
return oo;
}
}
};
}
return null;
}
/**
* Tests if given attribute is equal to this attribute.
*
* @param other the Object to be compared to this attribute
* @return true if the given attribute is equal to this attribute
*/
public final /*@ pure @*/ boolean equals(Object other) {
if ((other == null) || !(other.getClass().equals(this.getClass()))) {
return false;
}
Attribute att = (Attribute) other;
if (!m_Name.equals(att.m_Name)) {
return false;
}
if (isNominal() && att.isNominal()) {
if (m_Values.size() != att.m_Values.size()) {
return false;
}
for (int i = 0; i < m_Values.size(); i++) {
if (!m_Values.elementAt(i).equals(att.m_Values.elementAt(i))) {
return false;
}
}
return true;
}
if (isRelationValued() && att.isRelationValued()) {
if (!m_Header.equalHeaders(att.m_Header)) {
return false;
}
return true;
}
return (type() == att.type());
}
/**
* Returns the index of this attribute.
*
* @return the index of this attribute
*/
//@ ensures \result == m_Index;
public final /*@ pure @*/ int index() {
return m_Index;
}
/**
* Returns the index of a given attribute value. (The index of
* the first occurence of this value.)
*
* @param value the value for which the index is to be returned
* @return the index of the given attribute value if attribute
* is nominal or a string, -1 if it is not or the value
* can't be found
*/
public final int indexOfValue(String value) {
if (!isNominal() && !isString())
return -1;
Object store = value;
if (value.length() > STRING_COMPRESS_THRESHOLD) {
try {
store = new SerializedObject(value, true);
} catch (Exception ex) {
System.err.println("Couldn't compress string attribute value -"
+ " searching uncompressed.");
}
}
Integer val = (Integer)m_Hashtable.get(store);
if (val == null) return -1;
else return val.intValue();
}
/**
* Test if the attribute is nominal.
*
* @return true if the attribute is nominal
*/
//@ ensures \result <==> (m_Type == NOMINAL);
public final /*@ pure @*/ boolean isNominal() {
return (m_Type == NOMINAL);
}
/**
* Tests if the attribute is numeric.
*
* @return true if the attribute is numeric
*/
//@ ensures \result <==> ((m_Type == NUMERIC) || (m_Type == DATE));
public final /*@ pure @*/ boolean isNumeric() {
return ((m_Type == NUMERIC) || (m_Type == DATE));
}
/**
* Tests if the attribute is relation valued.
*
* @return true if the attribute is relation valued
*/
//@ ensures \result <==> (m_Type == RELATIONAL);
public final /*@ pure @*/ boolean isRelationValued() {
return (m_Type == RELATIONAL);
}
/**
* Tests if the attribute is a string.
*
* @return true if the attribute is a string
*/
//@ ensures \result <==> (m_Type == STRING);
public final /*@ pure @*/ boolean isString() {
return (m_Type == STRING);
}
/**
* Tests if the attribute is a date type.
*
* @return true if the attribute is a date type
*/
//@ ensures \result <==> (m_Type == DATE);
public final /*@ pure @*/ boolean isDate() {
return (m_Type == DATE);
}
/**
* Returns the attribute's name.
*
* @return the attribute's name as a string
*/
//@ ensures \result == m_Name;
public final /*@ pure @*/ String name() {
return m_Name;
}
/**
* Returns the number of attribute values. Returns 0 for
* attributes that are not either nominal, string, or
* relation-valued.
*
* @return the number of attribute values
*/
public final /*@ pure @*/ int numValues() {
if (!isNominal() && !isString() && !isRelationValued()) {
return 0;
} else {
return m_Values.size();
}
}
/**
* Returns a description of this attribute in ARFF format. Quotes
* strings if they contain whitespace characters, or if they
* are a question mark.
*
* @return a description of this attribute as a string
*/
public final String toString() {
StringBuffer text = new StringBuffer();
text.append(ARFF_ATTRIBUTE).append(" ").append(Utils.quote(m_Name)).append(" ");
switch (m_Type) {
case NOMINAL:
text.append('{');
Enumeration enu = enumerateValues();
while (enu.hasMoreElements()) {
text.append(Utils.quote((String) enu.nextElement()));
if (enu.hasMoreElements())
text.append(',');
}
text.append('}');
break;
case NUMERIC:
text.append(ARFF_ATTRIBUTE_NUMERIC);
break;
case STRING:
text.append(ARFF_ATTRIBUTE_STRING);
break;
case DATE:
text.append(ARFF_ATTRIBUTE_DATE).append(" ").append(Utils.quote(m_DateFormat.toPattern()));
break;
case RELATIONAL:
text.append(ARFF_ATTRIBUTE_RELATIONAL).append("\n");
Enumeration enm = m_Header.enumerateAttributes();
while (enm.hasMoreElements()) {
text.append(enm.nextElement()).append("\n");
}
text.append(ARFF_END_SUBRELATION).append(" ").append(Utils.quote(m_Name));
break;
default:
text.append("UNKNOWN");
break;
}
return text.toString();
}
/**
* Returns the attribute's type as an integer.
*
* @return the attribute's type.
*/
//@ ensures \result == m_Type;
public final /*@ pure @*/ int type() {
return m_Type;
}
/**
* Returns the Date format pattern in case this attribute is of type DATE,
* otherwise an empty string.
*
* @return the date format pattern
* @see SimpleDateFormat
*/
public final String getDateFormat() {
if (isDate())
return m_DateFormat.toPattern();
else
return "";
}
/**
* Returns a value of a nominal or string attribute. Returns an
* empty string if the attribute is neither a string nor a nominal
* attribute.
*
* @param valIndex the value's index
* @return the attribute's value as a string
*/
public final /*@ non_null pure @*/ String value(int valIndex) {
if (!isNominal() && !isString()) {
return "";
} else {
Object val = m_Values.elementAt(valIndex);
// If we're storing strings compressed, uncompress it.
if (val instanceof SerializedObject) {
val = ((SerializedObject)val).getObject();
}
return (String) val;
}
}
/**
* Returns the header info for a relation-valued attribute,
* null if the attribute is not relation-valued.
*
* @return the attribute's value as an Instances object
*/
public final /*@ non_null pure @*/ Instances relation() {
if (!isRelationValued()) {
return null;
} else {
return m_Header;
}
}
/**
* Returns a value of a relation-valued attribute. Returns
* null if the attribute is not relation-valued.
*
* @param valIndex the value's index
* @return the attribute's value as an Instances object
*/
public final /*@ non_null pure @*/ Instances relation(int valIndex) {
if (!isRelationValued()) {
return null;
} else {
return (Instances) m_Values.elementAt(valIndex);
}
}
/**
* Constructor for a numeric attribute with a particular index.
*
* @param attributeName the name for the attribute
* @param index the attribute's index
*/
//@ requires attributeName != null;
//@ requires index >= 0;
//@ ensures m_Name == attributeName;
//@ ensures m_Index == index;
public Attribute(String attributeName, int index) {
this(attributeName);
m_Index = index;
}
/**
* Constructor for date attributes with a particular index.
*
* @param attributeName the name for the attribute
* @param dateFormat a string suitable for use with
* SimpleDateFormatter for parsing dates. Null for a default format
* string.
* @param index the attribute's index
*/
//@ requires attributeName != null;
//@ requires index >= 0;
//@ ensures m_Name == attributeName;
//@ ensures m_Index == index;
public Attribute(String attributeName, String dateFormat,
int index) {
this(attributeName, dateFormat);
m_Index = index;
}
/**
* Constructor for nominal attributes and string attributes with
* a particular index.
* If a null vector of attribute values is passed to the method,
* the attribute is assumed to be a string.
*
* @param attributeName the name for the attribute
* @param attributeValues a vector of strings denoting the attribute values.
* Null if the attribute is a string attribute.
* @param index the attribute's index
*/
//@ requires attributeName != null;
//@ requires index >= 0;
//@ ensures m_Name == attributeName;
//@ ensures m_Index == index;
public Attribute(String attributeName, FastVector attributeValues,
int index) {
this(attributeName, attributeValues);
m_Index = index;
}
/**
* Constructor for a relation-valued attribute with a particular index.
*
* @param attributeName the name for the attribute
* @param header the header information for this attribute
* @param index the attribute's index
*/
//@ requires attributeName != null;
//@ requires index >= 0;
//@ ensures m_Name == attributeName;
//@ ensures m_Index == index;
public Attribute(String attributeName, Instances header,
int index) {
this(attributeName, header);
m_Index = index;
}
/**
* Adds a string value to the list of valid strings for attributes
* of type STRING and returns the index of the string.
*
* @param value The string value to add
* @return the index assigned to the string, or -1 if the attribute is not
* of type Attribute.STRING
*/
/*@ requires value != null;
ensures isString() && 0 <= \result && \result < m_Values.size() ||
! isString() && \result == -1;
*/
public int addStringValue(String value) {
if (!isString()) {
return -1;
}
Object store = value;
if (value.length() > STRING_COMPRESS_THRESHOLD) {
try {
store = new SerializedObject(value, true);
} catch (Exception ex) {
System.err.println("Couldn't compress string attribute value -"
+ " storing uncompressed.");
}
}
Integer index = (Integer)m_Hashtable.get(store);
if (index != null) {
return index.intValue();
} else {
int intIndex = m_Values.size();
m_Values.addElement(store);
m_Hashtable.put(store, new Integer(intIndex));
return intIndex;
}
}
/**
* Adds a string value to the list of valid strings for attributes
* of type STRING and returns the index of the string. This method is
* more efficient than addStringValue(String) for long strings.
*
* @param src The Attribute containing the string value to add.
* @param index the index of the string value in the source attribute.
* @return the index assigned to the string, or -1 if the attribute is not
* of type Attribute.STRING
*/
/*@ requires src != null;
requires 0 <= index && index < src.m_Values.size();
ensures isString() && 0 <= \result && \result < m_Values.size() ||
! isString() && \result == -1;
*/
public int addStringValue(Attribute src, int index) {
if (!isString()) {
return -1;
}
Object store = src.m_Values.elementAt(index);
Integer oldIndex = (Integer)m_Hashtable.get(store);
if (oldIndex != null) {
return oldIndex.intValue();
} else {
int intIndex = m_Values.size();
m_Values.addElement(store);
m_Hashtable.put(store, new Integer(intIndex));
return intIndex;
}
}
/**
* Adds a relation to a relation-valued attribute.
*
* @param value The value to add
* @return the index assigned to the value, or -1 if the attribute is not
* of type Attribute.RELATIONAL
*/
public int addRelation(Instances value) {
if (!isRelationValued()) {
return -1;
}
if (!m_Header.equalHeaders(value)) {
throw new IllegalArgumentException("Incompatible value for " +
"relation-valued attribute.");
}
Integer index = (Integer)m_Hashtable.get(value);
if (index != null) {
return index.intValue();
} else {
int intIndex = m_Values.size();
m_Values.addElement(value);
m_Hashtable.put(value, new Integer(intIndex));
return intIndex;
}
}
/**
* Adds an attribute value. Creates a fresh list of attribute
* values before adding it.
*
* @param value the attribute value
*/
final void addValue(String value) {
m_Values = (FastVector)m_Values.copy();
m_Hashtable = (Hashtable)m_Hashtable.clone();
forceAddValue(value);
}
/**
* Produces a shallow copy of this attribute with a new name.
*
* @param newName the name of the new attribute
* @return a copy of this attribute with the same index
*/
//@ requires newName != null;
//@ ensures \result.m_Name == newName;
//@ ensures \result.m_Index == m_Index;
//@ ensures \result.m_Type == m_Type;
public final /*@ pure non_null @*/ Attribute copy(String newName) {
Attribute copy = new Attribute(newName);
copy.m_Index = m_Index;
copy.m_DateFormat = m_DateFormat;
copy.m_Type = m_Type;
copy.m_Values = m_Values;
copy.m_Hashtable = m_Hashtable;
copy.m_Header = m_Header;
copy.setMetadata(m_Metadata);
return copy;
}
/**
* Removes a value of a nominal, string, or relation-valued
* attribute. Creates a fresh list of attribute values before
* removing it.
*
* @param index the value's index
* @throws IllegalArgumentException if the attribute is not
* of the correct type
*/
//@ requires isNominal() || isString() || isRelationValued();
//@ requires 0 <= index && index < m_Values.size();
final void delete(int index) {
if (!isNominal() && !isString() && !isRelationValued())
throw new IllegalArgumentException("Can only remove value of " +
"nominal, string or relation-" +
" valued attribute!");
else {
m_Values = (FastVector)m_Values.copy();
m_Values.removeElementAt(index);
if (!isRelationValued()) {
Hashtable hash = new Hashtable(m_Hashtable.size());
Enumeration enu = m_Hashtable.keys();
while (enu.hasMoreElements()) {
Object string = enu.nextElement();
Integer valIndexObject = (Integer)m_Hashtable.get(string);
int valIndex = valIndexObject.intValue();
if (valIndex > index) {
hash.put(string, new Integer(valIndex - 1));
} else if (valIndex < index) {
hash.put(string, valIndexObject);
}
}
m_Hashtable = hash;
}
}
}
/**
* Adds an attribute value.
*
* @param value the attribute value
*/
//@ requires value != null;
//@ ensures m_Values.size() == \old(m_Values.size()) + 1;
final void forceAddValue(String value) {
Object store = value;
if (value.length() > STRING_COMPRESS_THRESHOLD) {
try {
store = new SerializedObject(value, true);
} catch (Exception ex) {
System.err.println("Couldn't compress string attribute value -"
+ " storing uncompressed.");
}
}
m_Values.addElement(store);
m_Hashtable.put(store, new Integer(m_Values.size() - 1));
}
/**
* Sets the index of this attribute.
*
* @param index the index of this attribute
*/
//@ requires 0 <= index;
//@ assignable m_Index;
//@ ensures m_Index == index;
final void setIndex(int index) {
m_Index = index;
}
/**
* Sets a value of a nominal attribute or string attribute.
* Creates a fresh list of attribute values before it is set.
*
* @param index the value's index
* @param string the value
* @throws IllegalArgumentException if the attribute is not nominal or
* string.
*/
//@ requires string != null;
//@ requires isNominal() || isString();
//@ requires 0 <= index && index < m_Values.size();
final void setValue(int index, String string) {
switch (m_Type) {
case NOMINAL:
case STRING:
m_Values = (FastVector)m_Values.copy();
m_Hashtable = (Hashtable)m_Hashtable.clone();
Object store = string;
if (string.length() > STRING_COMPRESS_THRESHOLD) {
try {
store = new SerializedObject(string, true);
} catch (Exception ex) {
System.err.println("Couldn't compress string attribute value -"
+ " storing uncompressed.");
}
}
m_Hashtable.remove(m_Values.elementAt(index));
m_Values.setElementAt(store, index);
m_Hashtable.put(store, new Integer(index));
break;
default:
throw new IllegalArgumentException("Can only set values for nominal"
+ " or string attributes!");
}
}
/**
* Sets a value of a relation-valued attribute.
* Creates a fresh list of attribute values before it is set.
*
* @param index the value's index
* @param data the value
* @throws IllegalArgumentException if the attribute is not
* relation-valued.
*/
final void setValue(int index, Instances data) {
if (isRelationValued()) {
if (!data.equalHeaders(m_Header)) {
throw new IllegalArgumentException("Can't set relational value. " +
"Headers not compatible.");
}
m_Values = (FastVector)m_Values.copy();
m_Values.setElementAt(data, index);
} else {
throw new IllegalArgumentException("Can only set value for"
+ " relation-valued attributes!");
}
}
/**
* Returns the given amount of milliseconds formatted according to the
* current Date format.
*
* @param date the date, represented in milliseconds since
* January 1, 1970, 00:00:00 GMT, to return as string
* @return the formatted date
*/
//@ requires isDate();
public /*@pure@*/ String formatDate(double date) {
switch (m_Type) {
case DATE:
return m_DateFormat.format(new Date((long)date));
default:
throw new IllegalArgumentException("Can only format date values for date"
+ " attributes!");
}
}
/**
* Parses the given String as Date, according to the current format and
* returns the corresponding amount of milliseconds.
*
* @param string the date to parse
* @return the date in milliseconds since January 1, 1970, 00:00:00 GMT
* @throws ParseException if parsing fails
*/
//@ requires isDate();
//@ requires string != null;
public double parseDate(String string) throws ParseException {
switch (m_Type) {
case DATE:
long time = m_DateFormat.parse(string).getTime();
// TODO put in a safety check here if we can't store the value in a double.
return (double)time;
default:
throw new IllegalArgumentException("Can only parse date values for date"
+ " attributes!");
}
}
/**
* Returns the properties supplied for this attribute.
*
* @return metadata for this attribute
*/
public final /*@ pure @*/ ProtectedProperties getMetadata() {
return m_Metadata;
}
/**
* Returns the ordering of the attribute. One of the following:
*
* ORDERING_SYMBOLIC - attribute values should be treated as symbols.
* ORDERING_ORDERED - attribute values have a global ordering.
* ORDERING_MODULO - attribute values have an ordering which wraps.
*
* @return the ordering type of the attribute
*/
public final /*@ pure @*/ int ordering() {
return m_Ordering;
}
/**
* Returns whether the attribute values are equally spaced.
*
* @return whether the attribute is regular or not
*/
public final /*@ pure @*/ boolean isRegular() {
return m_IsRegular;
}
/**
* Returns whether the attribute can be averaged meaningfully.
*
* @return whether the attribute can be averaged or not
*/
public final /*@ pure @*/ boolean isAveragable() {
return m_IsAveragable;
}
/**
* Returns whether the attribute has a zeropoint and may be
* added meaningfully.
*
* @return whether the attribute has a zeropoint or not
*/
public final /*@ pure @*/ boolean hasZeropoint() {
return m_HasZeropoint;
}
/**
* Returns the attribute's weight.
*
* @return the attribute's weight as a double
*/
public final /*@ pure @*/ double weight() {
return m_Weight;
}
/**
* Sets the new attribute's weight
*
* @param value the new weight
*/
public void setWeight(double value) {
Properties props;
Enumeration names;
String name;
m_Weight = value;
// generate new metadata object
props = new Properties();
names = m_Metadata.propertyNames();
while (names.hasMoreElements()) {
name = (String) names.nextElement();
if (!name.equals("weight"))
props.setProperty(name, m_Metadata.getProperty(name));
}
props.setProperty("weight", "" + m_Weight);
m_Metadata = new ProtectedProperties(props);
}
/**
* Returns the lower bound of a numeric attribute.
*
* @return the lower bound of the specified numeric range
*/
public final /*@ pure @*/ double getLowerNumericBound() {
return m_LowerBound;
}
/**
* Returns whether the lower numeric bound of the attribute is open.
*
* @return whether the lower numeric bound is open or not (closed)
*/
public final /*@ pure @*/ boolean lowerNumericBoundIsOpen() {
return m_LowerBoundIsOpen;
}
/**
* Returns the upper bound of a numeric attribute.
*
* @return the upper bound of the specified numeric range
*/
public final /*@ pure @*/ double getUpperNumericBound() {
return m_UpperBound;
}
/**
* Returns whether the upper numeric bound of the attribute is open.
*
* @return whether the upper numeric bound is open or not (closed)
*/
public final /*@ pure @*/ boolean upperNumericBoundIsOpen() {
return m_UpperBoundIsOpen;
}
/**
* Determines whether a value lies within the bounds of the attribute.
*
* @param value the value to check
* @return whether the value is in range
*/
public final /*@ pure @*/ boolean isInRange(double value) {
// dates and missing values are a special case
if (m_Type == DATE || Instance.isMissingValue(value)) return true;
if (m_Type != NUMERIC) {
// do label range check
int intVal = (int) value;
if (intVal < 0 || intVal >= m_Hashtable.size()) return false;
} else {
// do numeric bounds check
if (m_LowerBoundIsOpen) {
if (value <= m_LowerBound) return false;
} else {
if (value < m_LowerBound) return false;
}
if (m_UpperBoundIsOpen) {
if (value >= m_UpperBound) return false;
} else {
if (value > m_UpperBound) return false;
}
}
return true;
}
/**
* Sets the metadata for the attribute. Processes the strings stored in the
* metadata of the attribute so that the properties can be set up for the
* easy-access metadata methods. Any strings sought that are omitted will
* cause default values to be set.
*
* The following properties are recognised:
* ordering, averageable, zeropoint, regular, weight, and range.
*
* All other properties can be queried and handled appropriately by classes
* calling the getMetadata() method.
*
* @param metadata the metadata
* @throws IllegalArgumentException if the properties are not consistent
*/
//@ requires metadata != null;
private void setMetadata(ProtectedProperties metadata) {
m_Metadata = metadata;
if (m_Type == DATE) {
m_Ordering = ORDERING_ORDERED;
m_IsRegular = true;
m_IsAveragable = false;
m_HasZeropoint = false;
} else {
// get ordering
String orderString = m_Metadata.getProperty("ordering","");
// numeric ordered attributes are averagable and zeropoint by default
String def;
if (m_Type == NUMERIC
&& orderString.compareTo("modulo") != 0
&& orderString.compareTo("symbolic") != 0)
def = "true";
else def = "false";
// determine boolean states
m_IsAveragable =
(m_Metadata.getProperty("averageable",def).compareTo("true") == 0);
m_HasZeropoint =
(m_Metadata.getProperty("zeropoint",def).compareTo("true") == 0);
// averagable or zeropoint implies regular
if (m_IsAveragable || m_HasZeropoint) def = "true";
m_IsRegular =
(m_Metadata.getProperty("regular",def).compareTo("true") == 0);
// determine ordering
if (orderString.compareTo("symbolic") == 0)
m_Ordering = ORDERING_SYMBOLIC;
else if (orderString.compareTo("ordered") == 0)
m_Ordering = ORDERING_ORDERED;
else if (orderString.compareTo("modulo") == 0)
m_Ordering = ORDERING_MODULO;
else {
if (m_Type == NUMERIC || m_IsAveragable || m_HasZeropoint)
m_Ordering = ORDERING_ORDERED;
else m_Ordering = ORDERING_SYMBOLIC;
}
}
// consistency checks
if (m_IsAveragable && !m_IsRegular)
throw new IllegalArgumentException("An averagable attribute must be"
+ " regular");
if (m_HasZeropoint && !m_IsRegular)
throw new IllegalArgumentException("A zeropoint attribute must be"
+ " regular");
if (m_IsRegular && m_Ordering == ORDERING_SYMBOLIC)
throw new IllegalArgumentException("A symbolic attribute cannot be"
+ " regular");
if (m_IsAveragable && m_Ordering != ORDERING_ORDERED)
throw new IllegalArgumentException("An averagable attribute must be"
+ " ordered");
if (m_HasZeropoint && m_Ordering != ORDERING_ORDERED)
throw new IllegalArgumentException("A zeropoint attribute must be"
+ " ordered");
// determine weight
m_Weight = 1.0;
String weightString = m_Metadata.getProperty("weight");
if (weightString != null) {
try{
m_Weight = Double.valueOf(weightString).doubleValue();
} catch (NumberFormatException e) {
// Check if value is really a number
throw new IllegalArgumentException("Not a valid attribute weight: '"
+ weightString + "'");
}
}
// determine numeric range
if (m_Type == NUMERIC) setNumericRange(m_Metadata.getProperty("range"));
}
/**
* Sets the numeric range based on a string. If the string is null the range
* will default to [-inf,+inf]. A square brace represents a closed interval, a
* curved brace represents an open interval, and 'inf' represents infinity.
* Examples of valid range strings: "[-inf,20)","(-13.5,-5.2)","(5,inf]"
*
* @param rangeString the string to parse as the attribute's numeric range
* @throws IllegalArgumentException if the range is not valid
*/
//@ requires rangeString != null;
private void setNumericRange(String rangeString)
{
// set defaults
m_LowerBound = Double.NEGATIVE_INFINITY;
m_LowerBoundIsOpen = false;
m_UpperBound = Double.POSITIVE_INFINITY;
m_UpperBoundIsOpen = false;
if (rangeString == null) return;
// set up a tokenzier to parse the string
StreamTokenizer tokenizer =
new StreamTokenizer(new StringReader(rangeString));
tokenizer.resetSyntax();
tokenizer.whitespaceChars(0, ' ');
tokenizer.wordChars(' '+1,'\u00FF');
tokenizer.ordinaryChar('[');
tokenizer.ordinaryChar('(');
tokenizer.ordinaryChar(',');
tokenizer.ordinaryChar(']');
tokenizer.ordinaryChar(')');
try {
// get opening brace
tokenizer.nextToken();
if (tokenizer.ttype == '[') m_LowerBoundIsOpen = false;
else if (tokenizer.ttype == '(') m_LowerBoundIsOpen = true;
else throw new IllegalArgumentException("Expected opening brace on range,"
+ " found: "
+ tokenizer.toString());
// get lower bound
tokenizer.nextToken();
if (tokenizer.ttype != tokenizer.TT_WORD)
throw new IllegalArgumentException("Expected lower bound in range,"
+ " found: "
+ tokenizer.toString());
if (tokenizer.sval.compareToIgnoreCase("-inf") == 0)
m_LowerBound = Double.NEGATIVE_INFINITY;
else if (tokenizer.sval.compareToIgnoreCase("+inf") == 0)
m_LowerBound = Double.POSITIVE_INFINITY;
else if (tokenizer.sval.compareToIgnoreCase("inf") == 0)
m_LowerBound = Double.NEGATIVE_INFINITY;
else try {
m_LowerBound = Double.valueOf(tokenizer.sval).doubleValue();
} catch (NumberFormatException e) {
throw new IllegalArgumentException("Expected lower bound in range,"
+ " found: '" + tokenizer.sval + "'");
}
// get separating comma
if (tokenizer.nextToken() != ',')
throw new IllegalArgumentException("Expected comma in range,"
+ " found: "
+ tokenizer.toString());
// get upper bound
tokenizer.nextToken();
if (tokenizer.ttype != tokenizer.TT_WORD)
throw new IllegalArgumentException("Expected upper bound in range,"
+ " found: "
+ tokenizer.toString());
if (tokenizer.sval.compareToIgnoreCase("-inf") == 0)
m_UpperBound = Double.NEGATIVE_INFINITY;
else if (tokenizer.sval.compareToIgnoreCase("+inf") == 0)
m_UpperBound = Double.POSITIVE_INFINITY;
else if (tokenizer.sval.compareToIgnoreCase("inf") == 0)
m_UpperBound = Double.POSITIVE_INFINITY;
else try {
m_UpperBound = Double.valueOf(tokenizer.sval).doubleValue();
} catch (NumberFormatException e) {
throw new IllegalArgumentException("Expected upper bound in range,"
+ " found: '" + tokenizer.sval + "'");
}
// get closing brace
tokenizer.nextToken();
if (tokenizer.ttype == ']') m_UpperBoundIsOpen = false;
else if (tokenizer.ttype == ')') m_UpperBoundIsOpen = true;
else throw new IllegalArgumentException("Expected closing brace on range,"
+ " found: "
+ tokenizer.toString());
// check for rubbish on end
if (tokenizer.nextToken() != tokenizer.TT_EOF)
throw new IllegalArgumentException("Expected end of range string,"
+ " found: "
+ tokenizer.toString());
} catch (IOException e) {
throw new IllegalArgumentException("IOException reading attribute range"
+ " string: " + e.getMessage());
}
if (m_UpperBound < m_LowerBound)
throw new IllegalArgumentException("Upper bound (" + m_UpperBound
+ ") on numeric range is"
+ " less than lower bound ("
+ m_LowerBound + ")!");
}
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 9518 $");
}
/**
* Simple main method for testing this class.
*
* @param ops the commandline options
*/
//@ requires ops != null;
//@ requires \nonnullelements(ops);
public static void main(String[] ops) {
try {
// Create numeric attributes "length" and "weight"
Attribute length = new Attribute("length");
Attribute weight = new Attribute("weight");
// Create date attribute "date"
Attribute date = new Attribute("date", "yyyy-MM-dd HH:mm:ss");
System.out.println(date);
double dd = date.parseDate("2001-04-04 14:13:55");
System.out.println("Test date = " + dd);
System.out.println(date.formatDate(dd));
dd = new Date().getTime();
System.out.println("Date now = " + dd);
System.out.println(date.formatDate(dd));
// Create vector to hold nominal values "first", "second", "third"
FastVector my_nominal_values = new FastVector(3);
my_nominal_values.addElement("first");
my_nominal_values.addElement("second");
my_nominal_values.addElement("third");
// Create nominal attribute "position"
Attribute position = new Attribute("position", my_nominal_values);
// Print the name of "position"
System.out.println("Name of \"position\": " + position.name());
// Print the values of "position"
Enumeration attValues = position.enumerateValues();
while (attValues.hasMoreElements()) {
String string = (String)attValues.nextElement();
System.out.println("Value of \"position\": " + string);
}
// Shallow copy attribute "position"
Attribute copy = (Attribute) position.copy();
// Test if attributes are the same
System.out.println("Copy is the same as original: " + copy.equals(position));
// Print index of attribute "weight" (should be unset: -1)
System.out.println("Index of attribute \"weight\" (should be -1): " +
weight.index());
// Print index of value "first" of attribute "position"
System.out.println("Index of value \"first\" of \"position\" (should be 0): " +
position.indexOfValue("first"));
// Tests type of attribute "position"
System.out.println("\"position\" is numeric: " + position.isNumeric());
System.out.println("\"position\" is nominal: " + position.isNominal());
System.out.println("\"position\" is string: " + position.isString());
// Prints name of attribute "position"
System.out.println("Name of \"position\": " + position.name());
// Prints number of values of attribute "position"
System.out.println("Number of values for \"position\": " + position.numValues());
// Prints the values (againg)
for (int i = 0; i < position.numValues(); i++) {
System.out.println("Value " + i + ": " + position.value(i));
}
// Prints the attribute "position" in ARFF format
System.out.println(position);
// Checks type of attribute "position" using constants
switch (position.type()) {
case Attribute.NUMERIC:
System.out.println("\"position\" is numeric");
break;
case Attribute.NOMINAL:
System.out.println("\"position\" is nominal");
break;
case Attribute.STRING:
System.out.println("\"position\" is string");
break;
case Attribute.DATE:
System.out.println("\"position\" is date");
break;
case Attribute.RELATIONAL:
System.out.println("\"position\" is relation-valued");
break;
default:
System.out.println("\"position\" has unknown type");
}
FastVector atts = new FastVector(1);
atts.addElement(position);
Instances relation = new Instances("Test", atts, 0);
Attribute relationValuedAtt = new Attribute("test", relation);
System.out.println(relationValuedAtt);
} catch (Exception e) {
e.printStackTrace();
}
}
}