weka.core.Instance Maven / Gradle / Ivy
Show all versions of weka-dev Show documentation
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* Instance.java
* Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.core;
import java.util.Enumeration;
/**
* Interface representing an instance. All values (numeric, date, nominal,
* string or relational) are internally stored as floating-point numbers in the
* original concrete class implementations (now called DenseInstance.java and
* SparseInstance.java), and the methods in this interface reflect this. If an
* attribute is nominal (or a string or relational), the stored value is the
* index of the corresponding nominal (or string or relational) value in the
* attribute's definition. We have chosen this approach in favor of a more
* elegant object-oriented approach because it is much faster.
*
*
* Typical usage (code from the main() method of this class):
*
*
*
* ...
*
* // Create empty instance with three attribute values
* Instance inst = new DenseInstance(3);
*
* // Set instance's values for the attributes "length", "weight", and "position"
* inst.setValue(length, 5.3);
* inst.setValue(weight, 300);
* inst.setValue(position, "first");
*
* // Set instance's dataset to be the dataset "race"
* inst.setDataset(race);
*
* // Print the instance
* System.out.println("The instance: " + inst);
*
* ...
*
*
*
* All methods that change an instance's attribute values must be safe, ie. a
* change of an instance's attribute values must not affect any other instances.
*
* @author Eibe Frank ([email protected])
* @version $Revision: 10141 $
*/
public interface Instance extends Copyable {
/**
* Returns the attribute with the given index.
*
* @param index the attribute's index
* @return the attribute at the given position
* @throws UnassignedDatasetException if instance doesn't have access to a
* dataset
*/
public Attribute attribute(int index);
/**
* Returns the attribute with the given index in the sparse representation.
* Same as attribute(int) for a DenseInstance.
*
* @param indexOfIndex the index of the attribute's index
* @return the attribute at the given position
* @throws UnassignedDatasetException if instance doesn't have access to a
* dataset
*/
public Attribute attributeSparse(int indexOfIndex);
/**
* Returns class attribute.
*
* @return the class attribute
* @throws UnassignedDatasetException if the class is not set or the instance
* doesn't have access to a dataset
*/
public Attribute classAttribute();
/**
* Returns the class attribute's index.
*
* @return the class index as an integer
* @throws UnassignedDatasetException if instance doesn't have access to a
* dataset
*/
public int classIndex();
/**
* Tests if an instance's class is missing.
*
* @return true if the instance's class is missing
* @throws UnassignedClassException if the class is not set or the instance
* doesn't have access to a dataset
*/
public boolean classIsMissing();
/**
* Returns an instance's class value as a floating-point number.
*
* @return the corresponding value as a double (If the corresponding attribute
* is nominal (or a string) then it returns the value's index as a
* double).
* @throws UnassignedClassException if the class is not set or the instance
* doesn't have access to a dataset
*/
public double classValue();
/**
* Returns the dataset this instance has access to. (ie. obtains information
* about attribute types from) Null if the instance doesn't have access to a
* dataset.
*
* @return the dataset the instance has accesss to
*/
public Instances dataset();
/**
* Deletes an attribute at the given position (0 to numAttributes() - 1). Only
* succeeds if the instance does not have access to any dataset because
* otherwise inconsistencies could be introduced.
*
* @param position the attribute's position
* @throws RuntimeException if the instance has access to a dataset
*/
public void deleteAttributeAt(int position);
/**
* Returns an enumeration of all the attributes.
*
* @return enumeration of all the attributes
* @throws UnassignedDatasetException if the instance doesn't have access to a
* dataset
*/
public Enumeration enumerateAttributes();
/**
* Tests if the headers of two instances are equivalent.
*
* @param inst another instance
* @return true if the header of the given instance is equivalent to this
* instance's header
* @throws UnassignedDatasetException if instance doesn't have access to any
* dataset
*/
public boolean equalHeaders(Instance inst);
/**
* Checks if the headers of two instances are equivalent. If not, then returns
* a message why they differ.
*
* @param dataset another instance
* @return null if the header of the given instance is equivalent to this
* instance's header, otherwise a message with details on why they
* differ
*/
public String equalHeadersMsg(Instance inst);
/**
* Tests whether an instance has a missing value. Skips the class attribute if
* set.
*
* @return true if instance has a missing value.
* @throws UnassignedDatasetException if instance doesn't have access to any
* dataset
*/
public boolean hasMissingValue();
/**
* Returns the index of the attribute stored at the given position in the
* sparse representation. Identify function for an instance of type
* DenseInstance.
*
* @param position the position
* @return the index of the attribute stored at the given position
*/
public int index(int position);
/**
* Inserts an attribute at the given position (0 to numAttributes()). Only
* succeeds if the instance does not have access to any dataset because
* otherwise inconsistencies could be introduced.
*
* @param position the attribute's position
* @throws RuntimeException if the instance has accesss to a dataset
* @throws IllegalArgumentException if the position is out of range
*/
public void insertAttributeAt(int position);
/**
* Tests if a specific value is "missing".
*
* @param attIndex the attribute's index
* @return true if the value is "missing"
*/
public boolean isMissing(int attIndex);
/**
* Tests if a specific value is "missing" in the sparse representation. Samse
* as isMissing(int) for a DenseInstance.
*
* @param indexOfIndex the index of the attribute's index
* @return true if the value is "missing"
*/
public boolean isMissingSparse(int indexOfIndex);
/**
* Tests if a specific value is "missing". The given attribute has to belong
* to a dataset.
*
* @param att the attribute
* @return true if the value is "missing"
*/
public boolean isMissing(Attribute att);
/**
* Merges this instance with the given instance and returns the result.
* Dataset is set to null. The returned instance is of the same type as this
* instance.
*
* @param inst the instance to be merged with this one
* @return the merged instances
*/
public Instance mergeInstance(Instance inst);
/**
* Returns the number of attributes.
*
* @return the number of attributes as an integer
*/
public int numAttributes();
/**
* Returns the number of class labels.
*
* @return the number of class labels as an integer if the class attribute is
* nominal, 1 otherwise.
* @throws UnassignedDatasetException if instance doesn't have access to any
* dataset
*/
public int numClasses();
/**
* Returns the number of values present in a sparse representation.
*
* @return the number of values
*/
public int numValues();
/**
* Replaces all missing values in the instance with the values contained in
* the given array. A deep copy of the vector of attribute values is performed
* before the values are replaced.
*
* @param array containing the means and modes
* @throws IllegalArgumentException if numbers of attributes are unequal
*/
public void replaceMissingValues(double[] array);
/**
* Sets the class value of an instance to be "missing". A deep copy of the
* vector of attribute values is performed before the value is set to be
* missing.
*
* @throws UnassignedClassException if the class is not set
* @throws UnassignedDatasetException if the instance doesn't have access to a
* dataset
*/
public void setClassMissing();
/**
* Sets the class value of an instance to the given value (internal
* floating-point format). A deep copy of the vector of attribute values is
* performed before the value is set.
*
* @param value the new attribute value (If the corresponding attribute is
* nominal (or a string) then this is the new value's index as a
* double).
* @throws UnassignedClassException if the class is not set
* @throws UnaddignedDatasetException if the instance doesn't have access to a
* dataset
*/
public void setClassValue(double value);
/**
* Sets the class value of an instance to the given value. A deep copy of the
* vector of attribute values is performed before the value is set.
*
* @param value the new class value (If the class is a string attribute and
* the value can't be found, the value is added to the attribute).
* @throws UnassignedClassException if the class is not set
* @throws UnassignedDatasetException if the dataset is not set
* @throws IllegalArgumentException if the attribute is not nominal or a
* string, or the value couldn't be found for a nominal attribute
*/
public void setClassValue(String value);
/**
* Sets the reference to the dataset. Does not check if the instance is
* compatible with the dataset. Note: the dataset does not know about this
* instance. If the structure of the dataset's header gets changed, this
* instance will not be adjusted automatically.
*
* @param instances the reference to the dataset
*/
public void setDataset(Instances instances);
/**
* Sets a specific value to be "missing". Performs a deep copy of the vector
* of attribute values before the value is set to be missing.
*
* @param attIndex the attribute's index
*/
public void setMissing(int attIndex);
/**
* Sets a specific value to be "missing". Performs a deep copy of the vector
* of attribute values before the value is set to be missing. The given
* attribute has to belong to a dataset.
*
* @param att the attribute
*/
public void setMissing(Attribute att);
/**
* Sets a specific value in the instance to the given value (internal
* floating-point format). Performs a deep copy of the vector of attribute
* values before the value is set.
*
* @param attIndex the attribute's index
* @param value the new attribute value (If the corresponding attribute is
* nominal (or a string) then this is the new value's index as a
* double).
*/
public void setValue(int attIndex, double value);
/**
* Sets a specific value in the instance to the given value (internal
* floating-point format), given an index into the sparse representation.
* Performs a deep copy of the vector of attribute values before the value is
* set. Same as setValue(int, double) for a DenseInstance.
*
* @param indexOfIndex the index of the attribute's index
* @param value the new attribute value (If the corresponding attribute is
* nominal (or a string) then this is the new value's index as a
* double).
*/
public void setValueSparse(int indexOfIndex, double value);
/**
* Sets a value of a nominal or string attribute to the given value. Performs
* a deep copy of the vector of attribute values before the value is set.
*
* @param attIndex the attribute's index
* @param value the new attribute value (If the attribute is a string
* attribute and the value can't be found, the value is added to the
* attribute).
* @throws UnassignedDatasetException if the dataset is not set
* @throws IllegalArgumentException if the selected attribute is not nominal
* or a string, or the supplied value couldn't be found for a
* nominal attribute
*/
public void setValue(int attIndex, String value);
/**
* Sets a specific value in the instance to the given value (internal
* floating-point format). Performs a deep copy of the vector of attribute
* values before the value is set, so if you are planning on calling setValue
* many times it may be faster to create a new instance using toDoubleArray.
* The given attribute has to belong to a dataset.
*
* @param att the attribute
* @param value the new attribute value (If the corresponding attribute is
* nominal (or a string) then this is the new value's index as a
* double).
*/
public void setValue(Attribute att, double value);
/**
* Sets a value of an nominal or string attribute to the given value. Performs
* a deep copy of the vector of attribute values before the value is set, so
* if you are planning on calling setValue many times it may be faster to
* create a new instance using toDoubleArray. The given attribute has to
* belong to a dataset.
*
* @param att the attribute
* @param value the new attribute value (If the attribute is a string
* attribute and the value can't be found, the value is added to the
* attribute).
* @throws IllegalArgumentException if the the attribute is not nominal or a
* string, or the value couldn't be found for a nominal attribute
*/
public void setValue(Attribute att, String value);
/**
* Sets the weight of an instance.
*
* @param weight the weight
*/
public void setWeight(double weight);
/**
* Returns the relational value of a relational attribute.
*
* @param attIndex the attribute's index
* @return the corresponding relation as an Instances object
* @throws IllegalArgumentException if the attribute is not a relation-valued
* attribute
* @throws UnassignedDatasetException if the instance doesn't belong to a
* dataset.
*/
public Instances relationalValue(int attIndex);
/**
* Returns the relational value of a relational attribute.
*
* @param att the attribute
* @return the corresponding relation as an Instances object
* @throws IllegalArgumentException if the attribute is not a relation-valued
* attribute
* @throws UnassignedDatasetException if the instance doesn't belong to a
* dataset.
*/
public Instances relationalValue(Attribute att);
/**
* Returns the value of a nominal, string, date, or relational attribute for
* the instance as a string.
*
* @param attIndex the attribute's index
* @return the value as a string
* @throws IllegalArgumentException if the attribute is not a nominal, string,
* date, or relation-valued attribute.
* @throws UnassignedDatasetException if the instance doesn't belong to a
* dataset.
*/
public String stringValue(int attIndex);
/**
* Returns the value of a nominal, string, date, or relational attribute for
* the instance as a string.
*
* @param att the attribute
* @return the value as a string
* @throws IllegalArgumentException if the attribute is not a nominal, string,
* date, or relation-valued attribute.
* @throws UnassignedDatasetException if the instance doesn't belong to a
* dataset.
*/
public String stringValue(Attribute att);
/**
* Returns the values of each attribute as an array of doubles.
*
* @return an array containing all the instance attribute values
*/
public double[] toDoubleArray();
/**
* Returns the description of one instance (without weight appended). If the
* instance doesn't have access to a dataset, it returns the internal
* floating-point values. Quotes string values that contain whitespace
* characters.
*
* This method is used by getRandomNumberGenerator() in Instances.java in
* order to maintain backwards compatibility with weka 3.4.
*
* @param afterDecimalPoint maximum number of digits after the decimal point
* for numeric values
*
* @return the instance's description as a string
*/
public String toStringNoWeight(int afterDecimalPoint);
/**
* Returns the description of one instance (without weight appended). If the
* instance doesn't have access to a dataset, it returns the internal
* floating-point values. Quotes string values that contain whitespace
* characters.
*
* This method is used by getRandomNumberGenerator() in Instances.java in
* order to maintain backwards compatibility with weka 3.4.
*
* @return the instance's description as a string
*/
public String toStringNoWeight();
/**
* Returns the description of one instance with any numeric values printed at
* the supplied maximum number of decimal places. If the instance doesn't have
* access to a dataset, it returns the internal floating-point values. Quotes
* string values that contain whitespace characters.
*
* @param afterDecimalPoint the maximum number of digits permitted after the
* decimal point for a numeric value
*
* @return the instance's description as a string
*/
public String toStringMaxDecimalDigits(int afterDecimalPoint);
/**
* Returns the description of one value of the instance as a string. If the
* instance doesn't have access to a dataset, it returns the internal
* floating-point value. Quotes string values that contain whitespace
* characters, or if they are a question mark.
*
* @param attIndex the attribute's index
* @param afterDecimalPoint the maximum number of digits permitted after the
* decimal point for numeric values
* @return the value's description as a string
*/
public String toString(int attIndex, int afterDecimalPoint);
/**
* Returns the description of one value of the instance as a string. If the
* instance doesn't have access to a dataset, it returns the internal
* floating-point value. Quotes string values that contain whitespace
* characters, or if they are a question mark.
*
* @param attIndex the attribute's index
* @return the value's description as a string
*/
public String toString(int attIndex);
/**
* Returns the description of one value of the instance as a string. If the
* instance doesn't have access to a dataset it returns the internal
* floating-point value. Quotes string values that contain whitespace
* characters, or if they are a question mark. The given attribute has to
* belong to a dataset.
*
* @param att the attribute
* @param afterDecimalPoint the maximum number of decimal places to print
* @return the value's description as a string
*/
public String toString(Attribute att, int afterDecimalPoint);
/**
* Returns the description of one value of the instance as a string. If the
* instance doesn't have access to a dataset it returns the internal
* floating-point value. Quotes string values that contain whitespace
* characters, or if they are a question mark. The given attribute has to
* belong to a dataset.
*
* @param att the attribute
* @return the value's description as a string
*/
public String toString(Attribute att);
/**
* Returns an instance's attribute value in internal format.
*
* @param attIndex the attribute's index
* @return the specified value as a double (If the corresponding attribute is
* nominal (or a string) then it returns the value's index as a
* double).
*/
public double value(int attIndex);
/**
* Returns an instance's attribute value in internal format, given an index in
* the sparse representation. Same as value(int) for a DenseInstance.
*
* @param indexOfIndex the index of the attribute's index
* @return the specified value as a double (If the corresponding attribute is
* nominal (or a string) then it returns the value's index as a
* double).
*/
public double valueSparse(int indexOfIndex);
/**
* Returns an instance's attribute value in internal format. The given
* attribute has to belong to a dataset.
*
* @param att the attribute
* @return the specified value as a double (If the corresponding attribute is
* nominal (or a string) then it returns the value's index as a
* double).
*/
public double value(Attribute att);
/**
* Returns the instance's weight.
*
* @return the instance's weight as a double
*/
public double weight();
}