All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.core.BinarySparseInstance Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.

The newest version!
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    BinarySparseInstance.java
 *    Copyright (C) 2002-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.core;

import java.util.ArrayList;
import java.util.Enumeration;

/**
 * Class for storing a binary-data-only instance as a sparse vector. A sparse
 * instance only requires storage for those attribute values that are non-zero.
 * Since the objective is to reduce storage requirements for datasets with large
 * numbers of default values, this also includes nominal attributes -- the first
 * nominal value (i.e. that which has index 0) will not require explicit
 * storage, so rearrange your nominal attribute value orderings if necessary.
 * Missing values are not supported, and will be treated as 1 (true).
 * 
 * @version $Revision: 12472 $
 */
public class BinarySparseInstance extends SparseInstance {

  /** for serialization */
  private static final long serialVersionUID = -5297388762342528737L;

  /**
   * Constructor that generates a sparse instance from the given instance.
   * Reference to the dataset is set to null. (ie. the instance doesn't have
   * access to information about the attribute types)
   * 
   * @param instance the instance from which the attribute values and the weight
   *          are to be copied
   */
  public BinarySparseInstance(Instance instance) {

    m_Weight = instance.weight();
    m_Dataset = null;
    m_NumAttributes = instance.numAttributes();
    if (instance instanceof SparseInstance) {
      m_AttValues = null;
      m_Indices = ((SparseInstance) instance).m_Indices;
    } else {
      int[] tempIndices = new int[instance.numAttributes()];
      int vals = 0;
      for (int i = 0; i < instance.numAttributes(); i++) {
        if (instance.value(i) != 0) {
          tempIndices[vals] = i;
          vals++;
        }
      }
      m_AttValues = null;
      m_Indices = new int[vals];
      System.arraycopy(tempIndices, 0, m_Indices, 0, vals);
    }
  }

  /**
   * Constructor that copies the info from the given instance. Reference to the
   * dataset is set to null. (ie. the instance doesn't have access to
   * information about the attribute types)
   * 
   * @param instance the instance from which the attribute info is to be copied
   */
  public BinarySparseInstance(SparseInstance instance) {

    m_AttValues = null;
    m_Indices = instance.m_Indices;
    m_Weight = instance.m_Weight;
    m_NumAttributes = instance.m_NumAttributes;
    m_Dataset = null;
  }

  /**
   * Constructor that generates a sparse instance from the given parameters.
   * Reference to the dataset is set to null. (ie. the instance doesn't have
   * access to information about the attribute types)
   * 
   * @param weight the instance's weight
   * @param attValues a vector of attribute values
   */
  public BinarySparseInstance(double weight, double[] attValues) {

    m_Weight = weight;
    m_Dataset = null;
    m_NumAttributes = attValues.length;
    int[] tempIndices = new int[m_NumAttributes];
    int vals = 0;
    for (int i = 0; i < m_NumAttributes; i++) {
      if (attValues[i] != 0) {
        tempIndices[vals] = i;
        vals++;
      }
    }
    m_AttValues = null;
    m_Indices = new int[vals];
    System.arraycopy(tempIndices, 0, m_Indices, 0, vals);
  }

  /**
   * Constructor that inititalizes instance variable with given values.
   * Reference to the dataset is set to null. (ie. the instance doesn't have
   * access to information about the attribute types)
   * 
   * @param weight the instance's weight
   * @param indices the indices of the given values in the full vector
   * @param maxNumValues the maximium number of values that can be stored
   */
  public BinarySparseInstance(double weight, int[] indices, int maxNumValues) {

    m_AttValues = null;
    m_Indices = indices;
    m_Weight = weight;
    m_NumAttributes = maxNumValues;
    m_Dataset = null;
  }

  /**
   * Constructor of an instance that sets weight to one, all values to 1, and
   * the reference to the dataset to null. (ie. the instance doesn't have access
   * to information about the attribute types)
   * 
   * @param numAttributes the size of the instance
   */
  public BinarySparseInstance(int numAttributes) {

    m_AttValues = null;
    m_NumAttributes = numAttributes;
    m_Indices = new int[numAttributes];
    for (int i = 0; i < m_Indices.length; i++) {
      m_Indices[i] = i;
    }
    m_Weight = 1;
    m_Dataset = null;
  }

  /**
   * Produces a shallow copy of this instance. The copy has access to the same
   * dataset. (if you want to make a copy that doesn't have access to the
   * dataset, use new BinarySparseInstance(instance)
   *
   * @return the shallow copy
   */
  @Override
  public Object copy() {

    BinarySparseInstance result = new BinarySparseInstance(this);
    result.m_Dataset = m_Dataset;
    return result;
  }

  /**
   * Copies the instance but fills up its values based on the given array
   * of doubles. The copy has access to the same dataset.
   *
   * @param values the array with new values
   * @return the new instance
   */
  public Instance copy(double[] values) {

    BinarySparseInstance result = new BinarySparseInstance(this.m_Weight, values);
    result.m_Dataset = m_Dataset;
    return result;
  }

  /**
   * Merges this instance with the given instance and returns the result.
   * Dataset is set to null.
   * 
   * @param inst the instance to be merged with this one
   * @return the merged instances
   */
  @Override
  public Instance mergeInstance(Instance inst) {

    int[] indices = new int[numValues() + inst.numValues()];

    int m = 0;
    for (int j = 0; j < numValues(); j++) {
      indices[m++] = index(j);
    }
    for (int j = 0; j < inst.numValues(); j++) {
      if (inst.valueSparse(j) != 0) {
        indices[m++] = numAttributes() + inst.index(j);
      }
    }

    if (m != indices.length) {
      // Need to truncate
      int[] newInd = new int[m];
      System.arraycopy(indices, 0, newInd, 0, m);
      indices = newInd;
    }
    return new BinarySparseInstance(1.0, indices, numAttributes()
      + inst.numAttributes());
  }

  /**
   * Does nothing, since we don't support missing values.
   * 
   * @param array containing the means and modes
   */
  @Override
  public void replaceMissingValues(double[] array) {

    // Does nothing, since we don't store missing values.
  }

  /**
   * Sets a specific value in the instance to the given value (internal
   * floating-point format). Performs a deep copy of the vector of attribute
   * values before the value is set.
   * 
   * @param attIndex the attribute's index
   * @param value the new attribute value (If the corresponding attribute is
   *          nominal (or a string) then this is the new value's index as a
   *          double).
   */
  @Override
  public void setValue(int attIndex, double value) {

    int index = locateIndex(attIndex);

    if ((index >= 0) && (m_Indices[index] == attIndex)) {
      if (value == 0) {
        int[] tempIndices = new int[m_Indices.length - 1];
        System.arraycopy(m_Indices, 0, tempIndices, 0, index);
        System.arraycopy(m_Indices, index + 1, tempIndices, index,
          m_Indices.length - index - 1);
        m_Indices = tempIndices;
      }
    } else {
      if (value != 0) {
        int[] tempIndices = new int[m_Indices.length + 1];
        System.arraycopy(m_Indices, 0, tempIndices, 0, index + 1);
        tempIndices[index + 1] = attIndex;
        System.arraycopy(m_Indices, index + 1, tempIndices, index + 2,
          m_Indices.length - index - 1);
        m_Indices = tempIndices;
      }
    }
  }

  /**
   * Sets a specific value in the instance to the given value (internal
   * floating-point format). Performs a deep copy of the vector of attribute
   * values before the value is set.
   * 
   * @param indexOfIndex the index of the attribute's index
   * @param value the new attribute value (If the corresponding attribute is
   *          nominal (or a string) then this is the new value's index as a
   *          double).
   */
  @Override
  public void setValueSparse(int indexOfIndex, double value) {

    if (value == 0) {
      int[] tempIndices = new int[m_Indices.length - 1];
      System.arraycopy(m_Indices, 0, tempIndices, 0, indexOfIndex);
      System.arraycopy(m_Indices, indexOfIndex + 1, tempIndices, indexOfIndex,
        m_Indices.length - indexOfIndex - 1);
      m_Indices = tempIndices;
    }
  }

  /**
   * Returns the values of each attribute as an array of doubles.
   * 
   * @return an array containing all the instance attribute values
   */
  @Override
  public double[] toDoubleArray() {

    double[] newValues = new double[m_NumAttributes];
    for (int i = 0; i < m_Indices.length; i++) {
      newValues[m_Indices[i]] = 1.0;
    }
    return newValues;
  }

  /**
   * Returns the description of one instance in sparse format. If the instance
   * doesn't have access to a dataset, it returns the internal floating-point
   * values. Quotes string values that contain whitespace characters.
   * 
   * @return the instance's description as a string
   */
  @Override
  public String toString() {

    StringBuffer text = new StringBuffer();

    text.append('{');
    for (int i = 0; i < m_Indices.length; i++) {
      if (i > 0) {
        text.append(",");
      }
      if (m_Dataset == null) {
        text.append(m_Indices[i] + " 1");
      } else {
        if (m_Dataset.attribute(m_Indices[i]).isNominal()
          || m_Dataset.attribute(m_Indices[i]).isString()) {
          text.append(m_Indices[i] + " "
            + Utils.quote(m_Dataset.attribute(m_Indices[i]).value(1)));
        } else {
          text.append(m_Indices[i] + " 1");
        }
      }
    }
    text.append('}');
    if (m_Weight != 1.0) {
      text.append(",{"
        + Utils.doubleToString(m_Weight,
          AbstractInstance.s_numericAfterDecimalPoint) + "}");
    }
    return text.toString();
  }

  /**
   * Returns an instance's attribute value in internal format.
   * 
   * @param attIndex the attribute's index
   * @return the specified value as a double (If the corresponding attribute is
   *         nominal (or a string) then it returns the value's index as a
   *         double).
   */
  @Override
  public double value(int attIndex) {

    int index = locateIndex(attIndex);
    if ((index >= 0) && (m_Indices[index] == attIndex)) {
      return 1.0;
    } else {
      return 0.0;
    }
  }

  /**
   * Returns an instance's attribute value in internal format. Does exactly the
   * same thing as value() if applied to an Instance.
   * 
   * @param indexOfIndex the index of the attribute's index
   * @return the specified value as a double (If the corresponding attribute is
   *         nominal (or a string) then it returns the value's index as a
   *         double).
   */
  @Override
  public final double valueSparse(int indexOfIndex) {

    return 1;
  }

  /**
   * Deletes an attribute at the given position (0 to numAttributes() - 1).
   * 
   * @param position the attribute's position
   */
  @Override
  protected void forceDeleteAttributeAt(int position) {

    int index = locateIndex(position);

    m_NumAttributes--;
    if ((index >= 0) && (m_Indices[index] == position)) {
      int[] tempIndices = new int[m_Indices.length - 1];
      System.arraycopy(m_Indices, 0, tempIndices, 0, index);
      for (int i = index; i < m_Indices.length - 1; i++) {
        tempIndices[i] = m_Indices[i + 1] - 1;
      }
      m_Indices = tempIndices;
    } else {
      int[] tempIndices = new int[m_Indices.length];
      System.arraycopy(m_Indices, 0, tempIndices, 0, index + 1);
      for (int i = index + 1; i < m_Indices.length - 1; i++) {
        tempIndices[i] = m_Indices[i] - 1;
      }
      m_Indices = tempIndices;
    }
  }

  /**
   * Inserts an attribute at the given position (0 to numAttributes()) and sets
   * its value to 1.
   * 
   * @param position the attribute's position
   */
  @Override
  protected void forceInsertAttributeAt(int position) {

    int index = locateIndex(position);

    m_NumAttributes++;
    if ((index >= 0) && (m_Indices[index] == position)) {
      int[] tempIndices = new int[m_Indices.length + 1];
      System.arraycopy(m_Indices, 0, tempIndices, 0, index);
      tempIndices[index] = position;
      for (int i = index; i < m_Indices.length; i++) {
        tempIndices[i + 1] = m_Indices[i] + 1;
      }
      m_Indices = tempIndices;
    } else {
      int[] tempIndices = new int[m_Indices.length + 1];
      System.arraycopy(m_Indices, 0, tempIndices, 0, index + 1);
      tempIndices[index + 1] = position;
      for (int i = index + 1; i < m_Indices.length; i++) {
        tempIndices[i + 1] = m_Indices[i] + 1;
      }
      m_Indices = tempIndices;
    }
  }

  /**
   * Main method for testing this class.
   * 
   * @param options the command line options - ignored
   */
  public static void main(String[] options) {

    try {

      // Create numeric attributes "length" and "weight"
      Attribute length = new Attribute("length");
      Attribute weight = new Attribute("weight");

      // Create vector to hold nominal values "first", "second", "third"
      ArrayList my_nominal_values = new ArrayList(3);
      my_nominal_values.add("first");
      my_nominal_values.add("second");

      // Create nominal attribute "position"
      Attribute position = new Attribute("position", my_nominal_values);

      // Create vector of the above attributes
      ArrayList attributes = new ArrayList(3);
      attributes.add(length);
      attributes.add(weight);
      attributes.add(position);

      // Create the empty dataset "race" with above attributes
      Instances race = new Instances("race", attributes, 0);

      // Make position the class attribute
      race.setClassIndex(position.index());

      // Create empty instance with three attribute values
      BinarySparseInstance inst = new BinarySparseInstance(3);

      // Set instance's values for the attributes "length", "weight", and
      // "position"
      inst.setValue(length, 5.3);
      inst.setValue(weight, 300);
      inst.setValue(position, "first");

      // Set instance's dataset to be the dataset "race"
      inst.setDataset(race);

      // Print the instance
      System.out.println("The instance: " + inst);

      // Print the first attribute
      System.out.println("First attribute: " + inst.attribute(0));

      // Print the class attribute
      System.out.println("Class attribute: " + inst.classAttribute());

      // Print the class index
      System.out.println("Class index: " + inst.classIndex());

      // Say if class is missing
      System.out.println("Class is missing: " + inst.classIsMissing());

      // Print the instance's class value in internal format
      System.out.println("Class value (internal format): " + inst.classValue());

      // Print a shallow copy of this instance
      SparseInstance copy = (SparseInstance) inst.copy();
      System.out.println("Shallow copy: " + copy);

      // Set dataset for shallow copy
      copy.setDataset(inst.dataset());
      System.out.println("Shallow copy with dataset set: " + copy);

      // Print out all values in internal format
      System.out.print("All stored values in internal format: ");
      for (int i = 0; i < inst.numValues(); i++) {
        if (i > 0) {
          System.out.print(",");
        }
        System.out.print(inst.valueSparse(i));
      }
      System.out.println();

      // Set all values to zero
      System.out.print("All values set to zero: ");
      while (inst.numValues() > 0) {
        inst.setValueSparse(0, 0);
      }
      for (int i = 0; i < inst.numValues(); i++) {
        if (i > 0) {
          System.out.print(",");
        }
        System.out.print(inst.valueSparse(i));
      }
      System.out.println();

      // Set all values to one
      System.out.print("All values set to one: ");
      for (int i = 0; i < inst.numAttributes(); i++) {
        inst.setValue(i, 1);
      }
      for (int i = 0; i < inst.numValues(); i++) {
        if (i > 0) {
          System.out.print(",");
        }
        System.out.print(inst.valueSparse(i));
      }
      System.out.println();

      // Unset dataset for copy, delete first attribute, and insert it again
      copy.setDataset(null);
      copy.deleteAttributeAt(0);
      copy.insertAttributeAt(0);
      copy.setDataset(inst.dataset());
      System.out.println("Copy with first attribute deleted and inserted: "
        + copy);

      // Same for second attribute
      copy.setDataset(null);
      copy.deleteAttributeAt(1);
      copy.insertAttributeAt(1);
      copy.setDataset(inst.dataset());
      System.out.println("Copy with second attribute deleted and inserted: "
        + copy);

      // Same for last attribute
      copy.setDataset(null);
      copy.deleteAttributeAt(2);
      copy.insertAttributeAt(2);
      copy.setDataset(inst.dataset());
      System.out.println("Copy with third attribute deleted and inserted: "
        + copy);

      // Enumerate attributes (leaving out the class attribute)
      System.out.println("Enumerating attributes (leaving out class):");
      Enumeration enu = inst.enumerateAttributes();
      while (enu.hasMoreElements()) {
        Attribute att = enu.nextElement();
        System.out.println(att);
      }

      // Headers are equivalent?
      System.out.println("Header of original and copy equivalent: "
        + inst.equalHeaders(copy));

      // Test for missing values
      System.out.println("Length of copy missing: " + copy.isMissing(length));
      System.out.println("Weight of copy missing: "
        + copy.isMissing(weight.index()));
      System.out.println("Length of copy missing: "
        + Utils.isMissingValue(copy.value(length)));

      // Prints number of attributes and classes
      System.out.println("Number of attributes: " + copy.numAttributes());
      System.out.println("Number of classes: " + copy.numClasses());

      // Replace missing values
      double[] meansAndModes = { 2, 3, 0 };
      copy.replaceMissingValues(meansAndModes);
      System.out.println("Copy with missing value replaced: " + copy);

      // Setting and getting values and weights
      copy.setClassMissing();
      System.out.println("Copy with missing class: " + copy);
      copy.setClassValue(0);
      System.out.println("Copy with class value set to first value: " + copy);
      copy.setClassValue("second");
      System.out.println("Copy with class value set to \"second\": " + copy);
      copy.setMissing(1);
      System.out.println("Copy with second attribute set to be missing: "
        + copy);
      copy.setMissing(length);
      System.out.println("Copy with length set to be missing: " + copy);
      copy.setValue(0, 0);
      System.out.println("Copy with first attribute set to 0: " + copy);
      copy.setValue(weight, 1);
      System.out.println("Copy with weight attribute set to 1: " + copy);
      copy.setValue(position, "second");
      System.out.println("Copy with position set to \"second\": " + copy);
      copy.setValue(2, "first");
      System.out.println("Copy with last attribute set to \"first\": " + copy);
      System.out.println("Current weight of instance copy: " + copy.weight());
      copy.setWeight(2);
      System.out.println("Current weight of instance copy (set to 2): "
        + copy.weight());
      System.out.println("Last value of copy: " + copy.toString(2));
      System.out.println("Value of position for copy: "
        + copy.toString(position));
      System.out.println("Last value of copy (internal format): "
        + copy.value(2));
      System.out.println("Value of position for copy (internal format): "
        + copy.value(position));
    } catch (Exception e) {
      e.printStackTrace();
    }
  }

  /**
   * Returns the revision string.
   * 
   * @return the revision
   */
  @Override
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 12472 $");
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy