All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.labs.samoa.instances.Instances Maven / Gradle / Ivy

Go to download

Massive On-line Analysis is an environment for massive data mining. MOA provides a framework for data stream mining and includes tools for evaluation and a collection of machine learning algorithms. Related to the WEKA project, also written in Java, while scaling to more demanding problems.

There is a newer version: 2024.07.0
Show newest version
/*
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * 	        http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied. See the License for the specific
 * language governing permissions and limitations under the
 * License.  
 */
package com.yahoo.labs.samoa.instances;

import java.io.Reader;
import java.io.Serializable;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Random;
import moa.core.Utils;

/**
 * The Class Instances.
 *
 * @author abifet
 */
public class Instances implements Serializable {

    /**
     * The keyword used to denote the start of an arff header
     */
    public final static String ARFF_RELATION = "@relation";

    /**
     * The keyword used to denote the start of the arff data section
     */
    public final static String ARFF_DATA = "@data";

    private static final long serialVersionUID = 8110510475535581577L;
    /**
     * The instance information.
     */
    protected InstanceInformation instanceInformation;
    /**
     * The instances.
     */
    protected List instances;

    /**
     * The arff.
     */
    protected ArffLoader arff;

    /**
     * A Hash that stores the indices of features.
     */
    protected HashMap hsAttributesIndices;

    /**
     * Indices of relevant features.
     */
    protected int[] indicesRelevants;

    /**
     * Indices of irrelevant features.
     */
    protected int[] indicesIrrelevants;

    /**
     * Instantiates a new instances.
     *
     * @param chunk the chunk
     */
    public Instances(Instances chunk) {
        this(chunk, chunk.numInstances());
        chunk.copyInstances(0, this, chunk.numInstances());
        this.computeAttributesIndices();
        if(chunk.indicesRelevants != null) {
            this.indicesRelevants = chunk.indicesRelevants.clone();
            this.indicesIrrelevants = chunk.indicesIrrelevants.clone();
        }
    }

    /**
     * Instantiates a new instances.
     */
    public Instances() {
    }

    /**
     * Instantiates a new instances.
     *
     * @param reader the reader
     * @param size the size
     * @param classAttribute the class attribute
     */
    public Instances(Reader reader, int size, int classAttribute) {
        arff = new ArffLoader(reader, 0, classAttribute);
        this.instanceInformation = arff.getStructure();
        this.instances = new ArrayList();
        this.computeAttributesIndices();
    }

    /**
     * Instantiates a new instances.
     *
     * @param reader the reader
     * @param range
     */
    public Instances(Reader reader, Range range) {
        this.arff = new MultiTargetArffLoader(reader, range);
        this.instanceInformation = arff.getStructure();
        this.instances = new ArrayList();
        this.computeAttributesIndices();
    }

    /**
     * Instantiates a new instances.
     *
     * @param chunk the chunk
     * @param capacity the capacity
     */
    public Instances(Instances chunk, int capacity) {
        this.instanceInformation = new InstanceInformation(chunk.instanceInformation());
        if (capacity < 0) {
            capacity = 0;
        }
        this.instances = new ArrayList(capacity);
        this.computeAttributesIndices();
        if(chunk.indicesRelevants != null) {
            this.indicesRelevants = chunk.indicesRelevants.clone();
            this.indicesIrrelevants = chunk.indicesIrrelevants.clone();
        }
    }

    /**
     * Instantiates a new instances.
     *
     * @param st the st
     * @param v the v
     * @param capacity the capacity
     */
    public Instances(String st, Attribute[] v, int capacity) {
        this.instanceInformation = new InstanceInformation(st, v);
        this.instances = new ArrayList(capacity);
        this.computeAttributesIndices();
    }

    /**
     * Instantiates a new instances.
     *
     * @param st the st
     * @param v the v
     * @param capacity the capacity
     */
    public Instances(String st, List v, int capacity) {
        Attribute[] attributes = new Attribute[v.size()];
        for (int i = 0; i < v.size(); i++) {
            attributes[i]= v.get(i);
        }
        this.instanceInformation = new InstanceInformation(st, attributes);
        this.instances = new ArrayList(capacity);
        this.computeAttributesIndices();
    }

    /**
     * Instantiates a new instances.
     *
     * @param chunk the chunk
     * @param first the first instance
     * @param toCopy the j
     */
    public Instances(Instances chunk, int first, int toCopy) {

        this(chunk, toCopy);

        if ((first < 0) || ((first + toCopy) > chunk.numInstances())) {
            throw new IllegalArgumentException("Parameters first and/or toCopy out "
                    + "of range");
        }
        chunk.copyInstances(first, this, toCopy);
        this.computeAttributesIndices();
    }

    /**
     * Instantiates a new instances.
     *
     * @param st the st
     * @param capacity the capacity
     */
    public Instances(StringReader st, int capacity) {
        this.instances = new ArrayList(capacity);
        this.computeAttributesIndices();
    }

    //Information Instances
    /**
     * Sets the relation name.
     *
     * @param string the new relation name
     */
    public void setRelationName(String string) {
        this.instanceInformation.setRelationName(string);
    }

    /**
     * Gets the relation name.
     *
     * @return the relation name
     */
    public String getRelationName() {
        return this.instanceInformation.getRelationName();
    }

    /**
     * Class index.
     *
     * @return the int
     */
    public int classIndex() {
        return this.instanceInformation.classIndex();
    }

    /**
     * Sets the class index.
     *
     * @param classIndex the new class index
     */
    public void setClassIndex(int classIndex) {
        this.instanceInformation.setClassIndex(classIndex);
    }

    /**
     * Class attribute.
     *
     * @return the attribute
     */
    public Attribute classAttribute() {
        return this.instanceInformation.classAttribute();
    }

    /**
     * Num attributes.
     *
     * @return the int
     */
    public int numAttributes() {
        return this.instanceInformation.numAttributes();
    }

    /**
     * Attribute.
     *
     * @param w the w
     * @return the attribute
     */
    public Attribute attribute(int w) {
        return this.instanceInformation.attribute(w);
    }

    /**
     * Num classes.
     *
     * @return the int
     */
    public int numClasses() {
        return this.instanceInformation.numClasses();
    }

    /**
     * Delete attribute at.
     *
     * @param integer the integer
     */
    public void deleteAttributeAt(Integer integer) {
        this.instanceInformation.deleteAttributeAt(integer);
        for (int i = 0; i < numInstances(); i++) {
            instance(i).setDataset(null);
            instance(i).deleteAttributeAt(integer);
            instance(i).setDataset(this);
        }
    }

    /**
     * Insert attribute at.
     *
     * @param attribute the attribute
     * @param position the position
     */
    public void insertAttributeAt(Attribute attribute, int position) {
        if (this.instanceInformation == null) {
            this.instanceInformation = new InstanceInformation();
        }
        this.instanceInformation.insertAttributeAt(attribute, position);
        for (int i = 0; i < numInstances(); i++) {
            instance(i).setDataset(null);
            instance(i).insertAttributeAt(i);
            instance(i).setDataset(this);
        }
    }

    //List of Instances
    /**
     * Instance.
     *
     * @param num the num
     * @return the instance
     */
    public Instance instance(int num) {
        return this.instances.get(num);
    }

    /**
     * Num instances.
     *
     * @return the int
     */
    public int numInstances() {
        return this.instances.size();
    }

    /**
     * Adds the.
     *
     * @param inst the inst
     */
    public void add(Instance inst) {
        this.instances.add(inst.copy());
    }

    /**
     * Randomize.
     *
     * @param random the random
     */
    public void randomize(Random random) {
        for (int j = numInstances() - 1; j > 0; j--) {
            swap(j, random.nextInt(j + 1));
        }
    }

    /**
     * Stratify.
     *
     * @param numFolds the num folds
     */
    public void stratify(int numFolds) {

        if (classAttribute().isNominal()) {

            // sort by class
            int index = 1;
            while (index < numInstances()) {
                Instance instance1 = instance(index - 1);
                for (int j = index; j < numInstances(); j++) {
                    Instance instance2 = instance(j);
                    if ((instance1.classValue() == instance2.classValue())
                            || (instance1.classIsMissing()
                            && instance2.classIsMissing())) {
                        swap(index, j);
                        index++;
                    }
                }
                index++;
            }
            stratStep(numFolds);
        }
    }

    protected void stratStep(int numFolds) {
        ArrayList newVec = new ArrayList(this.instances.size());
        int start = 0, j;

        // create stratified batch
        while (newVec.size() < numInstances()) {
            j = start;
            while (j < numInstances()) {
                newVec.add(instance(j));
                j = j + numFolds;
            }
            start++;
        }
        this.instances = newVec;
    }

    /**
     * Train cv.
     *
     * @param numFolds the num folds
     * @param numFold
     * @param random the random
     * @return the instances
     */
    public Instances trainCV(int numFolds, int numFold, Random random) {
        Instances train = trainCV(numFolds, numFold);
        train.randomize(random);
        return train;
    }

    public Instances trainCV(int numFolds, int numFold) {
        int numInstForFold, first, offset;
        Instances train;

        numInstForFold = numInstances() / numFolds;
        if (numFold < numInstances() % numFolds) {
            numInstForFold++;
            offset = numFold;
        } else {
            offset = numInstances() % numFolds;
        }
        train = new Instances(this, numInstances() - numInstForFold);
        first = numFold * (numInstances() / numFolds) + offset;
        copyInstances(0, train, first);
        copyInstances(first + numInstForFold, train,
                numInstances() - first - numInstForFold);
        return train;
    }

    protected void copyInstances(int from, Instances dest, int num) {
        for (int i = 0; i < num; i++) {
            dest.add(instance(from + i));
        }
    }

    /**
     * Test cv.
     *
     * @param numFolds the num folds
     * @param numFold the num fold
     * @return the instances
     */
    public Instances testCV(int numFolds, int numFold) {

        int numInstForFold, first, offset;
        Instances test;

        numInstForFold = numInstances() / numFolds;
        if (numFold < numInstances() % numFolds) {
            numInstForFold++;
            offset = numFold;
        } else {
            offset = numInstances() % numFolds;
        }
        test = new Instances(this, numInstForFold);
        first = numFold * (numInstances() / numFolds) + offset;
        copyInstances(first, test, numInstForFold);
        return test;
    }

    /*  public Instances dataset() {
     throw new UnsupportedOperationException("Not yet implemented");
     }*/
    /**
     * Mean or mode.
     *
     * @param j the j
     * @return the double
     */
    public double meanOrMode(int j) {
        throw new UnsupportedOperationException("Not yet implemented"); //CobWeb
    }

    /**
     * Read instance.
     *
     * @param fileReader the file reader
     * @return true, if successful
     */
    public boolean readInstance(Reader fileReader) {

        //ArffReader arff = new ArffReader(reader, this, m_Lines, 1);
        Instance inst = arff.readInstance();
        if (inst != null) {
            inst.setDataset(this);
            add(inst);
            return true;
        } else {
            return false;
        }
    }

    /**
     * Delete.
     */
    public void delete() {
        this.instances = new ArrayList();
    }

    /**
     * Delete.
     */
    public void delete(int index) {
        this.instances.remove(index);
    }

    /**
     * Swap.
     *
     * @param i the i
     * @param j the j
     */
    public void swap(int i, int j) {
        Instance in = instances.get(i);
        instances.set(i, instances.get(j));
        instances.set(j, in);
    }

    /**
     * Instance information.
     *
     * @return the instance information
     */
    private InstanceInformation instanceInformation() {
        return this.instanceInformation;
    }

    public Attribute attribute(String name) {

        for (int i = 0; i < numAttributes(); i++) {
            if (attribute(i).name().equals(name)) {
                return attribute(i);
            }
        }
        return null;
    }

    public int size() {
        return this.numInstances();
    }

    public void set(int i, Instance inst) {
        this.instances.set(i, inst);
    }

    public Instance get(int k) {
        return this.instance(k);
    }

    public void setRangeOutputIndices(Range range) {
        this.instanceInformation.setRangeOutputIndices(range);

    }

    public void setAttributes(Attribute[] v) {
        if (this.instanceInformation == null) {
            this.instanceInformation = new InstanceInformation();
        }
        this.instanceInformation.setAttributes(v);
    }

    public void setAttributes(Attribute[] v, int[] indexValues) {
        if (this.instanceInformation == null) {
            this.instanceInformation = new InstanceInformation();
        }
        this.instanceInformation.setAttributes(v, indexValues);
    }
    public void setAttributes(List v, List indexValues) {
        int[] ret = new int[indexValues.size()];
        for(int i = 0;i < ret.length;i++)
            ret[i] = indexValues.get(i);
        Attribute[] attributes = new Attribute[v.size()];
        for (int i = 0; i < v.size(); i++) {
            attributes[i]= v.get(i);
        }
       setAttributes(attributes, ret);
    }

    /**
     * Returns the dataset as a string in ARFF format. Strings are quoted if
     * they contain whitespace characters, or if they are a question mark.
     *
     * @return the dataset in ARFF format as a string
     */
    public String toString() {

        StringBuffer text = new StringBuffer();

        text.append(ARFF_RELATION).append(" ").
                append(Utils.quote(this.instanceInformation.getRelationName())).append("\n\n");
        for (int i = 0; i < numAttributes(); i++) {
            text.append(attribute(i).toString()).append("\n");
        }
        text.append("\n").append(ARFF_DATA).append("\n");

        text.append(stringWithoutHeader());
        return text.toString();
    }

    /**
     * Returns the instances in the dataset as a string in ARFF format. Strings
     * are quoted if they contain whitespace characters, or if they are a
     * question mark.
     *
     * @return the dataset in ARFF format as a string
     */
    protected String stringWithoutHeader() {

        StringBuffer text = new StringBuffer();

        for (int i = 0; i < numInstances(); i++) {
            text.append(instance(i));
            if (i < numInstances() - 1) {
                text.append('\n');
            }
        }
        return text.toString();

    }

    /**
     * Returns the index of an Attribute.
     *
     * @param att, the attribute.
     */
    protected int indexOf(Attribute att) {
        return this.hsAttributesIndices.get(att.name());
    }

    /**
     * Completes the hashset with attributes indices.
     */
    private void computeAttributesIndices() {
        this.hsAttributesIndices = new HashMap();
        // iterates through all existing attributes 
        // and sets an unique identifier for each one of them
        for (int i = 0; i < this.numAttributes(); i++) {
            hsAttributesIndices.put(this.attribute(i).name(), i);
        }
    }

    /**
     * Returns the indices of the relevant features indicesRelevants.
     * @return indicesRelevants
     */
    public int[] getIndicesRelevants() {
        return indicesRelevants;
    }

    /**
     * Returns the indices of the irrelevant features indicesIrrelevants.
     * @return indicesIrrelevants
     */
    public int[] getIndicesIrrelevants() {
        return indicesIrrelevants;
    }

    /**
     * Sets the indices of relevant features.
     * This method also sets the irrelevant ones since
     * it is the set complement.
     *
     * @param indicesRelevants
     */
    public void setIndicesRelevants(int[] indicesRelevants) {
        this.indicesRelevants = indicesRelevants;
        // -1 to skip the class attribute
        int numIrrelevantFeatures = this.numAttributes() - this.indicesRelevants.length - 1;
        this.indicesIrrelevants = new int[numIrrelevantFeatures];

        // Infers and sets the set of irrelevant features
        int index = 0;
        int indexRel = 0;
        for(int i = 0; i < numAttributes(); i++){
            if(i != classIndex()) {
                while (indexRel < indicesRelevants.length - 1 &&
                        i > indicesRelevants[indexRel]) indexRel++;
                if (indicesRelevants[indexRel] != i){
                    indicesIrrelevants[index] = i;
                    index++;
                }
            }
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy