All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gov.sandia.cognition.learning.data.feature.RandomSubspace Maven / Gradle / Ivy

There is a newer version: 4.0.1
Show newest version
/*
 * File:            RandomSubspace.java
 * Authors:         Justin Basilico
 * Project:         Cognitive Foundry
 * 
 * Copyright 2011 Cognitive Foundry. All rights reserved.
 */

package gov.sandia.cognition.learning.data.feature;

import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.learning.algorithm.BatchLearner;
import gov.sandia.cognition.learning.data.DatasetUtil;
import gov.sandia.cognition.learning.function.vector.SubVectorEvaluator;
import gov.sandia.cognition.math.Permutation;
import gov.sandia.cognition.math.matrix.Vector;
import gov.sandia.cognition.math.matrix.VectorFactory;
import gov.sandia.cognition.math.matrix.VectorFactoryContainer;
import gov.sandia.cognition.math.matrix.Vectorizable;
import gov.sandia.cognition.util.AbstractRandomized;
import gov.sandia.cognition.util.ArgumentChecker;
import gov.sandia.cognition.util.Randomized;
import java.util.Arrays;
import java.util.Collection;
import java.util.Random;

/**
 * Selects a random subspace from the given vector, which is a random set of
 * indices. It is typically used in the context of ensemble learning to adapt
 * a base learner to increase the variance, similar to the method used in
 * bagging or random forests. Thus, it is also known as attribute bagging.
 *
 * @author  Justin Basilico
 * @since   3.3.3
 */
@PublicationReference(
    title="Random Subspace Method",
    author="Wikipedia",
    year=2011,
    type=PublicationType.WebPage,
    url="http://en.wikipedia.org/wiki/Random_subspace_method")
public class RandomSubspace
    extends AbstractRandomized
    implements BatchLearner, SubVectorEvaluator>,
        Randomized, VectorFactoryContainer
{
    /** The default size is {@value}. */
    public static final int DEFAULT_SIZE = 10;

    /** The size of the random subspace to create, which is the number of
     *  dimensions that are chosen. */
    protected int size;

    /** The vector factory for the sub vector evaluator to use. */
    protected VectorFactory vectorFactory;

    /**
     * Creates a new {@code RandomSubspace} with the default size.
     */
    public RandomSubspace()
    {
        this(DEFAULT_SIZE);
    }

    /**
     * Creates a new {@code RandomSubspace} with the given size.
     *
     * @param   size
     *      The size of the subspace to create. Must be positive.
     */
    public RandomSubspace(
        final int size)
    {
        this(size, new Random());
    }

    /**
     * Creates a new {@code RandomSubspace} with the given parameters.
     *
     * @param   size
     *      The size of the subspace to create. Must be positive.
     * @param   random
     *      The random number generator to use.
     */
    public RandomSubspace(
        final int size,
        final Random random)
    {
        this(size, random, VectorFactory.getDefault());
    }

    /**
     * Creates a new {@code RandomSubspace} with the given parameters.
     *
     * @param   size
     *      The size of the subspace to create. Must be positive.
     * @param   random
     *      The random number generator to use.
     * @param   vectorFactory
     *      The vector factory to use in the result to create
     */
    public RandomSubspace(
        final int size,
        final Random random,
        final VectorFactory vectorFactory)
    {
        super(random);

        this.setSize(size);
        this.setVectorFactory(vectorFactory);
    }

    @Override
    public SubVectorEvaluator learn(
        final Collection data)
    {
        // Figure out the dimensionality of the data.
        final int dimensionality = DatasetUtil.getDimensionality(data);

        // Can't learn with a bad dimensionality.
        if (dimensionality < 0)
        {
            return null;
        }

// TODO: If we had a method for creating a partial permutation, we could avoid
// this code. -- jbasilico (2011-10-18)

        // Sample the indices we want to use without replacement.
        // We do this by creating a permutation and then only taking the first
        // part of it, containing the indices we will use.
        final int[] permutation = Permutation.createPermutation(
            dimensionality, this.getRandom());
        final int[] indices = new int[Math.min(size, dimensionality)];
        System.arraycopy(permutation, 0, indices, 0, indices.length);

        // We sort the indices just to make their interpretation easier.
        Arrays.sort(indices);

        // Create the result.
        return new SubVectorEvaluator(dimensionality, indices,
            this.getVectorFactory());
    }

    /**
     * Gets the size of the subspace that will be created.
     *
     * @return
     *      The size of the subspace that will be created. Must be positive.
     */
    public int getSize()
    {
        return this.size;
    }

    /**
     * Sets the size of the subspace that will be created.
     *
     * @param   size
     *      The size of the subspace that will be created. Must be positive.
     */
    public void setSize(
        final int size)
    {
        ArgumentChecker.assertIsPositive("size", size);
        this.size = size;
    }

    @Override
    public VectorFactory getVectorFactory()
    {
        return this.vectorFactory;
    }

    /**
     * Sets the vector factory to use.
     *
     * @param   vectorFactory
     *      The vector factory to use.
     */
    public void setVectorFactory(
        final VectorFactory vectorFactory)
    {
        this.vectorFactory = vectorFactory;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy