All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openimaj.ml.clustering.random.RandomSetShortClusterer Maven / Gradle / Ivy

Go to download

Various clustering algorithm implementations for all primitive types including random, random forest, K-Means (Exact, Hierarchical and Approximate), ...

There is a newer version: 1.3.10
Show newest version
/*
	AUTOMATICALLY GENERATED BY jTemp FROM
	/Users/jsh2/Work/openimaj/target/checkout/machine-learning/clustering/src/main/jtemp/org/openimaj/ml/clustering/random/RandomSet#T#Clusterer.jtemp
*/
/**
 * Copyright (c) 2011, The University of Southampton and the individual contributors.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 *
 *   * 	Redistributions of source code must retain the above copyright notice,
 * 	this list of conditions and the following disclaimer.
 *
 *   *	Redistributions in binary form must reproduce the above copyright notice,
 * 	this list of conditions and the following disclaimer in the documentation
 * 	and/or other materials provided with the distribution.
 *
 *   *	Neither the name of the University of Southampton nor the names of its
 * 	contributors may be used to endorse or promote products derived from this
 * 	software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.openimaj.ml.clustering.random;

import java.util.Arrays;
import java.util.Random;

import org.openimaj.data.DataSource;
import org.openimaj.data.RandomData;
import org.openimaj.ml.clustering.ShortCentroidsResult;

/**
 * A similar strategy to {@link RandomSetShortClusterer} however it is 
 * guaranteed that the same training vector will not be sampled more than once. 
 * 
 * @author Jonathon Hare ([email protected])
 * @author Sina Samangooei ([email protected])
 */
public class RandomSetShortClusterer extends RandomShortClusterer {
	/**
	 * Creates a new random byte cluster used to create K centroids with data containing M elements.
	 *
	 * @param M number of elements in each data vector 
	 */
	public RandomSetShortClusterer(int M) {
		super(M);
	}
	
	/**
	 * Creates a new random byte cluster used to create centroids with data containing M elements. The 
	 * number of clusters will be equal to the number of data points provided in training.
	 *
	 * @param M number of elements in each data vector
	 * @param K number of centroids to be created
	 */
	public RandomSetShortClusterer(int M, int K) {
		super(M, K);
	}
	
	/**
	 * Selects K elements from the provided data as the centroids of the clusters. If K is -1 all provided
	 * data points will be selected. It is guaranteed that the same data point will not be selected
	 * many times, though this is not the case if two seperate entries provided are identical.
	 * 
	 * @params data source of centroids
	 * @return the selected centroids
	 */
    @Override
	public ShortCentroidsResult cluster(short[][] data) {
		ShortCentroidsResult result = new ShortCentroidsResult();
		
		if (K == -1) {
			result.centroids = data;
		} else {
			if (data.length < K) {
				throw new IllegalArgumentException("Not enough data");
			}
			
			result.centroids = new short[K][];
			
			int[] indices;
			if(this.seed >= 0)
				indices = RandomData.getUniqueRandomInts(this.K, 0, data.length, new Random(this.seed));
			else
				indices = RandomData.getUniqueRandomInts(this.K, 0, data.length);
			
			for (int i = 0 ; i < indices.length; i++) {
				int dIndex = indices[i];
				
				result.centroids[i] = Arrays.copyOf(data[dIndex ], data[dIndex ].length);
			}
		}
		
		return result;
	}
	
	/**
	 * Selects K elements from the provided {@link DataSource} as the centroids of the clusters. 
	 * If K is -1 all provided data points will be selected. It is guaranteed that the same data 
	 * point will not be selected many times, though this is not the case if two seperate entries 
	 * provided are identical.
	 * 
	 * @params data a data source object
	 * @return the selected centroids
	 */
	@Override
	public ShortCentroidsResult cluster(DataSource data) {
		ShortCentroidsResult result = new ShortCentroidsResult();
		
		if(K == -1) {
			final int nc = data.size();
			result.centroids = new short[nc][data.numDimensions()];
		} else {
			result.centroids = new short[K][data.numDimensions()];
		}
		
		data.getRandomRows(result.centroids);
		
		return result;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy