All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openimaj.ml.clustering.kdtree.ClusterTestDataLoader Maven / Gradle / Ivy

Go to download

A project for various tests that don't quite constitute demos but might be useful to look at.

There is a newer version: 1.3.8
Show newest version
/**
 * Copyright (c) 2011, The University of Southampton and the individual contributors.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 *
 *   * 	Redistributions of source code must retain the above copyright notice,
 * 	this list of conditions and the following disclaimer.
 *
 *   *	Redistributions in binary form must reproduce the above copyright notice,
 * 	this list of conditions and the following disclaimer in the documentation
 * 	and/or other materials provided with the distribution.
 *
 *   *	Neither the name of the University of Southampton nor the names of its
 * 	contributors may be used to endorse or promote products derived from this
 * 	software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.openimaj.ml.clustering.kdtree;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.log4j.Logger;

/**
 * Load clusters from http://people.cs.nctu.edu.tw/~rsliang/dbscan/testdatagen.html
 * @author Sina Samangooei ([email protected])
 *
 */
public class ClusterTestDataLoader{
	/**
	 * Test details
	 * @author Sina Samangooei ([email protected])
	 *
	 */
	public static class TestStats{
		/**
		 * EPS variable
		 */
		public double eps;
		/**
		 * minpts variable
		 */
		public int minpts;
		/**
		 * nclusters variable
		 */
		public int ncluster;
		/**
		 * noutliers variable
		 */
		public int noutliers;
		/**
		 * mineps variable
		 */
		public double mineps;
	}
	private int percluster = -1;
	private boolean outliers = true;
	
	
	/**
	 * 
	 */
	public ClusterTestDataLoader() {
		this.percluster = -1;
	}
	
	/**
	 * @param percluster 
	 * @param outliers 
	 * 
	 */
	public ClusterTestDataLoader(int percluster, boolean outliers) {
		this.percluster = percluster;
		this.outliers = outliers;
	}

	private Logger logger = Logger.getLogger(ClusterTestDataLoader.class);
	private TestStats testStats;
	private int[][] testClusters;
	private double[][] testData;
	/**
	 * @param data
	 * @return read {@link TestStats}
	 */
	private TestStats readTestStats(String[] data) {
		ClusterTestDataLoader.TestStats ret = new TestStats();
		int i = 0;
		ret.eps = Double.parseDouble(data[i++].split("=")[1].trim());
		ret.minpts = Integer.parseInt(data[i++].split("=")[1].trim());
		ret.ncluster = Integer.parseInt(data[i++].split("=")[1].trim());
		ret.noutliers = Integer.parseInt(data[i++].split("=")[1].trim());
		ret.mineps = Double.parseDouble(data[i++].split("=")[1].trim());
		return ret;
	}


	/**
	 * @param data
	 * @return read the correct clusters
	 */
	private int[][] readTestClusters(String[] data) {
		int i = 0;
		for (;data[i].length()!=0; i++);
		for (i=i+1;data[i].length()!=0; i++);
		List clusters = new ArrayList();
		int count = 0;
		for (i=i+1;i", "").trim();
			arr[i++] = Integer.parseInt(s)-1;

		}
		return arr;
	}
	/**
	 * @param data
	 * @return read the test data
	 */
	private double[][] readTestData(String[] data) {
		
		int i = 0;
		for (;data[i].length()!=0; i++);
		List dataL = new ArrayList();
		int start = i+1;
		for (i=start;data[i].length()!=0; i++){
			dataL.add(readDataLine(data[i]));
		}
		logger.debug(String.format("Loading %d data items\n",dataL.size()));
		return dataL.toArray(new double[dataL.size()][]);
	}
	private Set existing(int[][] correct) {
		Set exist = new HashSet();
		for (int[] is : correct) {
			for (int i : is) {
				exist.add(i);
			}
		}
		return exist;
	}

	private double[] readDataLine(String string) {
		String[] split = string.split(" ");
		double[] arr = new double[]{
				Double.parseDouble(split[1]),
				Double.parseDouble(split[2])
		};
		return arr;
	}

	public void prepare(String[] data) {
		this.testStats = this.readTestStats(data);
		this.testClusters = this.readTestClusters(data);
		this.testData = this.readTestData(data);
		correctClusters();
	}

	private void correctClusters() {
		
		if(this.percluster != -1){
			double[][] correctedData = null;
			int[][] correctedClusters = new int[this.testClusters.length][this.percluster];	
			int seen ;
			if(this.outliers){
				seen = this.testStats.noutliers;
				correctedData= new double[this.percluster * this.testClusters.length + seen][];
				for (int i = 0; i < seen; i++) {
					correctedData[i] = this.testData[i];
				}
				
			}
			else{
				seen = 0;
				correctedData = new double[this.percluster * this.testClusters.length][];
			}
			for (int i = 0; i < this.testClusters.length; i++) {
				int[] clust = this.testClusters[i];
				for (int j = 0; j < this.percluster; j++) {
					int d = clust[j];
					correctedData[seen] = this.testData[d];
					correctedClusters[i][j] = seen;
					seen++;
				}
			}
			
			this.testClusters = correctedClusters;
			this.testData = correctedData;
		}
	}

	public TestStats getTestStats() {
		return this.testStats;
	}

	public double[][] getTestData() {
		return this.testData;
	}

	public int[][] getTestClusters() {
		return this.testClusters;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy