gov.sandia.cognition.learning.algorithm.clustering.initializer.GreedyClusterInitializer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cognitive-foundry Show documentation
Show all versions of cognitive-foundry Show documentation
A single jar with all the Cognitive Foundry components.
/*
* File: FixedClusterInitializer.java
* Authors: Justin Basilico and Kevin R. Dixon
* Company: Sandia National Laboratories
* Project: Cognitive Foundry
*
* Copyright February 21, 2006, Sandia Corporation. Under the terms of Contract
* DE-AC04-94AL85000, there is a non-exclusive license for use of this work by
* or on behalf of the U.S. Government. Export of this program may require a
* license from the United States Government. See CopyrightHistory.txt for
* complete details.
*
*/
package gov.sandia.cognition.learning.algorithm.clustering.initializer;
import gov.sandia.cognition.annotation.CodeReview;
import gov.sandia.cognition.learning.algorithm.clustering.cluster.Cluster;
import gov.sandia.cognition.learning.algorithm.clustering.cluster.ClusterCreator;
import gov.sandia.cognition.math.DivergenceFunction;
import java.util.Random;
/**
* Implements a FixedClusterInitializer that greedily attempts to create the
* initial clusters. The algorithm works by first selecting a random member to
* be the first cluster and then successively finding the member that is
* furthest from the selected clusters. This method of initialization can work
* well with the K-means algorithm.
*
* @param
* Type of {@code Cluster} used in theaceous {@code learn()}
* method.
* @param
* The algorithm operates on a {@code Collection}, so
* {@code DataType} will be something like Vector or String.
* @author Justin Basilico
* @author Kevin R. Dixon
* @since 1.0
*/
@CodeReview(
reviewer="Kevin R. Dixon",
date="2008-07-23",
changesNeeded=false,
comments={
"Now extends AbstractRandomized",
"Cleaned up javadoc a little bit with code annotations.",
"Otherwise, looks fine."
}
)
public class GreedyClusterInitializer, DataType>
extends AbstractMinDistanceFixedClusterInitializer
{
/**
* Creates a new, empty instance of {@code GreedyClusterInitializer}.
*/
public GreedyClusterInitializer()
{
this(null, null, new Random());
}
/**
* Creates a new instance of {@code GreedyClusterInitializer}.
*
* @param divergenceFunction
* The divergence function to use.
* @param creator
* The cluster creator to use.
* @param random
* The random number generator to use.
*/
public GreedyClusterInitializer(
final DivergenceFunction super DataType, ? super DataType> divergenceFunction,
final ClusterCreator creator,
final Random random)
{
super(divergenceFunction, creator, random);
}
@SuppressWarnings("unchecked")
@Override
public GreedyClusterInitializer clone()
{
return (GreedyClusterInitializer) super.clone();
}
@Override
protected int selectNextClusterIndex(
final double[] minDistances,
final boolean[] selected)
{
// We want to find the maximum minimum distance.
double maxMinDistance = Double.MAX_VALUE;
int maxIndex = 0;
final int elementCount = minDistances.length;
for (int i = 0; i < elementCount; i++)
{
if (selected[i])
{
// Skip clusters we've already selected.
continue;
}
// Get the current minimum distance for the element.
double minDistance = minDistances[i];
if (maxIndex == 0 || minDistance > maxMinDistance)
{
// This is the minium seen so far, so keep track of
// it.
maxIndex = i;
maxMinDistance = minDistance;
}
}
if (maxMinDistance <= 0.0)
{
// There are not enough points to prevent selecting two
// that are on top of each other.
return -1;
}
else
{
return maxIndex;
}
}
}