
prerna.algorithm.learning.util.CategoricalCluster Maven / Gradle / Ivy
The newest version!
package prerna.algorithm.learning.util;
import java.util.Collection;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
public class CategoricalCluster extends Hashtable> {
private static final Logger LOGGER = LogManager.getLogger(CategoricalCluster.class.getName());
private Map weights = new HashMap();
/**
* serialization id
*/
private static final long serialVersionUID = -3495117301034986814L;
/**
* Default constructor
*/
public CategoricalCluster(Map categoricalWeights) {
weights = categoricalWeights;
}
public void addToCluster(List attributeNames, List attributeInstances, List values) {
for(int i = 0; i < attributeNames.size(); i++) {
this.addToCluster(attributeNames.get(i), attributeInstances.get(i), values.get(i));
}
}
public void addToCluster(String attributeName, String attributeInstance, Double value) {
Hashtable valCount = null;
if(this.containsKey(attributeName))
{
valCount = this.get(attributeName);
if(valCount.containsKey(attributeInstance)) { // old instance value for property
double currValue = valCount.get(attributeInstance);
currValue += value;
valCount.put(attributeInstance, currValue);
} else { // new instance value for property
valCount.put(attributeInstance, value);
}
}
// new property to consider
else
{
valCount = new Hashtable();
valCount.put(attributeInstance, value);
this.put(attributeName, valCount);
}
}
public void removeFromCluster(List attributeNames, List attributeInstances, List values) {
for(int i = 0; i < attributeNames.size(); i++) {
this.removeFromCluster(attributeNames.get(i), attributeInstances.get(i), values.get(i));
}
}
public void removeFromCluster(String attributeName, String attributeInstance, Double value) {
Hashtable valCount = null;
if(this.containsKey(attributeName)) {
valCount = this.get(attributeName);
if(valCount.containsKey(attributeInstance)) { // reduce count by value
double currValue = valCount.get(attributeInstance);
currValue -= value;
// remove if value is 0
if(currValue == 0) {
valCount.remove(attributeInstance);
} else {
valCount.put(attributeInstance, currValue);
}
if(currValue < 0) {
LOGGER.error("WARNING!!! Attribute " + attributeName + " with value " + attributeInstance + " is now a negative value...");
}
}
// instance value cannot be found
else {
throw new NullPointerException("Attribute " + attributeName + " with value " + attributeInstance + " cannot be found in cluster to remove...");
}
}
// property not found
else {
throw new NullPointerException("Attribute " + attributeName + " cannot be found in cluster to remove...");
}
}
public Double getSimilarity(String attributeName, String attributeInstance) {
return 0.0;
}
public Double getSimilarity(List attributeNames, List attributeInstances, int indexToSkip) {
double similarity = 0.0;
// loop through all the categorical properties (each weight corresponds to one categorical property)
for(int i = 0; i < attributeNames.size(); i++) {
if(i==indexToSkip) {
continue;
}
// sumProperties contains the total number of instances for the property
double sumProperties = 0;
Hashtable propertyHash = this.get(attributeNames.get(i));//categoryClusterInfo.get(i);
Collection valueCollection = propertyHash.values();
for(Double val : valueCollection) {
sumProperties += val;
}
// numOccuranceInCluster contains the number of instances in the cluster that contain the same prop value as the instance
double numOccuranceInCluster = 0;
if(propertyHash.containsKey(attributeInstances.get(i))) {
numOccuranceInCluster = propertyHash.get(attributeInstances.get(i));
}
double weight = weights.get(attributeNames.get(i));
if(sumProperties == 0) {
throw new IllegalArgumentException("sumProperties can not be 0");
}
similarity += weight * numOccuranceInCluster / sumProperties;
}
return similarity;
}
public void reset() {
for(String key: this.keySet()) {
Hashtable table = this.get(key);
for(String key2: table.keySet()) {
table.put(key2, 0.0);
}
}
}
public double getClusterSimilarity(CategoricalCluster c2, String instanceType) {
double similarity = 0;
for(String attributeType : this.keySet()) {
if(attributeType.equals(instanceType)) {
continue;
}
Hashtable thisTypeHash = this.get(attributeType);
Hashtable typeHash = ((CategoricalCluster) c2).get(attributeType);
if(thisTypeHash.isEmpty() || typeHash.isEmpty()) {
continue;
}
double normalizationCount1 = 0;
for(String propInstance : thisTypeHash.keySet()) {
normalizationCount1 += thisTypeHash.get(propInstance);
}
double normalizationCount2 = 0;
for(String propInstance : typeHash.keySet()) {
normalizationCount2 += typeHash.get(propInstance);
}
int possibleValues = 0;
double sumClusterDiff = 0;
for(String propInstance : thisTypeHash.keySet()) {
double count1 = thisTypeHash.get(propInstance);
if(typeHash.containsKey(propInstance)) {
possibleValues++;
// calculate difference between counts
double count2 = typeHash.get(propInstance);
sumClusterDiff += Math.abs( count1/normalizationCount1 - count2/normalizationCount2);
} else {
possibleValues++;
//include values that 1st cluster has and 2nd cluster doesn't have
sumClusterDiff += count1/normalizationCount1;
}
}
//now include values that 2nd cluster has that 1st cluster doesn't have
for(String propInstance: typeHash.keySet()) {
if(!thisTypeHash.containsKey(propInstance)) {
possibleValues++;
double count2 = typeHash.get(propInstance);
sumClusterDiff += count2/normalizationCount2;
}
}
if(possibleValues == 0) {
throw new IllegalArgumentException("possibleValues can not be 0");
}
similarity += weights.get(attributeType) * (1 - sumClusterDiff/possibleValues);
}
return similarity;
}
public Map getWeights() {
return this.weights;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy