org.bigml.binding.LocalCentroid Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of bigml-binding Show documentation
Show all versions of bigml-binding Show documentation
An open source Java client that gives you a simple binding to interact with BigML. You can use it to
easily create, retrieve, list, update, and delete BigML resources.
package org.bigml.binding;
import org.bigml.binding.utils.Utils;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
/**
* Centroid structure for the BigML local Cluster
*
* This module defines an auxiliary Centroid predicate structure that is
* used in the cluster.
*/
public class LocalCentroid implements Serializable {
private static final long serialVersionUID = 1L;
protected static final String[] STATISTIC_MEASURES = {
"Minimum", "Mean", "Median", "Maximum",
"Standard deviation", "Sum", "Sum squares", "Variance" };
private JSONObject centroid;
private JSONObject center;
private int count;
private String centroidId;
private String name;
private JSONObject distance;
public LocalCentroid(JSONObject centroidData) {
centroid = centroidData;
center = (JSONObject) Utils.getJSONObject(centroidData, "center", new JSONObject());
count = ((Number) Utils.getJSONObject(centroidData, "count", new Integer(0))).intValue();
centroidId = (String) Utils.getJSONObject(centroidData, "id", null);
name = (String) Utils.getJSONObject(centroidData, "name", null);
distance = (JSONObject) Utils.getJSONObject(centroidData, "distance", new JSONObject());
}
public JSONObject getCentroid() {
return centroid;
}
public String getCentroidId() {
return centroidId;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getCount() {
return count;
}
public void setCount(int count) {
this.count = count;
}
public JSONObject getCenter() {
return center;
}
public JSONObject getDistance() {
return distance;
}
/**
* Squared Distance from the given input data to the centroid
*/
public Double distance2(Map inputData, Map termSets,
JSONObject scales, Double stopDistance2) {
double distance2 = 0.0;
for (Object centerKey : center.keySet()) {
String fieldId = (String) centerKey;
Object value = center.get(fieldId);
if (value instanceof JSONArray) {
// We are talking about a TEXT field (list of terms)
List terms = termSets.containsKey(fieldId) ?
(ArrayList) termSets.get(fieldId.toString()) : new ArrayList();
distance2 += cosineDistance2(terms, (JSONArray) value,
((Number) scales.get(fieldId)).doubleValue());
} else if (value instanceof String) {
if (!inputData.containsKey(fieldId) || !value.equals(inputData.get(fieldId))) {
distance2 += 1 * Math.pow(((Number) scales.get(fieldId)).doubleValue(), 2);
}
} else {
// Delta Value = (InputData Value - Centroid Value) * Scale of the Field
// Delta Value ^ 2
distance2 += Math.pow((((Number) inputData.get(fieldId)).doubleValue() - ((Number) value).doubleValue())
* ((Number) scales.get(fieldId)).doubleValue(), 2);
}
if (stopDistance2 != null && distance2 >= stopDistance2) {
return null;
}
}
return distance2;
}
/**
* Print the statistics for the training data clustered around the
* centroid
*/
public StringBuilder printStatistics() {
StringBuilder text = new StringBuilder(
String.format("%s:\n", name));
for (String measureTitle : STATISTIC_MEASURES) {
String measure = measureTitle.toLowerCase().replace(' ', '_');
text.append(String.format("\t%s: %s\n", measureTitle,
distance.get(measure).toString()));
}
text.append("\n");
return text;
}
/**
* Returns the distance defined by cosine similarity
*/
protected double cosineDistance2(
List terms, JSONArray centroidTerms, double scale) {
// Centroid values for the field can be an empty list.
// Then the distance for an empty input is 1
// (before applying the scale factor).
if( terms == null) {
terms = new ArrayList();
}
if( centroidTerms == null) {
centroidTerms = new JSONArray();
}
if( centroidTerms.isEmpty() && terms.isEmpty() ) {
return 0;
}
if( centroidTerms.isEmpty() || terms.isEmpty() ) {
return Math.pow(scale, 2);
}
int inputCount = 0;
Iterator centroidTermsItr = centroidTerms.iterator();
while(centroidTermsItr.hasNext()) {
String centroidTerm = (String) centroidTermsItr.next();
if( terms.contains(centroidTerm) ) {
inputCount++;
}
}
double cosineSimilarity = inputCount /
Math.sqrt(terms.size() * centroidTerms.size());
double similarityDistance = scale * (1 - cosineSimilarity);
return Math.pow(similarityDistance, 2);
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy