org.apache.mahout.utils.clustering.GraphMLClusterWriter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mahout-integration Show documentation
Show all versions of mahout-integration Show documentation
Optional components of Mahout which generally support interaction with third party systems,
formats, APIs, etc.
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.utils.clustering;
import java.io.IOException;
import java.io.Writer;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.regex.Pattern;
import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.clustering.classify.WeightedPropertyVectorWritable;
import org.apache.mahout.clustering.classify.WeightedVectorWritable;
import org.apache.mahout.clustering.iterator.ClusterWritable;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.common.StringUtils;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.math.NamedVector;
import org.apache.mahout.math.Vector;
/**
* GraphML -- see http://gephi.org/users/supported-graph-formats/graphml-format/
*/
public class GraphMLClusterWriter extends AbstractClusterWriter {
private static final Pattern VEC_PATTERN = Pattern.compile("\\{|\\:|\\,|\\}");
private final Map colors = new HashMap();
private Color lastClusterColor;
private float lastX;
private float lastY;
private Random random;
private int posStep;
private final String[] dictionary;
private final int numTopFeatures;
private final int subString;
public GraphMLClusterWriter(Writer writer, Map> clusterIdToPoints,
DistanceMeasure measure, int numTopFeatures, String[] dictionary, int subString)
throws IOException {
super(writer, clusterIdToPoints, measure);
this.dictionary = dictionary;
this.numTopFeatures = numTopFeatures;
this.subString = subString;
init(writer);
}
private void init(Writer writer) throws IOException {
writer.append("");
writer.append("");
//support rgb
writer.append("\n"
+ "\n"
+ ""
+ ""
+ ""
+ ""
+ "");
writer.append("");
lastClusterColor = new Color();
posStep = (int) (0.1 * clusterIdToPoints.size()) + 100;
random = RandomUtils.getRandom();
}
/*
*/
@Override
public void write(ClusterWritable clusterWritable) throws IOException {
StringBuilder line = new StringBuilder();
Cluster cluster = clusterWritable.getValue();
Color rgb = getColor(cluster.getId());
String topTerms = "";
if (dictionary != null) {
topTerms = getTopTerms(cluster.getCenter(), dictionary, numTopFeatures);
}
String clusterLabel = String.valueOf(cluster.getId()) + '_' + topTerms;
//do some positioning so that items are visible and grouped together
//TODO: put in a real layout algorithm
float x = lastX + 1000;
float y = lastY;
if (x > (1000 + posStep)) {
y = lastY + 1000;
x = 0;
}
line.append(createNode(clusterLabel, rgb, x, y));
List points = clusterIdToPoints.get(cluster.getId());
if (points != null) {
for (WeightedVectorWritable point : points) {
Vector theVec = point.getVector();
double distance = 1;
if (measure != null) {
//scale the distance
distance = measure.distance(cluster.getCenter().getLengthSquared(), cluster.getCenter(), theVec) * 500;
}
String vecStr;
int angle = random.nextInt(360); //pick an angle at random and then scale along that angle
double angleRads = Math.toRadians(angle);
float targetX = x + (float) (distance * Math.cos(angleRads));
float targetY = y + (float) (distance * Math.sin(angleRads));
if (theVec instanceof NamedVector) {
vecStr = ((NamedVector) theVec).getName();
} else {
vecStr = theVec.asFormatString();
//do some basic manipulations for display
vecStr = VEC_PATTERN.matcher(vecStr).replaceAll("_");
}
if (subString > 0 && vecStr.length() > subString) {
vecStr = vecStr.substring(0, subString);
}
line.append(createNode(vecStr, rgb, targetX, targetY));
line.append(createEdge(clusterLabel, vecStr, distance));
}
}
lastClusterColor = rgb;
lastX = x;
lastY = y;
getWriter().append(line).append("\n");
}
private Color getColor(int clusterId) {
Color result = colors.get(clusterId);
if (result == null) {
result = new Color();
//there is probably some better way to color a graph
int incR = 0;
int incG = 0;
int incB = 0;
if (lastClusterColor.r + 20 < 256 && lastClusterColor.g + 20 < 256 && lastClusterColor.b + 20 < 256) {
incR = 20;
incG = 0;
incB = 0;
} else if (lastClusterColor.r + 20 >= 256 && lastClusterColor.g + 20 < 256 && lastClusterColor.b + 20 < 256) {
incG = 20;
incB = 0;
} else if (lastClusterColor.r + 20 >= 256 && lastClusterColor.g + 20 >= 256 && lastClusterColor.b + 20 < 256) {
incB = 20;
} else {
incR += 3;
incG += 3;
incR += 3;
}
result.r = (lastClusterColor.r + incR) % 256;
result.g = (lastClusterColor.g + incG) % 256;
result.b = (lastClusterColor.b + incB) % 256;
colors.put(clusterId, result);
}
return result;
}
private static String createEdge(String left, String right, double distance) {
left = StringUtils.escapeXML(left);
right = StringUtils.escapeXML(right);
return ""
+ "" + distance + " ";
}
private static String createNode(String s, Color rgb, float x, float y) {
return "" + rgb.r
+ ""
+ "" + rgb.g
+ ""
+ "" + rgb.b
+ ""
+ "" + x
+ ""
+ "" + y
+ ""
+ " ";
}
@Override
public void close() throws IOException {
getWriter().append(" ").append(" ");
super.close();
}
private static class Color {
int r;
int g;
int b;
}
}