All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.mahout.utils.clustering.GraphMLClusterWriter Maven / Gradle / Ivy

Go to download

Optional components of Mahout which generally support interaction with third party systems, formats, APIs, etc.

There is a newer version: 0.13.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.mahout.utils.clustering;

import java.io.IOException;
import java.io.Writer;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.regex.Pattern;

import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.clustering.classify.WeightedPropertyVectorWritable;
import org.apache.mahout.clustering.classify.WeightedVectorWritable;
import org.apache.mahout.clustering.iterator.ClusterWritable;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.common.StringUtils;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.math.NamedVector;
import org.apache.mahout.math.Vector;

/**
 * GraphML -- see http://gephi.org/users/supported-graph-formats/graphml-format/
 */
public class GraphMLClusterWriter extends AbstractClusterWriter {

  private static final Pattern VEC_PATTERN = Pattern.compile("\\{|\\:|\\,|\\}");
  private final Map colors = new HashMap();
  private Color lastClusterColor;
  private float lastX;
  private float lastY;
  private Random random;
  private int posStep;
  private final String[] dictionary;
  private final int numTopFeatures;
  private final int subString;

  public GraphMLClusterWriter(Writer writer, Map> clusterIdToPoints,
                              DistanceMeasure measure, int numTopFeatures, String[] dictionary, int subString)
    throws IOException {
    super(writer, clusterIdToPoints, measure);
    this.dictionary = dictionary;
    this.numTopFeatures = numTopFeatures;
    this.subString = subString;
    init(writer);
  }

  private void init(Writer writer) throws IOException {
    writer.append("");
    writer.append("");
    //support rgb
    writer.append("\n"
                + "\n"
                + ""
                + ""
                + ""
                + ""
                + "");
    writer.append("");
    lastClusterColor = new Color();
    posStep = (int) (0.1 * clusterIdToPoints.size()) + 100;
    random = RandomUtils.getRandom();
  }

  /*
    
    
    
    
    
    
    
    
   */

  @Override
  public void write(ClusterWritable clusterWritable) throws IOException {
    StringBuilder line = new StringBuilder();
    Cluster cluster = clusterWritable.getValue();
    Color rgb = getColor(cluster.getId());

    String topTerms = "";
    if (dictionary != null) {
      topTerms = getTopTerms(cluster.getCenter(), dictionary, numTopFeatures);
    }
    String clusterLabel = String.valueOf(cluster.getId()) + '_' + topTerms;
    //do some positioning so that items are visible and grouped together
    //TODO: put in a real layout algorithm
    float x = lastX + 1000;
    float y = lastY;
    if (x > (1000 + posStep)) {
      y = lastY + 1000;
      x = 0;
    }

    line.append(createNode(clusterLabel, rgb, x, y));
    List points = clusterIdToPoints.get(cluster.getId());
    if (points != null) {
      for (WeightedVectorWritable point : points) {
        Vector theVec = point.getVector();
        double distance = 1;
        if (measure != null) {
          //scale the distance
          distance = measure.distance(cluster.getCenter().getLengthSquared(), cluster.getCenter(), theVec) * 500;
        }
        String vecStr;
        int angle = random.nextInt(360); //pick an angle at random and then scale along that angle
        double angleRads = Math.toRadians(angle);

        float targetX = x + (float) (distance * Math.cos(angleRads));
        float targetY = y + (float) (distance * Math.sin(angleRads));
        if (theVec instanceof NamedVector) {
          vecStr = ((NamedVector) theVec).getName();
        } else {
          vecStr = theVec.asFormatString();
          //do some basic manipulations for display
          vecStr = VEC_PATTERN.matcher(vecStr).replaceAll("_");
        }
        if (subString > 0 && vecStr.length() > subString) {
          vecStr = vecStr.substring(0, subString);
        }
        line.append(createNode(vecStr, rgb, targetX, targetY));
        line.append(createEdge(clusterLabel, vecStr, distance));
      }
    }
    lastClusterColor = rgb;
    lastX = x;
    lastY = y;
    getWriter().append(line).append("\n");
  }

  private Color getColor(int clusterId) {
    Color result = colors.get(clusterId);
    if (result == null) {
      result = new Color();
      //there is probably some better way to color a graph
      int incR = 0;
      int incG = 0;
      int incB = 0;
      if (lastClusterColor.r + 20 < 256 && lastClusterColor.g + 20 < 256 && lastClusterColor.b + 20 < 256) {
        incR = 20;
        incG = 0;
        incB = 0;
      } else if (lastClusterColor.r + 20 >= 256 && lastClusterColor.g + 20 < 256 && lastClusterColor.b + 20 < 256) {
        incG = 20;
        incB = 0;
      } else if (lastClusterColor.r + 20 >= 256 && lastClusterColor.g + 20 >= 256 && lastClusterColor.b + 20 < 256) {
        incB = 20;
      } else {
        incR += 3;
        incG += 3;
        incR += 3;
      }
      result.r = (lastClusterColor.r + incR) % 256;
      result.g = (lastClusterColor.g + incG) % 256;
      result.b = (lastClusterColor.b + incB) % 256;
      colors.put(clusterId, result);
    }
    return result;
  }

  private static String createEdge(String left, String right, double distance) {
    left = StringUtils.escapeXML(left);
    right = StringUtils.escapeXML(right);
    return "" 
            + "" + distance + "";
  }

  private static String createNode(String s, Color rgb, float x, float y) {
    return "" + rgb.r 
            + ""
            + "" + rgb.g
            + ""
            + "" + rgb.b
            + ""
            + "" + x
            + ""
            + "" + y
            + ""
            + "";
  }

  @Override
  public void close() throws IOException {
    getWriter().append("").append("");
    super.close();
  }

  private static class Color {
    int r;
    int g;
    int b;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy