All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.analysis.ja.GraphvizFormatter Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.analysis.ja;


import java.util.HashMap;
import java.util.Map;

import org.apache.lucene.analysis.ja.JapaneseTokenizer.Position;
import org.apache.lucene.analysis.ja.JapaneseTokenizer.WrappedPositionArray;
import org.apache.lucene.analysis.ja.dict.ConnectionCosts;
import org.apache.lucene.analysis.ja.dict.Dictionary;


// TODO: would be nice to show 2nd best path in a diff't
// color...

/**
 * Outputs the dot (graphviz) string for the viterbi lattice.
 */
public class GraphvizFormatter {
  
  private final static String BOS_LABEL = "BOS";
  
  private final static String EOS_LABEL = "EOS";
  
  private final static String FONT_NAME = "Helvetica";
  
  private final ConnectionCosts costs;
  
  private final Map bestPathMap;
  
  private final StringBuilder sb = new StringBuilder();
  
  public GraphvizFormatter(ConnectionCosts costs) {
    this.costs = costs;
    this.bestPathMap = new HashMap<>();
    sb.append(formatHeader());
    sb.append("  init [style=invis]\n");
    sb.append("  init -> 0.0 [label=\"" + BOS_LABEL + "\"]\n");
  }

  public String finish() {
    sb.append(formatTrailer());
    return sb.toString();
  }

  // Backtraces another incremental fragment:
  void onBacktrace(JapaneseTokenizer tok, WrappedPositionArray positions, int lastBackTracePos, Position endPosData, int fromIDX, char[] fragment, boolean isEnd) {
    setBestPathMap(positions, lastBackTracePos, endPosData, fromIDX);
    sb.append(formatNodes(tok, positions, lastBackTracePos, endPosData, fragment));
    if (isEnd) {
      sb.append("  fini [style=invis]\n");
      sb.append("  ");
      sb.append(getNodeID(endPosData.pos, fromIDX));
      sb.append(" -> fini [label=\"" + EOS_LABEL + "\"]");
    }
  }

  // Records which arcs make up the best bath:
  private void setBestPathMap(WrappedPositionArray positions, int startPos, Position endPosData, int fromIDX) {
    bestPathMap.clear();

    int pos = endPosData.pos;
    int bestIDX = fromIDX;
    while (pos > startPos) {
      final Position posData = positions.get(pos);

      final int backPos = posData.backPos[bestIDX];
      final int backIDX = posData.backIndex[bestIDX];

      final String toNodeID = getNodeID(pos, bestIDX);
      final String fromNodeID = getNodeID(backPos, backIDX);
      
      assert !bestPathMap.containsKey(fromNodeID);
      assert !bestPathMap.containsValue(toNodeID);
      bestPathMap.put(fromNodeID, toNodeID);
      pos = backPos;
      bestIDX = backIDX;
    }
  }
  
  private String formatNodes(JapaneseTokenizer tok, WrappedPositionArray positions, int startPos, Position endPosData, char[] fragment) {

    StringBuilder sb = new StringBuilder();
    // Output nodes
    for (int pos = startPos+1; pos <= endPosData.pos; pos++) {
      final Position posData = positions.get(pos);
      for(int idx=0;idx startPos; pos--) {
      final Position posData = positions.get(pos);
      for(int idx=0;idx ");
        sb.append(toNodeID);

        final String attrs;
        if (toNodeID.equals(bestPathMap.get(fromNodeID))) {
          // This arc is on best path
          attrs = " color=\"#40e050\" fontcolor=\"#40a050\" penwidth=3 fontsize=20";
        } else {
          attrs = "";
        }

        final Dictionary dict = tok.getDict(posData.backType[idx]);
        final int wordCost = dict.getWordCost(posData.backID[idx]);
        final int bgCost = costs.get(backPosData.lastRightID[posData.backIndex[idx]],
                                     dict.getLeftId(posData.backID[idx]));

        final String surfaceForm = new String(fragment,
                                              posData.backPos[idx] - startPos,
                                              pos - posData.backPos[idx]);
        
        sb.append(" [label=\"");
        sb.append(surfaceForm);
        sb.append(' ');
        sb.append(wordCost);
        if (bgCost >= 0) {
          sb.append('+');
        }
        sb.append(bgCost);
        sb.append("\"");
        sb.append(attrs);
        sb.append("]\n");
      }
    }
    return sb.toString();
  }
  
  private String formatHeader() {
    StringBuilder sb = new StringBuilder();
    sb.append("digraph viterbi {\n");
    sb.append("  graph [ fontsize=30 labelloc=\"t\" label=\"\" splines=true overlap=false rankdir = \"LR\"];\n");
    //sb.append("  // A2 paper size\n");
    //sb.append("  size = \"34.4,16.5\";\n");
    //sb.append("  // try to fill paper\n");
    //sb.append("  ratio = fill;\n");
    sb.append("  edge [ fontname=\"" + FONT_NAME + "\" fontcolor=\"red\" color=\"#606060\" ]\n");
    sb.append("  node [ style=\"filled\" fillcolor=\"#e8e8f0\" shape=\"Mrecord\" fontname=\"" + FONT_NAME + "\" ]\n");
    
    return sb.toString();
  }
  
  private String formatTrailer() {
    return "}";
  }
  
  private String getNodeID(int pos, int idx) {
    return pos + "." + idx;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy