All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.atilika.kuromoji.viterbi.ViterbiFormatter Maven / Gradle / Ivy

There is a newer version: 1.0.0-beta7
Show newest version
/*-*
 * Copyright © 2010-2015 Atilika Inc. and contributors (see CONTRIBUTORS.md)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.  A copy of the
 * License is distributed with this work in the LICENSE.md file.  You may
 * also obtain a copy of the License from
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.atilika.kuromoji.viterbi;

import com.atilika.kuromoji.dict.ConnectionCosts;

import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class ViterbiFormatter {

    private final static String BOS_LABEL = "BOS";
    private final static String EOS_LABEL = "EOS";
    private final static String FONT_NAME = "Helvetica";

    private ConnectionCosts costs;
    private Map nodeMap;
    private Map bestPathMap;

    private boolean foundBOS;

    public ViterbiFormatter(ConnectionCosts costs) {
        this.costs = costs;
        this.nodeMap = new HashMap<>();
        this.bestPathMap = new HashMap<>();
    }

    public String format(ViterbiLattice lattice) {
        return format(lattice, null);
    }

    public String format(ViterbiLattice lattice, List bestPath) {

        initBestPathMap(bestPath);

        StringBuilder builder = new StringBuilder();
        builder.append(formatHeader());
        builder.append(formatNodes(lattice));
        builder.append(formatTrailer());
        return builder.toString();

    }

    private void initBestPathMap(List bestPath) {
        this.bestPathMap.clear();

        if (bestPath == null) {
            return;
        }
        for (int i = 0; i < bestPath.size() - 1; i++) {
            ViterbiNode from = bestPath.get(i);
            ViterbiNode to = bestPath.get(i + 1);

            String fromId = getNodeId(from);
            String toId = getNodeId(to);

            assert this.bestPathMap.containsKey(fromId) == false;
            assert this.bestPathMap.containsValue(toId) == false;
            this.bestPathMap.put(fromId, toId);
        }
    }

    private String formatNodes(ViterbiLattice lattice) {
        ViterbiNode[][] startsArray = lattice.getStartIndexArr();
        ViterbiNode[][] endsArray = lattice.getEndIndexArr();
        this.nodeMap.clear();
        this.foundBOS = false;

        StringBuilder builder = new StringBuilder();
        for (int i = 1; i < endsArray.length; i++) {
            if (endsArray[i] == null || startsArray[i] == null) {
                continue;
            }
            for (int j = 0; j < endsArray[i].length; j++) {
                ViterbiNode from = endsArray[i][j];
                if (from == null) {
                    continue;
                }
                builder.append(formatNodeIfNew(from));
                for (int k = 0; k < startsArray[i].length; k++) {
                    ViterbiNode to = startsArray[i][k];
                    if (to == null) {
                        break;
                    }
                    builder.append(formatNodeIfNew(to));
                    builder.append(formatEdge(from, to));
                }
            }
        }
        return builder.toString();
    }

    private String formatNodeIfNew(ViterbiNode node) {
        String nodeId = getNodeId(node);
        if (!this.nodeMap.containsKey(nodeId)) {
            this.nodeMap.put(nodeId, node);
            return formatNode(node);
        } else {
            return "";
        }
    }

    private String formatHeader() {
        StringBuilder builder = new StringBuilder();
        builder.append("digraph viterbi {\n");
        builder.append("graph [ fontsize=30 labelloc=\"t\" label=\"\" splines=true overlap=false rankdir = \"LR\" ];\n");
        builder.append("# A2 paper size\n");
        builder.append("size = \"34.4,16.5\";\n");
        builder.append("# try to fill paper\n");
        builder.append("ratio = fill;\n");
        builder.append("edge [ fontname=\"" + FONT_NAME + "\" fontcolor=\"red\" color=\"#606060\" ]\n");
        builder.append("node [ style=\"filled\" fillcolor=\"#e8e8f0\" shape=\"Mrecord\" fontname=\"" + FONT_NAME
                        + "\" ]\n");

        return builder.toString();
    }

    private String formatTrailer() {
        return "}";
    }


    private String formatEdge(ViterbiNode from, ViterbiNode to) {
        if (this.bestPathMap.containsKey(getNodeId(from))
                        && this.bestPathMap.get(getNodeId(from)).equals(getNodeId(to))) {
            return formatEdge(from, to, "color=\"#40e050\" fontcolor=\"#40a050\" penwidth=3 fontsize=20 ");

        } else {
            return formatEdge(from, to, "");
        }
    }


    private String formatEdge(ViterbiNode from, ViterbiNode to, String attributes) {
        StringBuilder builder = new StringBuilder();
        builder.append(getNodeId(from));
        builder.append(" -> ");
        builder.append(getNodeId(to));
        builder.append(" [ ");
        builder.append("label=\"");
        builder.append(getCost(from, to));
        builder.append("\"");
        builder.append(" ");
        builder.append(attributes);
        builder.append(" ");
        builder.append(" ]");
        builder.append("\n");
        return builder.toString();
    }

    private String formatNode(ViterbiNode node) {
        StringBuilder builder = new StringBuilder();
        builder.append("\"");
        builder.append(getNodeId(node));
        builder.append("\"");
        builder.append(" [ ");
        builder.append("label=");
        builder.append(formatNodeLabel(node));
        if (node.getType() == ViterbiNode.Type.USER) {
            builder.append(" fillcolor=\"#e8f8e8\"");
        } else if (node.getType() == ViterbiNode.Type.UNKNOWN) {
            builder.append(" fillcolor=\"#f8e8f8\"");
        } else if (node.getType() == ViterbiNode.Type.INSERTED) {
            builder.append(" fillcolor=\"#ffe8e8\"");
        }
        builder.append(" ]");
        return builder.toString();
    }

    private String formatNodeLabel(ViterbiNode node) {
        StringBuilder builder = new StringBuilder();
        builder.append("<");
        builder.append("");
        builder.append("");
        builder.append("
"); builder.append(getNodeLabel(node)); builder.append("
"); builder.append(""); builder.append(node.getWordCost()); builder.append(""); builder.append("
>"); return builder.toString(); } private String getNodeId(ViterbiNode node) { return String.valueOf(node.hashCode()); } private String getNodeLabel(ViterbiNode node) { if (node.getType() == ViterbiNode.Type.KNOWN && node.getWordId() == 0) { if (this.foundBOS) { return EOS_LABEL; } else { this.foundBOS = true; return BOS_LABEL; } } else { return node.getSurface(); } } private int getCost(ViterbiNode from, ViterbiNode to) { return this.costs.get(from.getLeftId(), to.getRightId()); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy