annis.visualizers.component.rst.RSTImpl Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of annis-visualizers Show documentation
There is a newer version: 4.0.0-beta.4
/*
 * Copyright 2012 Corpuslinguistic working group Humboldt University Berlin.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package annis.visualizers.component.rst;

import annis.CommonHelper;
import annis.gui.components.CssRenderInfo;
import annis.gui.widgets.JITWrapper;
import annis.gui.widgets.gwt.client.ui.VJITWrapper;
import annis.libgui.MatchedNodeColors;
import annis.libgui.visualizers.VisualizerInput;
import static annis.model.AnnisConstants.ANNIS_NS;
import static annis.model.AnnisConstants.FEAT_RELANNIS_NODE;
import annis.model.Edge;
import annis.model.RelannisNodeFeature;
import com.vaadin.ui.Panel;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.Stack;
import java.util.TreeSet;
import java.util.UUID;
import org.corpus_tools.salt.common.SDocumentGraph;
import org.corpus_tools.salt.common.SStructure;
import org.corpus_tools.salt.common.STextualDS;
import org.corpus_tools.salt.common.SToken;
import org.corpus_tools.salt.core.GraphTraverseHandler;
import org.corpus_tools.salt.core.SAnnotation;
import org.corpus_tools.salt.core.SGraph.GRAPH_TRAVERSE_TYPE;
import org.corpus_tools.salt.core.SNode;
import org.corpus_tools.salt.core.SProcessingAnnotation;
import org.corpus_tools.salt.core.SRelation;
import org.corpus_tools.salt.util.DataSourceSequence;
import org.corpus_tools.salt.SALT_TYPE;
import org.eclipse.emf.common.util.EList;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * The Visualizer Plugin for RST-Visualization.
 *
 * This Visualization transforms the salt graph to a json object, which is sent
 * to the {@link VJITWrapper}.
 *
 * A node with an incoming rst edge will be moved one level up, so that it
 * becomes a sibling of its original parent. This is done, because in typical
 * rst visualizations these nodes are drawn as siblings of their parent, so they
 * are in one horizontal line with their parent, but they are actually modeled
 * as children, which is confusing, when we want to render these nodes. The
 * json, which is generated, looks nearly like this:
 *
 *  * {
 *  "id" : "root"
 *  "children" : [
 *  {
 *    "id" : "rst_0_1",
 *    "name" : 1
 *    "edges" : [{"sType" : "rst", from: "rst_0_2", to : "rst_0_1"}
 *  },
 *  {
 *    "id" : "rst_0_2",
 *    "name" : "2
 *  }]
 * }
 * 
 *
 * The example above shows the two nodes, which are connected by a rst edge.
 * They are on the same level in the json tree. The *natural* tree would have
 * looked like this:
 *
 *  * {
 *  "id" : "root"
 *  "children" : [
 *  {
 *    "id" : "rst_0_1",
 *    "name" : 1
 *    "edges" : [{"sType" : "rst", from: "rst_0_2", to : "rst_0_1"},
 *    "children" : [{
 *      "id" : "rst_0_2",
 *      "name" : "2
 *    }]
 *  }]
 * }
 * 
 *
 * @author Benjamin Weißenfels 
 */
public class RSTImpl extends Panel implements GraphTraverseHandler {

  // implements the AbstractComponent and talks to the VJITWrapperWidget
  private final JITWrapper jit;

  // traversing stack for build the json tree
  private Stack st = new Stack();

  // result of transform operation salt -> json
  private JSONObject result = new JSONObject();

  // filter root nodes with this annotation key
  private final String ANNOTATION_KEY = "cat";

  // sType for the rst relation
  private final String RST_RELATION = "rst";

  private final String RST_LAYER = "rst";

  /**
   * Create a unique id, for every RSTImpl instance, for building an unique html
   * id, in the DOM.
   */
  private final UUID uniqueID = UUID.randomUUID();

  // unique id for every instance of RSTImpl
  private final String visId;

  // result graph
  private SDocumentGraph graph;

  // namespace for SProcessingAnnotation sentence index
  static private final String SENTENCE_INDEX = "sentence_index";

  static private final String SENTENCE_LEFT = "sentence_left";

  static private final String SENTENCE_RIGHT = "sentence_right";

  // contains all nodes which are marked as matches and child nodes of matches
  private final Map markedAndCovered;

  private Properties mappings;

  private String namespace;

  /**
   * Sorted list of all SStructures which overlapped a sentence. It's used for
   * mapping the sentence to a number by the order of the SStructures in the
   * list.
   */
  private TreeSet sentences = new TreeSet(
          new Comparator() {
    private int getStartPosition(SStructure s) {
      List> out = s.getGraph().getOutRelations(s.getId());

      for (SRelation e : out) {
        if (e instanceof SRelation
                && ((SRelation) e).getTarget() instanceof SToken) {
          SToken tok = ((SToken) ((SRelation) e).getTarget());
          
          RelannisNodeFeature feat = 
            (RelannisNodeFeature) tok.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
          
          return (int) feat.getLeftToken();
        }
      }
      
      RelannisNodeFeature feat = 
        (RelannisNodeFeature) s.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
     
      return (int) feat.getLeftToken();
    }

    @Override
    public int compare(SStructure t1, SStructure t2) {
      int t1Idx = getStartPosition(t1);
      int t2Idx = getStartPosition(t2);

      if (t1Idx < t2Idx) {
        return -1;
      }

      if (t1Idx == t2Idx) {
        return 0;
      } else {
        return 1;
      }
    }
  });

  private final Logger log = LoggerFactory.getLogger(RSTImpl.class);

  public RSTImpl(VisualizerInput visInput) {

    markedAndCovered = visInput.getMarkedAndCovered();

    mappings = visInput.getMappings();

    namespace = visInput.getNamespace();

    visId = "rst_" + uniqueID.toString();

    jit = new JITWrapper();
    jit.setWidth("100%");
    jit.setHeight("-1px");
    setContent(jit);

    // send the json to the widget
    jit.setVisData(transformSaltToJSON(visInput));
    jit.setProperties(visInput.getMappings());
    jit.requestRepaint();

    addScrollbar();

  }

  public void addExtension(CssRenderInfo renderInfo) {
    super.addExtension(renderInfo);
  }

  private void addScrollbar() {
    this.setWidth("100%");
    this.getContent().setSizeUndefined();
  }

  private String transformSaltToJSON(VisualizerInput visInput) {
    graph = visInput.getSResult().getDocumentGraph();
    List rootSNodes = graph.getRoots();
    List rstRoots = new ArrayList();


    for (SNode sNode : rootSNodes) {
      if (CommonHelper.checkSLayer(namespace, sNode)) {
        rstRoots.add(sNode);
      }
    }


    if (rootSNodes.size() > 0) {

      // collect all sentence and sort them.
      graph.traverse(rstRoots, GRAPH_TRAVERSE_TYPE.TOP_DOWN_DEPTH_FIRST,
              "getSentences", new GraphTraverseHandler() {
        @Override
        public void nodeReached(GRAPH_TRAVERSE_TYPE traversalType,
                String traversalId, SNode currNode, SRelation sRelation,
                SNode fromNode, long order) {
          if (currNode instanceof SStructure
                  && isSegment(currNode)) {
            sentences.add((SStructure) currNode);
          }
        }

        @Override
        public void nodeLeft(GRAPH_TRAVERSE_TYPE traversalType,
                String traversalId, SNode currNode, SRelation edge,
                SNode fromNode,
                long order) {
        }

        @Override
        public boolean checkConstraint(GRAPH_TRAVERSE_TYPE traversalType,
                String traversalId, SRelation edge, SNode currNode, long order) {

          // token are not needed
          if (currNode instanceof SToken) {
            return false;
          }

          return true;
        }
      });

      //decorate segments with sentence number
      int i = 1;
      for (SStructure sentence : sentences) {
        sentence.createProcessingAnnotation(
                SENTENCE_INDEX, SENTENCE_INDEX, Integer.toString(i));
        i++;
      }

      graph.traverse(rstRoots, GRAPH_TRAVERSE_TYPE.TOP_DOWN_DEPTH_FIRST,
              "jsonBuild", this);
    } else {
      log.debug("does not find an annotation which matched {}",
              ANNOTATION_KEY);
      graph.traverse(
              rstRoots,
              GRAPH_TRAVERSE_TYPE.TOP_DOWN_DEPTH_FIRST,
              "jsonBuild", this);
    }

    return result.toString();
  }

  private JSONObject createJsonEntry(SNode currNode) {
    JSONObject jsonData = new JSONObject();
    StringBuilder sb = new StringBuilder();
    // use a hash set so we don't get any duplicate entries
    LinkedHashSet token = new LinkedHashSet<>();
    List> edges;

    if (currNode instanceof SStructure) {

      edges = currNode.getGraph().getOutRelations(currNode.getId());

      // get all tokens directly dominated tokens and build a string
      for (SRelation sedge : edges) {
        if (sedge.getTarget() instanceof SToken) 
        {
          token.add((SToken) sedge.getTarget());
        }
      }

      // build strings
      Iterator tokIterator = token.iterator();
      while(tokIterator.hasNext())
      {
        SToken tok = tokIterator.next();
        String text = getText(tok);
        String color = getHTMLColor(tok);

        if (color != null) {
          sb.append("");
        } else {
          sb.append("");
        }

        if (tokIterator.hasNext()) {
          sb.append(text).append(" ");
        } else {
          sb.append(text);
        }

        sb.append("");
      }
    }

    try {
      // build unique id, cause is used for an unique html element id.
      jsonData.put("id", getUniStrId(currNode));
      jsonData.put("name", currNode.getName());

      /**
       * additional data oject for edge labels and rendering sentences
       */
      JSONObject data = new JSONObject();
      JSONArray edgesJSON = getOutGoingEdgeTypeAnnotation(currNode);


      // since we have found some tokens, it must be a sentence in RST.
      if (token.size() > 0) {
        data.put("sentence", sb.toString());
      }


      if (edgesJSON != null) {
        data.put("edges", edgesJSON);
      }

      if (currNode instanceof SStructure && isSegment(currNode)) {
        SProcessingAnnotation sentence_idx = currNode.
                getProcessingAnnotation(SENTENCE_INDEX + "::" + SENTENCE_INDEX);
        int index = sentence_idx == null ? -1 : Integer.parseInt(sentence_idx.
                getValue_STEXT());

        data.put(SENTENCE_LEFT, index);
        data.put(SENTENCE_RIGHT, index);
      }

      jsonData.put("data", data);
    } catch (JSONException ex) {
      log.error("problems create entry for {}", currNode, ex);
    }

    return jsonData;
  }

  private JSONObject appendChild(JSONObject root, JSONObject node,
          SNode currSnode) {
    try {
      // is set to true, when currNode is reached by an rst edge
      boolean isAppendedToParent = false;

      List> in = currSnode.getGraph().getInRelations(currSnode.getId());

      if (in != null) {

        for (SRelation e : in) {
          if (hasRSTType(e)) {
            JSONObject tmp;


            if (st.size() > 1) {
              tmp = st.pop();
              getOrCreateArray(st.peek(), "children").put(node);
              sortChildren(st.peek());
              st.push(tmp);
            } else {
              getOrCreateArray(result, "children").put(node);
            }

            setSentenceSpan(node, st.peek());
            isAppendedToParent = true;
            break;
          }
        }
      }

      if (!isAppendedToParent) {
        getOrCreateArray(root, "children").put(node);
        setSentenceSpan(node, root);
        sortChildren(root);
      }
    } catch (JSONException ex) {
      log.error("cannot append {}", node, ex);
    }

    return node;
  }

  @Override
  public void nodeReached(GRAPH_TRAVERSE_TYPE traversalType,
          String traversalId,
          SNode currNode, SRelation sRelation, SNode fromNode, long order) {
    
    st.push(createJsonEntry(currNode));

  }

  @Override
  public void nodeLeft(GRAPH_TRAVERSE_TYPE traversalType, String traversalId,
          SNode currNode, SRelation edge, SNode fromNode, long order) {
    assert st.size() > 0;

    if (st.size() == 1) {
      try {
        getOrCreateArray(result, "children").put(st.pop());
        sortChildren(result);
      } catch (JSONException ex) {
        log.error("Problems with adding roots", ex);
      }
    } else {
      JSONObject jsonNode = st.pop();
      appendChild(st.peek(), jsonNode, currNode);
    }
  }

  @Override
  public boolean checkConstraint(GRAPH_TRAVERSE_TYPE traversalType,
          String traversalId, SRelation incomingEdge, SNode currNode, long order) {
    // token data structures are not needed
    if (currNode instanceof SToken) {
      return false;
    }
    else if (CommonHelper.checkSLayer(namespace, currNode)) {
      return true;
    }

    return false;
  }
  
  private JSONArray getOrCreateArray(JSONObject parent, String key) throws JSONException
  {
    JSONArray array = parent.has(key) ? parent.getJSONArray(key) : null;
    if(array == null)
    {
        array = new JSONArray();
        parent.put(key, array);
    }
    return array;
  }

  /**
   * Gets the overlapping token as string from a node, which are direct
   * dominated by this node.
   *
   * @param currNode
   * @return is null, if there is no relation to a token, or there is more then
   * one STEXT is overlapped by this node
   */
  private String getText(SToken currNode) {

    List sSequences = currNode.getGraph().
            getOverlappedDataSourceSequence(currNode, SALT_TYPE.STEXT_OVERLAPPING_RELATION);

    // only support one text for spanns
    if (sSequences == null || sSequences.size() != 1) {
      log.error("rst supports only one text and only text level");
      return null;
    }
    
    log.debug("sSequences {}", sSequences.toString());

    /**
     * Check if it is a text data structure. As described in the salt manual in
     * chapter "5.8 More specific nodes and relations" the start and end point
     * of a range of token is stored in superordinate node of type SSequentialDS
     */
    if (sSequences.get(0).getDataSource() instanceof STextualDS) {
      STextualDS text = ((STextualDS) sSequences.get(0).getDataSource());
      int start = sSequences.get(0).getStart().intValue();
      int end = sSequences.get(0).getEnd().intValue();
      return text.getText().substring(start, end);


    }

    // something fundamentally goes wrong
    log.error("{} instead of {}",
            sSequences.get(0).getDataSource().getClass().getName(),
            STextualDS.class
            .getName());


    return null;
  }

  private JSONArray getOutGoingEdgeTypeAnnotation(SNode node) throws
          JSONException {
    List> out = node.getGraph().getOutRelations(node.getId());
    String type;
    Set annos;
    JSONArray edgeData = new JSONArray();


    // check if there is a pointing relation
    if (out == null) {
      return edgeData;
    }

    for (SRelation edge : out) {
      if (!(edge instanceof SRelation) || edge.getTarget() instanceof SToken) {
        continue;
      }

      type = ((SRelation) edge).getType();
      String sTypeAsString = "edge";
      if(type != null && !type.isEmpty())
      {
        sTypeAsString = type;
      }

      JSONObject jsonEdge = new JSONObject();
      edgeData.put(jsonEdge);

      jsonEdge.put("sType", sTypeAsString);


      if (((SRelation) edge).getTarget() instanceof SNode) {
        /**
         * Invert the direction of the RST-edge.
         */
        if (getRSTType().equals(sTypeAsString)) {
          jsonEdge.put("to", getUniStrId(node));
          jsonEdge.put("from",
                  getUniStrId((SNode) ((SRelation) edge).getTarget()));
        } else {
          jsonEdge.put("from", getUniStrId(node));
          jsonEdge.put("to",
                  getUniStrId((SNode) ((SRelation) edge).getTarget()));
        }
      } else {
        throw new JSONException("could not cast to SNode");
      }

      annos = edge.getAnnotations();

      if (annos != null) {
        for (SAnnotation anno : annos) {
          getOrCreateArray(jsonEdge, "annotation").put(anno.getValue_STEXT());
        }
      }

    }

    return edgeData;
  }

  /**
   * Build a unique HTML id.
   */
  private String getUniStrId(SNode node) {
    return visId + "_" + node.getId();
  }

  /**
   * Checks, if a specific token is marked as matching token and returns a HTML
   * color string.
   *
   * @return is null when token is not marked
   */
  private String getHTMLColor(SToken token) {

    if (!markedAndCovered.containsKey(token)) {
      return null;
    }

    /**
     * Since the range in markedAndCovered is from 1 up to 8, we have to
     * decrease the value, for matching the colors in KWIC.
     */
    int color = (int) (long) markedAndCovered.get(token);
    color = Math.min(color > 0 ? color - 1 : color,
            MatchedNodeColors.values().length - 1);

    return MatchedNodeColors.values()[color].getHTMLColor();
  }

  /**
   * Checks, if there exists an SRelation which targets a SToken.
   */
  private boolean isSegment(SNode currNode) {

    List> edges = currNode.getGraph().getOutRelations(currNode.getId());

    if (edges != null && edges.size() > 0) {
      for (SRelation edge : edges) {
        if (edge.getTarget() instanceof SToken) {
          return true;
        }
      }
    }

    return false;
  }

  /**
   * Sets the sentence_left and sentence_right properties of the data object of
   * parent to the min/max of the currNode.
   */
  private void setSentenceSpan(JSONObject cNode, JSONObject parent) {
    try {
      JSONObject data = cNode.getJSONObject("data");

      int leftPosC = data.getInt(SENTENCE_LEFT);
      int rightPosC = data.getInt(SENTENCE_RIGHT);

      data = parent.getJSONObject("data");

      if (data.has(SENTENCE_LEFT)) {
        data.put(SENTENCE_LEFT, Math.min(leftPosC, data.getInt(SENTENCE_LEFT)));
      } else {
        data.put(SENTENCE_LEFT, leftPosC);
      }

      if (data.has(SENTENCE_RIGHT)) {
        data.put(SENTENCE_RIGHT,
                Math.max(rightPosC, data.getInt(SENTENCE_RIGHT)));
      } else {
        data.put(SENTENCE_RIGHT, rightPosC);
      }
    } catch (JSONException ex) {
      log.debug("error while setting left and right position for sentences", ex);
    }
  }

  /**
   * Sorts the children of root by the the sentence indizes. Since the sentence
   * indizes are based on the token indizes, some sentences have no sentences
   * indizes, because sometimes token nodes are out of context.
   *
   * A kind of insertion sort would be better than the used mergesort.
   *
   * And it is a pity that the {@link JSONArray} has no interface to sort the
   * underlying {@link Array}.
   *
   */
  private void sortChildren(JSONObject root) throws JSONException {
    JSONArray children = root.getJSONArray("children");
    List childrenSorted = new ArrayList(children.
            length());

    for (int i = 0; i < children.length(); i++) {
      childrenSorted.add(children.getJSONObject(i));
    }

    Collections.sort(childrenSorted, new Comparator