edu.stanford.nlp.trees.TreePrint Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation
Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.
The newest version!
package edu.stanford.nlp.trees;

import edu.stanford.nlp.ling.*;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.trees.international.pennchinese.ChineseEnglishWordMap;
import edu.stanford.nlp.util.*;
import edu.stanford.nlp.util.XMLUtils;
import edu.stanford.nlp.util.logging.Redwood;

import java.io.*;
import java.util.*;
import java.util.function.Function;
import java.util.function.Predicate;


/**
 * A class for customizing the print method(s) for a
 * {@code edu.stanford.nlp.trees.Tree} as the output of the
 * parser.  This class supports printing in multiple ways and altering
 * behavior via properties specified at construction.
 *
 * @author Roger Levy
 * @author Christopher Manning
 * @author Galen Andrew
 */
public class TreePrint  {

  /** A logger for this class */
  private static Redwood.RedwoodChannels log = Redwood.channels(TreePrint.class);

  // TODO: Add support for makeCopulaHead as an outputFormatOption here.

  public static final String rootLabelOnlyFormat = "rootSymbolOnly";
  public static final String headMark = "=H";

  /** The legal output tree formats. */
  public static final String[] outputTreeFormats = {
    "penn",
    "oneline",
    rootLabelOnlyFormat,
    "words",
    "wordsAndTags",
    "dependencies",
    "typedDependencies",
    "typedDependenciesCollapsed",
    "latexTree",
    "xmlTree",
    "collocations",
    "semanticGraph",
    "conllStyleDependencies",
    "conll2007"
  };

  private final Properties formats;
  private final Properties options;

  private final boolean markHeadNodes; // = false;
  private final boolean lexicalize; // = false;
  private final boolean removeEmpty;
  private final boolean ptb2text;
  private final boolean transChinese; // = false;
  private final boolean basicDependencies;
  private final boolean collapsedDependencies;
  private final boolean nonCollapsedDependencies;
  private final boolean nonCollapsedDependenciesSeparated;
  private final boolean CCPropagatedDependencies;
  private final boolean treeDependencies;

  private final boolean includeTags;

  private final HeadFinder hf;
  private final TreebankLanguagePack tlp;
  private final WordStemmer stemmer;
  private final Predicate> dependencyFilter;
  private final Predicate> dependencyWordFilter;
  private final GrammaticalStructureFactory gsf;

  /** Pool use of one WordNetConnection.  I don't really know if
   *  Dan Bikel's WordNet code is thread safe, but it definitely doesn't
   *  close its files, and too much of our code makes TreePrint objects and
   *  then drops them on the floor, and so we run out of file handles.
   *  That is, if this variable isn't static, code crashes.
   *  Maybe we should change this code to use jwnl(x)?
   *  CDM July 2006.
   */
  private static WordNetConnection wnc;


  /** This PrintWriter is used iff the user doesn't pass one in to a
   *  call to printTree().  It prints to System.out.
   */
  private final PrintWriter pw = new PrintWriter(System.out, true);


  /** Construct a new TreePrint that will print the given formats.
   *  Warning! This is the anglocentric constructor.
   *  It will work correctly only for English.
   *
   *  @param formats The formats to print the tree in.
   */
  public TreePrint(String formats) {
    this(formats, "", new PennTreebankLanguagePack());
  }

  /** Make a TreePrint instance with no options specified. */
  public TreePrint(String formats, TreebankLanguagePack tlp) {
    this(formats, "", tlp);
  }

  /** Make a TreePrint instance. This one uses the default tlp headFinder. */
  public TreePrint(String formats, String options, TreebankLanguagePack tlp) {
    this(formats, options, tlp, tlp.headFinder(), tlp.typedDependencyHeadFinder());
  }

  /**
   * Make a TreePrint instance.
   *
   * @param formatString A comma separated list of ways to print each Tree.
   *                For instance, "penn" or "words,typedDependencies".
   *                Known formats are: oneline, penn, latexTree, xmlTree, words,
   *                wordsAndTags, rootSymbolOnly, dependencies,
   *                typedDependencies, typedDependenciesCollapsed,
   *                collocations, semanticGraph, conllStyleDependencies,
   *                conll2007.  The last two are both tab-separated values
   *                formats.  The latter has a lot more columns filled with
   *                underscores. All of them print a blank line after
   *                the output except for oneline.  oneline is also not
   *                meaningful in XML output (it is ignored: use penn instead).
   *                (Use of typedDependenciesCollapsed is deprecated.  It
   *                works but we recommend instead selecting a type of
   *                dependencies using the optionsString argument.  Note in
   *                particular that typedDependenciesCollapsed does not do
   *                CC propagation, which we generally recommend.)
   * @param optionsString Options that additionally specify how trees are to
   *                be printed (for instance, whether stemming should be done).
   *                Known options are: {@code stem, lexicalize, markHeadNodes,
   *                xml, removeTopBracket, transChinese,
   *                includePunctuationDependencies, basicDependencies, treeDependencies,
   *                CCPropagatedDependencies, collapsedDependencies, nonCollapsedDependencies,
   *                nonCollapsedDependenciesSeparated, includeTags}.
   * @param tlp     The TreebankLanguagePack used to do things like delete
   *                or ignore punctuation in output
   * @param hf      The HeadFinder used in printing output
   */
  public TreePrint(String formatString, String optionsString, TreebankLanguagePack tlp, HeadFinder hf, HeadFinder typedDependencyHF) {
    formats = StringUtils.stringToProperties(formatString);
    options = StringUtils.stringToProperties(optionsString);
    List okOutputs = Arrays.asList(outputTreeFormats);
    for (Object formObj : formats.keySet()) {
      String format = (String) formObj;
      if ( ! okOutputs.contains(format)) {
        throw new RuntimeException("Error: output tree format " + format + " not supported. Known formats are: " + okOutputs);
      }
    }

    this.hf = hf;
    this.tlp = tlp;
    boolean includePunctuationDependencies;
    includePunctuationDependencies = propertyToBoolean(this.options,
                                                       "includePunctuationDependencies");

    boolean generateOriginalDependencies = tlp.generateOriginalDependencies();

    Predicate puncFilter;
    if (includePunctuationDependencies) {
      dependencyFilter = Filters.acceptFilter();
      dependencyWordFilter = Filters.acceptFilter();
      puncFilter = Filters.acceptFilter();
    } else {
      dependencyFilter = new Dependencies.DependentPuncTagRejectFilter<>(tlp.punctuationTagRejectFilter());
      dependencyWordFilter = new Dependencies.DependentPuncWordRejectFilter<>(tlp.punctuationWordRejectFilter());
      //Universal dependencies filter punction by tags
      puncFilter = generateOriginalDependencies ? tlp.punctuationWordRejectFilter() : tlp.punctuationTagRejectFilter();
    }
    if (propertyToBoolean(this.options, "stem")) {
      stemmer = new WordStemmer();
    } else {
      stemmer = null;
    }
    if (formats.containsKey("typedDependenciesCollapsed") ||
        formats.containsKey("typedDependencies") ||
        (formats.containsKey("conll2007") && tlp.supportsGrammaticalStructures())) {
      gsf = tlp.grammaticalStructureFactory(puncFilter, typedDependencyHF);
    } else {
      gsf = null;
    }

    lexicalize = propertyToBoolean(this.options, "lexicalize");
    markHeadNodes = propertyToBoolean(this.options, "markHeadNodes");
    transChinese = propertyToBoolean(this.options, "transChinese");
    ptb2text = propertyToBoolean(this.options, "ptb2text");
    removeEmpty = propertyToBoolean(this.options, "noempty") || ptb2text;

    basicDependencies =  propertyToBoolean(this.options, "basicDependencies");
    collapsedDependencies = propertyToBoolean(this.options, "collapsedDependencies");
    nonCollapsedDependencies = propertyToBoolean(this.options, "nonCollapsedDependencies");
    nonCollapsedDependenciesSeparated = propertyToBoolean(this.options, "nonCollapsedDependenciesSeparated");
    treeDependencies = propertyToBoolean(this.options, "treeDependencies");

    includeTags = propertyToBoolean(this.options, "includeTags");

    // if no option format for the dependencies is specified, CCPropagated is the default
    if ( ! basicDependencies && ! collapsedDependencies && ! nonCollapsedDependencies && ! nonCollapsedDependenciesSeparated && ! treeDependencies) {
      CCPropagatedDependencies = true;
    } else {
      CCPropagatedDependencies = propertyToBoolean(this.options, "CCPropagatedDependencies");
    }
  }


  private static boolean propertyToBoolean(Properties prop, String key) {
    return Boolean.parseBoolean(prop.getProperty(key));
  }

  /**
   * Prints the tree to the default PrintWriter.
   * @param t The tree to display
   */
  public void printTree(Tree t) {
    printTree(t, pw);
  }

  /**
   * Prints the tree, with an empty ID.
   * @param t The tree to display
   * @param pw The PrintWriter to print it to
   */
  public void printTree(final Tree t, PrintWriter pw) {
    printTree(t, "", pw);
  }


  /**
   * Prints the tree according to the options specified for this instance.
   * If the tree {@code t} is {@code null}, then the code prints
   * a line indicating a skipped tree.  Under the XML option this is
   * an {@code s} element with the {@code skipped} attribute having
   * value {@code true}, and, otherwise, it is the token
   * {@code SENTENCE_SKIPPED_OR_UNPARSABLE}.
   *
   * @param t The tree to display
   * @param id A name for this sentence
   * @param pw Where to display the tree
   */
  public void printTree(final Tree t, final String id, final PrintWriter pw) {
    final boolean inXml = propertyToBoolean(options, "xml");
    if (t == null) {
      // Parsing didn't succeed.
      if (inXml) {
        pw.print("");
        pw.println();
      } else {
        pw.println("SENTENCE_SKIPPED_OR_UNPARSABLE");
      }
    } else {
      if (inXml) {
        pw.print("");
      }
      printTreeInternal(t, pw, inXml);
      if (inXml) {
        pw.println("");
        pw.println();
      }
    }
  }


  /**
   * Prints the trees according to the options specified for this instance.
   * If the tree {@code t} is {@code null}, then the code prints
   * a line indicating a skipped tree.  Under the XML option this is
   * an {@code s} element with the {@code skipped} attribute having
   * value {@code true}, and, otherwise, it is the token
   * {@code SENTENCE_SKIPPED_OR_UNPARSABLE}.
   *
   * @param trees The list of trees to display
   * @param id A name for this sentence
   * @param pw Where to dislay the tree
   */
  public void printTrees(final List> trees, final String id, final PrintWriter pw) {
    final boolean inXml = propertyToBoolean(options, "xml");
    int ii = 0;  // incremented before used, so first tree is numbered 1
    for (ScoredObject tp : trees) {
      ii++;
      Tree t = tp.object();
      double score = tp.score();

      if (t == null) {
        // Parsing didn't succeed.
        if (inXml) {
          pw.print("");
          pw.println();
        } else {
          pw.println("SENTENCE_SKIPPED_OR_UNPARSABLE Parse #" + ii + " with score " + score);
        }
      } else {
        if (inXml) {
          pw.print("");
        } else {
          pw.print("# Parse ");
          pw.print(ii);
          pw.print(" with score ");
          pw.println(score);
        }
        printTreeInternal(t, pw, inXml);
        if (inXml) {
          pw.println("");
          pw.println();
        }
      }
    }
  }


  /** Print the internal part of a tree having already identified it.
   *  The ID and outer XML element is printed wrapping this method, but none
   *  of the internal content.
   *
   * @param t The tree to print. Now known to be non-null
   * @param pw Where to print it to
   * @param inXml Whether to use XML style printing
   */
  private void printTreeInternal(final Tree t, final PrintWriter pw, final boolean inXml) {
    Tree outputTree = t;

    if (formats.containsKey("conll2007") || removeEmpty) {
      outputTree = outputTree.prune(new BobChrisTreeNormalizer.EmptyFilter());
    }

    if (formats.containsKey("words")) {
      if (inXml) {
        ArrayList sentUnstemmed = outputTree.yield();
        pw.println("  ");
        int i = 1;
        for (Label w : sentUnstemmed) {
          pw.println("    " + XMLUtils.escapeXML(w.value()) + "");
          i++;
        }
        pw.println("  ");
      } else {
        String sent = SentenceUtils.listToString(outputTree.yield(), false);
        if(ptb2text) {
          pw.println(PTBTokenizer.ptb2Text(sent));
        } else {
          pw.println(sent);
          pw.println();
        }
      }
    }

    if (propertyToBoolean(options, "removeTopBracket")) {
      String s = outputTree.label().value();
      if (tlp.isStartSymbol(s)) {
        if (outputTree.isUnaryRewrite()) {
          outputTree = outputTree.firstChild();
        } else {
          // It's not quite clear what to do if the tree isn't unary at the top
          // but we then don't strip the ROOT symbol, since that seems closer
          // than losing part of the tree altogether....
          log.info("TreePrint: can't remove top bracket: not unary");
        }
      }
      // Note that TreePrint is also called on dependency trees that have
      // a word as the root node, and so we don't error if there isn't
      // the root symbol at the top; rather we silently assume that this
      // is a dependency tree!!
    }
    if (stemmer != null) {
      stemmer.visitTree(outputTree);
    }
    if (lexicalize) {
      outputTree = Trees.lexicalize(outputTree, hf);
      Function a =
        TreeFunctions.getLabeledToDescriptiveCoreLabelTreeFunction();
      outputTree = a.apply(outputTree);
    }

    if (formats.containsKey("collocations")) {
      outputTree = getCollocationProcessedTree(outputTree, hf);
    }

    if (!lexicalize) { // delexicalize the output tree
      Function a =
        TreeFunctions.getLabeledTreeToStringLabeledTreeFunction();
      outputTree = a.apply(outputTree);
    }

    Tree outputPSTree = outputTree;  // variant with head-marking, translations

    if (markHeadNodes) {
      outputPSTree = markHeadNodes(outputPSTree);
    }

    if (transChinese) {
      TreeTransformer tt = t1 -> {
        t1 = t1.treeSkeletonCopy();
        for (Tree subtree : t1) {
          if (subtree.isLeaf()) {
            Label oldLabel = subtree.label();
            String translation = ChineseEnglishWordMap.getInstance().getFirstTranslation(oldLabel.value());
            if (translation == null) translation = "[UNK]";
            Label newLabel = new StringLabel(oldLabel.value() + ':' + translation);
            subtree.setLabel(newLabel);
          }
        }
        return t1;
      };
      outputPSTree = tt.transformTree(outputPSTree);
    }

    if (propertyToBoolean(options, "xml")) {
      if (formats.containsKey("wordsAndTags")) {
        ArrayList sent = outputTree.taggedYield();
        pw.println("  ");
        int i = 1;
        for (TaggedWord tw : sent) {
          pw.println("    " + XMLUtils.escapeXML(tw.word()) + "");
          i++;
        }
        pw.println("  ");
      }
      if (formats.containsKey("penn")) {
        pw.println("  ");
        StringWriter sw = new StringWriter();
        PrintWriter psw = new PrintWriter(sw);
        outputPSTree.pennPrint(psw);
        pw.print(XMLUtils.escapeXML(sw.toString()));
        pw.println("  ");
      }
      if (formats.containsKey("latexTree")) {
        pw.println("    ");
        pw.println(".[");
        StringWriter sw = new StringWriter();
        PrintWriter psw = new PrintWriter(sw);
        outputTree.indentedListPrint(psw,false);
        pw.print(XMLUtils.escapeXML(sw.toString()));
        pw.println(".]");
        pw.println("  ");
      }
      if (formats.containsKey("xmlTree")) {
        pw.println("");
        outputTree.indentedXMLPrint(pw,false);
        pw.println("");
      }
      if (formats.containsKey("dependencies")) {
        Tree indexedTree = outputTree.deepCopy(outputTree.treeFactory(),
                                                 CoreLabel.factory());
        indexedTree.indexLeaves();
        Set> depsSet = indexedTree.mapDependencies(dependencyWordFilter, hf);
        List> sortedDeps = new ArrayList<>(depsSet);
        sortedDeps.sort(Dependencies.dependencyIndexComparator());
        pw.println("");
        for (Dependency d : sortedDeps) {
          pw.println(d.toString("xml"));
        }
        pw.println("");
      }
      if (formats.containsKey("conll2007") || formats.containsKey("conllStyleDependencies")) {
        log.info("The \"conll2007\" and \"conllStyleDependencies\" formats are ignored in xml.");
      }
      if (formats.containsKey("typedDependencies")) {
        GrammaticalStructure gs = gsf.newGrammaticalStructure(outputTree);
        if (basicDependencies) {
          print(gs.typedDependencies(), "xml", includeTags, pw);
        }
        if (nonCollapsedDependencies || nonCollapsedDependenciesSeparated) {
          print(gs.allTypedDependencies(), "xml", includeTags, pw);
        }
        if (collapsedDependencies) {
          print(gs.typedDependenciesCollapsed(GrammaticalStructure.Extras.MAXIMAL), "xml", includeTags, pw);
        }
        if (CCPropagatedDependencies) {
          print(gs.typedDependenciesCCprocessed(), "xml", includeTags, pw);
        }
        if(treeDependencies) {
          print(gs.typedDependenciesCollapsedTree(), "xml", includeTags, pw);
        }
      }
      if (formats.containsKey("typedDependenciesCollapsed")) {
        GrammaticalStructure gs = gsf.newGrammaticalStructure(outputTree);
        print(gs.typedDependenciesCCprocessed(), "xml", includeTags, pw);
      }

      // This makes parser require jgrapht.  Bad.
      // if (formats.containsKey("semanticGraph")) {
      //  SemanticGraph sg = SemanticGraph.makeFromTree(outputTree, true, false, false, null);
      //  pw.println(sg.toFormattedString());
      // }
    } else {
      // non-XML printing
      if (formats.containsKey("wordsAndTags")) {
        pw.println(SentenceUtils.listToString(outputTree.taggedYield(), false));
        pw.println();
      }
      if (formats.containsKey("oneline")) {
        pw.println(outputPSTree);
      }
      if (formats.containsKey("penn")) {
        outputPSTree.pennPrint(pw);
        pw.println();
      }
      if (formats.containsKey(rootLabelOnlyFormat)) {
        pw.println(outputTree.label().value());
      }
      if (formats.containsKey("latexTree")) {
        pw.println(".[");
        outputTree.indentedListPrint(pw,false);
        pw.println(".]");
      }
      if (formats.containsKey("xmlTree")) {
        outputTree.indentedXMLPrint(pw,false);
      }
      if (formats.containsKey("dependencies")) {
        Tree indexedTree = outputTree.deepCopy(outputTree.treeFactory());
        indexedTree.indexLeaves();
        List> sortedDeps = getSortedDeps(indexedTree, dependencyWordFilter);
        for (Dependency d : sortedDeps) {
          pw.println(d.toString("predicate"));
        }
        pw.println();
      }
      if (formats.containsKey("conll2007")) {
        // CoNLL-X 2007 format: http://ilk.uvt.nl/conll/#dataformat
        // wsg: This code should be retained (and not subsumed into EnglishGrammaticalStructure) so
        //      that dependencies for other languages can be printed.
        // wsg2011: This code currently ignores the dependency label since the present implementation
        //          of mapDependencies() returns UnnamedDependency objects.
        // TODO: if there is a GrammaticalStructureFactory available, use that instead of mapDependencies
        Tree it = outputTree.deepCopy(outputTree.treeFactory(), CoreLabel.factory());
        it.indexLeaves();

        List tagged = it.taggedLabeledYield();
        List> sortedDeps = getSortedDeps(it, Filters.acceptFilter());

        for (Dependency d : sortedDeps) {
          if (!dependencyFilter.test(d)) {
            continue;
          }
          if (!(d.dependent() instanceof HasIndex) || !(d.governor() instanceof HasIndex)) {
            throw new IllegalArgumentException("Expected labels to have indices");
          }
          HasIndex dep = (HasIndex) d.dependent();
          HasIndex gov = (HasIndex) d.governor();

          int depi = dep.index();
          int govi = gov.index();

          CoreLabel w = tagged.get(depi - 1);

          // Used for both course and fine POS tag fields
          String tag = PTBTokenizer.ptbToken2Text(w.tag());

          String word = PTBTokenizer.ptbToken2Text(w.word());
          String lemma = "_";
          String feats = "_";
          String pHead = "_";
          String pDepRel = "_";
          String depRel;
          if (d.name() != null) {
            depRel = d.name().toString();
          } else {
            depRel = (govi == 0) ? "ROOT" : "NULL";
          }

          // The 2007 format has 10 fields
          pw.printf("%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s%n", depi, word, lemma, tag, tag, feats, govi, depRel, pHead, pDepRel);
        }
        pw.println();
      }
      if (formats.containsKey("conllStyleDependencies")) {
        // TODO: Rewrite this to output StanfordDependencies using EnglishGrammaticalStructure code
        BobChrisTreeNormalizer tn = new BobChrisTreeNormalizer();
        Tree indexedTree = outputTree.deepCopy(outputTree.treeFactory(),
                                                 CoreLabel.factory());
        // TODO: Can the below for-loop be deleted now?  (Now that the HeadFinder knows about NML.)
        for (Tree node : indexedTree) {
          if (node.label().value().startsWith("NML")) {
            node.label().setValue("NP");
          }
        }

        indexedTree = tn.normalizeWholeTree(indexedTree, outputTree.treeFactory());
        indexedTree.indexLeaves();
        Set> depsSet = null;
        boolean failed = false;
        try {
          depsSet = indexedTree.mapDependencies(dependencyFilter, hf);
        } catch (Exception e) {
          failed = true;
        }
        if (failed) {
          log.info("failed: ");
          log.info(t);
          log.info();
        } else {
          Map deps = Generics.newHashMap();
          for (Dependency dep : depsSet) {
            CoreLabel child = (CoreLabel)dep.dependent();
            CoreLabel parent = (CoreLabel)dep.governor();
            Integer childIndex =
              child.get(CoreAnnotations.IndexAnnotation.class);
            Integer parentIndex =
              parent.get(CoreAnnotations.IndexAnnotation.class);
//            log.info(childIndex+"\t"+parentIndex);
            deps.put(childIndex, parentIndex);
          }
          boolean foundRoot = false;
          int index = 1;
          for (Tree node : indexedTree.getLeaves()) {
            String word = node.label().value();
            String tag = node.parent(indexedTree).label().value();
            int parent = 0;
            if (deps.containsKey(index)) {
              parent = deps.get(index);
            } else {
              if (foundRoot) { throw new RuntimeException(); }
              foundRoot = true;
            }
            pw.println(index + '\t' + word + '\t' + tag + '\t' + parent);
            index++;
          }
          pw.println();
        }
      }
      if (formats.containsKey("typedDependencies")) {
        GrammaticalStructure gs = gsf.newGrammaticalStructure(outputTree);
        if (basicDependencies) {
          print(gs.typedDependencies(), includeTags, pw);
        }
        if (nonCollapsedDependencies) {
          print(gs.allTypedDependencies(), includeTags, pw);
        }
        if (nonCollapsedDependenciesSeparated) {
          print(gs.allTypedDependencies(), "separator", includeTags, pw);
        }
        if (collapsedDependencies) {
          print(gs.typedDependenciesCollapsed(GrammaticalStructure.Extras.MAXIMAL), includeTags, pw);
        }
        if (CCPropagatedDependencies) {
          print(gs.typedDependenciesCCprocessed(), includeTags, pw);
        }
        if (treeDependencies) {
          print(gs.typedDependenciesCollapsedTree(), includeTags, pw);
        }
      }
      if (formats.containsKey("typedDependenciesCollapsed")) {
        GrammaticalStructure gs = gsf.newGrammaticalStructure(outputTree);
        print(gs.typedDependenciesCCprocessed(), includeTags, pw);
      }
      // This makes parser require jgrapht.  Bad
      // if (formats.containsKey("semanticGraph")) {
      //  SemanticGraph sg = SemanticGraph.makeFromTree(outputTree, true, false, false, null);
      //  pw.println(sg.toFormattedString());
      // }
    }

    // flush to make sure we see all output
    pw.flush();
  }

  private List> getSortedDeps(Tree tree, Predicate> filter) {
    if (gsf != null) {
      GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
      Collection deps = gs.typedDependencies(GrammaticalStructure.Extras.NONE);
      List> sortedDeps = new ArrayList<>();
      for (TypedDependency dep : deps) {
        sortedDeps.add(new NamedDependency(dep.gov(), dep.dep(), dep.reln().toString()));
      }
      Collections.sort(sortedDeps, Dependencies.dependencyIndexComparator());
      return sortedDeps;
    } else {
      Set> depsSet = tree.mapDependencies(filter, hf, "root");
      List> sortedDeps = new ArrayList<>(depsSet);
      Collections.sort(sortedDeps, Dependencies.dependencyIndexComparator());
      return sortedDeps;
    }
  }


  /** For the input tree, collapse any collocations in it that exist in
   *  WordNet and are contiguous in the tree into a single node.
   *  A single static Wordnet connection is used by all instances of this
   *  class.  Reflection to check that a Wordnet connection exists.  Otherwise
   *  we print an error and do nothing.
   *
   *  @param tree The input tree.  NOTE: This tree is mangled by this method
   *  @param hf The head finder to use
   *  @return The collocation collapsed tree
   */
  private static synchronized Tree getCollocationProcessedTree(Tree tree,
                                                               HeadFinder hf) {
    if (wnc == null) {
      try {
        Class cl = Class.forName("edu.stanford.nlp.trees.WordNetInstance");
        wnc = (WordNetConnection) cl.newInstance();
      } catch (Exception e) {
        log.info("Couldn't open WordNet Connection.  Aborting collocation detection.");
        log.info(e);
        wnc = null;
      }
    }
    if (wnc != null) {
      CollocationFinder cf = new CollocationFinder(tree, wnc, hf);
      tree = cf.getMangledTree();
    } else {
      log.error("WordNetConnection unavailable for collocations.");
    }
    return tree;
  }


  public void printHeader(PrintWriter pw, String charset) {
    if (propertyToBoolean(options, "xml")) {
      pw.println("");
      pw.println("");
    }
  }


  public void printFooter(PrintWriter pw) {
    if (propertyToBoolean(options, "xml")) {
      pw.println("");
    }
  }


  public Tree markHeadNodes(Tree t) {
    return markHeadNodes(t, null);
  }

  private Tree markHeadNodes(Tree t, Tree head) {
    if (t.isLeaf()) {
      return t; // don't worry about head-marking leaves
    }
    Label newLabel;
    if (t == head) {
      newLabel = headMark(t.label());
    } else {
      newLabel = t.label();
    }
    Tree newHead = hf.determineHead(t);
    return t.treeFactory().newTreeNode(newLabel, Arrays.asList(headMarkChildren(t, newHead)));
  }

  private static Label headMark(Label l) {
    Label l1 = l.labelFactory().newLabel(l);
    l1.setValue(l1.value() + headMark);
    return l1;
  }

  private Tree[] headMarkChildren(Tree t, Tree head) {
    Tree[] kids = t.children();
    Tree[] newKids = new Tree[kids.length];
    for (int i = 0, n = kids.length; i < n; i++) {
      newKids[i] = markHeadNodes(kids[i], head);
    }
    return newKids;
  }

  /** This provides a simple main method for calling TreePrint.
   *  Flags supported are:
   *  
   *   -format format (like -outputFormat of parser, default "penn")
   *  
 -options options (like -outputFormatOptions of parser, default "")
   *  
 -tLP class (the TreebankLanguagePack, default "edu.stanford.nlp.tree.PennTreebankLanguagePack")
   *  
 -hf class (the HeadFinder, default, the one in the class specified by -tLP)
   *  
 -useTLPTreeReader (use the treeReaderFactory() inside
   *       the -tLP class; otherwise a PennTreeReader with no normalization is used)
   *  
   *  The single argument should be a file containing Trees in the format that is either
   *  Penn Treebank s-expressions or as specified by -useTLPTreeReader and the -tLP class,
   *  or if there is no such argument, trees are read from stdin and the program runs as a
   *  filter.
   *
   *  @param args Command line arguments, as above.
   */
  public static void main(String[] args) {
    String format = "penn";
    String options = "";
    String tlpName = "edu.stanford.nlp.trees.PennTreebankLanguagePack";
    String hfName = null;
    Map flagMap = Generics.newHashMap();
    flagMap.put("-format", 1);
    flagMap.put("-options", 1);
    flagMap.put("-tLP", 1);
    flagMap.put("-hf", 1);
    Map argsMap = StringUtils.argsToMap(args,flagMap);
    args = argsMap.get(null);
    if(argsMap.keySet().contains("-format")) {
      format = argsMap.get("-format")[0];
    }
    if(argsMap.keySet().contains("-options")) {
      options = argsMap.get("-options")[0];
    }
    if (argsMap.keySet().contains("-tLP")) {
      tlpName = argsMap.get("-tLP")[0];
    }
    if (argsMap.keySet().contains("-hf")) {
      hfName = argsMap.get("-hf")[0];
    }
    TreebankLanguagePack tlp;
    try {
      tlp = (TreebankLanguagePack) Class.forName(tlpName).newInstance();
    } catch (Exception e) {
      log.warning(e);
      return;
    }
    HeadFinder hf;
    if (hfName != null) {
      try {
        hf = (HeadFinder) Class.forName(hfName).newInstance();
      } catch (Exception e) {
        log.warning(e);
        return;
      }
    } else {
      hf = tlp.headFinder();
    }
    TreePrint print = new TreePrint(format, options, tlp, (hf == null) ? tlp.headFinder(): hf, tlp.typedDependencyHeadFinder());
    Iterator i; // initialized below
    if (args.length > 0) {
      Treebank trees; // initialized below
      TreeReaderFactory trf;
      if (argsMap.keySet().contains("-useTLPTreeReader")) {
        trf = tlp.treeReaderFactory();
      } else {
        trf = in -> new PennTreeReader(in, new LabeledScoredTreeFactory(new StringLabelFactory()), new TreeNormalizer());
      }
      trees = new DiskTreebank(trf);
      trees.loadPath(args[0]);
      i = trees.iterator();
    } else {
      i = tlp.treeTokenizerFactory().getTokenizer(new BufferedReader(new InputStreamReader(System.in)));
    }
    while(i.hasNext()) {
      print.printTree(i.next());
    }
  }

  /**
   * NO OUTSIDE USE
   * Returns a String representation of the result of this set of
   * typed dependencies in a user-specified format.
   * Currently, three formats are supported:
   * 
   * "plain"
   * (Default.)  Formats the dependencies as logical relations,
   * as exemplified by the following:
   *    *  nsubj(died-1, Sam-0)
   *  tmod(died-1, today-2)
   *  
   * 
   * "readable"
   * Formats the dependencies as a table with columns
   * {@code dependent}, {@code relation}, and
   * {@code governor}, as exemplified by the following:
   *    *  Sam-0               nsubj               died-1
   *  today-2             tmod                died-1
   *  
   * 
   * "xml"
   * Formats the dependencies as XML, as exemplified by the following:
   *    *  <dependencies>
   *    <dep type="nsubj">
   *      <governor idx="1">died</governor>
   *      <dependent idx="0">Sam</dependent>
   *    </dep>
   *    <dep type="tmod">
   *      <governor idx="1">died</governor>
   *      <dependent idx="2">today</dependent>
   *    </dep>
   *  </dependencies>
   *  
   * 
   * 
   *
   * @param dependencies The TypedDependencies to print
   * @param format a {@code String} specifying the desired format
   * @return a {@code String} representation of the typed
   *         dependencies in this {@code GrammaticalStructure}
   */
  private static String toString(Collection dependencies, String format, boolean includeTags) {
    if (format != null && format.equals("xml")) {
      return toXMLString(dependencies, includeTags);
    } else if (format != null && format.equals("readable")) {
      return toReadableString(dependencies);
    } else if (format != null && format.equals("separator")) {
      return toString(dependencies, true, includeTags);
    } else {
      return toString(dependencies, false, includeTags);
    }
  }

  /**
   * NO OUTSIDE USE
   * Returns a String representation of this set of typed dependencies
   * as exemplified by the following:
   *    *  tmod(died-6, today-9)
   *  nsubj(died-6, Sam-3)
   *  
   *
   * @param dependencies The TypedDependencies to print
   * @param extraSep boolean indicating whether the extra dependencies have to be printed separately, after the basic ones
   * @return a {@code String} representation of this set of
   *         typed dependencies
   */
  private static String toString(Collection dependencies, boolean extraSep, boolean includeTags) {
    CoreLabel.OutputFormat labelFormat = (includeTags) ? CoreLabel.OutputFormat.VALUE_TAG_INDEX : CoreLabel.OutputFormat.VALUE_INDEX;
    StringBuilder buf = new StringBuilder();
    if (extraSep) {
      List extraDeps = new ArrayList<>();
      for (TypedDependency td : dependencies) {
        if (td.extra()) {
          extraDeps.add(td);
        } else {
          buf.append(td.toString(labelFormat)).append('\n');
        }
      }
      // now we print the separator for extra dependencies, and print these if there are some
      if (!extraDeps.isEmpty()) {
        buf.append("======\n");
        for (TypedDependency td : extraDeps) {
          buf.append(td.toString(labelFormat)).append('\n');
        }
      }
    } else {
      for (TypedDependency td : dependencies) {
        buf.append(td.toString(labelFormat)).append('\n');
      }
    }
    return buf.toString();
  }

  // NO OUTSIDE USE
  private static String toReadableString(Collection dependencies) {
    StringBuilder buf = new StringBuilder();
    buf.append(String.format("%-20s%-20s%-20s%n", "dep", "reln", "gov"));
    buf.append(String.format("%-20s%-20s%-20s%n", "---", "----", "---"));
    for (TypedDependency td : dependencies) {
      buf.append(String.format("%-20s%-20s%-20s%n", td.dep(), td.reln(), td.gov()));
    }
    return buf.toString();
  }

  // NO OUTSIDE USE
  private static String toXMLString(Collection dependencies, boolean includeTags) {
    StringBuilder buf = new StringBuilder("\n");
    for (TypedDependency td : dependencies) {
      String reln = td.reln().toString();
      String gov = td.gov().value();
      String govTag = td.gov().tag();
      int govIdx = td.gov().index();
      String dep = td.dep().value();
      String depTag = td.dep().tag();
      int depIdx = td.dep().index();
      boolean extra = td.extra();
      // add an attribute if the node is a copy
      // (this happens in collapsing when different prepositions are conjuncts)
      String govCopy = "";
      int copyGov = td.gov().copyCount();
      if (copyGov > 0) {
        govCopy = " copy=\"" + copyGov + '\"';
      }
      String depCopy = "";
      int copyDep = td.dep().copyCount();
      if (copyDep > 0) {
        depCopy = " copy=\"" + copyDep + '\"';
      }
      String govTagAttribute = (includeTags && govTag != null) ? " tag=\"" + govTag + "\"" : "";
      String depTagAttribute = (includeTags && depTag != null) ? " tag=\"" + depTag + "\"" : "";
      // add an attribute if the typed dependency is an extra relation (do not preserve the tree structure)
      String extraAttr = "";
      if (extra) {
        extraAttr = " extra=\"yes\"";
      }
      buf.append("  \n");
      buf.append("    ').append(XMLUtils.escapeXML(gov)).append("\n");
      buf.append("    ').append(XMLUtils.escapeXML(dep)).append("\n");
      buf.append("  \n");
    }
    buf.append("");
    return buf.toString();
  }

  /**
   * USED BY TREEPRINT AND WSD.SUPWSD.PREPROCESS
   * Prints this set of typed dependencies to the specified
   * {@code PrintWriter}.
   * @param dependencies The collection of TypedDependency to print
   * @param pw Where to print them
   */
  public static void print(Collection dependencies, boolean includeTags, PrintWriter pw) {
    pw.println(toString(dependencies, false, includeTags));
  }

  /**
   * USED BY TREEPRINT
   * Prints this set of typed dependencies to the specified
   * {@code PrintWriter} in the specified format.
   * @param dependencies The collection of TypedDependency to print
   * @param format "xml" or "readable" or other
   * @param pw Where to print them
   */
  public static void print(Collection dependencies, String format, boolean includeTags, PrintWriter pw) {
    pw.println(toString(dependencies, format, includeTags));
  }

}