All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.berkeley.nlp.syntax.GrammaticalRelation Maven / Gradle / Ivy

Go to download

The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).

The newest version!
package edu.berkeley.nlp.syntax;

import edu.berkeley.nlp.ling.HeadFinder;
import edu.berkeley.nlp.ling.CollinsHeadFinder;
import edu.berkeley.nlp.util.CollectionUtils;
import edu.berkeley.nlp.util.Pair;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
 * User: aria42
 * Date: Apr 15, 2009
 */
public class GrammaticalRelation {


  public static Pair,Tree> getSubjectObject(TreePathFinder tpf,
                                                                 Tree vpNode)
  {
    if (!vpNode.getLabel().startsWith("VP")) {
      throw new IllegalArgumentException(vpNode + " is not a VP");
    }
    List> childs = vpNode.getChildren();
    // Subject
    Tree subj = null;
    for (Tree node: tpf.getRoot().getPostOrderTraversal()) {
      if (node.getLabel().startsWith("NP")) {
        Tree lcaNode = tpf.findLowestCommonAncestor(node,vpNode);
        if (lcaNode.getLabel().startsWith("S") && tpf.findParent(node) == lcaNode) {
          subj = node;
          break;
        }
      }
    }
    // Object
    Tree obj = null;
    for (int c = 0; c < childs.size(); c++) {
      Tree child = childs.get(c);
      if (child.isPhrasal() && child.getLabel().startsWith("NP")) {
        obj = child;
        break;
      }
    }
    if (subj == null || obj == null) return null;
    return Pair.newPair(subj,obj);
  }

  private final static Set isVerbs =
    new HashSet(CollectionUtils.makeList("is","was"));
     
  public static List,Tree>> getPredicateNominativePairs(TreePathFinder tpf,
                                                                                  Tree root,
                                                                                  HeadFinder hf)
  {
    List> vpNodes = new ArrayList();
    for (Tree node : tpf.getRoot().getPostOrderTraversal()) {
      if (node.getLabel().startsWith("VP")) {
        Pair p = HeadFinder.Utils.getHeadWordAndPartOfSpeechPair(hf, node);        
        String headWord = p.getFirst();
        if (isVerbs.contains(headWord.toLowerCase())) {
          vpNodes.add(node);
        }
      }
    }
    List,Tree>> result = new ArrayList, Tree>>();
    for (Tree vpNode : vpNodes) {
      Pair, Tree> treeTreePair = getSubjectObject(tpf, vpNode);
      if (treeTreePair != null) result.add(treeTreePair);
    }
    return result;
  }

  public static void main(String[] args) {
    Tree t = Trees.PennTreeReader.parseEasy("(ROOT (S (NP (NNP John)) (VP (VBD was) (NP (DT a) (NN man)))))");
    TreePathFinder tpf = new TreePathFinder(t);
    System.out.println(getPredicateNominativePairs(tpf, t, new CollinsHeadFinder()));
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy