All Downloads are FREE. Search and download functionalities are using the official Maven repository.

opennlp.tools.coref.resolver.IsAResolver Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package opennlp.tools.coref.resolver;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;

import opennlp.tools.coref.DiscourseEntity;
import opennlp.tools.coref.mention.MentionContext;

/**
 *  Resolves coreference between appositives.
 */
public class IsAResolver extends MaxentResolver {

  Pattern predicativePattern;

  public IsAResolver(String projectName, ResolverMode m) throws IOException {
    super(projectName, "/imodel", m, 20);
    showExclusions = false;
    //predicativePattern = Pattern.compile("^(,|am|are|is|was|were|--)$");
    predicativePattern = Pattern.compile("^(,|--)$");
  }

  public IsAResolver(String projectName, ResolverMode m, NonReferentialResolver nrr) throws IOException {
    super(projectName, "/imodel", m, 20,nrr);
    showExclusions = false;
    //predicativePattern = Pattern.compile("^(,|am|are|is|was|were|--)$");
    predicativePattern = Pattern.compile("^(,|--)$");
  }


  public boolean canResolve(MentionContext ec) {
    if (ec.getHeadTokenTag().startsWith("NN")) {
      return (ec.getPreviousToken() != null && predicativePattern.matcher(ec.getPreviousToken().toString()).matches());
    }
    return false;
  }

  protected boolean excluded(MentionContext ec, DiscourseEntity de) {
    MentionContext cec = de.getLastExtent();
    //System.err.println("IsAResolver.excluded?: ec.span="+ec.getSpan()+" cec.span="+cec.getSpan()+" cec="+cec.toText()+" lastToken="+ec.getNextToken());
    if (ec.getSentenceNumber() != cec.getSentenceNumber()) {
      //System.err.println("IsAResolver.excluded: (true) not same sentence");
      return (true);
    }
    //shallow parse appositives
    //System.err.println("IsAResolver.excluded: ec="+ec.toText()+" "+ec.span+" cec="+cec.toText()+" "+cec.span);
    if (cec.getIndexSpan().getEnd() == ec.getIndexSpan().getStart() - 2) {
      return (false);
    }
    //full parse w/o trailing comma
    if (cec.getIndexSpan().getEnd() == ec.getIndexSpan().getEnd()) {
      //System.err.println("IsAResolver.excluded: (false) spans share end");
      return (false);
    }
    //full parse w/ trailing comma or period
    if (cec.getIndexSpan().getEnd() <= ec.getIndexSpan().getEnd() + 2 && (ec.getNextToken() != null && (ec.getNextToken().toString().equals(",") || ec.getNextToken().toString().equals(".")))) {
      //System.err.println("IsAResolver.excluded: (false) spans end + punct");
      return (false);
    }
    //System.err.println("IsAResolver.excluded: (true) default");
    return (true);
  }

  protected boolean outOfRange(MentionContext ec, DiscourseEntity de) {
    MentionContext cec = de.getLastExtent();
    return (cec.getSentenceNumber() != ec.getSentenceNumber());
  }

  protected boolean defaultReferent(DiscourseEntity de) {
    return (true);
  }

  protected List getFeatures(MentionContext mention, DiscourseEntity entity) {
    List features = new ArrayList();
    features.addAll(super.getFeatures(mention, entity));
    if (entity != null) {
      MentionContext ant = entity.getLastExtent();
      List leftContexts = ResolverUtils.getContextFeatures(ant);
      for (int ci = 0, cn = leftContexts.size(); ci < cn; ci++) {
        features.add("l" + leftContexts.get(ci));
      }
      List rightContexts = ResolverUtils.getContextFeatures(mention);
      for (int ci = 0, cn = rightContexts.size(); ci < cn; ci++) {
        features.add("r" + rightContexts.get(ci));
      }
      features.add("hts"+ant.getHeadTokenTag()+","+mention.getHeadTokenTag());
    }
    /*
    if (entity != null) {
      //System.err.println("MaxentIsResolver.getFeatures: ["+ec2.toText()+"] -> ["+de.getLastExtent().toText()+"]");
      //previous word and tag
      if (ant.prevToken != null) {
        features.add("pw=" + ant.prevToken);
        features.add("pt=" + ant.prevToken.getSyntacticType());
      }
      else {
        features.add("pw=");
        features.add("pt=");
      }

      //next word and tag
      if (mention.nextToken != null) {
        features.add("nw=" + mention.nextToken);
        features.add("nt=" + mention.nextToken.getSyntacticType());
      }
      else {
        features.add("nw=");
        features.add("nt=");
      }

      //modifier word and tag for c1
      int i = 0;
      List c1toks = ant.tokens;
      for (; i < ant.headTokenIndex; i++) {
        features.add("mw=" + c1toks.get(i));
        features.add("mt=" + ((Parse) c1toks.get(i)).getSyntacticType());
      }
      //head word and tag for c1
      features.add("mh=" + c1toks.get(i));
      features.add("mt=" + ((Parse) c1toks.get(i)).getSyntacticType());

      //modifier word and tag for c2
      i = 0;
      List c2toks = mention.tokens;
      for (; i < mention.headTokenIndex; i++) {
        features.add("mw=" + c2toks.get(i));
        features.add("mt=" + ((Parse) c2toks.get(i)).getSyntacticType());
      }
      //head word and tag for n2
      features.add("mh=" + c2toks.get(i));
      features.add("mt=" + ((Parse) c2toks.get(i)).getSyntacticType());

      //word/tag pairs
      for (i = 0; i < ant.headTokenIndex; i++) {
        for (int j = 0; j < mention.headTokenIndex; j++) {
          features.add("w=" + c1toks.get(i) + "|" + "w=" + c2toks.get(j));
          features.add("w=" + c1toks.get(i) + "|" + "t=" + ((Parse) c2toks.get(j)).getSyntacticType());
          features.add("t=" + ((Parse) c1toks.get(i)).getSyntacticType() + "|" + "w=" + c2toks.get(j));
          features.add("t=" + ((Parse) c1toks.get(i)).getSyntacticType() + "|" + "t=" + ((Parse) c2toks.get(j)).getSyntacticType());
        }
      }
      features.add("ht=" + ant.headTokenTag + "|" + "ht=" + mention.headTokenTag);
      features.add("ht1=" + ant.headTokenTag);
      features.add("ht2=" + mention.headTokenTag);
     */
      //semantic categories
      /*
      if (ant.neType != null) {
        if (re.neType != null) {
          features.add("sc="+ant.neType+","+re.neType);
        }
        else if (!re.headTokenTag.startsWith("NNP") && re.headTokenTag.startsWith("NN")) {
          Set synsets = re.synsets;
          for (Iterator si=synsets.iterator();si.hasNext();) {
            features.add("sc="+ant.neType+","+si.next());
          }
        }
      }
      else if (!ant.headTokenTag.startsWith("NNP") && ant.headTokenTag.startsWith("NN")) {
        if (re.neType != null) {
          Set synsets = ant.synsets;
          for (Iterator si=synsets.iterator();si.hasNext();) {
            features.add("sc="+re.neType+","+si.next());
          }
        }
        else if (!re.headTokenTag.startsWith("NNP") && re.headTokenTag.startsWith("NN")) {
          //System.err.println("MaxentIsaResolover.getFeatures: both common re="+re.parse+" ant="+ant.parse);
          Set synsets1 = ant.synsets;
          Set synsets2 = re.synsets;
          for (Iterator si=synsets1.iterator();si.hasNext();) {
            Object synset = si.next();
            if (synsets2.contains(synset)) {
              features.add("sc="+synset);
            }
          }
        }
      }
    }
    */
    //System.err.println("MaxentIsResolver.getFeatures: "+features.toString());
    return (features);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy