All Downloads are FREE. Search and download functionalities are using the official Maven repository.

resources.NE.firstname.jape Maven / Gradle / Ivy

Go to download

ANNIE is a general purpose information extraction system that provides the building blocks of many other GATE applications.

The newest version!
/*
*  firstname.jape
*
* Copyright (c) 1998-2004, The University of Sheffield.
*
*  This file is part of GATE (see http://gate.ac.uk/), and is free
*  software, licenced under the GNU Library General Public License,
*  Version 2, June 1991 (in the distribution as file licence.html,
*  and also available at http://gate.ac.uk/gate/licence.html).
*
*  Diana Maynard, 02 Aug 2001
*
*  $Id: firstname.jape 19012 2015-11-27 16:06:30Z dgmaynard $
*/

Phase:	FirstName
Input: Token Lookup ClosedClass NumberLetter UserID Split
Options: control = appelt


Rule: FirstNameTwitterName
Priority: 500
// @fred

(
 {Lookup.majorType == person_first, UserID, Lookup.kind !=ambig}
):person
-->
{
gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person").get("Lookup");
gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();

//find out if the gender is unambiguous
String gender = (String)personAnn.getFeatures().get("minorType");
boolean ambig = false;
gate.FeatureMap constraints = Factory.newFeatureMap();
constraints.put("majorType", "person_first");
Iterator lookupsIter =  inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup", constraints).iterator();
while(!ambig && lookupsIter.hasNext()){
  gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next();
  //we're only interested in annots of the same length
  if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){
    ambig = !gender.equals(anAnnot.getFeatures().get("minorType"));
  }
}
if(!ambig) features.put("gender", gender);

features.put("rule", "FirstNameTwitterName");
features.put("twittername", "yes");
outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
features);
}


Rule: FirstNameTwitterNameAmbig
Priority: 600
// @mark

(
 {Lookup.majorType == person_first, UserID, Lookup.kind ==ambig}
):person
-->
{
gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person").get("Lookup");
gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();

//find out if the gender is unambiguous
String gender = (String)personAnn.getFeatures().get("minorType");
boolean ambig = false;
gate.FeatureMap constraints = Factory.newFeatureMap();
constraints.put("majorType", "person_first");
Iterator lookupsIter =  inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup", constraints).iterator();
while(!ambig && lookupsIter.hasNext()){
  gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next();
  //we're only interested in annots of the same length
  if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){
    ambig = !gender.equals(anAnnot.getFeatures().get("minorType"));
  }
}
if(!ambig) features.put("gender", gender);

features.put("rule", "FirstNameTwitterNameAmbig");
features.put("twittername", "yes");
features.put("kind", "ambig");
outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
features);
}


Rule: FirstName
// Fred


(
 {Lookup.majorType == person_first, !ClosedClass}
):person
-->
{
gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();

//find out if the gender is unambiguous
String gender = (String)personAnn.getFeatures().get("minorType");
boolean ambig = false;
gate.FeatureMap constraints = Factory.newFeatureMap();
constraints.put("majorType", "person_first");
Iterator lookupsIter =  inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup", constraints).iterator();
while(!ambig && lookupsIter.hasNext()){
  gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next();
  //we're only interested in annots of the same length
  if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){
    ambig = !gender.equals(anAnnot.getFeatures().get("minorType"));
  }
}
if(!ambig) features.put("gender", gender);
else features.put("gender", "unknown");

features.put("rule", "FirstName");
features.put("twittername", "no");
outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
features);
}


Rule: FirstNameAmbig
Priority: 300
/* prefer this rule if the firstname has an ambiguous feature in the gazetteer, e.g. "Christian"
   In this case, we won't use it in the main name-finding grammar if we find it on its own, 
   only as part of a longer name
*/

(
 {Lookup.majorType == person_first, Lookup.kind == ambig}
):person
-->
{
gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();

//find out if the gender is unambiguous
String gender = (String)personAnn.getFeatures().get("minorType");
boolean ambig = false;
gate.FeatureMap constraints = Factory.newFeatureMap();
constraints.put("majorType", "person_first");
Iterator lookupsIter =  inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup", constraints).iterator();
while(!ambig && lookupsIter.hasNext()){
  gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next();
  //we're only interested in annots of the same length
  if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){
    ambig = !gender.equals(anAnnot.getFeatures().get("minorType"));
  }
}
if(!ambig) features.put("gender", gender);

features.put("rule", "FirstNameAmbig");
features.put("kind", "ambig");
features.put("twittername", "no");
outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
features);
}


Rule: TitleGender
Priority: 50
// Mr
//guess the gender from the person's title

(
 ({Lookup.majorType == title, Lookup.minorType == male}|
  {Lookup.majorType == title, Lookup.minorType == female})
 ({Token.string == "."})?
)
:person
-->
{
gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
features.put("gender", personAnn.getFeatures().get("minorType"));
features.put("rule", "TitleGender");
outputAS.add(person.firstNode(), person.lastNode(), "Title",
features);
}

Rule: Title
// Dr

(
 {Lookup.majorType == title}
 ({Token.string == "."})?
):person
-->
 :person.Title = {rule = "Title"}




Rule: Initials1
// A.B.
// A.
// A

(
  ({Token.orth == upperInitial, Token.length =="1", !ClosedClass, !NumberLetter}
  ({Token.string == "."})?
  )+
):tag
-->
:tag.Initials = {rule = "Initials1"}


Rule: Initials2
// AB
// ABC

(
 {Token.orth == allCaps, Token.length == "2", !Lookup, !ClosedClass, !NumberLetter} |
 {Token.orth == allCaps, Token.length == "3", !Lookup, !ClosedClass, !NumberLetter}
):tag
-->
:tag.Initials = {kind = "nopunct", rule = "Initials2"}






© 2015 - 2024 Weber Informatics LLC | Privacy Policy