resources.NE.firstname.jape Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of annie Show documentation
Show all versions of annie Show documentation
ANNIE is a general purpose information extraction system that
provides the building blocks of many other GATE applications.
/*
* firstname.jape
*
* Copyright (c) 1998-2004, The University of Sheffield.
*
* This file is part of GATE (see http://gate.ac.uk/), and is free
* software, licenced under the GNU Library General Public License,
* Version 2, June 1991 (in the distribution as file licence.html,
* and also available at http://gate.ac.uk/gate/licence.html).
*
* Diana Maynard, 02 Aug 2001
*
* $Id: firstname.jape 19012 2015-11-27 16:06:30Z dgmaynard $
*/
Phase: FirstName
Input: Token Lookup ClosedClass NumberLetter UserID
Options: control = appelt
Rule: FirstNameTwitterName
Priority: 500
// @fred
(
{Lookup.majorType == person_first, UserID, Lookup.kind !=ambig}
):person
-->
{
gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person").get("Lookup");
gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
//find out if the gender is unambiguous
String gender = (String)personAnn.getFeatures().get("minorType");
boolean ambig = false;
gate.FeatureMap constraints = Factory.newFeatureMap();
constraints.put("majorType", "person_first");
Iterator lookupsIter = inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup", constraints).iterator();
while(!ambig && lookupsIter.hasNext()){
gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next();
//we're only interested in annots of the same length
if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){
ambig = !gender.equals(anAnnot.getFeatures().get("minorType"));
}
}
if(!ambig) features.put("gender", gender);
features.put("rule", "FirstNameTwitterName");
features.put("twittername", "yes");
outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
features);
}
Rule: FirstNameTwitterNameAmbig
Priority: 600
// @mark
(
{Lookup.majorType == person_first, UserID, Lookup.kind ==ambig}
):person
-->
{
gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person").get("Lookup");
gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
//find out if the gender is unambiguous
String gender = (String)personAnn.getFeatures().get("minorType");
boolean ambig = false;
gate.FeatureMap constraints = Factory.newFeatureMap();
constraints.put("majorType", "person_first");
Iterator lookupsIter = inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup", constraints).iterator();
while(!ambig && lookupsIter.hasNext()){
gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next();
//we're only interested in annots of the same length
if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){
ambig = !gender.equals(anAnnot.getFeatures().get("minorType"));
}
}
if(!ambig) features.put("gender", gender);
features.put("rule", "FirstNameTwitterNameAmbig");
features.put("twittername", "yes");
features.put("kind", "ambig");
outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
features);
}
Rule: FirstName
// Fred
(
{Lookup.majorType == person_first, !ClosedClass}
):person
-->
{
gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
//find out if the gender is unambiguous
String gender = (String)personAnn.getFeatures().get("minorType");
boolean ambig = false;
gate.FeatureMap constraints = Factory.newFeatureMap();
constraints.put("majorType", "person_first");
Iterator lookupsIter = inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup", constraints).iterator();
while(!ambig && lookupsIter.hasNext()){
gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next();
//we're only interested in annots of the same length
if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){
ambig = !gender.equals(anAnnot.getFeatures().get("minorType"));
}
}
if(!ambig) features.put("gender", gender);
features.put("rule", "FirstName");
features.put("twittername", "no");
outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
features);
}
Rule: FirstNameAmbig
Priority: 300
/* prefer this rule if the firstname has an ambiguous feature in the gazetteer, e.g. "Christian"
In this case, we won't use it in the main name-finding grammar if we find it on its own,
only as part of a longer name
*/
(
{Lookup.majorType == person_first, Lookup.kind == ambig}
):person
-->
{
gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
//find out if the gender is unambiguous
String gender = (String)personAnn.getFeatures().get("minorType");
boolean ambig = false;
gate.FeatureMap constraints = Factory.newFeatureMap();
constraints.put("majorType", "person_first");
Iterator lookupsIter = inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup", constraints).iterator();
while(!ambig && lookupsIter.hasNext()){
gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next();
//we're only interested in annots of the same length
if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){
ambig = !gender.equals(anAnnot.getFeatures().get("minorType"));
}
}
if(!ambig) features.put("gender", gender);
features.put("rule", "FirstNameAmbig");
features.put("kind", "ambig");
features.put("twittername", "no");
outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
features);
}
Rule: TitleGender
Priority: 50
// Mr
(
({Lookup.majorType == title, Lookup.minorType == male}|
{Lookup.majorType == title, Lookup.minorType == female})
({Token.string == "."})?
)
:person
-->
{
gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
features.put("gender", personAnn.getFeatures().get("minorType"));
features.put("rule", "TitleGender");
outputAS.add(person.firstNode(), person.lastNode(), "Title",
features);
}
Rule: Title
// Dr
(
{Lookup.majorType == title}
({Token.string == "."})?
):person
-->
:person.Title = {rule = "Title"}
Rule: Initials1
// A.B.
// A.
// A
(
({Token.orth == upperInitial, Token.length =="1", !ClosedClass, !NumberLetter}
({Token.string == "."})?
)+
):tag
-->
:tag.Initials = {rule = "Initials1"}
Rule: Initials2
// AB
// ABC
(
{Token.orth == allCaps, Token.length == "2", !Lookup, !ClosedClass, !NumberLetter} |
{Token.orth == allCaps, Token.length == "3", !Lookup, !ClosedClass, !NumberLetter}
):tag
-->
:tag.Initials = {kind = "nopunct", rule = "Initials2"}