resources.grammar.firstname.jape Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lang-german Show documentation
Show all versions of lang-german Show documentation
Support for processing German documents
The newest version!
/*
* firstname.jape
*
* Copyright (c) 1998-2004, The University of Sheffield.
*
* This file is part of GATE (see http://gate.ac.uk/), and is free
* software, licenced under the GNU Library General Public License,
* Version 2, June 1991 (in the distribution as file licence.html,
* and also available at http://gate.ac.uk/gate/licence.html).
*
* Diana Maynard, 02 Aug 2001
*
* $Id: firstname.jape 19646 2016-10-06 12:35:17Z dgmaynard $
*/
Phase: FirstName
Input: Token Lookup ClosedClass NumberLetter UserMention
Options: control = appelt
Rule: FirstNameWithinTwitterName
(
{UserMention contains {Lookup.majorType == person_first}}
)
-->
{}
Rule: FirstNameTwitterName
Priority: 500
// @fred
(
{Lookup.majorType == person_first, UserMention, Lookup.kind !=ambig}
):person
-->
{
gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person").get("Lookup");
gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
//find out if the gender is unambiguous
String gender = (String)personAnn.getFeatures().get("minorType");
boolean ambig = false;
gate.FeatureMap constraints = Factory.newFeatureMap();
constraints.put("majorType", "person_first");
Iterator lookupsIter = inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup", constraints).iterator();
while(!ambig && lookupsIter.hasNext()){
gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next();
//we're only interested in annots of the same length
if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){
ambig = !gender.equals(anAnnot.getFeatures().get("minorType"));
}
}
if(!ambig) features.put("gender", gender);
features.put("rule", "FirstNameTwitterName");
features.put("twittername", "yes");
outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
features);
}
Rule: FirstNameTwitterNameAmbig
Priority: 600
// @mark
(
{Lookup.majorType == person_first, UserMention, Lookup.kind ==ambig}
):person
-->
{
gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person").get("Lookup");
gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
//find out if the gender is unambiguous
String gender = (String)personAnn.getFeatures().get("minorType");
boolean ambig = false;
gate.FeatureMap constraints = Factory.newFeatureMap();
constraints.put("majorType", "person_first");
Iterator lookupsIter = inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup", constraints).iterator();
while(!ambig && lookupsIter.hasNext()){
gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next();
//we're only interested in annots of the same length
if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){
ambig = !gender.equals(anAnnot.getFeatures().get("minorType"));
}
}
if(!ambig) features.put("gender", gender);
features.put("rule", "FirstNameTwitterNameAmbig");
features.put("twittername", "yes");
features.put("kind", "ambig");
outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
features);
}
Rule: FirstName
Priority: 300
// Fred
(
{Lookup.majorType == person_first, !ClosedClass}
):person
-->
{
gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
//find out if the gender is unambiguous
String gender = (String)personAnn.getFeatures().get("minorType");
boolean ambig = false;
gate.FeatureMap constraints = Factory.newFeatureMap();
constraints.put("majorType", "person_first");
Iterator lookupsIter = inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup", constraints).iterator();
while(!ambig && lookupsIter.hasNext()){
gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next();
//we're only interested in annots of the same length
if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){
ambig = !gender.equals(anAnnot.getFeatures().get("minorType"));
}
}
if(!ambig) features.put("gender", gender);
features.put("rule", "FirstName");
features.put("twittername", "no");
outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
features);
}
Rule: FirstNameAmbig
Priority: 300
/* prefer this rule if the firstname has an ambiguous feature in the gazetteer, e.g. "Christian"
In this case, we won't use it in the main name-finding grammar if we find it on its own,
only as part of a longer name
*/
(
{Lookup.majorType == person_first, Lookup.kind == ambig}
):person
-->
{
gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
//find out if the gender is unambiguous
String gender = (String)personAnn.getFeatures().get("minorType");
boolean ambig = false;
gate.FeatureMap constraints = Factory.newFeatureMap();
constraints.put("majorType", "person_first");
Iterator lookupsIter = inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup", constraints).iterator();
while(!ambig && lookupsIter.hasNext()){
gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next();
//we're only interested in annots of the same length
if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){
ambig = !gender.equals(anAnnot.getFeatures().get("minorType"));
}
}
if(!ambig) features.put("gender", gender);
features.put("rule", "FirstNameAmbig");
features.put("kind", "ambig");
features.put("twittername", "no");
outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
features);
}
Rule: TitleGender
Priority: 50
// Mr
(
({Lookup.majorType == title, Lookup.minorType == male}|
{Lookup.majorType == title, Lookup.minorType == female})
({Token.string == "."})?
)
:person
-->
{
gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
features.put("gender", personAnn.getFeatures().get("minorType"));
features.put("rule", "TitleGender");
outputAS.add(person.firstNode(), person.lastNode(), "Title",
features);
}
Rule: Title
// Dr
(
{Lookup.majorType == title}
({Token.string == "."})?
):person
-->
:person.Title = {rule = "Title"}
Rule: Initials1
// A.B.
// A.
// A
(
({Token.orth == upperInitial, Token.length =="1", !ClosedClass, !NumberLetter}
({Token.string == "."})?
)+
):tag
-->
:tag.Initials = {rule = "Initials1"}
Rule: Initials2
// AB
// ABC
(
{Token.orth == allCaps, Token.length == "2", !Lookup, !ClosedClass, !NumberLetter} |
{Token.orth == allCaps, Token.length == "3", !Lookup, !ClosedClass, !NumberLetter}
):tag
-->
:tag.Initials = {kind = "nopunct", rule = "Initials2"}