Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want. Maven / Gradle / Ivy
* name.jape
* Copyright (c) 1998-2004, The University of Sheffield.
* This file is part of GATE (see, and is free
* software, licenced under the GNU Library General Public License,
* Version 2, June 1991 (in the distribution as file licence.html,
* and also available at
* Diana Maynard, 10 Sep 2001
* $Id: name.jape 19646 2016-10-06 12:35:17Z dgmaynard $
Phase: Name
Input: Token Lookup Title FirstPerson Upper ClosedClass Initials Split UserMention Hashtag
Options: control = appelt debug = false
// Person Rules
Macro: TITLE
({Token.string == "."})?
({FirstPerson.gender == male, FirstPerson.kind != ambig} |
{FirstPerson.gender == female, FirstPerson.kind != ambig})
{FirstPerson.kind == ambig}
({Token.string == ","})?
{Lookup.majorType == person_ending}
({Lookup.majorType == surname, Lookup.minorType == prefix}
(({Token.string == "O"}|{Token.string == "D"})
{Token.string == "'"}
// Person Rules
Rule: Pronoun
Priority: 1000
{Token.category == PP}|
{Token.category == PRP}|
{Token.category == RB}
Priority: 1000
// stops certain things being recognised as People
Rule: GazPerson
Priority: 100
{Token.category == DT}|
{Token.category == PRP}|
{Token.category == RB}
{Lookup.majorType == person_full}
:person -->
gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
// find the Token annotations
AnnotationSet tokenSet = gate.Utils.getContainedAnnotations(inputAS, personSet, "Token");
// put them in order
List tokenList = gate.Utils.inDocumentOrder(tokenSet);
if (tokenList.size() == 1) {
// if there's only one Token, guess it's a surname
String surnameContent = gate.Utils.stringFor(doc, tokenList.get(0));
features.put("surname", surnameContent);
else if (tokenList.size() > 0) {
// the string under the first Token
String firstNameContent = gate.Utils.stringFor(doc, tokenList.get(0));
features.put("firstName", firstNameContent);
// the string under the remaining Tokens if any
if (tokenList.size() > 1) {
Long lastNameStart = gate.Utils.start(tokenList.get(1));
Long lastNameEnd = gate.Utils.end(tokenList.get(tokenList.size() - 1));
String surnameContent = gate.Utils.stringFor(doc, lastNameStart, lastNameEnd);
features.put("surname", surnameContent);
features.put("kind", "fullName");
features.put("rule", "GazPerson");
features.put("gender", personAnn.getFeatures().get("gender"));
// this method doesn't require try-catch
gate.Utils.addAnn(outputAS, personSet, "TempPerson", features);
Rule: GazPersonFirst
Priority: 200
{Token.category == DT}|
{Token.category == PRP}|
{Token.category == RB}
{FirstPerson.kind != ambig}
{Token.orth == upperInitial, Token.length == "1"}
gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
features.put("gender", personAnn.getFeatures().get("gender"));
features.put("kind", "firstName");
features.put("rule", "GazPersonFirst");
// get the string of the first name
String contentFirstName = gate.Utils.stringFor(doc, personAnn);
features.put("firstName", contentFirstName);
outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
Rule: PersonFirstContext
Priority: 30
// Anne and Kenton
{Token.string == "and"}
({Token.orth == upperInitial, Token.length != "1"})
//first deal with person1
gate.FeatureMap features1 = Factory.newFeatureMap();
gate.AnnotationSet person1Set = (gate.AnnotationSet)bindings.get("person1");
gate.Annotation personAnn = (gate.Annotation)person1Set.iterator().next();
String contentFirstName = gate.Utils.stringFor(doc, personAnn);
features1.put("firstName", contentFirstName);
features1.put("gender", personAnn.getFeatures().get("gender"));
features1.put("kind", "firstName");
features1.put("rule", "PersonFirstContext");
outputAS.add(person1Set.firstNode(), person1Set.lastNode(), "TempPerson",
//now deal with person2
gate.FeatureMap features2 = Factory.newFeatureMap();
gate.AnnotationSet person2Set = (gate.AnnotationSet)bindings.get("person2");
gate.Annotation person2Ann = (gate.Annotation)person2Set.iterator().next();
String content2FirstName = gate.Utils.stringFor(doc, person2Ann);
features2.put("firstName", content2FirstName);
features2.put("kind", "firstName");
features2.put("rule", "PersonFirstContext");
outputAS.add(person2Set.firstNode(), person2Set.lastNode(), "TempPerson",
Rule: PersonTitle
Priority: 35
// Mr. Jones
// Mr Fred Jones
// note we only allow one first and surname,
// but we add more in a final phase if we find adjacent unknowns
{Token.category == DT}|
{Token.category == PRP}|
{Token.category == RB}
({Title.rule == "TitleGender"}):title
gate.FeatureMap features = Factory.newFeatureMap();
gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
gate.AnnotationSet firstNameSet = (gate.AnnotationSet)bindings.get("firstName");
gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
String contentTitle = gate.Utils.stringFor(doc, titleAnn);
features.put("title", contentTitle);
features.put("gender", titleAnn.getFeatures().get("gender"));
if (firstNameSet != null && firstNameSet.size()>0)
gate.Annotation firstNameAnn = (gate.Annotation)firstNameSet.iterator().next();
String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
features.put("firstName", firstNameContent);
String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
features.put("surname", surnameContent);
features.put("kind", "personName");
features.put("rule", "PersonTitle");
outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
Rule: PersonTitleUnknownGender
Priority: 30
// Prof. Jones
// This person will just get an unknown value for gender. Or we could decide to make them male by default, as they're mostly military etc.
{Token.category == DT}|
{Token.category == PRP}|
{Token.category == RB}
gate.FeatureMap features = Factory.newFeatureMap();
gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
gate.AnnotationSet firstNameSet = (gate.AnnotationSet)bindings.get("firstName");
gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
String contentTitle = gate.Utils.stringFor(doc, titleAnn);
features.put("title", contentTitle);
features.put("gender", "unknown");
if (firstNameSet != null && firstNameSet.size()>0)
gate.Annotation firstNameAnn = (gate.Annotation)firstNameSet.iterator().next();
String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
features.put("firstName", firstNameContent);
String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
features.put("surname", surnameContent);
features.put("kind", "personName");
features.put("rule", "PersonTitleGenderUnknown");
outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
Rule: PersonTitleInitials
Priority: 35
// Mr J. Jones
{Token.category == DT}|
{Token.category == PRP}|
{Token.category == RB}
({Title.rule == "TitleGender"}):title
({Upper, !Initials})
gate.FeatureMap features = Factory.newFeatureMap();
gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
gate.AnnotationSet initialsSet = (gate.AnnotationSet)bindings.get("initials");
gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
String contentTitle = gate.Utils.stringFor(doc, titleAnn);
features.put("title", contentTitle);
features.put("gender", titleAnn.getFeatures().get("gender"));
if (initialsSet != null && initialsSet.size()>0)
List initialsList = gate.Utils.inDocumentOrder(initialsSet);
Long initialsStart = gate.Utils.start(initialsList.get(0));
Long initialsEnd = gate.Utils.end(initialsList.get(initialsList.size() - 1));
String initialsContent = gate.Utils.cleanStringFor(doc, initialsStart, initialsEnd);
features.put("initials", initialsContent);
String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
features.put("surname", surnameContent);
features.put("kind", "personName");
features.put("rule", "PersonTitleInitials");
outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
Rule: TitleFirstName
Priority: 55
// use this rule when we know what gender the title indicates
// Mr Fred
({Title.gender == male} | {Title.gender == female}):title
:person -->
gate.FeatureMap features = Factory.newFeatureMap();
gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
gate.AnnotationSet firstNameSet = (gate.AnnotationSet)bindings.get("firstName");
gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
String contentTitle = gate.Utils.stringFor(doc, titleAnn);
features.put("title", contentTitle);
features.put("gender", titleAnn.getFeatures().get("gender"));
if (firstNameSet != null && firstNameSet.size()>0)
gate.Annotation firstNameAnn = (gate.Annotation)firstNameSet.iterator().next();
String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
features.put("firstName", firstNameContent);
features.put("kind", "personName");
features.put("rule", "TitleFirstName");
outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
Rule: PersonJobTitle
Priority: 20
// note we include titles but not jobtitles in markup
{Lookup.majorType == jobtitle}
:person.TempPerson = {kind = "fullName", rule = "PersonJobTitle"},
:jobtitle.JobTitle = {rule = "PersonJobTitle"}
Rule: NotFirstPersonStop
Priority: 70
// ambig first name and surname is stop word
// e.g. Will And
{Token.category == PRP}|
{Token.category == DT}
({Lookup.majorType == stop}
:person -->
Rule: FirstPersonStop
Priority: 50
// John And
{Token.category == DT}|
{Token.category == PRP}|
{Token.category == RB}|
{Token.category == IN}
gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
features.put("gender", personAnn.getFeatures().get("gender"));
features.put("kind", "firstName");
features.put("rule", "FirstPersonStop");
outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
Rule: NotPersonFull
Priority: 50
// do not allow Det + Surname
{Token.category == DT}|
{Token.category == PRP}|
{Token.category == RB}
Rule: LocPersonAmbig1
Priority: 50
// Location + Possible Surname --> Location only (ignore Surname)
{Lookup.majorType == location}
:loc.TempLocation = {kind = "locName", rule = LocPersonAmbig1}
Rule: LocPersonAmbig2
Priority: 50
// Location + + Possible Surname --> Location only (ignore Surname)
{Lookup.majorType == location}
:loc.TempLocation = {kind = "locName", rule = LocPersonAmbig2}
Rule: LocPersonAmbig3
Priority: 100
// Ambiguous Location/Person + Possible Surname --> Person
{Lookup.majorType == location, Lookup.ambig == yes, FirstPerson}
gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
features.put("gender", personAnn.getFeatures().get("gender"));
features.put("kind", "firstName");
features.put("rule", "LocPersonAmbig3");
outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
Rule: PersonFullInitialsCaps
Priority: 100
// If the initials is of type nopunct, we want to discard the Person if the surname is also in all caps, as it's too ambiguous
{Token.category == DT}
({Initials.kind == nopunct})
({Upper.kind == allCaps})
):person -->
:person.Discard = {rule = "PersonFullInitialsCaps"}
Rule: PersonFullInitials
Priority: 10
// F.W. Jones
{Token.category == DT}
({Initials, !Lookup}):initials
):person -->
gate.FeatureMap features = Factory.newFeatureMap();
gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
gate.AnnotationSet initialsSet = (gate.AnnotationSet)bindings.get("initials");
List initialsList = gate.Utils.inDocumentOrder(initialsSet);
Long initialsStart = gate.Utils.start(initialsList.get(0));
Long initialsEnd = gate.Utils.end(initialsList.get(initialsList.size() - 1));
String initialsContent = gate.Utils.cleanStringFor(doc, initialsStart, initialsEnd);
features.put("initials", initialsContent);
gate.AnnotationSet middleNameSet = (gate.AnnotationSet)bindings.get("middleName");
if (middleNameSet != null && middleNameSet.size()>0)
gate.Annotation middleNameAnn = (gate.Annotation)middleNameSet.iterator().next();
String middleNameContent = gate.Utils.cleanStringFor(doc, middleNameAnn);
features.put("middleName", middleNameContent);
features.put("gender", middleNameAnn.getFeatures().get("gender"));
gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
String surnameContent = gate.Utils.cleanStringFor(doc, surnameAnn);
features.put("surname", surnameContent);
features.put("kind", "fullName");
features.put("rule", "PersonFullInitials");
outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
Rule: PersonFull
Priority: 10
// F.W. Jones
// Fred Jones
{Token.category == DT}
):person -->
gate.FeatureMap features = Factory.newFeatureMap();
gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
gate.AnnotationSet firstNameSet = (gate.AnnotationSet)bindings.get("firstName");
gate.Annotation firstNameAnn = (gate.Annotation)firstNameSet.iterator().next();
String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
features.put("firstName", firstNameContent);
features.put("gender", firstNameAnn.getFeatures().get("gender"));
gate.AnnotationSet middleNameSet = (gate.AnnotationSet)bindings.get("middleName");
if (middleNameSet != null && middleNameSet.size()>0)
gate.Annotation middleNameAnn = (gate.Annotation)middleNameSet.iterator().next();
String middleNameContent = gate.Utils.stringFor(doc, middleNameAnn);
features.put("middleName", middleNameContent);
gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
features.put("surname", surnameContent);
features.put("kind", "fullName");
features.put("rule", "PersonFull");
outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
Rule: PersonFullDoubleBarrelled
Priority: 9
// F.W. Smith Jones
// Fred Smith Jones
):person -->
gate.FeatureMap features = Factory.newFeatureMap();
gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
gate.AnnotationSet firstNameSet = (gate.AnnotationSet)bindings.get("firstName");
gate.Annotation firstNameAnn = (gate.Annotation)firstNameSet.iterator().next();
String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
features.put("firstName", firstNameContent);
features.put("gender", firstNameAnn.getFeatures().get("gender"));
gate.AnnotationSet middleNameSet = (gate.AnnotationSet)bindings.get("middleName");
if (middleNameSet != null && middleNameSet.size()>0)
gate.Annotation middleNameAnn = (gate.Annotation)middleNameSet.iterator().next();
String middleNameContent = gate.Utils.stringFor(doc, middleNameAnn);
features.put("middleName", middleNameContent);
gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
features.put("surname", surnameContent);
features.put("kind", "fullName");
features.put("rule", "PersonFullDoubleBarrelled");
outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
Rule: PersonMiddleInitial
Priority: 10
// Fred C. Jones
):person -->
gate.FeatureMap features = Factory.newFeatureMap();
gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
gate.AnnotationSet firstNameSet = (gate.AnnotationSet)bindings.get("firstName");
gate.Annotation firstNameAnn = (gate.Annotation)firstNameSet.iterator().next();
String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
features.put("firstName", firstNameContent);
features.put("gender", firstNameAnn.getFeatures().get("gender"));
gate.AnnotationSet initialsSet = (gate.AnnotationSet)bindings.get("initials");
if (initialsSet != null && initialsSet.size()>0)
gate.Annotation initialsAnn = (gate.Annotation)initialsSet.iterator().next();
String initialsContent = gate.Utils.stringFor(doc, initialsAnn);
features.put("initials", initialsContent);
gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
features.put("surname", surnameContent);
features.put("kind", "fullName");
features.put("rule", "PersonFull");
outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
Rule: PersonFullStop
Priority: 50
// G.Wilson Fri
{Lookup.majorType == date}
:person.TempPerson = {kind = "fullName", rule = "PersonFullStop"}
Rule: NotPersonFullReverse
Priority: 20
// XYZ, I
{Token.string == ","}
{Token.category == PRP}
Rule: PersonSaint
Priority: 50
// Note: ensure that it's not a Saints Day first
({Token.string == "St"} ({Token.string == "."})? |
{Token.string == "Saint"})
:person -->
gate.FeatureMap features = Factory.newFeatureMap();
gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
gate.AnnotationSet firstPerson = (gate.AnnotationSet)personSet.get("FirstPerson");
if (firstPerson != null && firstPerson.size()>0)
gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next();
features.put("gender", personAnn.getFeatures().get("gender"));
features.put("kind", "firstName");
features.put("rule", "PersonSaint");
outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
Rule: PersonLocAmbig
Priority: 40
// Ken London
// Susan Hampshire
// Christian name + Location --> Person's Name
({Lookup.majorType == location}):surname
):person -->
gate.FeatureMap features = Factory.newFeatureMap();
gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
gate.AnnotationSet firstNameSet = (gate.AnnotationSet)bindings.get("firstName");
gate.Annotation firstNameAnn = (gate.Annotation)firstNameSet.iterator().next();
gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
features.put("gender", firstNameAnn.getFeatures().get("gender"));
String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
features.put("firstName", firstNameContent);
String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
features.put("surname", surnameContent);
features.put("kind", "fullName");
features.put("rule", "PersonLocAmbig");
outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
Rule: TitlePersonLocAmbig
Priority: 50
// Professor London
// title + Location --> Person's Name
({Lookup.majorType == location}):surname
):person -->
gate.FeatureMap features = Factory.newFeatureMap();
gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
features.put("gender", titleAnn.getFeatures().get("gender"));
String titleContent = gate.Utils.stringFor(doc, titleAnn);
features.put("title", titleContent);
String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
features.put("surname", surnameContent);
features.put("kind", "fullName");
features.put("rule", "TitlePersonLocAmbig");
outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
Priority: 50
// if the last name is an organisation ending, treat as an organisation not person
// e.g. A.B. Consulting
{Token.category == DT}
({Lookup.majorType == org_key}|
{Lookup.majorType == org_base}
:orgName -->
:orgName.TempOrganization = {kind = "unknown", rule = "PersonOrgAmbig"}
// Organisation Rules
Macro: CDG
// cdg is something like "Ltd."
({Lookup.majorType == cdg})|
({Token.string == ","}
{Lookup.majorType == cdg})
Macro: SAINT
({Token.string == "St"} ({Token.string == "."})? |
{Token.string == "Saint"})
{Token.string == "Church"}|{Token.string == "church"}|
{Token.string == "Cathedral"}|{Token.string == "cathedral"}|
{Token.string == "Chapel"}|{Token.string == "chapel"}
Rule: TheGazOrganization
Priority: 245
{Token.category == DT}|
{Token.category == RB}
{Lookup.majorType == organization}
:orgName -->
gate.FeatureMap features = Factory.newFeatureMap();
// create an annotation set consisting of all the annotations for org
gate.AnnotationSet orgSet = (gate.AnnotationSet)bindings.get("orgName");
// create an annotation set consisting of the annotation matching Lookup
gate.AnnotationSet org = (gate.AnnotationSet)orgSet.get("Lookup");
// if the annotation type Lookup doesn't exist, do nothing
if (org != null && org.size()>0)
// if it does exist, take the first element in the set
gate.Annotation orgAnn = (gate.Annotation)org.iterator().next();
//propagate minorType feature (and value) from org
features.put("orgType", orgAnn.getFeatures().get("minorType"));
// create some new features
features.put("rule", "GazOrganization");
// create a TempOrg annotation and add the features we've created
outputAS.add(orgSet.firstNode(), orgSet.lastNode(), "TempOrganization",
Rule: GazOrganization
Priority: 145
{Lookup.majorType == organization}
:orgName -->
gate.FeatureMap features = Factory.newFeatureMap();
// create an annotation set consisting of all the annotations for org
gate.AnnotationSet orgSet = (gate.AnnotationSet)bindings.get("orgName");
// create an annotation set consisting of the annotation matching Lookup
gate.AnnotationSet org = (gate.AnnotationSet)orgSet.get("Lookup");
// if the annotation type Lookup doesn't exist, do nothing
if (org != null && org.size()>0)
// if it does exist, take the first element in the set
gate.Annotation orgAnn = (gate.Annotation)org.iterator().next();
//propagate minorType feature (and value) from org
features.put("orgType", orgAnn.getFeatures().get("minorType"));
// create some new features
features.put("rule", "GazOrganization");
// create a TempOrg annotation and add the features we've created
outputAS.add(orgSet.firstNode(), orgSet.lastNode(), "TempOrganization",
Rule: LocOrganization
Priority: 50
// Ealing Police
({Lookup.majorType == location} |
{Lookup.majorType == country_adj})
{Lookup.majorType == organization}
({Lookup.majorType == organization})?
:orgName -->
:orgName.TempOrganization = {kind = "orgName", rule=LocOrganization}
Rule: NewspaperEnding
Priority: 200
// GSA Today
{Lookup.majorType == newspaper_ending}
gate.FeatureMap features = Factory.newFeatureMap();
gate.AnnotationSet orgSet = (gate.AnnotationSet)bindings.get("orgName");
gate.AnnotationSet org = (gate.AnnotationSet)orgSet.get("Lookup");
if (org != null && org.size()>0)
gate.Annotation orgAnn = (gate.Annotation)org.iterator().next();
features.put("orgType", orgAnn.getFeatures().get("minorType"));
// create some new features
features.put("rule", "NewspaperEndng");
// create a TempOrg annotation and add the features we've created
outputAS.add(orgSet.firstNode(), orgSet.lastNode(), "TempOrganization",
Rule: INOrgXandY
Priority: 200
// Bradford & Bingley
// Bradford & Bingley Ltd
{Token.category == IN}
({Token.category == NNP}
{Token.string == "&"}
{Token.orth == upperInitial}
:orgName -->
:orgName.TempOrganization = {orgType = "unknown", rule = "OrgXandY"}
Rule: OrgXandY
Priority: 20
// Bradford & Bingley
// Bradford & Bingley Ltd
({Token.category == NNP}
{Token.string == "&"}
{Token.orth == upperInitial}
:orgName -->
:orgName.TempOrganization = {orgType = "unknown", rule = "OrgXandY"}
Priority: 25
// University of Sheffield
// Sheffield University
// A Sheffield University
{Token.string == "University"}
{Token.string == "of"}
{Token.category == NNP})+
:orgName -->
:orgName.TempOrganization = {orgType = "other", rule = "OrgDept"}
Rule: OrgDept
Priority: 25
// Department of Pure Mathematics and Physics
{Token.string == "Department"}
{Token.string == "of"}
{Token.orth == upperInitial})+
{Token.string == "and"}
{Token.orth == upperInitial})+
:orgName -->
:orgName.TempOrganization = {orgType = "department", rule = "OrgDept"}
Rule: TheOrgXKey
Priority: 500
// The Aaaa Ltd.
{Token.category == DT}
{Lookup.majorType == org_key}
({Lookup.majorType == org_ending})?
:org.TempOrganization = {orgType = "unknown", rule = "TheOrgXKey"}
Rule: NotOrgXKey
Priority: 150
// if all the names are org_base or org_key, it's not an organisation
// e.g. Business Consulting
({Lookup.majorType == org_key}|
{Lookup.majorType == org_base}
({Lookup.majorType == org_ending})?
Rule: NotTheKey
Priority: 200
{Token.category == DT}
{Lookup.majorType == org_key}
({Lookup.majorType == org_ending})?
Rule: OrgXKey
Priority: 125
// Aaaa Ltd.
({Token.category == DT})?
{Lookup.majorType == org_key}
({Lookup.majorType == org_ending})?
:org.TempOrganization = {orgType = "unknown", rule = "OrgXKey"}
Rule: NotOrgXEnding
Priority: 500
// Very Limited
{Token.category == DT}
{Token.category == RB}
{Lookup.majorType == cdg}
Rule: NotOrgXEnding2
Priority: 500
// The Coca Cola Co.
{Token.category == DT}
{Lookup.majorType == cdg}
:orgName -->
:orgName.TempOrganization = {orgType = "company", rule = "OrgXEnding"}
Rule: OrgXEnding
Priority: 120
// Coca Cola Co.
{Lookup.majorType == cdg}
:orgName -->
:orgName.TempOrganization = {orgType = "unknown", rule = "OrgXEnding"}
Rule: TheOrgXandYKey
Priority: 220
{Token.category == DT}
(({Token.string == "and"} |
{Token.string == "&"})
{Lookup.majorType == org_key}
({Lookup.majorType == org_ending})?
:orgName -->
:orgName.TempOrganization = {orgType = "unknown", rule = "OrgXandYKey"}
Rule: OrgXandYKey
Priority: 120
// Aaaa Ltd.
// Xxx Services Ltd.
// AA and BB Services Ltd.
// but NOT A XXX Services Ltd.
(({Token.string == "and"} |
{Token.string == "&"})
{Lookup.majorType == org_key}
({Lookup.majorType == org_ending})?
:orgName -->
:orgName.TempOrganization = {orgType = "unknown", rule = "OrgXandYKey"}
Rule: OrgXsKeyBase
Priority: 120
// Gandy's Circus
// Queen's Ware
({Token.orth == upperInitial}
{Token.string == "'"}
({Token.string == "s"})?
({Lookup.majorType == org_key}|
{Lookup.majorType == org_base})
:orgName -->
:orgName.TempOrganization = {orgType = "unknown", rule = "OrgXsKeybase"}
Rule: NotOrgXBase
Priority: 1000
// not things like British National
// or The University
({Token.category == DT}
({Lookup.majorType == country_adj}|
{Token.orth == lowercase})
({Lookup.majorType == org_base}|
{Lookup.majorType == govern_key})
:orgName -->
:orgName.Temp = {kind = "notorgName", rule = "NotOrgXBase"}
Rule: TheOrgXBase
Priority: 230
({Token.category == DT}
{Lookup.majorType == organization}
({Lookup.majorType == org_base}|
{Lookup.majorType == govern_key}
{Token.string == "of"}
:orgName -->
:orgName.TempOrganization = {orgType = "unknown", rule = "TheOrgXBase"}
Rule: OrgXBase
Priority: 130
// same as OrgXKey but uses base instead of key
// includes govern_key e.g. academy
// Barclays Bank
// Royal Academy of Art
{Lookup.majorType == organization}
({Lookup.majorType == org_base}|
{Lookup.majorType == govern_key}
{Token.string == "of"}
:orgName -->
:orgName.TempOrganization = {orgType = "unknown", rule = "OrgXBase"}
Rule: TheBaseofOrg
Priority: 230
{Token.category == DT}
({Lookup.majorType == org_base}|
{Lookup.majorType == govern_key}
{Token.string == "of"}
{Token.category == DT}
:orgName -->
:orgName.TempOrganization = {orgType = "unknown", rule = "BaseofOrg"}
Rule: BaseofOrg
Priority: 130
({Lookup.majorType == org_base}|
{Lookup.majorType == govern_key}
{Token.string == "of"}
{Token.category == DT}
:orgName -->
:orgName.TempOrganization = {orgType = "unknown", rule = "BaseofOrg"}
Rule: OrgPreX
Priority: 130
// Royal Tuscan
{Lookup.majorType == org_pre}
{Token.orth == upperInitial})+
({Lookup.majorType == org_ending})?
:orgName -->
:orgName.TempOrganization = {orgType = "unknown", rule = "OrgPreX"}
Rule: OrgChurch
Priority: 150
// St. Andrew's Church
{Token.orth == upperInitial}
{Token.string == "'"}({Token.string == "s"})?
:orgName -->
:orgName.TempOrganization = {orgType = "other", rule = "OrgChurch"}
Priority: 130
// Alexandra Pottery should be org not person
// overrides PersonFull
{Token.string == "'"}({Token.string == "s"})?
({Lookup.majorType == org_key}|
{Lookup.majorType == org_base})
({Lookup.majorType == org_ending})?
:org.TempOrganization= {orgType = "unknown", rule = "OrgPersonAmbig"}
// Location rules
Rule: Location1
Priority: 200
// Lookup = city, country, province, region, water
// Western Europe
// South China sea
{Token.category == DT}
({Lookup.majorType == loc_key, Lookup.minorType == pre}
{Lookup.majorType == location}
{Lookup.majorType == loc_key, Lookup.minorType == post})?
:locName -->
gate.FeatureMap features = Factory.newFeatureMap();
// create an annotation set consisting of all the annotations for org
gate.AnnotationSet locSet = (gate.AnnotationSet)bindings.get("locName");
// create an annotation set consisting of the annotation matching Lookup
gate.AnnotationSet loc = (gate.AnnotationSet)locSet.get("Lookup");
// if the annotation type Lookup doesn't exist, do nothing
if (loc != null && loc.size()>0)
// if it does exist, take the first element in the set
gate.Annotation locAnn = (gate.Annotation)loc.iterator().next();
//propagate minorType feature (and value) from loc
features.put("locType", locAnn.getFeatures().get("minorType"));
// create some new features
features.put("rule", "Location1");
// create a TempLoc annotation and add the features we've created
outputAS.add(locSet.firstNode(), locSet.lastNode(), "TempLocation",
Rule: GazLocation
Priority: 200
{Token.category == DT}
{Lookup.majorType == location}
gate.FeatureMap features = Factory.newFeatureMap();
// create an annotation set consisting of all the annotations for org
gate.AnnotationSet locSet = (gate.AnnotationSet)bindings.get("locName");
// create an annotation set consisting of the annotation matching Lookup
gate.AnnotationSet loc = (gate.AnnotationSet)locSet.get("Lookup");
// if the annotation type Lookup doesn't exist, do nothing
if (loc != null && loc.size()>0)
// if it does exist, take the first element in the set
gate.Annotation locAnn = (gate.Annotation)loc.iterator().next();
//propagate minorType feature (and value) from loc
features.put("locType", locAnn.getFeatures().get("minorType"));
// create some new features
features.put("rule", "GazLocation");
// create a TempLoc annotation and add the features we've created
outputAS.add(locSet.firstNode(), locSet.lastNode(), "TempLocation",
Rule: GazLocationLocation
Priority: 100
({Lookup.majorType == location}):locName1
{Token.string == ","}
({Lookup.majorType == location}):locName2
gate.FeatureMap features = Factory.newFeatureMap();
gate.FeatureMap morefeatures = Factory.newFeatureMap();
gate.AnnotationSet loc1Set = (gate.AnnotationSet)bindings.get("locName1");
gate.AnnotationSet loc1 = (gate.AnnotationSet)loc1Set.get("Lookup");
gate.AnnotationSet loc2Set = (gate.AnnotationSet)bindings.get("locName2");
gate.AnnotationSet loc2 = (gate.AnnotationSet)loc2Set.get("Lookup");
// if the annotation type Lookup doesn't exist, do nothing
if (loc1 != null && loc1.size()>0)
gate.Annotation loc1Ann = (gate.Annotation)loc1.iterator().next();
features.put("locType", loc1Ann.getFeatures().get("minorType"));
if (loc2 != null && loc2.size()>0)
gate.Annotation loc2Ann = (gate.Annotation)loc2.iterator().next();
morefeatures.put("locType", loc2Ann.getFeatures().get("minorType"));
features.put("rule", "GazLocation");
outputAS.add(loc1Set.firstNode(), loc1Set.lastNode(), "TempLocation", features);
morefeatures.put("rule", "GazLocation");
outputAS.add(loc2Set.firstNode(), loc2Set.lastNode(), "TempLocation", morefeatures);
Rule: LocationPost
Priority: 50
{Token.category == DT}
{Token.category == NNP}
{Lookup.majorType == loc_key, Lookup.minorType == post}
:locName.TempLocation = {kind = "locName", rule = LocationPost}
{Token.category == DT}
({Lookup.majorType == loc_key, Lookup.minorType == pre}
{Lookup.majorType == loc_key, Lookup.minorType == post})?
:locName -->
:locName.TempLocation = {kind = "locName", rule = LocKey}
// Context-based Rules
{Token.string == "in"}
{Lookup.majorType == location}
:locName.TempLocation = {kind = "locName", rule = InLoc1, locType = :locName.Lookup.minorType}
Priority: 30
{Lookup.majorType == loc_general_key}
{Token.string == "of"}
:loc.TempLocation = {kind = "locName", rule = LocGeneralKey}
Priority: 1
// company X
{Token.string == "company"}
:org.TempOrganization= {orgType = "company", rule = "OrgContext1"}
Rule: OrgContext2
Priority: 5
// Telstar laboratory
// Medici offices
: org
({Token.string == "offices"} |
{Token.string == "Offices"} |
{Token.string == "laboratory"} |
{Token.string == "Laboratory"} |
{Token.string == "laboratories"} |
{Token.string == "Laboratories"})
:org.TempOrganization= {orgType = "other", rule = "OrgContext2"}
Priority: 50
// Smith joined Energis
({Token.string == "joined"}|
{Token.string == "joining"}|
{Token.string == "joins"}|
{Token.string == "join"}
:org.TempOrganization= {orgType = "company", rule = "joinOrg"}