All Downloads are FREE. Search and download functionalities are using the official Maven repository.

justhalf.nlp.reader.acereader.ACEEntity Maven / Gradle / Ivy

package justhalf.nlp.reader.acereader;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/**
 * Represents an Entity with possibly multiple mentions
 * @author Aldrian Obaja ([email protected])
 *
 */
public class ACEEntity extends ACEObject {
	
	/**
	 * Based on:
	 * 
    *
  1. ACE2004: https://www.ldc.upenn.edu/sites/www.ldc.upenn.edu/files/english-edt-v4.2.6.pdf
  2. *
  3. ACE2005: https://www.ldc.upenn.edu/sites/www.ldc.upenn.edu/files/english-entities-guidelines-v5.6.6.pdf
  4. *
* Use {@link #toString()} method to get the actual textual form used in the annotation files. * @author Aldrian Obaja ([email protected]) * */ public static enum ACEEntityType implements ACEObjectType { /** * Person entities are limited to humans. * A person may be a single individual or a group. */ PER("Person"), /** * Organization entities are limited to corporations, agencies, and other groups of people * defined by an established organizational structure. */ ORG("Organization"), /** * Location entities are limited to geographical entities such as geographical areas and landmasses, * bodies of water, and geological formations. */ LOC("Location"), /** * Geo-political Entity
* GPE entities are geographical regions defined by political and/or social groups. * A GPE entity subsumes and does not distinguish between a nation, its region, its government, * or its people. */ GPE("Geo-Political Entity"), /** * Facility entities are limited to buildings and other permanent man-made structures and * real estate improvements. */ FAC("Facility"), /** * A vehicle entity is a physical device primarily designed to move an object from one location * to another, by (for example) carrying, pulling, or pushing the transported object. * Vehicle entities may or may not have their own power source. */ VEH("Vehicle"), /** * Weapon entities are limited to physical devices primarily used as instruments for physically * harming or destroying animals (often humans), buildings, or other constructions. */ WEA("Weapon"), ; /** A more descriptive text form of this entity type */ public final String text; private List subtypes; private ACEEntityType(String text) { this.text = text; } public List subtypes(){ if(subtypes == null){ List subtypeList = new ArrayList(); for(ACEEntitySubType subtype: ACEEntitySubType.values()){ if(subtype.type == this){ subtypeList.add(subtype); } } subtypes = subtypeList; } return subtypes; } public String toString(){ return name(); } } /** * Based on: *
    *
  1. ACE2004: https://www.ldc.upenn.edu/sites/www.ldc.upenn.edu/files/english-edt-v4.2.6.pdf
  2. *
  3. ACE2005: https://www.ldc.upenn.edu/sites/www.ldc.upenn.edu/files/english-entities-guidelines-v5.6.6.pdf
  4. *
* Use {@link #toString()} method to get the actual textual form used in the annotation files. * @author Aldrian Obaja ([email protected]) * */ public static enum ACEEntitySubType implements ACEObjectSubType { // Person // Only in 2004 PER_PER("", ACEEntityType.PER, true, false), // Only in 2005 PER_INDIVIDUAL("Individual", ACEEntityType.PER, false, true), PER_GROUP("Group", ACEEntityType.PER, false, true), PER_INDETERMINATE("Indeterminate", ACEEntityType.PER, false, true), // Organization ORG_GOVERNMENT("Government", ACEEntityType.ORG, true, true), ORG_COMMERCIAL("Commercial", ACEEntityType.ORG, true, true), ORG_EDUCATIONAL("Educational", ACEEntityType.ORG, true, true), ORG_NON_PROFIT("Non-Profit", ACEEntityType.ORG, true, true), // Only in 2004 ORG_OTHER("Other", ACEEntityType.ORG, true, false), // Only in 2005 ORG_ENTERTAINMENT("Entertainment", ACEEntityType.ORG, false, true), ORG_NON_GOVERNMENTAL("Non-Governmental", ACEEntityType.ORG, false, true), ORG_MEDIA("Media", ACEEntityType.ORG, false, true), ORG_RELIGIOUS("Religious", ACEEntityType.ORG, false, true), ORG_MEDICAL_SCIENCE("Medical-Science", ACEEntityType.ORG, false, true), ORG_SPORTS("Sports", ACEEntityType.ORG, false, true), // GPE GPE_CONTINENT("Continent", ACEEntityType.GPE, true, true), GPE_NATION("Nation", ACEEntityType.GPE, true, true), GPE_STATE_OR_PROVINCE("State-or-Province", ACEEntityType.GPE, true, true), GPE_COUNTY_OR_DISTRICT("County-or-District", ACEEntityType.GPE, true, true), // Only in 2004 GPE_CITY_OR_TOWN("City-or-Town", ACEEntityType.GPE, true, false), GPE_OTHER("Other", ACEEntityType.GPE, true, false), // Only in 2005 GPE_POPULATION_CENTER("Population-Center", ACEEntityType.GPE, false, true), GPE_GPE_CLUSTER("GPE-Cluster", ACEEntityType.GPE, false, true), GPE_SPECIAL("Special", ACEEntityType.GPE, false, true), // Location LOC_ADDRESS("Address", ACEEntityType.LOC, true, true), LOC_BOUNDARY("Boundary", ACEEntityType.LOC, true, true), LOC_CELESTIAL("Celestial", ACEEntityType.LOC, true, true), LOC_WATER_BODY("Water-Body", ACEEntityType.LOC, true, true), LOC_LAND_REGION_NATURAL("Land-Region-Natural", ACEEntityType.LOC, true, true), LOC_REGION_INTERNATIONAL("Region-International", ACEEntityType.LOC, true, true), // Only in 2004 LOC_REGION_LOCAL("Region-Local", ACEEntityType.LOC, true, false), LOC_REGION_SUBNATIONAL("Region-Subnational", ACEEntityType.LOC, true, false), LOC_REGION_NATIONAL("Region-National", ACEEntityType.LOC, true, false), LOC_OTHER("Other", ACEEntityType.LOC, true, false), // Only in 2005 LOC_REGION_GENERAL("Region-General", ACEEntityType.LOC, false, true), // Facility FAC_PLANT("Plant", ACEEntityType.FAC, true, true), FAC_PATH("Path", ACEEntityType.FAC, true, true), // Only in 2004 FAC_BUILDING("Building", ACEEntityType.FAC, true, false), FAC_SUBAREA_BUILDING("Subarea-Building", ACEEntityType.FAC, true, false), FAC_BOUNDED_AREA("Bounded-Area", ACEEntityType.FAC, true, false), FAC_CONDUIT("Conduit", ACEEntityType.FAC, true, false), FAC_BARRIER("Barrier", ACEEntityType.FAC, true, false), FAC_OTHER("Other", ACEEntityType.FAC, true, false), // Only in 2005 FAC_AIRPORT("Airport", ACEEntityType.FAC, false, true), FAC_BUILDING_GROUNDS("Building-Grounds", ACEEntityType.FAC, false, true), FAC_SUBAREA_FACILITY("Subarea-Facility", ACEEntityType.FAC, false, true), // Vehicle VEH_AIR("Air", ACEEntityType.VEH, true, true), VEH_LAND("Land", ACEEntityType.VEH, true, true), VEH_WATER("Water", ACEEntityType.VEH, true, true), VEH_SUBAREA_VEHICLE("Subarea-Vehicle", ACEEntityType.VEH, true, true), // Only in 2004 VEH_OTHER("Other", ACEEntityType.VEH, true, false), // Only in 2005 VEH_UNDERSPECIFIED("Underspecified", ACEEntityType.VEH, false, true), // Weapon WEA_BLUNT("Blunt", ACEEntityType.WEA, true, true), WEA_EXPLODING("Exploding", ACEEntityType.WEA, true, true), WEA_SHARP("Sharp", ACEEntityType.WEA, true, true), WEA_CHEMICAL("Chemical", ACEEntityType.WEA, true, true), WEA_BIOLOGICAL("Biological", ACEEntityType.WEA, true, true), WEA_SHOOTING("Shooting", ACEEntityType.WEA, true, true), WEA_PROJECTILE("Projectile", ACEEntityType.WEA, true, true), WEA_NUCLEAR("Nuclear", ACEEntityType.WEA, true, true), // Only in 2004 WEA_OTHER("Other", ACEEntityType.WEA, true, false), // Only in 2005 WEA_UNDERSPECIFIED("Underspecified", ACEEntityType.WEA, false, true), ; /** The original text specifying this subtype. Might be empty if there is no actual subtype. */ public final String text; public final ACEEntityType type; public final boolean in2004; public final boolean in2005; private final static List subtypesIn2004 = new ArrayList(); private final static List subtypesIn2005 = new ArrayList(); private ACEEntitySubType(String text, ACEEntityType type, boolean in2004, boolean in2005){ this.text = text; this.type = type; this.in2004 = in2004; this.in2005 = in2005; } public List getACE2004SubTypes(){ if(subtypesIn2004.size() == 0){ for(ACEEntitySubType subtype: values()){ if(subtype.in2004){ subtypesIn2004.add(subtype); } } } return subtypesIn2004; } public List getACE2005SubTypes(){ if(subtypesIn2005.size() == 0){ for(ACEEntitySubType subtype: values()){ if(subtype.in2005){ subtypesIn2005.add(subtype); } } } return subtypesIn2005; } public String toString(){ return this.text; } } /** * The entity class (only in ACE2005) * SPC: A particular, specific, and unique real world entity * GEN: A kind or type of entity rather than a specific entity * NEG: A negatively quantified (usually generic) entity * USP: An underspecified entity (e.g., modal/uncertain/...) */ public static enum ACEEntitySpecificity { /** A particular, specific, and unique real world entity */ SPC, /** A kind or type of entity rather than a specific entity */ GEN, /** A negatively quantified (usually generic) entity */ NEG, /** An underspecified entity (e.g., modal/uncertain/...) */ USP, /** In ACE2004 there is no specificity defined */ NOT_TAGGED, } /** The type of the entity, i.e., PER, ORG, LOC, GPE, FAC, VEH, WEA **/ public ACEEntityType type; /** The subtype of the entity, e.g., Government, Address, Building, Subarea-Vehicle, Projectile **/ public ACEEntitySubType subtype; public ACEEntitySpecificity specificity; public String textualRepresentation; public List mentions; public ACEEntity(String id, String type, String subtype, String entityClass){ this(id, type, subtype, entityClass, (String)null); } public ACEEntity(String id, String type, String subtype, String specificity, String textualRepresentation){ this(id, type, subtype, specificity, textualRepresentation, new ArrayList()); } public ACEEntity(String id, String type, String subtype, String specificity, String textualRepresentation, List mentions){ super(id); this.type = ACEEntityType.valueOf(type); if(subtype == null || subtype.length() == 0){ subtype = type; } this.subtype = ACEEntitySubType.valueOf((type+"_"+subtype).toUpperCase().replace("-", "_")); if(specificity == null || specificity.length() == 0){ specificity = ACEEntitySpecificity.NOT_TAGGED.name(); } this.specificity = ACEEntitySpecificity.valueOf(specificity); this.textualRepresentation = textualRepresentation; this.mentions = mentions; } public ACEEntity(ACEEntity entity){ super(entity.id); this.type = entity.type; this.subtype = entity.subtype; this.specificity = entity.specificity; this.textualRepresentation = entity.textualRepresentation; this.mentions = new ArrayList(); Collections.copy(this.mentions, entity.mentions); } public void addMention(ACEEntityMention mention){ this.mentions.add(mention); } public String toString(){ StringBuilder result = new StringBuilder(); result.append("[ID="+id+"]"); result.append("[Type="+type+"]"); result.append("[Subtype="+subtype+"]"); result.append("[Specificity="+specificity+"]"); return result.toString(); } @SuppressWarnings("unchecked") @Override public List mentions() { return mentions; } @SuppressWarnings("unchecked") @Override public ACEEntityType type() { return type; } @SuppressWarnings("unchecked") @Override public ACEEntitySubType subtype() { return subtype; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy