gate.creole.gazetteer.GazetteerNode Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of annie Show documentation
Show all versions of annie Show documentation
ANNIE is a general purpose information extraction system that
provides the building blocks of many other GATE applications.
The newest version!
/*
* Copyright (c) 1995-2012, The University of Sheffield. See the file
* COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
*
* This file is part of GATE (see http://gate.ac.uk/), and is free
* software, licenced under the GNU Library General Public License,
* Version 2, June 1991 (in the distribution as file licence.html,
* and also available at http://gate.ac.uk/gate/licence.html).
*
* $Id: GazetteerNode.java 19518 2016-08-19 11:45:35Z markagreenwood $
*/
package gate.creole.gazetteer;
import gate.util.GateRuntimeException;
import java.util.*;
/**
*
* A node in a gazetteer list allowing an arbitary amount of features
* to be added as metadata to an entry, e.g.:
*
*
* With the separator set to '\t', if a gazetteer entry looked like this:
*
* Vodaphone type=mobile phone company
*
* Then the GazetteerNode would consist of an entry "Vodaphone", with a featureMap
* containing the key "type", mapped to "mobile phone company".
*
* @author JLy
*
*/
public class GazetteerNode {
/** The gazetteer entry */
private String entry;
/** The features associated to the entry. If there are no features for this entry, it is null */
private Map featureMap = null;
/** The separator used in a GazetteerNode string */
private String separator;
/**
* Constructor. Uses the default separator.
*
* @param entry the gazetteer entry
* @param featureMap a map of name-value pairs
*/
public GazetteerNode(String entry, Map featureMap) {
this.entry = entry;
this.featureMap = featureMap;
}
/**
* Parses and create a gazetteer node from a string using no separator, i.e.
* the whole node is considered as the string to match, and there are no
* additional features.
*
* @param node the gazetteer node to be parsed
*/
public GazetteerNode(String node) {
this(node, (String) null, false);
}
/**
* Parses and create a gazetteer node from a string
*
* @param node the gazetteer node to be parsed
* @param separator the separator used in the gazetteer node string to delimit
* each name-value pair of features. If the separator is null, then the whole
* node will be used as the gazetteer entry
*/
public GazetteerNode(String node, String separator) {
this(node, separator, false);
}
/**
* Parses and create a gazetteer node from a string
*
* @param node the gazetteer node to be parsed
* @param separator the separator used in the gazetteer node string to delimit
* each name-value pair of features. If the separator is null, then the whole
* node will be used as the gazetteer entry
* @param isOrdered true if the feature maps used should be ordered
*/
public GazetteerNode(String node, String separator, boolean isOrdered) {
this.separator = (separator != null && separator.length() == 0)? null : separator;
int index_sep;
if(this.separator == null || (index_sep = node.indexOf(this.separator)) == -1 ) {
entry = node;
// leave featureMap null
} else {
entry = node.substring(0, index_sep);
String features = node.substring(index_sep + 1);
featureMap = getFeatures(features, isOrdered);
}
}
/**
* Given a string of name-value pairs in the format "name=value", separated
* by whatever this GazetteerNode's separator has been set to, convert it
* to the equivalent map.
*
* @param features a string in the format "name=value" separated by whatever
* the separator has been set to.
* @param isOrdered true if the map returned should be ordered
* @return a Map of the features
*/
private Map getFeatures(String features, boolean isOrdered) {
if (separator == null)
return null;
// split the string into name-value pair strings
ArrayList tempPairs = new ArrayList();
int substr_begin = 0;
int substr_end = features.indexOf(separator,substr_begin);
while (substr_end != -1) {
// if the "pair" is just an empty string, just ignore it.
// See https://github.com/GateNLP/gateplugin-ANNIE/issues/12
if(substr_end-substr_begin>0) {
tempPairs.add(features.substring(substr_begin,substr_end));
}
substr_begin = substr_end + 1;
substr_end = features.indexOf(separator,substr_begin);
}
String lastPair = features.substring(substr_begin);
if (lastPair.length() != 0) {
tempPairs.add(lastPair);
}
String[] pairs = tempPairs.toArray(new String[tempPairs.size()]);
if (pairs.length == 0) {
return null;
}
// extract the name and value from the pair strings and put in feature map
Map featureMap;
if (isOrdered) {
featureMap = new LinkedHashMap<>(pairs.length);
} else {
featureMap = new HashMap<>(pairs.length);
}
for(int i = 0; i < pairs.length; i++) {
String pair = pairs[i];
int sep = pair.indexOf('=');
if(sep == -1) {
System.err.println("Odd pair: >"+pair+"<");
throw new GateRuntimeException("Correct format for gazetteer entry" +
" features is: [entry]([separator][featureName]=[featureValue])*");
} else {
String name = pair.substring(0, sep).trim();
String value = pair.substring(sep + 1).trim();
if(name.length() > 0 && value.length() > 0) {
featureMap.put(name, value);
}
}
}
if (featureMap.size() == 0) {
return null;
}
return featureMap;
}
/**
* Converts a featureMap to separated name value pairs. Note: the string will
* begin with the separator character.
*
* @param featureMap map to be converted
* @return string of name/value pairs
*/
public String featureMapToString(Map featureMap) {
StringBuffer str = new StringBuffer();
if (featureMap instanceof LinkedHashMap) {
for (Map.Entry entry : featureMap.entrySet()) {
if (entry.getKey() != null && !entry.getKey().trim().isEmpty() &&
entry.getValue() != null && !entry.getValue().toString().isEmpty()) {
// only store fields with both key and value
// it makes no sense to store =value and storing key= gets dropped
// on reload so no point writing it out either
str.append(separator).append(entry.getKey()).append("=").append(entry.getValue());
}
}
} else {
// sort into a predictable order
List sortedKeys = new ArrayList(featureMap.keySet());
Collections.sort(sortedKeys);
for(Iterator it = sortedKeys.iterator(); it.hasNext();) {
String key = it.next();
String value = featureMap.get(key).toString().trim();
if (!key.isEmpty() && !value.isEmpty()) {
// only store fields with both key and value
// it makes no sense to store =value and storing key= gets dropped
// on reload so no point writing it out either
str.append(separator).append(key).append("=").append(value);
}
}
}
return str.toString();
}
/**
* Gets the string representation of this node
*
* @return the string representation of this node
*/
@Override
public String toString() {
if(featureMap == null || separator == null)
return entry;
else return entry + featureMapToString(featureMap);
}
/**
* Checks this node vs another one for equality.
*
* @param o another node
* @return true if the string representation of the entry and weighting match.
*/
@Override
public boolean equals(Object o) {
boolean result = false;
if(o instanceof GazetteerNode) {
result = this.toString().equals(o.toString());
}
return result;
}
@Override
public int hashCode() {
return toString().hashCode();
}
/**
* @return the entry
*/
public String getEntry() {
return entry;
}
/**
* @param entry
* the entry to set
*/
public void setEntry(String entry) {
this.entry = entry;
}
/**
* @return the featureMap
*/
public Map getFeatureMap() {
return featureMap;
}
/**
* @param featureMap the featureMap to set
*/
public void setFeatureMap(Map featureMap) {
this.featureMap = featureMap;
}
/**
* @return the separator
*/
public String getSeparator() {
return separator;
}
/**
* @param separator the separator to set
*/
public void setSeparator(String separator) {
this.separator = separator;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy