simplenlg.framework.WordElement Maven / Gradle / Ivy
Show all versions of SimpleNLG Show documentation
/*
* The contents of this file are subject to the Mozilla Public License
* Version 2.0 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* https://www.mozilla.org/en-US/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
* License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is "Simplenlg".
*
* The Initial Developer of the Original Code is Ehud Reiter, Albert Gatt and Dave Westwater.
* Portions created by Ehud Reiter, Albert Gatt and Dave Westwater are Copyright (C) 2010-11 The University of Aberdeen. All Rights Reserved.
*
* Contributor(s): Ehud Reiter, Albert Gatt, Dave Westwater, Roman Kutlak, Margaret Mitchell, and Saad Mahamood.
*/
package simplenlg.framework;
import java.util.*;
import simplenlg.features.Inflection;
import simplenlg.features.LexicalFeature;
/**
* This is the class for a lexical entry (ie, a word). Words are stored in a
* {@link simplenlg.lexicon.Lexicon}, and usually the developer retrieves a
* WordElement via a lookup method in the lexicon
*
* Words always have a base form, and usually have a
* {@link simplenlg.framework.LexicalCategory}. They may also have a Lexicon ID.
*
* Words also have features (which are retrieved from the Lexicon), these are
* held in the standard NLGElement feature map
*
* @author E. Reiter, University of Aberdeen.
* @version 4.0
*/
public class WordElement extends NLGElement {
/*
* Internal class. This maintains inflectional variants of the word, which
* may be available in the lexicon. For example, a word may have both a
* regular and an irregular variant. If the desired type is the irregular,
* it is convenient to have the set of irregular inflected forms available
* without necessitating a new call to the lexicon to get the forms.
*/
private class InflectionSet {
// the infl type
@SuppressWarnings("unused")
Inflection infl;
// the forms, mapping values of LexicalFeature to actual word forms
Map forms;
InflectionSet(Inflection infl) {
this.infl = infl;
this.forms = new HashMap();
}
/*
* set an inflectional form
*
* @param feature
*
* @param form
*/
void addForm(String feature, String form) {
this.forms.put(feature, form);
}
/*
* get an inflectional form
*/
String getForm(String feature) {
return this.forms.get(feature);
}
}
// Words have baseForm, category, id, and features
// features are inherited from NLGElement
String baseForm; // base form, eg "dog". currently also in NLG Element, but
// will be removed from there
String id; // id in lexicon (may be null);
Map inflVars; // the inflectional variants
Inflection defaultInfl; // the default inflectional variant
// LexicalCategory category; // type of word
/**********************************************************/
// constructors
/**********************************************************/
/**
* empty constructor
*/
public WordElement() {
this(null, LexicalCategory.ANY, null);
}
/**
* create a WordElement with the specified baseForm (no category or ID
* specified)
*
* @param baseForm - base form of WordElement
*/
public WordElement(String baseForm) {
this(baseForm, LexicalCategory.ANY, null);
}
/**
* create a WordElement with the specified baseForm and category
*
* @param baseForm - base form of WordElement
* @param category - category of WordElement
*/
public WordElement(String baseForm, LexicalCategory category) {
this(baseForm, category, null);
}
/**
* create a WordElement with the specified baseForm, category, ID
*
* @param baseForm - base form of WordElement
* @param category - category of WordElement
* @param id - ID of word in lexicon
*/
public WordElement(String baseForm, LexicalCategory category, String id) {
super();
this.baseForm = baseForm;
setCategory(category);
this.id = id;
this.inflVars = new HashMap();
}
/**
* creates a duplicate WordElement from an existing WordElement
*
* @param currentWord - An existing WordElement
*/
public WordElement(WordElement currentWord) {
super();
this.baseForm = currentWord.getBaseForm();
setCategory(currentWord.getCategory());
this.id = currentWord.getId();
this.inflVars = currentWord.getInflectionalVariants();
this.defaultInfl = (Inflection) currentWord.getDefaultInflectionalVariant();
setFeatures(currentWord);
}
/**********************************************************/
// getters and setters
/**********************************************************/
/**
* @return the baseForm
*/
public String getBaseForm() {
return this.baseForm;
}
/**
* @return the id
*/
public String getId() {
return this.id;
}
/**
* @param baseForm the baseForm to set
*/
public void setBaseForm(String baseForm) {
this.baseForm = baseForm;
}
/**
* @param id the id to set
*/
public void setId(String id) {
this.id = id;
}
/**
* Set the default inflectional variant of a word. This is mostly relevant
* if the word has more than one possible inflectional variant (for example,
* it can be inflected in both a regular and irregular way).
*
*
* If the default inflectional variant is set, the inflectional forms of the
* word may change as a result. This depends on whether inflectional forms
* have been specifically associated with this variant, via
* {@link #addInflectionalVariant(Inflection, String, String)}.
*
*
* The NIHDBLexicon
associates different inflectional variants
* with words, if they are so specified, and adds the correct forms.
*
* @param variant The variant
*/
public void setDefaultInflectionalVariant(Inflection variant) {
setFeature(LexicalFeature.DEFAULT_INFL, variant);
this.defaultInfl = variant;
if(this.inflVars.containsKey(variant)) {
InflectionSet set = inflVars.get(variant);
String[] forms = LexicalFeature.getInflectionalFeatures(this.getCategory());
if(forms != null) {
for(String f : forms) {
setFeature(f, set.getForm(f));
}
}
}
}
/**
* @return the default inflectional variant
*/
public Object getDefaultInflectionalVariant() {
// return getFeature(LexicalFeature.DEFAULT_INFL);
return this.defaultInfl;
}
/**
* Convenience method to get all the inflectional forms of the word.
*
* @return the HashMap of inflectional variants
*/
public Map getInflectionalVariants() {
return this.inflVars;
}
/**
* Convenience method to set the default spelling variant of a word.
* Equivalent to
* setFeature(LexicalFeature.DEFAULT_SPELL, variant)
.
*
*
* By default, the spelling variant used is the base form. If otherwise set,
* this forces the realiser to always use the spelling variant specified.
*
* @param variant The spelling variant
*/
public void setDefaultSpellingVariant(String variant) {
setFeature(LexicalFeature.DEFAULT_SPELL, variant);
}
/**
* Convenience method, equivalent to
* getFeatureAsString(LexicalFeature.DEFAULT_SPELL)
. If this
* feature is not set, the baseform is returned.
*
* @return the default inflectional variant
*/
public String getDefaultSpellingVariant() {
String defSpell = getFeatureAsString(LexicalFeature.DEFAULT_SPELL);
return defSpell == null ? this.getBaseForm() : defSpell;
}
/**
* Add an inflectional variant to this word element. This method is intended
* for use by a Lexicon
. The idea is that words which have more
* than one inflectional variant (for example, a regular and an irregular
* form of the past tense), can have a default variant (for example, the
* regular), but also store information about the other variants. This comes
* in useful in case the default inflectional variant is reset to a new one.
* In that case, the stored forms for the new variant are used to inflect
* the word.
*
*
* An example: The verb lie has both a regular form
* (lies, lied, lying) and an irregular form (lay, lain, etc).
* Assume that the Lexicon
provides this information and treats
* this as variant information of the same word (as does the
* NIHDBLexicon
, for example). Typically, the default
* inflectional variant is the Inflection.REGULAR
. This means
* that morphology proceeds to inflect the verb as lies, lying and so
* on. If the default inflectional variant is reset to
* Inflection.IRREGULAR
, the stored irregular forms will be
* used instead.
*
* @param infl the Inflection pattern with which this form is associated
* @param lexicalFeature the actual inflectional feature being set, for example
* LexicalFeature.PRESENT_3S
* @param form the actual inflected word form
*/
public void addInflectionalVariant(Inflection infl, String lexicalFeature, String form) {
if(this.inflVars.containsKey(infl)) {
this.inflVars.get(infl).addForm(lexicalFeature, form);
} else {
InflectionSet set = new InflectionSet(infl);
set.addForm(lexicalFeature, form);
this.inflVars.put(infl, set);
}
}
/**
* Specify that this word has an inflectional variant (e.g. irregular)
*
* @param infl the variant
*/
public void addInflectionalVariant(Inflection infl) {
this.inflVars.put(infl, new InflectionSet(infl));
}
/**
* Check whether this word has a particular inflectional variant
*
* @param infl the variant
* @return true
if this word has the variant
*/
public boolean hasInflectionalVariant(Inflection infl) {
return this.inflVars.containsKey(infl);
}
/**
* Sets Features from another existing WordElement into this WordElement.
*
* @param currentWord the WordElement to copy features from
*/
public void setFeatures(WordElement currentWord) {
if(null != currentWord && null != currentWord.getAllFeatures()) {
for(String feature : currentWord.getAllFeatureNames()) {
this.setFeature(feature, currentWord.getFeature(feature));
}
}
}
/**********************************************************/
// other methods
/**********************************************************/
@Override
public String toString() {
ElementCategory _category = getCategory();
StringBuffer buffer = new StringBuffer("WordElement["); //$NON-NLS-1$
buffer.append(getBaseForm()).append(':');
if(_category != null) {
buffer.append(_category.toString());
} else {
buffer.append("no category"); //$NON-NLS-1$
}
buffer.append(']');
return buffer.toString();
}
public String toXML() {
String xml = String.format("%n"); //$NON-NLS-1$
if(getBaseForm() != null)
xml = xml + String.format(" %s%n", getBaseForm()); //$NON-NLS-1$
if(getCategory() != LexicalCategory.ANY)
xml = xml + String.format(" %s %n", //$NON-NLS-1$
getCategory().toString().toLowerCase());
if(getId() != null)
xml = xml + String.format(" %s %n", getId()); //$NON-NLS-1$
SortedSet featureNames = new TreeSet(getAllFeatureNames()); // list features in alpha order
for(String feature : featureNames) {
Object value = getFeature(feature);
if(value != null) { // ignore null features
if(value instanceof Boolean) { // booleans ignored if false,
// shown as if true
boolean bvalue = ((Boolean) value).booleanValue();
if(bvalue)
xml = xml + String.format(" <%s/>%n", feature); //$NON-NLS-1$
} else { // otherwise include feature and value
xml = xml + String.format(" <%s>%s%s>%n", feature, value //$NON-NLS-1$
.toString(), feature);
}
}
}
xml = xml + String.format(" %n"); //$NON-NLS-1$
return xml;
}
/**
* This method returns an empty List
as words do not have child
* elements.
*/
@Override
public List getChildren() {
return new ArrayList();
}
@Override
public String printTree(String indent) {
StringBuffer print = new StringBuffer();
print.append("WordElement: base=").append(getBaseForm()) //$NON-NLS-1$
.append(", category=").append(getCategory().toString()) //$NON-NLS-1$
.append(", ").append(super.toString()).append('\n'); //$NON-NLS-1$
return print.toString();
}
/**
* Check if this WordElement is equal to an object.
*
* @param o the object
* @return true
iff the object is a word element with the same
* id and the same baseform and the same features.
*/
@Override
public boolean equals(Object o) {
if(o instanceof WordElement) {
WordElement we = (WordElement) o;
return (this.baseForm == we.baseForm || this.baseForm.equals(we.baseForm)) && (this.id == we.id
|| this.id.equals(we.id))
&& we.features.equals(this.features);
}
return false;
}
}