All Downloads are FREE. Search and download functionalities are using the official Maven repository.

simplenlg.morphology.english.MorphologyRules Maven / Gradle / Ivy

/*
 * The contents of this file are subject to the Mozilla Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
 * License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is "Simplenlg".
 *
 * The Initial Developer of the Original Code is Ehud Reiter, Albert Gatt and Dave Westwater.
 * Portions created by Ehud Reiter, Albert Gatt and Dave Westwater are Copyright (C) 2010-11 The University of Aberdeen. All Rights Reserved.
 *
 * Contributor(s): Ehud Reiter, Albert Gatt, Dave Wewstwater, Roman Kutlak, Margaret Mitchell.
 */
package simplenlg.morphology.english;

import simplenlg.features.DiscourseFunction;
import simplenlg.features.Feature;
import simplenlg.features.Form;
import simplenlg.features.Gender;
import simplenlg.features.InternalFeature;
import simplenlg.features.LexicalFeature;
import simplenlg.features.NumberAgreement;
import simplenlg.features.Inflection;
import simplenlg.features.Person;
import simplenlg.features.Tense;
import simplenlg.framework.InflectedWordElement;
import simplenlg.framework.LexicalCategory;
import simplenlg.framework.NLGElement;
import simplenlg.framework.StringElement;
import simplenlg.framework.WordElement;

/**
 * 

* This abstract class contains a number of rules for doing simple inflection. *

* *

* As a matter of course, the processor will first use any user-defined * inflection for the world. If no inflection is provided then the lexicon, if * it exists, will be examined for the correct inflection. Failing this a set of * very basic rules will be examined to inflect the word. *

* *

* All processing modules perform realisation on a tree of * NLGElements. The modules can alter the tree in whichever way * they wish. For example, the syntax processor replaces phrase elements with * list elements consisting of inflected words while the morphology processor * replaces inflected words with string elements. *

* *

* N.B. the use of module, processing module and * processor is interchangeable. They all mean an instance of this * class. *

* * * @author D. Westwater, University of Aberdeen. * @version 4.0 16-Mar-2011 modified to use correct base form (ER) */ public abstract class MorphologyRules { /** * A triple array of Pronouns organised by singular/plural, * possessive/reflexive/subjective/objective and by gender/person. */ @SuppressWarnings("nls") private static final String[][][] PRONOUNS = { { { "I", "you", "he", "she", "it" }, { "me", "you", "him", "her", "it" }, { "myself", "yourself", "himself", "herself", "itself" }, { "mine", "yours", "his", "hers", "its" }, { "my", "your", "his", "her", "its" } }, { { "we", "you", "they", "they", "they" }, { "us", "you", "them", "them", "them" }, { "ourselves", "yourselves", "themselves", "themselves", "themselves" }, { "ours", "yours", "theirs", "theirs", "theirs" }, { "our", "your", "their", "their", "their" } } }; private static final String[] WH_PRONOUNS = { "who", "what", "which", "where", "why", "how", "how many" }; /** * This method performs the morphology for nouns. * * @param element * the InflectedWordElement. * @param baseWord * the WordElement as created from the lexicon * entry. * @return a StringElement representing the word after * inflection. */ protected static StringElement doNounMorphology( InflectedWordElement element, WordElement baseWord) { StringBuffer realised = new StringBuffer(); // base form from baseWord if it exists, otherwise from element String baseForm = getBaseForm(element, baseWord); if (element.isPlural() && !element.getFeatureAsBoolean(LexicalFeature.PROPER) .booleanValue()) { String pluralForm = null; // AG changed: now check if default infl is uncount // if (element.getFeatureAsBoolean(LexicalFeature.NON_COUNT) // .booleanValue()) { // pluralForm = baseForm; Object elementDefaultInfl = element .getFeature(LexicalFeature.DEFAULT_INFL); if (elementDefaultInfl != null && Inflection.UNCOUNT.equals(elementDefaultInfl)) { pluralForm = baseForm; } else { pluralForm = element.getFeatureAsString(LexicalFeature.PLURAL); } if (pluralForm == null && baseWord != null) { // AG changed: now check if default infl is uncount // if (baseWord.getFeatureAsBoolean(LexicalFeature.NON_COUNT) // .booleanValue()) { // pluralForm = baseForm; String baseDefaultInfl = baseWord .getFeatureAsString(LexicalFeature.DEFAULT_INFL); if (baseDefaultInfl != null && baseDefaultInfl.equals("uncount")) { pluralForm = baseForm; } else { pluralForm = baseWord .getFeatureAsString(LexicalFeature.PLURAL); } } if (pluralForm == null) { Object pattern = element .getFeature(LexicalFeature.DEFAULT_INFL); if (Inflection.GRECO_LATIN_REGULAR.equals(pattern)) { pluralForm = buildGrecoLatinPluralNoun(baseForm); } else { pluralForm = buildRegularPluralNoun(baseForm); } } realised.append(pluralForm); } else { realised.append(baseForm); } checkPossessive(element, realised); StringElement realisedElement = new StringElement(realised.toString()); realisedElement.setFeature(InternalFeature.DISCOURSE_FUNCTION, element .getFeature(InternalFeature.DISCOURSE_FUNCTION)); return realisedElement; } /** * Builds a plural for regular nouns. The rules are performed in this order: *
    *
  • For nouns ending -Cy, where C is any consonant, the ending * becomes -ies. For example, fly becomes flies.
  • *
  • For nouns ending -ch, -s, -sh, -x * or -z the ending becomes -es. For example, box * becomes boxes.
  • *
  • All other nouns have -s appended the other end. For example, * dog becomes dogs.
  • *
* * @param baseForm * the base form of the word. * @return the inflected word. */ private static String buildRegularPluralNoun(String baseForm) { String plural = null; if (baseForm != null) { if (baseForm.matches(".*[b-z&&[^eiou]]y\\b")) { //$NON-NLS-1$ plural = baseForm.replaceAll("y\\b", "ies"); //$NON-NLS-1$ //$NON-NLS-2$ //AG: changed regex from ".*[szx(ch)(sh)]\\b" (tip of the hat to Ian Tabolt) } else if (baseForm.matches(".*([szx]|[cs]h)\\b")) { //$NON-NLS-1$ plural = baseForm + "es"; //$NON-NLS-1$ } else { plural = baseForm + "s"; //$NON-NLS-1$ } } return plural; } /** * Builds a plural for Greco-Latin regular nouns. The rules are performed in * this order: *
    *
  • For nouns ending -us the ending becomes -i. For * example, focus becomes foci.
  • *
  • For nouns ending -ma the ending becomes -mata. For * example, trauma becomes traumata.
  • *
  • For nouns ending -a the ending becomes -ae. For * example, larva becomes larvae.
  • *
  • For nouns ending -um or -on the ending becomes * -a. For example, taxon becomes taxa.
  • *
  • For nouns ending -sis the ending becomes -ses. For * example, analysis becomes analyses.
  • *
  • For nouns ending -is the ending becomes -ides. For * example, cystis becomes cystides.
  • *
  • For nouns ending -men the ending becomes -mina. For * example, foramen becomes foramina.
  • *
  • For nouns ending -ex the ending becomes -ices. For * example, index becomes indices.
  • *
  • For nouns ending -x the ending becomes -ces. For * example, matrix becomes matrices.
  • *
* * @param baseForm * the base form of the word. * @return the inflected word. */ private static String buildGrecoLatinPluralNoun(String baseForm) { String plural = null; if (baseForm != null) { if (baseForm.endsWith("us")) { //$NON-NLS-1$ plural = baseForm.replaceAll("us\\b", "i"); //$NON-NLS-1$ //$NON-NLS-2$ } else if (baseForm.endsWith("ma")) { //$NON-NLS-1$ plural = baseForm + "ta"; //$NON-NLS-1$ } else if (baseForm.endsWith("a")) { //$NON-NLS-1$ plural = baseForm + "e"; //$NON-NLS-1$ } else if (baseForm.matches(".*[(um)(on)]\\b")) { //$NON-NLS-1$ plural = baseForm.replaceAll("[(um)(on)]\\b", "a"); //$NON-NLS-1$ //$NON-NLS-2$ } else if (baseForm.endsWith("sis")) { //$NON-NLS-1$ plural = baseForm.replaceAll("sis\\b", "ses"); //$NON-NLS-1$ //$NON-NLS-2$ } else if (baseForm.endsWith("is")) { //$NON-NLS-1$ plural = baseForm.replaceAll("is\\b", "ides"); //$NON-NLS-1$ //$NON-NLS-2$ } else if (baseForm.endsWith("men")) { //$NON-NLS-1$ plural = baseForm.replaceAll("men\\b", "mina"); //$NON-NLS-1$ //$NON-NLS-2$ } else if (baseForm.endsWith("ex")) { //$NON-NLS-1$ plural = baseForm.replaceAll("ex\\b", "ices"); //$NON-NLS-1$ //$NON-NLS-2$ } else if (baseForm.endsWith("x")) { //$NON-NLS-1$ plural = baseForm.replaceAll("x\\b", "ces"); //$NON-NLS-1$ //$NON-NLS-2$ } else { plural = baseForm; } } return plural; } /** * This method performs the morphology for verbs. * * @param element * the InflectedWordElement. * @param baseWord * the WordElement as created from the lexicon * entry. * @return a StringElement representing the word after * inflection. */ protected static NLGElement doVerbMorphology(InflectedWordElement element, WordElement baseWord) { String realised = null; Object numberValue = element.getFeature(Feature.NUMBER); Object personValue = element.getFeature(Feature.PERSON); Object tense = element.getFeature(Feature.TENSE); Tense tenseValue; // AG: change to avoid deprecated getTense // if tense value is Tense, cast it, else default to present if (tense instanceof Tense) { tenseValue = (Tense) tense; } else { tenseValue = Tense.PRESENT; } Object formValue = element.getFeature(Feature.FORM); Object patternValue = element.getFeature(LexicalFeature.DEFAULT_INFL); // base form from baseWord if it exists, otherwise from element String baseForm = getBaseForm(element, baseWord); if (element.getFeatureAsBoolean(Feature.NEGATED) || Form.BARE_INFINITIVE.equals(formValue)) { realised = baseForm; } else if (Form.PRESENT_PARTICIPLE.equals(formValue)) { realised = element .getFeatureAsString(LexicalFeature.PRESENT_PARTICIPLE); if (realised == null && baseWord != null) { realised = baseWord .getFeatureAsString(LexicalFeature.PRESENT_PARTICIPLE); } if (realised == null) { if (Inflection.REGULAR_DOUBLE.equals(patternValue)) { realised = buildDoublePresPartVerb(baseForm); } else { realised = buildRegularPresPartVerb(baseForm); } } } else if (Tense.PAST.equals(tenseValue) || Form.PAST_PARTICIPLE.equals(formValue)) { if (Form.PAST_PARTICIPLE.equals(formValue)) { realised = element .getFeatureAsString(LexicalFeature.PAST_PARTICIPLE); if (realised == null && baseWord != null) { realised = baseWord .getFeatureAsString(LexicalFeature.PAST_PARTICIPLE); } if (realised == null) { if ("be".equalsIgnoreCase(baseForm)) { //$NON-NLS-1$ realised = "been"; //$NON-NLS-1$ } else if (Inflection.REGULAR_DOUBLE.equals(patternValue)) { realised = buildDoublePastVerb(baseForm); } else { realised = buildRegularPastVerb(baseForm, numberValue, personValue); } } } else { realised = element.getFeatureAsString(LexicalFeature.PAST); if (realised == null && baseWord != null) { realised = baseWord.getFeatureAsString(LexicalFeature.PAST); } if (realised == null) { if (Inflection.REGULAR_DOUBLE.equals(patternValue)) { realised = buildDoublePastVerb(baseForm); } else { realised = buildRegularPastVerb(baseForm, numberValue, personValue); } } } } else if ((numberValue == null || NumberAgreement.SINGULAR .equals(numberValue)) && (personValue == null || Person.THIRD.equals(personValue)) && (tenseValue == null || Tense.PRESENT.equals(tenseValue))) { realised = element.getFeatureAsString(LexicalFeature.PRESENT3S); if (realised == null && baseWord != null && !"be".equalsIgnoreCase(baseForm)) { //$NON-NLS-1$ realised = baseWord .getFeatureAsString(LexicalFeature.PRESENT3S); } if (realised == null) { realised = buildPresent3SVerb(baseForm); } } else { if ("be".equalsIgnoreCase(baseForm)) { //$NON-NLS-1$ if (Person.FIRST.equals(personValue) && (NumberAgreement.SINGULAR.equals(numberValue) || numberValue == null)) { realised = "am"; //$NON-NLS-1$ } else { realised = "are"; //$NON-NLS-1$ } } else { realised = baseForm; } } StringElement realisedElement = new StringElement(realised); realisedElement.setFeature(InternalFeature.DISCOURSE_FUNCTION, element .getFeature(InternalFeature.DISCOURSE_FUNCTION)); return realisedElement; } /** * return the base form of a word * * @param element * @param baseWord * @return */ private static String getBaseForm(InflectedWordElement element, WordElement baseWord) { // unclear what the right behaviour should be // for now, prefer baseWord.getBaseForm() to element.getBaseForm() for // verbs (ie, "is" mapped to "be") // but prefer element.getBaseForm() to baseWord.getBaseForm() for other // words (ie, "children" not mapped to "child") // AG: changed this to get the default spelling variant // needed to preserve spelling changes in the VP if (LexicalCategory.VERB == element.getCategory()) { if (baseWord != null && baseWord.getDefaultSpellingVariant() != null) return baseWord.getDefaultSpellingVariant(); else return element.getBaseForm(); } else { if (element.getBaseForm() != null) return element.getBaseForm(); else if (baseWord == null) return null; else return baseWord.getDefaultSpellingVariant(); } // if (LexicalCategory.VERB == element.getCategory()) { // if (baseWord != null && baseWord.getBaseForm() != null) // return baseWord.getBaseForm(); // else // return element.getBaseForm(); // } else { // if (element.getBaseForm() != null) // return element.getBaseForm(); // else if (baseWord == null) // return null; // else // return baseWord.getBaseForm(); // } } /** * Checks to see if the noun is possessive. If it is then nouns in ending in * -s become -s' while every other noun has -'s appended to * the end. * * @param element * the InflectedWordElement * @param realised * the realisation of the word. */ private static void checkPossessive(InflectedWordElement element, StringBuffer realised) { if (element.getFeatureAsBoolean(Feature.POSSESSIVE).booleanValue()) { if (realised.charAt(realised.length() - 1) == 's') { realised.append('\''); } else { realised.append("'s"); //$NON-NLS-1$ } } } /** * Builds the third-person singular form for regular verbs. The rules are * performed in this order: *
    *
  • If the verb is be the realised form is is.
  • *
  • For verbs ending -ch, -s, -sh, -x * or -z the ending becomes -es. For example, * preach becomes preaches.
  • *
  • For verbs ending -y the ending becomes -ies. For * example, fly becomes flies.
  • *
  • For every other verb, -s is added to the end of the word.
  • *
* * @param baseForm * the base form of the word. * @return the inflected word. */ private static String buildPresent3SVerb(String baseForm) { String morphology = null; if (baseForm != null) { if (baseForm.equalsIgnoreCase("be")) { //$NON-NLS-1$ morphology = "is"; //$NON-NLS-1$ } else if (baseForm.matches(".*[szx(ch)(sh)]\\b")) { //$NON-NLS-1$ morphology = baseForm + "es"; //$NON-NLS-1$ } else if (baseForm.matches(".*[b-z&&[^eiou]]y\\b")) { //$NON-NLS-1$ morphology = baseForm.replaceAll("y\\b", "ies"); //$NON-NLS-1$ //$NON-NLS-2$ } else { morphology = baseForm + "s"; //$NON-NLS-1$ } } return morphology; } /** * Builds the past-tense form for regular verbs. The rules are performed in * this order: *
    *
  • If the verb is be and the number agreement is plural then * the realised form is were.
  • *
  • If the verb is be and the number agreement is singular then * the realised form is was, unless the person is second, in which * case it's were.
  • *
  • For verbs ending -e the ending becomes -ed. For * example, chased becomes chased.
  • *
  • For verbs ending -Cy, where C is any consonant, the ending * becomes -ied. For example, dry becomes dried.
  • *
  • For every other verb, -ed is added to the end of the word.
  • *
* * @param baseForm * the base form of the word. * @param number * the number agreement for the word. * @param person * the person * @return the inflected word. */ private static String buildRegularPastVerb(String baseForm, Object number, Object person) { String morphology = null; if (baseForm != null) { if (baseForm.equalsIgnoreCase("be")) { //$NON-NLS-1$ if (NumberAgreement.PLURAL.equals(number)) { morphology = "were"; //$NON-NLS-1$ // AG - bug fix to handle second person past (courtesy of // Minh Le) } else if (Person.SECOND.equals(person)) { morphology = "were"; //$NON-NLS-1$ } else { morphology = "was"; } } else if (baseForm.endsWith("e")) { //$NON-NLS-1$ morphology = baseForm + "d"; //$NON-NLS-1$ } else if (baseForm.matches(".*[b-z&&[^eiou]]y\\b")) { //$NON-NLS-1$ morphology = baseForm.replaceAll("y\\b", "ied"); //$NON-NLS-1$ //$NON-NLS-2$ } else { morphology = baseForm + "ed"; //$NON-NLS-1$ } } return morphology; } /** * Builds the past-tense form for verbs that follow the doubling form of the * last consonant. -ed is added to the end after the last consonant * is doubled. For example, tug becomes tugged. * * @param baseForm * the base form of the word. * @return the inflected word. */ private static String buildDoublePastVerb(String baseForm) { String morphology = null; if (baseForm != null) { morphology = baseForm + baseForm.charAt(baseForm.length() - 1) + "ed"; //$NON-NLS-1$ } return morphology; } /** * Builds the present participle form for regular verbs. The rules are * performed in this order: *
    *
  • If the verb is be then the realised form is being.
  • *
  • For verbs ending -ie the ending becomes -ying. For * example, tie becomes tying.
  • *
  • For verbs ending -ee, -oe or -ye then * -ing is added to the end. For example, canoe becomes * canoeing.
  • *
  • For other verbs ending in -e the ending becomes * -ing. For example, chase becomes chasing.
  • *
  • For all other verbs, -ing is added to the end. For example, * dry becomes drying.
  • *
* * @param baseForm * the base form of the word. * @param number * the number agreement for the word. * @return the inflected word. */ private static String buildRegularPresPartVerb(String baseForm) { String morphology = null; if (baseForm != null) { if (baseForm.equalsIgnoreCase("be")) { //$NON-NLS-1$ morphology = "being"; //$NON-NLS-1$ } else if (baseForm.endsWith("ie")) { //$NON-NLS-1$ morphology = baseForm.replaceAll("ie\\b", "ying"); //$NON-NLS-1$ //$NON-NLS-2$ } else if (baseForm.matches(".*[^iyeo]e\\b")) { //$NON-NLS-1$ morphology = baseForm.replaceAll("e\\b", "ing"); //$NON-NLS-1$ //$NON-NLS-2$ } else { morphology = baseForm + "ing"; //$NON-NLS-1$ } } return morphology; } /** * Builds the present participle form for verbs that follow the doubling * form of the last consonant. -ing is added to the end after the * last consonant is doubled. For example, tug becomes * tugging. * * @param baseForm * the base form of the word. * @return the inflected word. */ private static String buildDoublePresPartVerb(String baseForm) { String morphology = null; if (baseForm != null) { morphology = baseForm + baseForm.charAt(baseForm.length() - 1) + "ing"; //$NON-NLS-1$ } return morphology; } /** * This method performs the morphology for adjectives. * * @param element * the InflectedWordElement. * @param baseWord * the WordElement as created from the lexicon * entry. * @return a StringElement representing the word after * inflection. */ public static NLGElement doAdjectiveMorphology( InflectedWordElement element, WordElement baseWord) { String realised = null; Object patternValue = element.getFeature(LexicalFeature.DEFAULT_INFL); // base form from baseWord if it exists, otherwise from element String baseForm = getBaseForm(element, baseWord); if (element.getFeatureAsBoolean(Feature.IS_COMPARATIVE).booleanValue()) { realised = element.getFeatureAsString(LexicalFeature.COMPARATIVE); if (realised == null && baseWord != null) { realised = baseWord .getFeatureAsString(LexicalFeature.COMPARATIVE); } if (realised == null) { if (Inflection.REGULAR_DOUBLE.equals(patternValue)) { realised = buildDoubleCompAdjective(baseForm); } else { realised = buildRegularComparative(baseForm); } } } else if (element.getFeatureAsBoolean(Feature.IS_SUPERLATIVE) .booleanValue()) { realised = element.getFeatureAsString(LexicalFeature.SUPERLATIVE); if (realised == null && baseWord != null) { realised = baseWord .getFeatureAsString(LexicalFeature.SUPERLATIVE); } if (realised == null) { if (Inflection.REGULAR_DOUBLE.equals(patternValue)) { realised = buildDoubleSuperAdjective(baseForm); } else { realised = buildRegularSuperlative(baseForm); } } } else { realised = baseForm; } StringElement realisedElement = new StringElement(realised); realisedElement.setFeature(InternalFeature.DISCOURSE_FUNCTION, element .getFeature(InternalFeature.DISCOURSE_FUNCTION)); return realisedElement; } /** * Builds the comparative form for adjectives that follow the doubling form * of the last consonant. -er is added to the end after the last * consonant is doubled. For example, fat becomes fatter. * * @param baseForm * the base form of the word. * @return the inflected word. */ private static String buildDoubleCompAdjective(String baseForm) { String morphology = null; if (baseForm != null) { morphology = baseForm + baseForm.charAt(baseForm.length() - 1) + "er"; //$NON-NLS-1$ } return morphology; } /** * Builds the comparative form for regular adjectives. The rules are * performed in this order: *
    *
  • For adjectives ending -Cy, where C is any consonant, the * ending becomes -ier. For example, brainy becomes * brainier.
  • *
  • For adjectives ending -e the ending becomes -er. * For example, fine becomes finer.
  • *
  • For all other adjectives, -er is added to the end. For * example, clear becomes clearer.
  • *
* * @param baseForm * the base form of the word. * @param number * the number agreement for the word. * @return the inflected word. */ private static String buildRegularComparative(String baseForm) { String morphology = null; if (baseForm != null) { if (baseForm.matches(".*[b-z&&[^eiou]]y\\b")) { //$NON-NLS-1$ morphology = baseForm.replaceAll("y\\b", "ier"); //$NON-NLS-1$ //$NON-NLS-2$ } else if (baseForm.endsWith("e")) { //$NON-NLS-1$ morphology = baseForm + "r"; //$NON-NLS-1$ } else { morphology = baseForm + "er"; //$NON-NLS-1$ } } return morphology; } /** * Builds the superlative form for adjectives that follow the doubling form * of the last consonant. -est is added to the end after the last * consonant is doubled. For example, fat becomes fattest. * * @param baseForm * the base form of the word. * @return the inflected word. */ private static String buildDoubleSuperAdjective(String baseForm) { String morphology = null; if (baseForm != null) { morphology = baseForm + baseForm.charAt(baseForm.length() - 1) + "est"; //$NON-NLS-1$ } return morphology; } /** * Builds the superlative form for regular adjectives. The rules are * performed in this order: *
    *
  • For verbs ending -Cy, where C is any consonant, the ending * becomes -iest. For example, brainy becomes * brainiest.
  • *
  • For verbs ending -e the ending becomes -est. For * example, fine becomes finest.
  • *
  • For all other verbs, -est is added to the end. For example, * clear becomes clearest.
  • *
* * @param baseForm * the base form of the word. * @param number * the number agreement for the word. * @return the inflected word. */ private static String buildRegularSuperlative(String baseForm) { String morphology = null; if (baseForm != null) { if (baseForm.matches(".*[b-z&&[^eiou]]y\\b")) { //$NON-NLS-1$ morphology = baseForm.replaceAll("y\\b", "iest"); //$NON-NLS-1$ //$NON-NLS-2$ } else if (baseForm.endsWith("e")) { //$NON-NLS-1$ morphology = baseForm + "st"; //$NON-NLS-1$ } else { morphology = baseForm + "est"; //$NON-NLS-1$ } } return morphology; } /** * This method performs the morphology for adverbs. * * @param element * the InflectedWordElement. * @param baseWord * the WordElement as created from the lexicon * entry. * @return a StringElement representing the word after * inflection. */ public static NLGElement doAdverbMorphology(InflectedWordElement element, WordElement baseWord) { String realised = null; // base form from baseWord if it exists, otherwise from element String baseForm = getBaseForm(element, baseWord); if (element.getFeatureAsBoolean(Feature.IS_COMPARATIVE).booleanValue()) { realised = element.getFeatureAsString(LexicalFeature.COMPARATIVE); if (realised == null && baseWord != null) { realised = baseWord .getFeatureAsString(LexicalFeature.COMPARATIVE); } if (realised == null) { realised = buildRegularComparative(baseForm); } } else if (element.getFeatureAsBoolean(Feature.IS_SUPERLATIVE) .booleanValue()) { realised = element.getFeatureAsString(LexicalFeature.SUPERLATIVE); if (realised == null && baseWord != null) { realised = baseWord .getFeatureAsString(LexicalFeature.SUPERLATIVE); } if (realised == null) { realised = buildRegularSuperlative(baseForm); } } else { realised = baseForm; } StringElement realisedElement = new StringElement(realised); realisedElement.setFeature(InternalFeature.DISCOURSE_FUNCTION, element .getFeature(InternalFeature.DISCOURSE_FUNCTION)); return realisedElement; } /** * This method performs the morphology for pronouns. * * @param element * the InflectedWordElement. * @return a StringElement representing the word after * inflection. */ public static NLGElement doPronounMorphology(InflectedWordElement element) { String realised = null; if (!element.getFeatureAsBoolean(InternalFeature.NON_MORPH) .booleanValue() && !isWHPronoun(element)) { Object genderValue = element.getFeature(LexicalFeature.GENDER); Object personValue = element.getFeature(Feature.PERSON); Object discourseValue = element .getFeature(InternalFeature.DISCOURSE_FUNCTION); int numberIndex = element.isPlural() ? 1 : 0; int genderIndex = (genderValue instanceof Gender) ? ((Gender) genderValue) .ordinal() : 2; int personIndex = (personValue instanceof Person) ? ((Person) personValue) .ordinal() : 2; if (personIndex == 2) { personIndex += genderIndex; } int positionIndex = 0; if (element.getFeatureAsBoolean(LexicalFeature.REFLEXIVE) .booleanValue()) { positionIndex = 2; } else if (element.getFeatureAsBoolean(Feature.POSSESSIVE) .booleanValue()) { positionIndex = 3; if (DiscourseFunction.SPECIFIER.equals(discourseValue)) { positionIndex++; } } else { positionIndex = (DiscourseFunction.SUBJECT .equals(discourseValue) && !element .getFeatureAsBoolean(Feature.PASSIVE).booleanValue()) || (DiscourseFunction.OBJECT.equals(discourseValue) && element .getFeatureAsBoolean(Feature.PASSIVE) .booleanValue()) || DiscourseFunction.SPECIFIER.equals(discourseValue) || (DiscourseFunction.COMPLEMENT.equals(discourseValue) && element .getFeatureAsBoolean(Feature.PASSIVE) .booleanValue()) ? 0 : 1; } realised = PRONOUNS[numberIndex][positionIndex][personIndex]; } else { realised = element.getBaseForm(); } StringElement realisedElement = new StringElement(realised); realisedElement.setFeature(InternalFeature.DISCOURSE_FUNCTION, element .getFeature(InternalFeature.DISCOURSE_FUNCTION)); return realisedElement; } private static boolean isWHPronoun(InflectedWordElement word) { String base = word.getBaseForm(); boolean wh = false; if (base != null) { for (int i = 0; i < WH_PRONOUNS.length && !wh; i++) { wh = WH_PRONOUNS[i].equals(base); } } return wh; } /** * This method performs the morphology for determiners. * * @param determiner * the InflectedWordElement. * @param realisation * the current realisation of the determiner. */ public static void doDeterminerMorphology(NLGElement determiner, String realisation) { if (realisation != null) { if (determiner.getRealisation().equals("a")) { //$NON-NLS-1$ if (determiner.isPlural()) { determiner.setRealisation("some"); }else if (DeterminerAgrHelper.requiresAn(realisation)) { determiner.setRealisation("an"); } } // if(determiner.getRealisation().equals("an") && !DeterminerAgrHelper.requiresAn(realisation)){ // // determiner.setRealisation("a"); // // } // } else if (realisation.matches(MorphologyRules.AN_AGREEMENT) // || realisation // .matches(MorphologyRules.AN_NUMERAL_AGREEMENT)) { // if (!isAnException(realisation)) { // determiner.setRealisation("an"); // } // } //} } } // /** // * check whether a string beginning with a vowel is an exception and // doesn't // * take "an" (e.g. "a one percent change") // * // * @return // */ // private static boolean isAnException(String string) { // for (String ex : MorphologyRules.AN_EXCEPTIONS) { // if (string.matches("^" + ex + ".*")) { // // if (string.equalsIgnoreCase(ex)) { // return true; // } // } // // return false; // } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy