All Downloads are FREE. Search and download functionalities are using the official Maven repository.

simplenlg.morphology.english.DeterminerAgrHelper Maven / Gradle / Ivy

The newest version!
/*
 * The contents of this file are subject to the Mozilla Public License
 * Version 2.0 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * https://www.mozilla.org/en-US/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
 * License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is "Simplenlg".
 *
 * The Initial Developer of the Original Code is Ehud Reiter, Albert Gatt and Dave Westwater.
 * Portions created by Ehud Reiter, Albert Gatt and Dave Westwater are Copyright (C) 2010-11 The University of Aberdeen. All Rights Reserved.
 *
 * Contributor(s): Ehud Reiter, Albert Gatt, Dave Westwater, Roman Kutlak, Margaret Mitchell, and Saad Mahamood.
 */
package simplenlg.morphology.english;

/**
 * This class is used to parse numbers that are passed as figures, to determine
 * whether they should take "a" or "an" as determiner.
 *
 * @author bertugatt
 */
public class DeterminerAgrHelper {

	/*
	 * An array of strings which are exceptions to the rule that "an" comes
	 * before vowels
	 */
	private static final String[] AN_EXCEPTIONS = {"one", "180", "110"};

	/*
	 * Start of string involving vowels, for use of "an"
	 */
	private static final String AN_AGREEMENT = "\\A(a|e|i|o|u).*";

	/*
	 * Start of string involving numbers, for use of "an" -- courtesy of Chris
	 * Howell, Agfa healthcare corporation
	 */
	// private static final String AN_NUMERAL_AGREEMENT =
	// "^(((8((\\d+)|(\\d+(\\.|,)\\d+))?).*)|((11|18)(\\d{3,}|\\D)).*)$";

	/**
	 * Check whether this string starts with a number that needs "an" (e.g.
	 * "an 18% increase")
	 *
	 * @param string the string
	 * @return true if this string starts with 11, 18, or 8,
	 * 		excluding strings that start with 180 or 110
	 */
	public static boolean requiresAn(String string) {
		boolean req = false;

		String lowercaseInput = string.toLowerCase();

		if(lowercaseInput.matches(AN_AGREEMENT) && !isAnException(lowercaseInput)) {
			req = true;

		} else {
			String numPref = getNumericPrefix(lowercaseInput);

			if(numPref != null && numPref.length() > 0 && numPref.matches("^(8|11|18).*$")) {
				Integer num = Integer.parseInt(numPref);
				req = checkNum(num);
			}
		}

		return req;
	}

	/*
	 * check whether a string beginning with a vowel is an exception and doesn't
	 * take "an" (e.g. "a one percent change")
	 *
	 * @return
	 */
	private static boolean isAnException(String string) {
		for(String ex : AN_EXCEPTIONS) {
			if(string.matches("^" + ex + ".*")) {
				// if (string.equalsIgnoreCase(ex)) {
				return true;
			}
		}

		return false;
	}

	/*
	 * Returns true if the number starts with 8, 11 or 18 and is
	 * either less than 100 or greater than 1000, but excluding 180,000 etc.
	 */
	private static boolean checkNum(int num) {
		boolean needsAn = false;

		// eight, eleven, eighty and eighteen
		if(num == 11 || num == 18 || num == 8 || (num >= 80 && num < 90)) {
			needsAn = true;

		} else if(num > 1000) {
			num = Math.round(num / 1000);
			needsAn = checkNum(num);
		}

		return needsAn;
	}

	/*
	 * Retrieve the numeral prefix of a string.
	 */
	private static String getNumericPrefix(String string) {
		StringBuffer numeric = new StringBuffer();

		if(string != null) {
			string = string.trim();

			if(string.length() > 0) {

				StringBuffer buffer = new StringBuffer(string);
				char first = buffer.charAt(0);

				if(Character.isDigit(first)) {
					numeric.append(first);

					for(int i = 1; i < buffer.length(); i++) {
						Character next = buffer.charAt(i);

						if(Character.isDigit(next)) {
							numeric.append(next);

							// skip commas within numbers
						} else if(next.equals(',')) {
							continue;

						} else {
							break;
						}
					}
				}
			}
		}

		return numeric.length() == 0 ? null : numeric.toString();
	}

	/**
	 * Check to see if a string ends with the indefinite article "a" and it agrees with {@code np}.
	 *
	 * @return an altered version of {@code text} to use "an" if it agrees with {@code np}, the original string otherwise.
	 */
	static String checkEndsWithIndefiniteArticle(String text, String np) {

		String[] tokens = text.split(" ");

		String lastToken = tokens[tokens.length - 1];

		if(lastToken.equalsIgnoreCase("a") && DeterminerAgrHelper.requiresAn(np)) {

			tokens[tokens.length - 1] = "an";

			return stringArrayToString(tokens);

		}

		return text;

	}

	// Turns ["a","b","c"] into "a b c"
	private static String stringArrayToString(String[] sArray) {

		StringBuilder buf = new StringBuilder();

		for(int i = 0; i < sArray.length; i++) {

			buf.append(sArray[i]);

			if(i != sArray.length - 1) {

				buf.append(" ");

			}

		}

		return buf.toString();

	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy