org.usergrid.utils.Inflector Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of usergrid-core Show documentation
Core services for Usergrid system.
There is a newer version: 0.0.27.1
/*
 * ModeShape (http://www.modeshape.org)
 * See the COPYRIGHT.txt file distributed with this work for information
 * regarding copyright ownership.  Some portions may be licensed
 * to Red Hat, Inc. under one or more contributor license agreements.
 * See the AUTHORS.txt file in the distribution for a full listing of 
 * individual contributors. 
 *
 * ModeShape is free software. Unless otherwise indicated, all code in ModeShape
 * is licensed to you under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * ModeShape is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this software; if not, write to the Free
 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
 */

package org.usergrid.utils;

import java.util.HashSet;
import java.util.LinkedList;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Transforms words to singular, plural, humanized (human readable), underscore,
 * camel case, or ordinal form. This is inspired by the Inflector class
 * in Ruby on Rails, which is
 * distributed under the Rails license.
 */
public class Inflector {

	protected static final Inflector INSTANCE = new Inflector();

	public static final Inflector getInstance() {
		return INSTANCE;
	}

	protected class Rule {

		protected final String expression;
		protected final Pattern expressionPattern;
		protected final String replacement;

		protected Rule(String expression, String replacement) {
			this.expression = expression;
			this.replacement = replacement != null ? replacement : "";
			expressionPattern = Pattern.compile(this.expression,
					Pattern.CASE_INSENSITIVE);
		}

		/**
		 * Apply the rule against the input string, returning the modified
		 * string or null if the rule didn't apply (and no modifications were
		 * made)
		 * 
		 * @param input
		 *            the input string
		 * @return the modified string if this rule applied, or null if the
		 *         input was not modified by this rule
		 */
		protected String apply(String input) {
			Matcher matcher = expressionPattern.matcher(input);
			if (!matcher.find()) {
				return null;
			}
			return matcher.replaceAll(replacement);
		}

		@Override
		public int hashCode() {
			return expression.hashCode();
		}

		@Override
		public boolean equals(Object obj) {
			if (obj == this) {
				return true;
			}
			if ((obj != null) && (obj.getClass() == this.getClass())) {
				final Rule that = (Rule) obj;
				if (expression.equalsIgnoreCase(that.expression)) {
					return true;
				}
			}
			return false;
		}

		@Override
		public String toString() {
			return expression + ", " + replacement;
		}
	}

	private final LinkedList plurals = new LinkedList();
	private final LinkedList singulars = new LinkedList();
	/**
	 * The lowercase words that are to be excluded and not processed. This map
	 * can be modified by the users via {@link #getUncountables()}.
	 */
	private final Set uncountables = new HashSet();

	public Inflector() {
		initialize();
	}

	protected Inflector(Inflector original) {
		plurals.addAll(original.plurals);
		singulars.addAll(original.singulars);
		uncountables.addAll(original.uncountables);
	}

	@Override
	public Inflector clone() {
		return new Inflector(this);
	}

	// ------------------------------------------------------------------------------------------------
	// Usage functions
	// ------------------------------------------------------------------------------------------------

	/**
	 * Returns the plural form of the word in the string.
	 * 
	 * Examples:
	 * 
	 * 
	 *   inflector.pluralize("post")               #=> "posts"
	 *   inflector.pluralize("octopus")            #=> "octopi"
	 *   inflector.pluralize("sheep")              #=> "sheep"
	 *   inflector.pluralize("words")              #=> "words"
	 *   inflector.pluralize("the blue mailman")   #=> "the blue mailmen"
	 *   inflector.pluralize("CamelOctopus")       #=> "CamelOctopi"
	 * 
	 * 
	 * 
	 * 
	 * Note that if the {@link Object#toString()} is called on the supplied
	 * object, so this method works for non-strings, too.
	 * 
	 * 
	 * @param word
	 *            the word that is to be pluralized.
	 * @return the pluralized form of the word, or the word itself if it could
	 *         not be pluralized
	 * @see #singularize(Object)
	 */
	public String pluralize(Object word) {
		if (word == null) {
			return null;
		}
		String wordStr = word.toString().trim();
		if (wordStr.length() == 0) {
			return wordStr;
		}
		if (isUncountable(wordStr)) {
			return wordStr;
		}
		for (Rule rule : plurals) {
			String result = rule.apply(wordStr);
			if (result != null) {
				return result;
			}
		}
		return wordStr;
	}

	public String pluralize(Object word, int count) {
		if (word == null) {
			return null;
		}
		if ((count == 1) || (count == -1)) {
			return word.toString();
		}
		return pluralize(word);
	}

	public boolean isPlural(Object word) {
		if (word == null) {
			return false;
		}
		return pluralize(word).equals(word.toString());
	}

	/**
	 * Returns the singular form of the word in the string.
	 * 
	 * Examples:
	 * 
	 * 
	 *   inflector.singularize("posts")             #=> "post"
	 *   inflector.singularize("octopi")            #=> "octopus"
	 *   inflector.singularize("sheep")             #=> "sheep"
	 *   inflector.singularize("words")             #=> "word"
	 *   inflector.singularize("the blue mailmen")  #=> "the blue mailman"
	 *   inflector.singularize("CamelOctopi")       #=> "CamelOctopus"
	 * 
	 * 
	 * 
	 * 
	 * Note that if the {@link Object#toString()} is called on the supplied
	 * object, so this method works for non-strings, too.
	 * 
	 * 
	 * @param word
	 *            the word that is to be pluralized.
	 * @return the pluralized form of the word, or the word itself if it could
	 *         not be pluralized
	 * @see #pluralize(Object)
	 */
	public String singularize(Object word) {
		if (word == null) {
			return null;
		}
		String wordStr = word.toString().trim();
		if (wordStr.length() == 0) {
			return wordStr;
		}
		if (isUncountable(wordStr)) {
			return wordStr;
		}
		for (Rule rule : singulars) {
			String result = rule.apply(wordStr);
			if (result != null) {
				return result;
			}
		}
		return wordStr;
	}

	public boolean isSingular(Object word) {
		if (word == null) {
			return false;
		}
		return singularize(word).equals(word.toString());
	}

	/**
	 * Converts strings to lowerCamelCase. This method will also use any extra
	 * delimiter characters to identify word boundaries.
	 * 
	 * Examples:
	 * 
	 * 
	 *   inflector.lowerCamelCase("active_record")       #=> "activeRecord"
	 *   inflector.lowerCamelCase("first_name")          #=> "firstName"
	 *   inflector.lowerCamelCase("name")                #=> "name"
	 *   inflector.lowerCamelCase("the-first_name",'-')  #=> "theFirstName"
	 * 
	 * 
	 * 
	 * 
	 * @param lowerCaseAndUnderscoredWord
	 *            the word that is to be converted to camel case
	 * @param delimiterChars
	 *            optional characters that are used to delimit word boundaries
	 * @return the lower camel case version of the word
	 * @see #underscore(String, char[])
	 * @see #camelCase(String, boolean, char[])
	 * @see #upperCamelCase(String, char[])
	 */
	public String lowerCamelCase(String lowerCaseAndUnderscoredWord,
			char... delimiterChars) {
		return camelCase(lowerCaseAndUnderscoredWord, false, delimiterChars);
	}

	/**
	 * Converts strings to UpperCamelCase. This method will also use any extra
	 * delimiter characters to identify word boundaries.
	 * 
	 * Examples:
	 * 
	 * 
	 *   inflector.upperCamelCase("active_record")       #=> "SctiveRecord"
	 *   inflector.upperCamelCase("first_name")          #=> "FirstName"
	 *   inflector.upperCamelCase("name")                #=> "Name"
	 *   inflector.lowerCamelCase("the-first_name",'-')  #=> "TheFirstName"
	 * 
	 * 
	 * 
	 * 
	 * @param lowerCaseAndUnderscoredWord
	 *            the word that is to be converted to camel case
	 * @param delimiterChars
	 *            optional characters that are used to delimit word boundaries
	 * @return the upper camel case version of the word
	 * @see #underscore(String, char[])
	 * @see #camelCase(String, boolean, char[])
	 * @see #lowerCamelCase(String, char[])
	 */
	public String upperCamelCase(String lowerCaseAndUnderscoredWord,
			char... delimiterChars) {
		return camelCase(lowerCaseAndUnderscoredWord, true, delimiterChars);
	}

	/**
	 * By default, this method converts strings to UpperCamelCase. If the
	 * uppercaseFirstLetter argument to false, then this method
	 * produces lowerCamelCase. This method will also use any extra delimiter
	 * characters to identify word boundaries.
	 * 
	 * Examples:
	 * 
	 * 
	 *   inflector.camelCase("active_record",false)    #=> "activeRecord"
	 *   inflector.camelCase("active_record",true)     #=> "ActiveRecord"
	 *   inflector.camelCase("first_name",false)       #=> "firstName"
	 *   inflector.camelCase("first_name",true)        #=> "FirstName"
	 *   inflector.camelCase("name",false)             #=> "name"
	 *   inflector.camelCase("name",true)              #=> "Name"
	 * 
	 * 
	 * 
	 * 
	 * @param lowerCaseAndUnderscoredWord
	 *            the word that is to be converted to camel case
	 * @param uppercaseFirstLetter
	 *            true if the first character is to be uppercased, or false if
	 *            the first character is to be lowercased
	 * @param delimiterChars
	 *            optional characters that are used to delimit word boundaries
	 * @return the camel case version of the word
	 * @see #underscore(String, char[])
	 * @see #upperCamelCase(String, char[])
	 * @see #lowerCamelCase(String, char[])
	 */
	public String camelCase(String lowerCaseAndUnderscoredWord,
			boolean uppercaseFirstLetter, char... delimiterChars) {
		if (lowerCaseAndUnderscoredWord == null) {
			return null;
		}
		lowerCaseAndUnderscoredWord = lowerCaseAndUnderscoredWord.trim();
		if (lowerCaseAndUnderscoredWord.length() == 0) {
			return "";
		}
		if (uppercaseFirstLetter) {
			String result = lowerCaseAndUnderscoredWord;
			// Replace any extra delimiters with underscores (before the
			// underscores are converted in the next step)...
			if (delimiterChars != null) {
				for (char delimiterChar : delimiterChars) {
					result = result.replace(delimiterChar, '_');
				}
			}

			// Change the case at the beginning at after each underscore ...
			return replaceAllWithUppercase(result, "(^|_)(.)", 2);
		}
		if (lowerCaseAndUnderscoredWord.length() < 2) {
			return lowerCaseAndUnderscoredWord;
		}
		return ""
				+ Character.toLowerCase(lowerCaseAndUnderscoredWord.charAt(0))
				+ camelCase(lowerCaseAndUnderscoredWord, true, delimiterChars)
						.substring(1);
	}

	/**
	 * Makes an underscored form from the expression in the string (the reverse
	 * of the {@link #camelCase(String, boolean, char[]) camelCase} method. Also
	 * changes any characters that match the supplied delimiters into
	 * underscore.
	 * 
	 * Examples:
	 * 
	 * 
	 *   inflector.underscore("activeRecord")     #=> "active_record"
	 *   inflector.underscore("ActiveRecord")     #=> "active_record"
	 *   inflector.underscore("firstName")        #=> "first_name"
	 *   inflector.underscore("FirstName")        #=> "first_name"
	 *   inflector.underscore("name")             #=> "name"
	 *   inflector.underscore("The.firstName")    #=> "the_first_name"
	 * 
	 * 
	 * 
	 * 
	 * @param camelCaseWord
	 *            the camel-cased word that is to be converted;
	 * @param delimiterChars
	 *            optional characters that are used to delimit word boundaries
	 *            (beyond capitalization)
	 * @return a lower-cased version of the input, with separate words delimited
	 *         by the underscore character.
	 */
	public String underscore(String camelCaseWord, char... delimiterChars) {
		if (camelCaseWord == null) {
			return null;
		}
		String result = camelCaseWord.trim();
		if (result.length() == 0) {
			return "";
		}
		result = result.replaceAll("([A-Z]+)([A-Z][a-z])", "$1_$2");
		result = result.replaceAll("([a-z\\d])([A-Z])", "$1_$2");
		result = result.replace('-', '_');
		if (delimiterChars != null) {
			for (char delimiterChar : delimiterChars) {
				result = result.replace(delimiterChar, '_');
			}
		}
		return result.toLowerCase();
	}

	/**
	 * Returns a copy of the input with the first character converted to
	 * uppercase and the remainder to lowercase.
	 * 
	 * @param words
	 *            the word to be capitalized
	 * @return the string with the first character capitalized and the remaining
	 *         characters lowercased
	 */
	public String capitalize(String words) {
		if (words == null) {
			return null;
		}
		String result = words.trim();
		if (result.length() == 0) {
			return "";
		}
		if (result.length() == 1) {
			return result.toUpperCase();
		}
		return "" + Character.toUpperCase(result.charAt(0))
				+ result.substring(1).toLowerCase();
	}

	/**
	 * Capitalizes the first word and turns underscores into spaces and strips
	 * trailing "_id" and any supplied removable tokens. Like
	 * {@link #titleCase(String, String[])}, this is meant for creating pretty
	 * output.
	 * 
	 * Examples:
	 * 
	 * 
	 *   inflector.humanize("employee_salary")       #=> "Employee salary"
	 *   inflector.humanize("author_id")             #=> "Author"
	 * 
	 * 
	 * 
	 * 
	 * @param lowerCaseAndUnderscoredWords
	 *            the input to be humanized
	 * @param removableTokens
	 *            optional array of tokens that are to be removed
	 * @return the humanized string
	 * @see #titleCase(String, String[])
	 */
	public String humanize(String lowerCaseAndUnderscoredWords,
			String... removableTokens) {
		if (lowerCaseAndUnderscoredWords == null) {
			return null;
		}
		String result = lowerCaseAndUnderscoredWords.trim();
		if (result.length() == 0) {
			return "";
		}
		// Remove a trailing "_id" token
		result = result.replaceAll("_id$", "");
		// Remove all of the tokens that should be removed
		if (removableTokens != null) {
			for (String removableToken : removableTokens) {
				result = result.replaceAll(removableToken, "");
			}
		}
		result = result.replaceAll("_+", " "); // replace all adjacent
												// underscores with a single
												// space
		return capitalize(result);
	}

	/**
	 * Capitalizes all the words and replaces some characters in the string to
	 * create a nicer looking title. Underscores are changed to spaces, a
	 * trailing "_id" is removed, and any of the supplied tokens are removed.
	 * Like {@link #humanize(String, String[])}, this is meant for creating
	 * pretty output.
	 * 
	 * Examples:
	 * 
	 * 
	 *   inflector.titleCase("man from the boondocks")       #=> "Man From The Boondocks"
	 *   inflector.titleCase("x-men: the last stand")        #=> "X Men: The Last Stand"
	 * 
	 * 
	 * 
	 * 
	 * @param words
	 *            the input to be turned into title case
	 * @param removableTokens
	 *            optional array of tokens that are to be removed
	 * @return the title-case version of the supplied words
	 */
	public String titleCase(String words, String... removableTokens) {
		String result = humanize(words, removableTokens);
		result = replaceAllWithUppercase(result, "\\b([a-z])", 1); // change
																	// first
																	// char of
																	// each word
																	// to
																	// uppercase
		return result;
	}

	/**
	 * Turns a non-negative number into an ordinal string used to denote the
	 * position in an ordered sequence, such as 1st, 2nd, 3rd, 4th.
	 * 
	 * @param number
	 *            the non-negative number
	 * @return the string with the number and ordinal suffix
	 */
	public String ordinalize(int number) {
		int remainder = number % 100;
		String numberStr = Integer.toString(number);
		if ((11 <= number) && (number <= 13)) {
			return numberStr + "th";
		}
		remainder = number % 10;
		if (remainder == 1) {
			return numberStr + "st";
		}
		if (remainder == 2) {
			return numberStr + "nd";
		}
		if (remainder == 3) {
			return numberStr + "rd";
		}
		return numberStr + "th";
	}

	// ------------------------------------------------------------------------------------------------
	// Management methods
	// ------------------------------------------------------------------------------------------------

	/**
	 * Determine whether the supplied word is considered uncountable by the
	 * {@link #pluralize(Object) pluralize} and {@link #singularize(Object)
	 * singularize} methods.
	 * 
	 * @param word
	 *            the word
	 * @return true if the plural and singular forms of the word are the same
	 */
	public boolean isUncountable(String word) {
		if (word == null) {
			return false;
		}
		String trimmedLower = word.trim().toLowerCase();
		return uncountables.contains(trimmedLower);
	}

	/**
	 * Get the set of words that are not processed by the Inflector. The
	 * resulting map is directly modifiable.
	 * 
	 * @return the set of uncountable words
	 */
	public Set getUncountables() {
		return uncountables;
	}

	public void addPluralize(String rule, String replacement) {
		final Rule pluralizeRule = new Rule(rule, replacement);
		plurals.addFirst(pluralizeRule);
	}

	public void addSingularize(String rule, String replacement) {
		final Rule singularizeRule = new Rule(rule, replacement);
		singulars.addFirst(singularizeRule);
	}

	public void addIrregular(String singular, String plural) {
		if (org.apache.commons.lang.StringUtils.isEmpty(singular)) {
			throw new IllegalArgumentException("singular rule may not be empty");
		}
		if (org.apache.commons.lang.StringUtils.isEmpty(plural)) {
			throw new IllegalArgumentException("plural rule may not be empty");
		}
		String singularRemainder = singular.length() > 1 ? singular
				.substring(1) : "";
		String pluralRemainder = plural.length() > 1 ? plural.substring(1) : "";
		addPluralize("(" + singular.charAt(0) + ")" + singularRemainder + "$",
				"$1" + pluralRemainder);
		addSingularize("(" + plural.charAt(0) + ")" + pluralRemainder + "$",
				"$1" + singularRemainder);
	}

	public void addUncountable(String... words) {
		if ((words == null) || (words.length == 0)) {
			return;
		}
		for (String word : words) {
			if (word != null) {
				uncountables.add(word.trim().toLowerCase());
			}
		}
	}

	/**
	 * Utility method to replace all occurrences given by the specific
	 * backreference with its uppercased form, and remove all other
	 * backreferences.
	 * 
	 * The Java {@link Pattern regular expression processing} does not use the
	 * preprocessing directives \l, \u,
	 * \L, and \U. If so, such directives could be
	 * used in the replacement string to uppercase or lowercase the
	 * backreferences. For example, \L1 would lowercase the first
	 * backreference, and \u3 would uppercase the 3rd
	 * backreference.
	 * 
	 * 
	 * @param input
	 * @param regex
	 * @param groupNumberToUppercase
	 * @return the input string with the appropriate characters converted to
	 *         upper-case
	 */
	protected static String replaceAllWithUppercase(String input, String regex,
			int groupNumberToUppercase) {
		Pattern underscoreAndDotPattern = Pattern.compile(regex);
		Matcher matcher = underscoreAndDotPattern.matcher(input);
		StringBuffer sb = new StringBuffer();
		while (matcher.find()) {
			matcher.appendReplacement(sb, matcher.group(groupNumberToUppercase)
					.toUpperCase());
		}
		matcher.appendTail(sb);
		return sb.toString();
	}

	/**
	 * Completely remove all rules within this inflector.
	 */
	public void clear() {
		uncountables.clear();
		plurals.clear();
		singulars.clear();
	}

	protected void initialize() {
		Inflector inflect = this;
		inflect.addPluralize("$", "s");
		inflect.addPluralize("s$", "s");
		inflect.addPluralize("(ax|test)is$", "$1es");
		inflect.addPluralize("(octop|vir)us$", "$1i");
		inflect.addPluralize("(octop|vir)i$", "$1i"); // already plural
		inflect.addPluralize("(alias|status)$", "$1es");
		inflect.addPluralize("(bu)s$", "$1ses");
		inflect.addPluralize("(buffal|tomat)o$", "$1oes");
		inflect.addPluralize("([ti])um$", "$1a");
		inflect.addPluralize("([ti])a$", "$1a"); // already plural
		inflect.addPluralize("sis$", "ses");
		inflect.addPluralize("(?:([^f])fe|([lr])f)$", "$1$2ves");
		inflect.addPluralize("(hive)$", "$1s");
		inflect.addPluralize("([^aeiouy]|qu)y$", "$1ies");
		inflect.addPluralize("(x|ch|ss|sh)$", "$1es");
		inflect.addPluralize("(matr|vert|ind)ix|ex$", "$1ices");
		inflect.addPluralize("([m|l])ouse$", "$1ice");
		inflect.addPluralize("([m|l])ice$", "$1ice");
		inflect.addPluralize("^(ox)$", "$1en");
		inflect.addPluralize("(quiz)$", "$1zes");
		// Need to check for the following words that are already pluralized:
		inflect.addPluralize("(people|men|children|sexes|moves|stadiums)$",
				"$1"); // irregulars
		inflect.addPluralize("(oxen|octopi|viri|aliases|quizzes)$", "$1"); // special
																			// rules

		inflect.addSingularize("s$", "");
		inflect.addSingularize("(s|si|u)s$", "$1s"); // '-us' and '-ss' are
														// already singular
		inflect.addSingularize("(n)ews$", "$1ews");
		inflect.addSingularize("([ti])a$", "$1um");
		inflect.addSingularize(
				"((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$",
				"$1$2sis");
		inflect.addSingularize("(^analy)ses$", "$1sis");
		inflect.addSingularize("(^analy)sis$", "$1sis"); // already singular,
															// but ends in 's'
		inflect.addSingularize("([^f])ves$", "$1fe");
		inflect.addSingularize("(hive)s$", "$1");
		inflect.addSingularize("(tive)s$", "$1");
		inflect.addSingularize("([lr])ves$", "$1f");
		inflect.addSingularize("([^aeiouy]|qu)ies$", "$1y");
		inflect.addSingularize("(s)eries$", "$1eries");
		inflect.addSingularize("(m)ovies$", "$1ovie");
		inflect.addSingularize("(x|ch|ss|sh)es$", "$1");
		inflect.addSingularize("([m|l])ice$", "$1ouse");
		inflect.addSingularize("(bus)es$", "$1");
		inflect.addSingularize("(o)es$", "$1");
		inflect.addSingularize("(shoe)s$", "$1");
		inflect.addSingularize("(cris|ax|test)is$", "$1is"); // already
																// singular, but
																// ends in 's'
		inflect.addSingularize("(cris|ax|test)es$", "$1is");
		inflect.addSingularize("(octop|vir)i$", "$1us");
		inflect.addSingularize("(octop|vir)us$", "$1us"); // already singular,
															// but ends in 's'
		inflect.addSingularize("(alias|status)es$", "$1");
		inflect.addSingularize("(alias|status)$", "$1"); // already singular,
															// but ends in 's'
		inflect.addSingularize("^(ox)en", "$1");
		inflect.addSingularize("(vert|ind)ices$", "$1ex");
		inflect.addSingularize("(matr)ices$", "$1ix");
		inflect.addSingularize("(quiz)zes$", "$1");

		inflect.addIrregular("person", "people");
		inflect.addIrregular("man", "men");
		inflect.addIrregular("child", "children");
		inflect.addIrregular("sex", "sexes");
		inflect.addIrregular("move", "moves");
		inflect.addIrregular("stadium", "stadiums");

		inflect.addUncountable("equipment", "information", "rice", "money",
				"species", "series", "fish", "sheep", "data", "analytics");
	}

}