org.hl7.fhir.utilities.Inflector Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of org.hl7.fhir.utilities Show documentation
There is a newer version: 6.4.1
Show newest version
package org.hl7.fhir.utilities;

/*
  Copyright (c) 2011+, HL7, Inc.
  All rights reserved.
  
  Redistribution and use in source and binary forms, with or without modification, 
  are permitted provided that the following conditions are met:
    
   * Redistributions of source code must retain the above copyright notice, this 
     list of conditions and the following disclaimer.
   * Redistributions in binary form must reproduce the above copyright notice, 
     this list of conditions and the following disclaimer in the documentation 
     and/or other materials provided with the distribution.
   * Neither the name of HL7 nor the names of its contributors may be used to 
     endorse or promote products derived from this software without specific 
     prior written permission.
  
  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
  POSSIBILITY OF SUCH DAMAGE.
  
 */



/*
 * JBoss DNA (http://www.jboss.org/dna)
 * See the COPYRIGHT.txt file distributed with this work for information
 * regarding copyright ownership.  Some portions may be licensed
 * to Red Hat, Inc. under one or more contributor license agreements.
 * See the AUTHORS.txt file in the distribution for a full listing of 
 * individual contributors. 
 *
 * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA
 * is licensed to you under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * JBoss DNA is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this software; if not, write to the Free
 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
 */

import java.util.HashSet;
import java.util.LinkedList;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Transforms words to singular, plural, humanized (human readable), underscore, camel case, or ordinal form. This is inspired by
 * the Inflector class in Ruby on Rails, which is distributed under the Rails license.
 * 
 * @author Randall Hauch
 */
public class Inflector {

    protected static final Inflector INSTANCE = new Inflector();

    public static final Inflector getInstance() {
        return INSTANCE;
    }

    protected class Rule {

        protected final String expression;
        protected final Pattern expressionPattern;
        protected final String replacement;

        protected Rule( String expression,
                        String replacement ) {
            this.expression = expression;
            this.replacement = replacement != null ? replacement : "";
            this.expressionPattern = Pattern.compile(this.expression, Pattern.CASE_INSENSITIVE);
        }

        /**
         * Apply the rule against the input string, returning the modified string or null if the rule didn't apply (and no
         * modifications were made)
         * 
         * @param input the input string
         * @return the modified string if this rule applied, or null if the input was not modified by this rule
         */
        protected String apply( String input ) {
            Matcher matcher = this.expressionPattern.matcher(input);
            if (!matcher.find()) return null;
            return matcher.replaceAll(this.replacement);
        }

        @Override
        public int hashCode() {
            return expression.hashCode();
        }

        @Override
        public boolean equals( Object obj ) {
            if (obj == this) return true;
            if (obj != null && obj.getClass() == this.getClass()) {
                final Rule that = (Rule)obj;
                if (this.expression.equalsIgnoreCase(that.expression)) return true;
            }
            return false;
        }

        @Override
        public String toString() {
            return expression + ", " + replacement;
        }
    }

    private LinkedList plurals = new LinkedList();
    private LinkedList singulars = new LinkedList();
    /**
     * The lowercase words that are to be excluded and not processed. This map can be modified by the users via
     * {@link #getUncountables()}.
     */
    private final Set uncountables = new HashSet();

    public Inflector() {
        initialize();
    }

    protected Inflector( Inflector original ) {
        this.plurals.addAll(original.plurals);
        this.singulars.addAll(original.singulars);
        this.uncountables.addAll(original.uncountables);
    }

    @Override
    public Inflector clone() {
        return new Inflector(this);
    }

    // ------------------------------------------------------------------------------------------------
    // Usage functions
    // ------------------------------------------------------------------------------------------------

    /**
     * Returns the plural form of the word in the string.
     * 
     * Examples:
     * 
     *      *   inflector.pluralize("post")               #=> "posts"
     *   inflector.pluralize("octopus")            #=> "octopi"
     *   inflector.pluralize("sheep")              #=> "sheep"
     *   inflector.pluralize("words")              #=> "words"
     *   inflector.pluralize("the blue mailman")   #=> "the blue mailmen"
     *   inflector.pluralize("CamelOctopus")       #=> "CamelOctopi"
     * 
     * 
     * 
     * 
     * Note that if the {@link Object#toString()} is called on the supplied object, so this method works for non-strings, too.
     * 
     * 
     * @param word the word that is to be pluralized.
     * @return the pluralized form of the word, or the word itself if it could not be pluralized
     * @see #singularize(Object)
     */
    public String pluralize( Object word ) {
        if (word == null) return null;
        String wordStr = word.toString().trim();
        if (wordStr.length() == 0) return wordStr;
        if (isUncountable(wordStr)) return wordStr;
        for (Rule rule : this.plurals) {
            String result = rule.apply(wordStr);
            if (result != null) return result;
        }
        return wordStr;
    }

    public String pluralize( Object word,
                             int count ) {
        if (word == null) return null;
        if (count == 1 || count == -1) {
            return word.toString();
        }
        return pluralize(word);
    }

    /**
     * Returns the singular form of the word in the string.
     * 
     * Examples:
     * 
     *      *   inflector.singularize("posts")             #=> "post"
     *   inflector.singularize("octopi")            #=> "octopus"
     *   inflector.singularize("sheep")             #=> "sheep"
     *   inflector.singularize("words")             #=> "word"
     *   inflector.singularize("the blue mailmen")  #=> "the blue mailman"
     *   inflector.singularize("CamelOctopi")       #=> "CamelOctopus"
     * 
     * 
     * 
     * 
     * Note that if the {@link Object#toString()} is called on the supplied object, so this method works for non-strings, too.
     * 
     * 
     * @param word the word that is to be pluralized.
     * @return the pluralized form of the word, or the word itself if it could not be pluralized
     * @see #pluralize(Object)
     */
    public String singularize( Object word ) {
        if (word == null) return null;
        String wordStr = word.toString().trim();
        if (wordStr.length() == 0) return wordStr;
        if (isUncountable(wordStr)) return wordStr;
        for (Rule rule : this.singulars) {
            String result = rule.apply(wordStr);
            if (result != null) return result;
        }
        return wordStr;
    }

    /**
     * Converts strings to lowerCamelCase. This method will also use any extra delimiter characters to identify word boundaries.
     * 
     * Examples:
     * 
     *      *   inflector.lowerCamelCase("active_record")       #=> "activeRecord"
     *   inflector.lowerCamelCase("first_name")          #=> "firstName"
     *   inflector.lowerCamelCase("name")                #=> "name"
     *   inflector.lowerCamelCase("the-first_name",'-')  #=> "theFirstName"
     * 
     * 
     * 
     * 
     * @param lowerCaseAndUnderscoredWord the word that is to be converted to camel case
     * @param delimiterChars optional characters that are used to delimit word boundaries
     * @return the lower camel case version of the word
     * @see #underscore(String, char[])
     * @see #camelCase(String, boolean, char[])
     * @see #upperCamelCase(String, char[])
     */
    public String lowerCamelCase( String lowerCaseAndUnderscoredWord,
                                  char... delimiterChars ) {
        return camelCase(lowerCaseAndUnderscoredWord, false, delimiterChars);
    }

    /**
     * Converts strings to UpperCamelCase. This method will also use any extra delimiter characters to identify word boundaries.
     * 
     * Examples:
     * 
     *      *   inflector.upperCamelCase("active_record")       #=> "SctiveRecord"
     *   inflector.upperCamelCase("first_name")          #=> "FirstName"
     *   inflector.upperCamelCase("name")                #=> "Name"
     *   inflector.lowerCamelCase("the-first_name",'-')  #=> "TheFirstName"
     * 
     * 
     * 
     * 
     * @param lowerCaseAndUnderscoredWord the word that is to be converted to camel case
     * @param delimiterChars optional characters that are used to delimit word boundaries
     * @return the upper camel case version of the word
     * @see #underscore(String, char[])
     * @see #camelCase(String, boolean, char[])
     * @see #lowerCamelCase(String, char[])
     */
    public String upperCamelCase( String lowerCaseAndUnderscoredWord,
                                  char... delimiterChars ) {
        return camelCase(lowerCaseAndUnderscoredWord, true, delimiterChars);
    }

    /**
     * By default, this method converts strings to UpperCamelCase. If the uppercaseFirstLetter argument to false,
     * then this method produces lowerCamelCase. This method will also use any extra delimiter characters to identify word
     * boundaries.
     * 
     * Examples:
     * 
     *      *   inflector.camelCase("active_record",false)    #=> "activeRecord"
     *   inflector.camelCase("active_record",true)     #=> "ActiveRecord"
     *   inflector.camelCase("first_name",false)       #=> "firstName"
     *   inflector.camelCase("first_name",true)        #=> "FirstName"
     *   inflector.camelCase("name",false)             #=> "name"
     *   inflector.camelCase("name",true)              #=> "Name"
     * 
     * 
     * 
     * 
     * @param lowerCaseAndUnderscoredWord the word that is to be converted to camel case
     * @param uppercaseFirstLetter true if the first character is to be uppercased, or false if the first character is to be
     *        lowercased
     * @param delimiterChars optional characters that are used to delimit word boundaries
     * @return the camel case version of the word
     * @see #underscore(String, char[])
     * @see #upperCamelCase(String, char[])
     * @see #lowerCamelCase(String, char[])
     */
    public String camelCase( String lowerCaseAndUnderscoredWord,
                             boolean uppercaseFirstLetter,
                             char... delimiterChars ) {
        if (lowerCaseAndUnderscoredWord == null) return null;
        lowerCaseAndUnderscoredWord = lowerCaseAndUnderscoredWord.trim();
        if (lowerCaseAndUnderscoredWord.length() == 0) return "";
        if (uppercaseFirstLetter) {
            String result = lowerCaseAndUnderscoredWord;
            // Replace any extra delimiters with underscores (before the underscores are converted in the next step)...
            if (delimiterChars != null) {
                for (char delimiterChar : delimiterChars) {
                    result = result.replace(delimiterChar, '_');
                }
            }

            // Change the case at the beginning at after each underscore ...
            return replaceAllWithUppercase(result, "(^|_)(.)", 2);
        }
        if (lowerCaseAndUnderscoredWord.length() < 2) return lowerCaseAndUnderscoredWord;
        return "" + Character.toLowerCase(lowerCaseAndUnderscoredWord.charAt(0))
               + camelCase(lowerCaseAndUnderscoredWord, true, delimiterChars).substring(1);
    }

    /**
     * Makes an underscored form from the expression in the string (the reverse of the {@link #camelCase(String, boolean, char[])
     * camelCase} method. Also changes any characters that match the supplied delimiters into underscore.
     * 
     * Examples:
     * 
     *      *   inflector.underscore("activeRecord")     #=> "active_record"
     *   inflector.underscore("ActiveRecord")     #=> "active_record"
     *   inflector.underscore("firstName")        #=> "first_name"
     *   inflector.underscore("FirstName")        #=> "first_name"
     *   inflector.underscore("name")             #=> "name"
     *   inflector.underscore("The.firstName")    #=> "the_first_name"
     * 
     * 
     * 
     * 
     * @param camelCaseWord the camel-cased word that is to be converted;
     * @param delimiterChars optional characters that are used to delimit word boundaries (beyond capitalization)
     * @return a lower-cased version of the input, with separate words delimited by the underscore character.
     */
    public String underscore( String camelCaseWord,
                              char... delimiterChars ) {
        if (camelCaseWord == null) return null;
        String result = camelCaseWord.trim();
        if (result.length() == 0) return "";
        result = result.replaceAll("([A-Z]+)([A-Z][a-z])", "$1_$2");
        result = result.replaceAll("([a-z\\d])([A-Z])", "$1_$2");
        result = result.replace('-', '_');
        if (delimiterChars != null) {
            for (char delimiterChar : delimiterChars) {
                result = result.replace(delimiterChar, '_');
            }
        }
        return result.toLowerCase();
    }

    /**
     * Returns a copy of the input with the first character converted to uppercase and the remainder to lowercase.
     * 
     * @param words the word to be capitalized
     * @return the string with the first character capitalized and the remaining characters lowercased
     */
    public String capitalize( String words ) {
        if (words == null) return null;
        String result = words.trim();
        if (result.length() == 0) return "";
        if (result.length() == 1) return result.toUpperCase();
        return "" + Character.toUpperCase(result.charAt(0)) + result.substring(1).toLowerCase();
    }

    /**
     * Capitalizes the first word and turns underscores into spaces and strips trailing "_id" and any supplied removable tokens.
     * Like {@link #titleCase(String, String[])}, this is meant for creating pretty output.
     * 
     * Examples:
     * 
     *      *   inflector.humanize("employee_salary")       #=> "Employee salary"
     *   inflector.humanize("author_id")             #=> "Author"
     * 
     * 
     * 
     * 
     * @param lowerCaseAndUnderscoredWords the input to be humanized
     * @param removableTokens optional array of tokens that are to be removed
     * @return the humanized string
     * @see #titleCase(String, String[])
     */
    public String humanize( String lowerCaseAndUnderscoredWords,
                            String... removableTokens ) {
        if (lowerCaseAndUnderscoredWords == null) return null;
        String result = lowerCaseAndUnderscoredWords.trim();
        if (result.length() == 0) return "";
        // Remove a trailing "_id" token
        result = result.replaceAll("_id$", "");
        // Remove all of the tokens that should be removed
        if (removableTokens != null) {
            for (String removableToken : removableTokens) {
                result = result.replaceAll(removableToken, "");
            }
        }
        result = result.replaceAll("_+", " "); // replace all adjacent underscores with a single space
        return capitalize(result);
    }

    /**
     * Capitalizes all the words and replaces some characters in the string to create a nicer looking title. Underscores are
     * changed to spaces, a trailing "_id" is removed, and any of the supplied tokens are removed. Like
     * {@link #humanize(String, String[])}, this is meant for creating pretty output.
     * 
     * Examples:
     * 
     *      *   inflector.titleCase("man from the boondocks")       #=> "Man From The Boondocks"
     *   inflector.titleCase("x-men: the last stand")        #=> "X Men: The Last Stand"
     * 
     * 
     * 
     * 
     * @param words the input to be turned into title case
     * @param removableTokens optional array of tokens that are to be removed
     * @return the title-case version of the supplied words
     */
    public String titleCase( String words,
                             String... removableTokens ) {
        String result = humanize(words, removableTokens);
        result = replaceAllWithUppercase(result, "\\b([a-z])", 1); // change first char of each word to uppercase
        return result;
    }

    /**
     * Turns a non-negative number into an ordinal string used to denote the position in an ordered sequence, such as 1st, 2nd,
     * 3rd, 4th.
     * 
     * @param number the non-negative number
     * @return the string with the number and ordinal suffix
     */
    public String ordinalize( int number ) {
        int remainder = number % 100;
        String numberStr = Integer.toString(number);
        if (11 <= number && number <= 13) return numberStr + "th";
        remainder = number % 10;
        if (remainder == 1) return numberStr + "st";
        if (remainder == 2) return numberStr + "nd";
        if (remainder == 3) return numberStr + "rd";
        return numberStr + "th";
    }

    // ------------------------------------------------------------------------------------------------
    // Management methods
    // ------------------------------------------------------------------------------------------------

    /**
     * Determine whether the supplied word is considered uncountable by the {@link #pluralize(Object) pluralize} and
     * {@link #singularize(Object) singularize} methods.
     * 
     * @param word the word
     * @return true if the plural and singular forms of the word are the same
     */
    public boolean isUncountable( String word ) {
        if (word == null) return false;
        String trimmedLower = word.trim().toLowerCase();
        return this.uncountables.contains(trimmedLower);
    }

    /**
     * Get the set of words that are not processed by the Inflector. The resulting map is directly modifiable.
     * 
     * @return the set of uncountable words
     */
    public Set getUncountables() {
        return uncountables;
    }

    public void addPluralize( String rule,
                              String replacement ) {
        final Rule pluralizeRule = new Rule(rule, replacement);
        this.plurals.addFirst(pluralizeRule);
    }

    public void addSingularize( String rule,
                                String replacement ) {
        final Rule singularizeRule = new Rule(rule, replacement);
        this.singulars.addFirst(singularizeRule);
    }

    public void addIrregular( String singular,
                              String plural ) {
        //CheckArg.isNotEmpty(singular, "singular rule");
        //CheckArg.isNotEmpty(plural, "plural rule");
        String singularRemainder = singular.length() > 1 ? singular.substring(1) : "";
        String pluralRemainder = plural.length() > 1 ? plural.substring(1) : "";
        addPluralize("(" + singular.charAt(0) + ")" + singularRemainder + "$", "$1" + pluralRemainder);
        addSingularize("(" + plural.charAt(0) + ")" + pluralRemainder + "$", "$1" + singularRemainder);
    }

    public void addUncountable( String... words ) {
        if (words == null || words.length == 0) return;
        for (String word : words) {
            if (word != null) uncountables.add(word.trim().toLowerCase());
        }
    }

    /**
     * Utility method to replace all occurrences given by the specific backreference with its uppercased form, and remove all
     * other backreferences.
     * 
     * The Java {@link Pattern regular expression processing} does not use the preprocessing directives \l,
     * \u, \L, and \U. If so, such directives could be used in the replacement string
     * to uppercase or lowercase the backreferences. For example, \L1 would lowercase the first backreference, and
     * \u3 would uppercase the 3rd backreference.
     * 
     * 
     * @param input
     * @param regex
     * @param groupNumberToUppercase
     * @return the input string with the appropriate characters converted to upper-case
     */
    protected static String replaceAllWithUppercase( String input,
                                                     String regex,
                                                     int groupNumberToUppercase ) {
        Pattern underscoreAndDotPattern = Pattern.compile(regex);
        Matcher matcher = underscoreAndDotPattern.matcher(input);
        StringBuffer sb = new StringBuffer();
        while (matcher.find()) {
            matcher.appendReplacement(sb, matcher.group(groupNumberToUppercase).toUpperCase());
        }
        matcher.appendTail(sb);
        return sb.toString();
    }

    /**
     * Completely remove all rules within this inflector.
     */
    public void clear() {
        this.uncountables.clear();
        this.plurals.clear();
        this.singulars.clear();
    }

    protected void initialize() {
        Inflector inflect = this;
        inflect.addPluralize("$", "s");
        inflect.addPluralize("s$", "s");
        inflect.addPluralize("(ax|test)is$", "$1es");
        inflect.addPluralize("(octop|vir)us$", "$1i");
        inflect.addPluralize("(octop|vir)i$", "$1i"); // already plural
        inflect.addPluralize("(alias|status)$", "$1es");
        inflect.addPluralize("(bu)s$", "$1ses");
        inflect.addPluralize("(buffal|tomat)o$", "$1oes");
        inflect.addPluralize("([ti])um$", "$1a");
        inflect.addPluralize("([ti])a$", "$1a"); // already plural
        inflect.addPluralize("sis$", "ses");
        inflect.addPluralize("(?:([^f])fe|([lr])f)$", "$1$2ves");
        inflect.addPluralize("(hive)$", "$1s");
        inflect.addPluralize("([^aeiouy]|qu)y$", "$1ies");
        inflect.addPluralize("(x|ch|ss|sh)$", "$1es");
        inflect.addPluralize("(matr|vert|ind)ix|ex$", "$1ices");
        inflect.addPluralize("([m|l])ouse$", "$1ice");
        inflect.addPluralize("([m|l])ice$", "$1ice");
        inflect.addPluralize("^(ox)$", "$1en");
        inflect.addPluralize("(quiz)$", "$1zes");
        // Need to check for the following words that are already pluralized:
        inflect.addPluralize("(people|men|children|sexes|moves|stadiums)$", "$1"); // irregulars
        inflect.addPluralize("(oxen|octopi|viri|aliases|quizzes)$", "$1"); // special rules

        inflect.addSingularize("s$", "");
        inflect.addSingularize("(s|si|u)s$", "$1s"); // '-us' and '-ss' are already singular
        inflect.addSingularize("(n)ews$", "$1ews");
        inflect.addSingularize("([ti])a$", "$1um");
        inflect.addSingularize("((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$", "$1$2sis");
        inflect.addSingularize("(^analy)ses$", "$1sis");
        inflect.addSingularize("(^analy)sis$", "$1sis"); // already singular, but ends in 's'
        inflect.addSingularize("([^f])ves$", "$1fe");
        inflect.addSingularize("(hive)s$", "$1");
        inflect.addSingularize("(tive)s$", "$1");
        inflect.addSingularize("([lr])ves$", "$1f");
        inflect.addSingularize("([^aeiouy]|qu)ies$", "$1y");
        inflect.addSingularize("(s)eries$", "$1eries");
        inflect.addSingularize("(m)ovies$", "$1ovie");
        inflect.addSingularize("(x|ch|ss|sh)es$", "$1");
        inflect.addSingularize("([m|l])ice$", "$1ouse");
        inflect.addSingularize("(bus)es$", "$1");
        inflect.addSingularize("(o)es$", "$1");
        inflect.addSingularize("(shoe)s$", "$1");
        inflect.addSingularize("(cris|ax|test)is$", "$1is"); // already singular, but ends in 's'
        inflect.addSingularize("(cris|ax|test)es$", "$1is");
        inflect.addSingularize("(octop|vir)i$", "$1us");
        inflect.addSingularize("(octop|vir)us$", "$1us"); // already singular, but ends in 's'
        inflect.addSingularize("(alias|status)es$", "$1");
        inflect.addSingularize("(alias|status)$", "$1"); // already singular, but ends in 's'
        inflect.addSingularize("^(ox)en", "$1");
        inflect.addSingularize("(vert|ind)ices$", "$1ex");
        inflect.addSingularize("(matr)ices$", "$1ix");
        inflect.addSingularize("(quiz)zes$", "$1");

        inflect.addIrregular("person", "people");
        inflect.addIrregular("man", "men");
        inflect.addIrregular("child", "children");
        inflect.addIrregular("sex", "sexes");
        inflect.addIrregular("move", "moves");
        inflect.addIrregular("stadium", "stadiums");

        inflect.addUncountable("equipment", "information", "rice", "money", "species", "series", "fish", "sheep");
    }

}