org.glassfish.jersey.jaxb.internal.NounInflector Maven / Gradle / Ivy

Go to download
/*
 * Copyright (c) 2010, 2019 Oracle and/or its affiliates. All rights reserved.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Eclipse Public License v. 2.0, which is available at
 * http://www.eclipse.org/legal/epl-2.0.
 *
 * This Source Code may also be made available under the following Secondary
 * Licenses when the conditions for such availability set forth in the
 * Eclipse Public License v. 2.0 are satisfied: GNU General Public License,
 * version 2 with the GNU Classpath Exception, which is available at
 * https://www.gnu.org/software/classpath/license.html.
 *
 * SPDX-License-Identifier: EPL-2.0 OR GPL-2.0 WITH Classpath-exception-2.0
 */

package org.glassfish.jersey.jaxb.internal;

import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * API for performing inflections (pluralization, singularization, and so on)
 * on various strings.  These inflections will be useful in code generators that
 * convert things like database table names into Java class names.
 *
 * The {@code getInstance()} method returns a singleton instance of
 * this class with a default set of rules, which can then be customized.
 * Rules added during customization will take precedence over the standard ones.
 * Use the {@code addIrregular()}, {@code addPlural()}, {@code addSingular()},
 * and {@code addUncountable()} methods to add additional rules ot the default
 * ones.
 *
 * IMPLEMENTATION NOTE - The default implementation is
 * intended to be functionally compatible with the {@code Inflector::inflections}
 * class in Ruby on Rails.  The {@code gsub()} method on Ruby strings
 * matches regular expressions anywhere in the input.  However, nearly all of
 * the actual patterns used in this component use {@code $} at the end to
 * match the end of the input string (so that only the last word in a multiple
 * word phrase will be singularized or pluralized).  Therefore, the Java versions
 * of the regular expressions have been modified to capture all text before the
 * interesting characters at the end, and emit them as part of the result, so
 * that the entire string can be matched against a pattern once.
 *
 * @author Florian Rosenberg
 */
final class NounInflector {

    // ------------------------------------------------------------ Constructors

    /**
     * Private constructor to avoid instantiation.
     */
    private NounInflector() {

        addPlural("$", "s", false);
        addPlural("(.*)$", "\\1s");
        addPlural("(.*)(ax|test)is$", "\\1\\2es");
        addPlural("(.*)(octop|vir)us$", "\\1\\2i");
        addPlural("(.*)(alias|status)$", "\\1\\2es");
        addPlural("(.*)(bu)s$", "\\1\\2ses");
        addPlural("(.*)(buffal|tomat)o$", "\\1\\2oes");
        addPlural("(.*)([ti])um$", "\\1\\2a");
        addPlural("(.*)sis$", "\\1ses");
        addPlural("(.*)(?:([^f])fe|([lr])f)$", "\\1\\3ves");
        addPlural("(.*)(hive)$", "\\1\\2s");
        addPlural("(.*)(tive)$", "\\1\\2s"); // Added for consistency with singular rules
        addPlural("(.*)([^aeiouy]|qu)y$", "\\1\\2ies");
        addPlural("(.*)(series)$", "\\1\\2"); // Added for consistency with singular rules
        addPlural("(.*)(movie)$", "\\1\\2s"); // Added for consistency with singular rules
        addPlural("(.*)(x|ch|ss|sh)$", "\\1\\2es");
        addPlural("(.*)(matr|vert|ind)ix|ex$", "\\1\\2ices");
        addPlural("(.*)(o)$", "\\1\\2es"); // Added for consistency with singular rules
        addPlural("(.*)(shoe)$", "\\1\\2s"); // Added for consistency with singular rules
        addPlural("(.*)([m|l])ouse$", "\\1\\2ice");
        addPlural("^(ox)$", "\\1en");
        addPlural("(.*)(vert|ind)ex$", "\\1\\2ices"); // Added for consistency with singular rules
        addPlural("(.*)(matr)ix$", "\\1\\2ices"); // Added for consistency with singular rules
        addPlural("(.*)(quiz)$", "\\1\\2zes");

        addSingular("(.*)s$", "\\1");
        addSingular("(.*)(n)ews$", "\\1\\2ews");
        addSingular("(.*)([ti])a$", "\\1\\2um");
        addSingular("(.*)((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$", "\\1\\2sis");
        addSingular("(.*)(^analy)ses$", "\\1\\2sis");
        addSingular("(.*)([^f])ves$", "\\1\\2fe");
        addSingular("(.*)(hive)s$", "\\1\\2");
        addSingular("(.*)(tive)s$", "\\1\\2");
        addSingular("(.*)([lr])ves$", "\\1\\2f");
        addSingular("(.*)([^aeiouy]|qu)ies$", "\\1\\2y");
        addSingular("(.*)(s)eries$", "\\1\\2eries");
        addSingular("(.*)(m)ovies$", "\\1\\2ovie");
        addSingular("(.*)(x|ch|ss|sh)es$", "\\1\\2");
        addSingular("(.*)([m|l])ice$", "\\1\\2ouse");
        addSingular("(.*)(bus)es$", "\\1\\2");
        addSingular("(.*)(o)es$", "\\1\\2");
        addSingular("(.*)(shoe)s$", "\\1\\2");
        addSingular("(.*)(cris|ax|test)es$", "\\1\\2is");
        addSingular("(.*)(octop|vir)i$", "\\1\\2us");
        addSingular("(.*)(alias|status)es$", "\\1\\2");
        addSingular("^(ox)en", "\\1");
        addSingular("(.*)(vert|ind)ices$", "\\1\\2ex");
        addSingular("(.*)(matr)ices$", "\\1\\2ix");
        addSingular("(.*)(quiz)zes$", "\\1\\2");

        addIrregular("child", "children");
        addIrregular("man", "men");
        addIrregular("move", "moves");
        addIrregular("person", "people");
        addIrregular("sex", "sexes");

        addUncountable("equipment");
        addUncountable("fish");
        addUncountable("information");
        addUncountable("money");
        addUncountable("rice");
        addUncountable("series");
        addUncountable("sheep");
        addUncountable("species");

    }
    // -------------------------------------------------------- Static Variables
    /**
     * The singleton instance returned by the default getInstance()
     * method.
     */
    private static transient NounInflector instance = null;
    /**
     * List of Replacers for performing replacement operations
     * on matches for plural words.
     */
    private final List plurals = new LinkedList();
    /**
     * List of Replacers for performing replacement operations
     * on matches for addSingular words.
     */
    private final List singulars = new ArrayList();
    /**
     * List of words that represent addUncountable concepts that cannot be
     * pluralized or singularized.
     */
    private final List uncountables = new LinkedList();

    // ------------------------------------------------------ Instance Variables
    // ---------------------------------------------------------- Static Methods

    /**
     * Return a fully configured {@link NounInflector} instance that can be used
     * for performing transformations.
     */
    public static NounInflector getInstance() {

        if (instance == null) {
            instance = new NounInflector();
        }
        return instance;

    }

    // ---------------------------------------------------------- Public Methods

    /**
     * Convert strings to EmbeddedCamelCase.  Embedded
     * underscores will be removed.
     *
     * @param word Word to be converted
     */
    public String camelize(final String word) {

        return camelize(word, false);

    }

    /**
     * Convert word strings consisting of lower case letters and
     * underscore characters between words into embeddedCamelCase
     * or EmbeddedCamelCase, depending on the lower
     * flag.  Embedded underscores will be removed.  Embedded '/'
     * characters will be replaced by '.', making this method useful
     * in converting path-like names into fully qualified classnames.
     *
     * IMPLEMENTATION DIFFERENCE - The Rails version of this
     * method also converts '/' characters to '::' because that reflects
     * the normal syntax for fully qualified names in Ruby.
     *
     * 
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     * Input Output
"foo_bar", false "FooBar"
"foo_bar", true "fooBar"
"foo_bar/baz", false "FooBar.Baz"
"foo_bar/baz", true "fooBar.Baz"
     *
     * @param word Word to be converted
     * @param flag Flag indicating that the initial character should
     *  be lower cased instead of upper cased
     */
    public String camelize(final String word, final boolean flag) {
        if (word.length() == 0) {
            return word;
        }

        final StringBuilder sb = new StringBuilder(word.length());
        if (flag) {
            sb.append(Character.toLowerCase(word.charAt(0)));
        } else {
            sb.append(Character.toUpperCase(word.charAt(0)));
        }
        boolean capitalize = false;
        for (int i = 1; i < word.length(); i++) {
            final char ch = word.charAt(i);
            if (capitalize) {
                sb.append(Character.toUpperCase(ch));
                capitalize = false;
            } else if (ch == '_') {
                capitalize = true;
            } else if (ch == '/') {
                capitalize = true;
                sb.append('.');
            } else {
                sb.append(ch);
            }
        }
        return sb.toString();

    }

    /**
     * Create and return a simple class name that corresponds to a
     * addPlural table name.  Any leading schema name will be trimmed.
     *
     * 
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     * Input Output
"foo_bars" "FooBar"
"baz" "Baz"
     *
     * @param tableName Table name to be converted
     */
    public String classify(String tableName) {

        final int period = tableName.lastIndexOf('.');
        if (period >= 0) {
            tableName = tableName.substring(period + 1);
        }
        return camelize(singularize(tableName));

    }

    /**
     * Replace underscores in the specified word with dashes.
     *
     * 
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     * Input Output
"foo_bar" "foo-bar"
"baz" "baz"
     *
     * @param word Word to be converted
     */
    public String dasherize(final String word) {

        return word.replace('_', '-');

    }

    /**
     * Remove any package name from a fully qualified class name,
     * returning only the simple classname.
     *
     * 
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     * Input Output
"java.util.Map" "Map"
"String" "String"
     *
     * @param className Fully qualified class name to be converted
     */
    public String demodulize(final String className) {

        final int period = className.lastIndexOf('.');
        if (period >= 0) {
            return className.substring(period + 1);
        } else {
            return className;
        }

    }

    /**
     * Create and return a foreign key name from a class name,
     * separating the "id" suffix with an underscore.
     */
    public String foreignKey(final String className) {

        return foreignKey(className, true);

    }

    /**
     * Create and return a foreign key name from a class name,
     * optionally inserting an underscore before the "id" portion.
     *
     * 
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     * Input Output
"com.mymodel.Order", false "orderid"
"com.mymodel.Order", true "order_id"
"Message", false "messageid"
"Message", true "message_id"
     *
     * @param className Class name for which to create a foreign key
     * @param underscore Flag indicating whether an underscore should
     *  be emitted between the class name and the "id" suffix
     */
    public String foreignKey(final String className, final boolean underscore) {

        return underscore(demodulize(className) + (underscore ? "_id" : "id"));

    }

    /**
     * Capitalize the first word in a lower cased and underscored string,
     * turn underscores into spaces, and string any trailing "_id".  Like
     * titleize(), this is meant for creating pretty output,
     * and is not intended for code generation.
     *
     * 
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     * Input Output
"employee_salary" "Employee salary"
"author_id" "Author"
     *
     * @param words Word string to be converted
     */
    public String humanize(String words) {

        if (words.endsWith("_id")) {
            words = words.substring(0, words.length() - 3);
        }
        final StringBuilder sb = new StringBuilder(words.length());
        sb.append(Character.toUpperCase(words.charAt(0)));
        for (int i = 1; i < words.length(); i++) {
            final char ch = words.charAt(i);
            if (ch == '_') {
                sb.append(' ');
            } else {
                sb.append(ch);
            }
        }
        return sb.toString();

    }

    /**
     * Turn a number into a corresponding ordinal string used to
     * denote the position in an ordered sequence.
     *
     * 
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     * Input Output
1 "1st"
2 "2nd"
3 "3rd"
4 "rth"
1002 "1002nd"
2012 "2012th"
     *
     * @param number Number to be converted
     */
    public String ordinalize(final int number) {

        final int modulo = number % 100;
        if ((modulo >= 11) && (modulo <= 13)) {
            return "" + number + "th";
        }
        switch (number % 10) {
            case 1:
                return "" + number + "st";
            case 2:
                return "" + number + "nd";
            case 3:
                return "" + number + "rd";
            default:
                return "" + number + "th";
        }

    }

    /**
     * Return a addPlural version of the specified (addSingular) word.
     *
     *
     * @param word Singular word to be converted
     */
    public String pluralize(final String word) {

        // Scan uncountables and leave alone
        for (final String uncountable : uncountables) {
            if (uncountable.equals(word)) {
                return word;
            }
        }

        // Scan our patterns for a match and return the correct replacement
        for (final Replacer plural : plurals) {
            final String replacement = plural.replacement(word);
            if (replacement != null) {
                return replacement;
            }
        }

        // Return the original string unchanged
        return word;

    }

    /**
     * Return a addSingular version of the specified (addPlural) word.
     *
     *
     * @param word Plural word to be converted
     */
    public String singularize(final String word) {

        // Scan uncountables and leave alone
        for (final String uncountable : uncountables) {
            if (uncountable.equals(word)) {
                return word;
            }
        }

        // Scan our patterns for a match and return the correct replacement
        for (final Replacer singular : singulars) {
            final String replacement = singular.replacement(word);
            if (replacement != null) {
                return replacement;
            }
        }

        // Return the original string unchanged
        return word;

    }

    /**
     * Convert the simple name of a model class into the corresponding
     * name of a database table, by uncamelizing, inserting underscores,
     * and pluralizing the last word.
     *
     * 
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     * Input Output
"RawScaledScorer" "raw_scaled_scorers"
"fancyCategory" "fancy_categories"
     *
     * @param className Class name to be converted
     */
    public String tableize(final String className) {

        return pluralize(underscore(className));

    }

    /**
     * Capitalize all the words, and replace some characters in the string
     * to create a nicer looking title.  This is meant for creating pretty
     * output, and is not intended for code generation.
     *
     * 
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     * Input Output
"the honeymooners" "The Honeymooners"
"x-men: the last stand" "X Men: The Last Stand"
     *
     * @param words Word string to be converted
     */
    public String titleize(final String words) {

        final StringBuilder sb = new StringBuilder(words.length());
        boolean capitalize = true; // To get the first character right
        for (int i = 0; i < words.length(); i++) {
            final char ch = words.charAt(i);
            if (Character.isWhitespace(ch)) {
                sb.append(' ');
                capitalize = true;
            } else if (ch == '-') {
                sb.append(' ');
                capitalize = true;
            } else if (capitalize) {
                sb.append(Character.toUpperCase(ch));
                capitalize = false;
            } else {
                sb.append(ch);
            }
        }
        return sb.toString();

    }

    public String decapitalize(final String word) {
        // do nothing if null or empty
        if ((word == null) || (word.length() < 1)) {
            return word;
        }
        // or if already decapitalized
        final char first = word.charAt(0);
        if (Character.isLowerCase(first)) {
            return word;
        }
        // otherwise turn the first character to lower case and attach the rest
        final StringBuilder sb = new StringBuilder(word.length());
        sb.append(Character.toLowerCase(first));
        sb.append(word.substring(1));
        return sb.toString();
    }

    /**
     * The reverse of camelize(), makes an underscored form
     * from the expression in the string.  Changes "." to "/" to convert
     * fully qualified class names into paths.
     *
     * 
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     *   
     *     
     *     
     *   
     * Input Output
"FooBar" "foo_bar"
"fooBar" "foo_bar"
"FooBar.Baz" "foo_bar/baz"
"FooBar.Baz" "foo_bar/baz"
     *
     * @param word Camel cased word to be converted
     */
    public String underscore(final String word) {

        final StringBuilder sb = new StringBuilder(word.length() + 5);
        boolean uncapitalize = false;
        for (int i = 0; i < word.length(); i++) {
            final char ch = word.charAt(i);
            if (uncapitalize) {
                sb.append(Character.toLowerCase(ch));
                uncapitalize = false;
            } else if (ch == '.') {
                sb.append('/');
                uncapitalize = true;
            } else if (Character.isUpperCase(ch)) {
                if (i > 0) {
                    sb.append('_');
                }
                sb.append(Character.toLowerCase(ch));
            } else {
                sb.append(ch);
            }
        }
        return sb.toString();

    }

    // --------------------------------------------------- Customization Methods

    /**
     * Add the addSingular and addPlural forms of words that cannot be
     * converted using the normal rules.
     *
     *
     * @param singular Singular form of the word
     * @param plural Plural form of the word
     */
    public void addIrregular(final String singular, final String plural) {

        addPlural("(.*)(" + singular.substring(0, 1) + ")" + singular.substring(1) + "$",
                "\\1\\2" + plural.substring(1));
        addSingular("(.*)(" + plural.substring(0, 1) + ")" + plural.substring(1) + "$",
                "\\1\\2" + singular.substring(1));

    }

    /**
     * Add a match pattern and replacement rule for converting addPlural
     * forms to addSingular forms.  By default, matches will be case
     * insensitive.
     *
     *
     * @param match Match pattern regular expression
     * @param rule Replacement rule
     */
    public void addPlural(final String match, final String rule) {

        addPlural(match, rule, true);

    }

    /**
     * Add a match pattern and replacement rule for converting addPlural
     * forms to addSingular forms.
     *
     *
     * @param match Match pattern regular expression
     * @param rule Replacement rule
     * @param insensitive Flag indicating this match should be case insensitive
     */
    public void addPlural(final String match, final String rule, final boolean insensitive) {

        plurals.add(0, new Replacer(match, rule, insensitive));

    }

    /**
     * Add a match pattern and replacement rule for converting addSingular
     * forms to addPlural forms.  By default, matches will be case insensitive.
     *
     *
     * @param match Match pattern regular expression
     * @param rule Replacement rule
     */
    public void addSingular(final String match, final String rule) {

        addSingular(match, rule, true);

    }

    /**
     * Add a match pattern and replacement rule for converting addSingular
     * forms to addPlural forms.
     *
     *
     * @param match Match pattern regular expression
     * @param rule Replacement rule
     * @param insensitive Flag indicating this match should be case insensitive
     */
    public void addSingular(final String match, final String rule, final boolean insensitive) {

        singulars.add(0, new Replacer(match, rule, insensitive));

    }

    /**
     * Add a word that cannot be converted between addSingular and addPlural.
     *
     *
     * @param word Word to be added
     */
    public void addUncountable(final String word) {

        uncountables.add(0, word.toLowerCase(Locale.ROOT));

    }

    // --------------------------------------------------------- Private Classes

    /**
     * Internal class that uses a regular expression matcher to both
     * match the specified regular expression to a specified word, and
     * (if successful) perform the appropriate substitutions.
     */
    private static class Replacer {

        // --------------------------------------------------------- Constructor
        public Replacer(final String match, final String rule, final boolean insensitive) {

            pattern = Pattern.compile(match,
                    insensitive ? Pattern.CASE_INSENSITIVE : 0);
            this.rule = rule;

        }

        // -------------------------------------------------- Instance Variables
        private Pattern pattern = null;
        private String rule = null;

        // ------------------------------------------------------ Public Methods

        /**
         * Replace the input if it matches the pattern.
         *
         * @param input the input string.
         * @return the replacement, if the input matches, otherwise null.
         */
        public String replacement(final String input) {
            final Matcher matcher = pattern.matcher(input);
            if (matcher.matches()) {
                final StringBuilder sb = new StringBuilder();
                boolean group = false;
                for (int i = 0; i < rule.length(); i++) {
                    final char ch = rule.charAt(i);
                    if (group) {
                        sb.append(matcher.group(Character.digit(ch, 10)));
                        group = false;
                    } else if (ch == '\\') {
                        group = true;
                    } else {
                        sb.append(ch);
                    }
                }
                return sb.toString();
            } else {
                return null;
            }
        }
    }
}
Input	Output
"foo_bar", false	"FooBar"
"foo_bar", true	"fooBar"
"foo_bar/baz", false	"FooBar.Baz"
"foo_bar/baz", true	"fooBar.Baz"
Input	Output
"com.mymodel.Order", false	"orderid"
"com.mymodel.Order", true	"order_id"
"Message", false	"messageid"
"Message", true	"message_id"
Input	Output
"RawScaledScorer"	"raw_scaled_scorers"
"fancyCategory"	"fancy_categories"
Input	Output
"the honeymooners"	"The Honeymooners"
"x-men: the last stand"	"X Men: The Last Stand"
Input	Output
"FooBar"	"foo_bar"
"fooBar"	"foo_bar"
"FooBar.Baz"	"foo_bar/baz"
"FooBar.Baz"	"foo_bar/baz"