All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.unkrig.commons.lang.Characters Maven / Gradle / Ivy


/*
 * de.unkrig.commons - A general-purpose Java class library
 *
 * Copyright (c) 2016, Arno Unkrig
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
 * following conditions are met:
 *
 *    1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
 *       following disclaimer.
 *    2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
 *       following disclaimer in the documentation and/or other materials provided with the distribution.
 *    3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
 *       products derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package de.unkrig.commons.lang;

import java.util.Collections;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;

import de.unkrig.commons.lang.protocol.Predicate;
import de.unkrig.commons.nullanalysis.Nullable;

/**
 * Extensions for the JRE's {@link Character} class.
 */
public final
class Characters {

    private Characters() {}

    /**
     * Evaluates whether a given code point lies in the POSIX character class "lower" ({@code [a-z]}).
     *
     * @see The Open
     *      Group Base Specifications Issue 7, section 7.3.1: LC_CTYPE
     */
    public static final Predicate
    IS_POSIX_LOWER = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) {
            int c = subject;
            return c >= 'a' && c <= 'z';
        }
    };

    /**
     * Evaluates whether a given code point lies in the POSIX character class "upper" ({@code [A-Z]}).
     *
     * @see The Open
     *      Group Base Specifications Issue 7, section 7.3.1: LC_CTYPE
     */
    public static final Predicate
    IS_POSIX_UPPER = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) {
            int c = subject;
            return c >= 'A' && c <= 'Z';
        }
    };

    /**
     * Evaluates whether a given code point is in the ASCII range (0-127).
     */
    public static final Predicate
    IS_POSIX_ASCII = new Predicate() {
        @Override public boolean evaluate(Integer subject) { return subject <= 0x7f; }
    };

    /**
     * Evaluates whether a given code point lies in the POSIX character class "alpha" ({@code [A-Za-z]}).
     *
     * @see The Open
     *      Group Base Specifications Issue 7, section 7.3.1: LC_CTYPE
     */
    public static final Predicate
    IS_POSIX_ALPHA = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) {
            int c = subject;
            return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
        }
    };

    /**
     * Evaluates whether a given code point lies in the POSIX character class "digit" ({@code [0-9]}).
     *
     * @see The Open
     *      Group Base Specifications Issue 7, section 7.3.1: LC_CTYPE
     */
    public static final Predicate
    IS_POSIX_DIGIT = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) {
            int c = subject;
            return c >= '0' && c <= '9';
        }
    };

    /**
     * Evaluates whether a given code point lies in the POSIX character class "alnum" ({@code [A-Za-z0-9]}).
     *
     * @see The Open
     *      Group Base Specifications Issue 7, section 7.3.1: LC_CTYPE
     */
    public static final Predicate
    IS_POSIX_ALNUM = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) {
            int c = subject;
            return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9');
        }
    };

    /**
     * Evaluates whether a given code point lies in the POSIX character class "punct" (one of ! " # $ % &
     * ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~).
     *
     * @see The Open
     *      Group Base Specifications Issue 7, section 7.3.1: LC_CTYPE
     */
    public static final Predicate
    IS_POSIX_PUNCT = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) {
            int c = subject;
            return (
                (c >= '!' && c <= '/')    // !"#$%&'()*+,-./ (33...47)
                || (c >= ':' && c <= '@') // :;<=>?@         (58...64)
                || (c >= '[' && c <= '`') // [\]^_`          (91...96)
                || (c >= '{' && c <= '~') // {|}~            (123...126)
            );
        }
    };

    /**
     * Evaluates whether a given code point lies in the POSIX character class "graph"; the union of classes "alpha",
     * "digit", and "punct".
     *
     * @see The Open
     *      Group Base Specifications Issue 7, section 7.3.1: LC_CTYPE
     */
    public static final Predicate
    IS_POSIX_GRAPH = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) {
            return Characters.IS_POSIX_ALNUM.evaluate(subject) || Characters.IS_POSIX_PUNCT.evaluate(subject);
        }
    };

    /**
     * Evaluates whether a given code point lies in the POSIX character class "print"; the union of classes "alpha",
     * "digit" and "punct", and the SPACE character.
     *
     * @see The Open
     *      Group Base Specifications Issue 7, section 7.3.1: LC_CTYPE
     */
    public static final Predicate
    IS_POSIX_PRINT = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) {
            return (
                subject == ' '
                || Characters.IS_POSIX_ALNUM.evaluate(subject)
                || Characters.IS_POSIX_PUNCT.evaluate(subject)
            );
        }
    };

    /**
     * Evaluates whether a given code point lies in the POSIX character class "blank"; which consists of the SPACE
     * character and the TAB character.
     *
     * @see The Open
     *      Group Base Specifications Issue 7, section 7.3.1: LC_CTYPE
     */
    public static final Predicate
    IS_POSIX_BLANK = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) {
            int c = subject;
            return c == ' ' || c == '\t';
        }
    };

    /**
     * Evaluates whether a given code point lies in the POSIX character class "cntrl" ({@code [\0-\x1f\x7f]}).
     *
     * @see The Open
     *      Group Base Specifications Issue 7, section 7.3.1: LC_CTYPE
     */
    public static final Predicate
    IS_POSIX_CNTRL = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) {
            int c = subject;
            return c <= 0x1f || c == 0x7f;
        }
    };

    /**
     * Evaluates whether a given code point lies in the POSIX character class "xdigit" ({@code [0-9a-fA-F]}).
     *
     * @see The Open
     *      Group Base Specifications Issue 7, section 7.3.1: LC_CTYPE
     */
    public static final Predicate
    IS_POSIX_XDIGIT = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) {
            int c = subject;
            return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
        }
    };

    /**
     * Evaluates whether a given code point lies in the POSIX character class "space" (consisting of the tab, newline,
     * vertical-tab, form-feed, carriage-return and space characters).
     *
     * @see The Open
     *      Group Base Specifications Issue 7, section 7.3.1: LC_CTYPE
     */
    public static final Predicate
    IS_POSIX_SPACE = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) {
            int c = subject;
            return (
                (c >= 9 && c <= 13) // 0x09=tab, 0x0a=newline, 0x0b=vertical-tab, 0x0c=form-feed, 0x0d=carriage-return
                || c == ' '         // 0x20=space
            );
        }
    };

    /** A predicate for {@link Character#isLowerCase(int)}. */
    public static final Predicate
    IS_LOWER_CASE = new Predicate() {
        @Override public boolean evaluate(Integer subject) { return Character.isLowerCase(subject); }
    };

    /** A predicate for {@link Character#isUpperCase(int)}. */
    public static final Predicate
    IS_UPPER_CASE = new Predicate() {
        @Override public boolean evaluate(Integer subject) { return Character.isUpperCase(subject); }
    };

    /** A predicate for {@link Character#isWhitespace(int)}. */
    public static final Predicate
    IS_WHITESPACE = new Predicate() {
        @Override public boolean evaluate(Integer subject) { return Character.isWhitespace(subject); }
    };

    /** A predicate for {@link Character#isMirrored(int)}. */
    public static final Predicate
    IS_MIRRORED = new Predicate() {
        @Override public boolean evaluate(Integer subject) { return Character.isMirrored(subject); }
    };

    public static final Predicate
    IS_UNICODE_LOWER = new Predicate() {
        @Override public boolean evaluate(Integer subject) { return Character.isLowerCase(subject); }
    };

    public static final Predicate
    IS_UNICODE_UPPER = new Predicate() {
        @Override public boolean evaluate(Integer subject) { return Character.isUpperCase(subject); }
    };

    public static final Predicate
    IS_UNICODE_TITLE = new Predicate() {
        @Override public boolean evaluate(Integer subject) { return Character.isTitleCase(subject); }
    };

    public static final Predicate
    IS_UNICODE_LETTER = new Predicate() {
        @Override public boolean evaluate(Integer subject) { return Character.isLetter(subject); }
    };

    public static final Predicate
    IS_UNICODE_ALPHA = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) {

            // "Character.isAlphabetic()" is only available since Java 1.7.
            int type = Character.getType(subject);
            return (
                type == Character.UPPERCASE_LETTER
                || type == Character.LOWERCASE_LETTER
                || type == Character.TITLECASE_LETTER
                || type == Character.MODIFIER_LETTER
                || type == Character.OTHER_LETTER
                || type == Character.LETTER_NUMBER
            );
        }
    };

    public static final Predicate
    IS_UNICODE_DIGIT = new Predicate() {
        @Override public boolean evaluate(Integer subject) { return Character.isDigit(subject); }
    };

    public static final Predicate
    IS_UNICODE_ALNUM = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) {
            return Characters.IS_UNICODE_ALPHA.evaluate(subject) || Characters.IS_UNICODE_DIGIT.evaluate(subject);
        }
    };

    public static final Predicate
    IS_UNICODE_PUNCT = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) {

            // See "UnicodeProp.PUNCTUATION"
            int type = Character.getType(subject);
            return (
                type == Character.CONNECTOR_PUNCTUATION
                || type == Character.DASH_PUNCTUATION
                || type == Character.START_PUNCTUATION
                || type == Character.END_PUNCTUATION
                || type == Character.OTHER_PUNCTUATION
                || type == Character.INITIAL_QUOTE_PUNCTUATION
                || type == Character.FINAL_QUOTE_PUNCTUATION
            );
        }
    };

    public static final Predicate
    IS_UNICODE_GRAPH = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) {

            // See "UnicodeProp.GRAPH"
            int type = Character.getType(subject);
            return (
                type == Character.SPACE_SEPARATOR
                || type == Character.LINE_SEPARATOR
                || type == Character.PARAGRAPH_SEPARATOR
                || type == Character.CONTROL
                || type == Character.SURROGATE
                || type == Character.UNASSIGNED
            );
        }
    };

    public static final Predicate
    IS_UNICODE_PRINT = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) {

            // See "UnicodeProp.PRINT"
            return (
                (Characters.IS_UNICODE_GRAPH.evaluate(subject) || Characters.IS_UNICODE_BLANK.evaluate(subject))
                && !Characters.IS_UNICODE_CNTRL.evaluate(subject)
            );
        }
    };

    public static final Predicate
    IS_UNICODE_BLANK = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) {
            return Character.getType(subject) == Character.SPACE_SEPARATOR || subject == 0x9;
        }
    };

    public static final Predicate
    IS_UNICODE_CNTRL = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) { return Character.getType(subject) == Character.CONTROL; }
    };

    public static final Predicate
    IS_UNICODE_HEX_DIGIT = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) {

            // See "UnicodeProp.HEX_DIGIT"
            return (
                Character.isDigit(subject)
                || (subject >= '0'    && subject <= '9')
                || (subject >= 'A'    && subject <= 'F')
                || (subject >= 'a'    && subject <= 'f')
                || (subject >= 0xFF10 && subject <= 0xFF19)
                || (subject >= 0xFF21 && subject <= 0xFF26)
                || (subject >= 0xFF41 && subject <= 0xFF46)
            );
        }
    };

    public static final Predicate
    IS_UNICODE_ASSIGNED = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) { return Character.getType(subject) != Character.UNASSIGNED; }
    };

    public static final Predicate
    IS_UNICODE_NONCHARACTER = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) { return (subject & 0xfffe) == 0xfffe || (subject >= 0xfdd0 && subject <= 0xfdef); }
    };

    public static final Predicate
    IS_UNICODE_WHITE_SPACE = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) {

            int type = Character.getType(subject);
            return (
                type == Character.SPACE_SEPARATOR
                || type == Character.LINE_SEPARATOR
                || type == Character.PARAGRAPH_SEPARATOR
                || (subject >= 0x9 && subject <= 0xd)
                || (subject == 0x85)
            );
        }
    };

    public static final Predicate
    IS_UNICODE_WORD = new Predicate() {

        @Override public boolean
        evaluate(Integer subject) {

            if (
                Characters.IS_UNICODE_ALPHA.evaluate(subject)
                || Characters.IS_UNICODE_JOIN_CONTROL.evaluate(subject)
            ) return true;

            int type = Character.getType(subject);
            return (
                type == Character.NON_SPACING_MARK
                || type == Character.ENCLOSING_MARK
                || type == Character.COMBINING_SPACING_MARK
                || type == Character.DECIMAL_DIGIT_NUMBER
                || type == Character.CONNECTOR_PUNCTUATION
            );
        }
    };

    public static final Predicate
    IS_UNICODE_JOIN_CONTROL = new Predicate() {
        @Override public boolean evaluate(Integer subject) { return subject == 0x200C || subject == 0x200D; }
    };

    @Nullable public static Predicate
    unicodePropertyFromName(String name) {
        return Characters.UNICODE_PROPERTIES.get(name.toUpperCase(Locale.US));
    }
    private static final Map> UNICODE_PROPERTIES;
    static {
        Map> m = new HashMap>();

        m.put("ALPHABETIC",              Characters.IS_UNICODE_ALPHA);
        m.put("LETTER",                  Characters.IS_UNICODE_LETTER);
//        m.put("IDEOGRAPHIC",             Characters.IS_UNICODE_IDEOGRAPHIC);
        m.put("LOWERCASE",               Characters.IS_UNICODE_LOWER);
        m.put("UPPERCASE",               Characters.IS_UNICODE_UPPER);
        m.put("TITLECASE",               Characters.IS_UNICODE_TITLE);
        m.put("WHITE_SPACE",             Characters.IS_UNICODE_WHITE_SPACE);
        m.put("CONTROL",                 Characters.IS_UNICODE_CNTRL);
        m.put("PUNCTUATION",             Characters.IS_UNICODE_PUNCT);
        m.put("HEX_DIGIT",               Characters.IS_UNICODE_HEX_DIGIT);
        m.put("ASSIGNED",                Characters.IS_UNICODE_ASSIGNED);
        m.put("NONCHARACTER_CODE_POINT", Characters.IS_UNICODE_NONCHARACTER);
        m.put("DIGIT",                   Characters.IS_UNICODE_DIGIT);
        m.put("ALNUM",                   Characters.IS_UNICODE_ALNUM);
        m.put("BLANK",                   Characters.IS_UNICODE_BLANK);
        m.put("GRAPH",                   Characters.IS_UNICODE_GRAPH);
        m.put("PRINT",                   Characters.IS_UNICODE_PRINT);
        m.put("WORD",                    Characters.IS_UNICODE_WORD);
        m.put("JOIN_CONTROL",            Characters.IS_UNICODE_JOIN_CONTROL);

        // Aliases.
        m.put("WHITESPACE",            m.get("WHITE_SPACE"));
        m.put("HEXDIGIT",              m.get("HEX_DIGIT"));
        m.put("NONCHARACTERCODEPOINT", m.get("NONCHARACTER_CODE_POINT"));
        m.put("JOINCONTROL",           m.get("JOIN_CONTROL"));

        UNICODE_PROPERTIES = Collections.unmodifiableMap(m);
    }

    @Nullable public static Predicate
    unicodePropertyFromPosixName(String name) {
        return Characters.UNICODE_PROPERTIES2.get(name.toUpperCase(Locale.US));
    }
    private static final Map> UNICODE_PROPERTIES2;
    static {
        Map> m = new HashMap>();

        m.put("ALPHA",  Characters.IS_UNICODE_ALPHA);
        m.put("LOWER",  Characters.IS_UNICODE_LOWER);
        m.put("UPPER",  Characters.IS_UNICODE_UPPER);
        m.put("SPACE",  Characters.IS_UNICODE_WHITE_SPACE);
        m.put("PUNCT",  Characters.IS_UNICODE_PUNCT);
        m.put("XDIGIT", Characters.IS_UNICODE_HEX_DIGIT);
        m.put("ALNUM",  Characters.IS_UNICODE_ALNUM);
        m.put("CNTRL",  Characters.IS_UNICODE_CNTRL);
        m.put("DIGIT",  Characters.IS_UNICODE_DIGIT);
        m.put("BLANK",  Characters.IS_UNICODE_BLANK);
        m.put("GRAPH",  Characters.IS_UNICODE_GRAPH);
        m.put("PRINT",  Characters.IS_UNICODE_PRINT);

        UNICODE_PROPERTIES2 = Collections.unmodifiableMap(m);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy