All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.mysql.cj.util.SaslPrep Maven / Gradle / Ivy

There is a newer version: 1.0.0-beta2
Show newest version
/*
 * Copyright (c) 2020, Oracle and/or its affiliates.
 *
 * This program is free software; you can redistribute it and/or modify it under
 * the terms of the GNU General Public License, version 2.0, as published by the
 * Free Software Foundation.
 *
 * This program is also distributed with certain software (including but not
 * limited to OpenSSL) that is licensed under separate terms, as designated in a
 * particular file or component or in included license documentation. The
 * authors of MySQL hereby grant you an additional permission to link the
 * program and your derivative works with the separately licensed software that
 * they have included with MySQL.
 *
 * Without limiting anything contained in the foregoing, this file, which is
 * part of MySQL Connector/J, is also subject to the Universal FOSS Exception,
 * version 1.0, a copy of which can be found at
 * http://oss.oracle.com/licenses/universal-foss-exception.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
 * for more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
 */

package com.mysql.cj.util;

import java.text.Normalizer;
import java.text.Normalizer.Form;

import com.mysql.cj.exceptions.ExceptionFactory;
import com.mysql.cj.exceptions.WrongArgumentException;

/**
 * Implementation for SASLprep: Stringprep Profile for User Names and Passwords, as specified in RFC 4013.
 * 
 * @see RFC 3454
 */
public class SaslPrep {
    /**
     * The type of string usage regarding the support for unassigned code points as described in RFC
     * 3454, Section 7.
     */
    public enum StringType {
        /**
         * Stored strings using the profile MUST NOT contain any unassigned code points.
         */
        STORED,
        /**
         * Queries for matching strings MAY contain unassigned code points.
         */
        QUERY;
    }

    /**
     * Prepares the given string by applying the "SASLprep" profile of the "stringprep" algorithm.
     * 
     * @param str
     *            the string to prepare.
     * @param sType
     *            the type of preparation with regard to the support for unassigned code points.
     * 
     * @return
     *         the prepared version of the given string.
     * @see RFC 4013
     * @see RFC 3454
     */
    public static String prepare(String str, StringType sType) {
        if (str.length() == 0) {
            return str;
        }

        StringBuilder sb = new StringBuilder(str.length());

        // 2.1. Mapping.
        for (char chr : str.toCharArray()) {
            if (isNonAsciiSpaceChar(chr)) {
                sb.append(' ');
            } else if (!isMappeableToNothing(chr)) {
                sb.append(chr);
            }
        }

        // 2.2. Normalization.
        String preparedStr = normalizeKc(sb);

        // 2.3. Prohibited Output & 2.4. Bidirectional Characters & 2.5. Unassigned Code Points.
        boolean startsWithRAndAlCat = isBidiRAndAlCat(preparedStr.codePointAt(0));
        boolean endsWithRAndAlCat = isBidiRAndAlCat(
                preparedStr.codePointAt(preparedStr.length() - (Character.isLowSurrogate(preparedStr.charAt(preparedStr.length() - 1)) ? 2 : 1)));
        boolean containsRAndAlCat = startsWithRAndAlCat || endsWithRAndAlCat;
        boolean containsLCat = false;
        for (int i = 0, ni; i < preparedStr.length(); i = ni) {
            char chr = preparedStr.charAt(i);
            int cp = preparedStr.codePointAt(i);
            ni = i + Character.charCount(cp);

            // 2.3. Prohibited Output.
            if (isProhibited(chr, cp)) {
                throw ExceptionFactory.createException(WrongArgumentException.class, "Prohibited character at position " + i + ".");
            }

            // 2.4. Bidirectional Characters.
            // (Already covered: MUST be prohibited - change display properties or are deprecated.)
            // RFC 3454, Section 5.8.
            if (!containsRAndAlCat) {
                containsRAndAlCat = isBidiRAndAlCat(cp);
            }
            if (!containsLCat) {
                containsLCat = isBidiLCat(cp);
            }
            if (containsRAndAlCat && containsLCat) {
                throw ExceptionFactory.createException(WrongArgumentException.class, "Cannot contain both RandALCat characters and LCat characters.");
            }
            if (ni >= preparedStr.length() && containsRAndAlCat && (!startsWithRAndAlCat || !endsWithRAndAlCat)) {
                throw ExceptionFactory.createException(WrongArgumentException.class,
                        "Cannot contain RandALCat characters and not start and end with RandALCat characters.");
            }

            // 2.5. Unassigned Code Points.
            if (sType == StringType.STORED && isUnassigned(cp)) {
                throw ExceptionFactory.createException(WrongArgumentException.class, "Unassigned character at position " + i + ".");
            }
        }

        return preparedStr;
    }

    /**
     * Mapping: non-ASCII space characters [StringPrep, C.1.2] that can be mapped to SPACE
     * (U+0020).
     * 
     * @param chr
     *            the character to check.
     * @return
     *         true if the character is one of the non-ASCII space characters, false otherwise.
     */
    private static boolean isNonAsciiSpaceChar(char chr) {
        return chr == '\u00A0' || chr == '\u1680' || chr >= '\u2000' && chr <= '\u200B' || chr == '\u202F' || chr == '\u205F' || chr == '\u3000';
    }

    /**
     * Mapping: the "commonly mapped to nothing" characters [StringPrep, B.1] that can be mapped
     * to nothing.
     * 
     * @param chr
     *            the character to check.
     * @return
     *         true if the character is one of the "commonly mapped to nothing" characters, false otherwise.
     */
    private static boolean isMappeableToNothing(char chr) {
        return chr == '\u00AD' || chr == '\u034F' || chr == '\u1806' || chr >= '\u180B' && chr <= '\u180D' || chr >= '\u200B' && chr <= '\u200D'
                || chr == '\u2060' || chr >= '\uFE00' && chr <= '\uFE0F' || chr == '\uFEFF';
    }

    /**
     * Normalization: Unicode normalization form KC.
     * 
     * @param str
     *            the string to be normalized.
     * @return
     *         a normalized version of the given string by the rules of the Unicode normalization form KC.
     */
    private static String normalizeKc(CharSequence str) {
        return Normalizer.normalize(str, Form.NFKC);
    }

    /**
     * Checks if the given character is one of the prohibited characters under the "SASLprep Profile" rules.
     * 
     * @param chr
     *            the character to check.
     * @param cp
     *            the code point of the character to check.
     * @return
     *         true if the character is prohibited according to the profile rules, false otherwise.
     * @see RFC 4013, Section 2.3
     */
    private static boolean isProhibited(char chr, int cp) {
        return /* already covered: isNonAsciiSpaceChar(chr) || */ isAsciiControlCharacter(chr) || isNonAsciiControlCharacter(cp) || isPrivateUseCharacter(cp)
                || isNonCharacterCodePoint(cp) || isSurrogateCode(chr) || isInappropriateForPlainTextCharacter(chr)
                || isInappropriateForCanonicalRepresentationCharacter(chr) || isChangeDisplayPropertiesOrDeprecatedCharacter(chr) || isTaggingCharacter(cp);
    }

    /**
     * Prohibited Output: ASCII control characters [StringPrep, C.2.1].
     * 
     * @param chr
     *            the character to check.
     * @return
     *         true if the character is one of the ASCII control characters, false otherwise.
     */
    private static boolean isAsciiControlCharacter(char chr) {
        return chr <= '\u001F' || chr == '\u007F';
    }

    /**
     * Prohibited Output: non-ASCII control characters [StringPrep, C.2.2].
     * 
     * @param cp
     *            the code point of the character to check.
     * @return
     *         true if the character is one of the non-ASCII control characters, false otherwise.
     */
    private static boolean isNonAsciiControlCharacter(int cp) {
        return cp >= 0x0080 && cp <= 0x009F || cp == 0x06DD || cp == 0x070F || cp == 0x180E || cp == 0x200C || cp == 0x200D || cp == 0x2028 || cp == 0x2029
                || cp >= 0x2060 && cp <= 0x2063 || cp >= 0x206A && cp <= 0x206F || cp == 0xFEFF || cp >= 0xFFF9 && cp <= 0xFFFC
                || cp >= 0x1D173 && cp <= 0x1D17A;
    }

    /**
     * Prohibited Output: private use characters [StringPrep, C.3].
     * 
     * @param cp
     *            the code point of the character to check.
     * @return
     *         true if the character is one of the private use characters, false otherwise.
     */
    private static boolean isPrivateUseCharacter(int cp) {
        return cp >= 0xE000 && cp <= 0xF8FF || cp >= 0xF0000 && cp <= 0xFFFFD || cp >= 0x100000 && cp <= 0x10FFFD;
    }

    /**
     * Prohibited Output: non-character code points [StringPrep, C.4].
     * 
     * @param cp
     *            the code point of the character to check.
     * @return
     *         true if the character is one of the non-character code points, false otherwise.
     */
    private static boolean isNonCharacterCodePoint(int cp) {
        return cp >= 0xFDD0 && cp <= 0xFDEF || cp >= 0xFFFE && cp <= 0xFFFF || cp >= 0x1FFFE && cp <= 0x1FFFF || cp >= 0x2FFFE && cp <= 0x2FFFF
                || cp >= 0x3FFFE && cp <= 0x3FFFF || cp >= 0x4FFFE && cp <= 0x4FFFF || cp >= 0x5FFFE && cp <= 0x5FFFF || cp >= 0x6FFFE && cp <= 0x6FFFF
                || cp >= 0x7FFFE && cp <= 0x7FFFF || cp >= 0x8FFFE && cp <= 0x8FFFF || cp >= 0x9FFFE && cp <= 0x9FFFF || cp >= 0xAFFFE && cp <= 0xAFFFF
                || cp >= 0xBFFFE && cp <= 0xBFFFF || cp >= 0xCFFFE && cp <= 0xCFFFF || cp >= 0xDFFFE && cp <= 0xDFFFF || cp >= 0xEFFFE && cp <= 0xEFFFF
                || cp >= 0xFFFFE && cp <= 0xFFFFF || cp >= 0x10FFFE && cp <= 0x10FFFF;
    }

    /**
     * Prohibited Output: surrogate code points [StringPrep, C.5].
     * 
     * @param chr
     *            the character to check.
     * @return
     *         true if the character is one of the surrogate code points, false otherwise.
     */
    private static boolean isSurrogateCode(char chr) {
        return chr >= '\uD800' && chr <= '\uDFFF';
    }

    /**
     * Prohibited Output: inappropriate for plain text characters [StringPrep, C.6].
     * 
     * @param chr
     *            the character to check.
     * @return
     *         true if the character is one of the inappropriate for plain text characters, false otherwise.
     */
    private static boolean isInappropriateForPlainTextCharacter(char chr) {
        return chr == '\uFFF9' || chr >= '\uFFFA' && chr <= '\uFFFD';
    }

    /**
     * Prohibited Output: inappropriate for canonical representation characters [StringPrep,
     * C.7].
     * 
     * @param chr
     *            the character to check.
     * @return
     *         true if the character is one of the inappropriate for canonical representation characters, false otherwise.
     */
    private static boolean isInappropriateForCanonicalRepresentationCharacter(char chr) {
        return chr >= '\u2FF0' && chr <= '\u2FFB';
    }

    /**
     * Prohibited Output: change display properties or deprecated characters [StringPrep, C.8].
     * 
     * @param chr
     *            the character to check.
     * @return
     *         true if the character is one of the change display properties or deprecated characters, false otherwise.
     */
    private static boolean isChangeDisplayPropertiesOrDeprecatedCharacter(char chr) {
        return chr == '\u0340' || chr == '\u0341' || chr == '\u200E' || chr == '\u200F' || chr >= '\u202A' && chr <= '\u202E'
                || chr >= '\u206A' && chr <= '\u206F';
    }

    /**
     * Prohibited Output: tagging characters [StringPrep, C.9].
     * 
     * @param cp
     *            the code point of the character to check.
     * @return
     *         true if the character is one of the tagging characters, false otherwise.
     */
    private static boolean isTaggingCharacter(int cp) {
        return cp == 0xE0001 || cp >= 0xE0020 && cp <= 0xE007F;
    }

    /**
     * Bidirectional Characters: RandALCat characters.
     * See also RFC 3454, Section 6
     * 
     * @param cp
     *            the code point of the character to check.
     * @return
     *         true if the character is one of the RandALCat characters, false otherwise.
     */
    private static boolean isBidiRAndAlCat(int cp) {
        byte dir = Character.getDirectionality(cp);
        return dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT || dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC;
    }

    /**
     * Bidirectional Characters: LCat characters.
     * See also RFC 3454, Section 6
     * 
     * @param cp
     *            the code point of the character to check.
     * @return
     *         true if the character is one of the LCat characters, false otherwise.
     */
    private static boolean isBidiLCat(int cp) {
        byte dir = Character.getDirectionality(cp);
        return dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT;
    }

    /**
     * Unassigned Code Points: list of unassigned code points.
     * See also RFC 3454, Section 7.
     * 
     * 

* Note that this implementation does not check exactly the unassigned code points as specified in the RFC since it is based on Java's Unicode support, * which is updated regularly while the specification is based on a static list of code points. This should have no major impact, though. * * @param cp * the code point of the character to check. * @return * true if the character is unassigned, false otherwise. */ private static boolean isUnassigned(int cp) { return !Character.isDefined(cp); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy