com.mysql.cj.util.SaslPrep Maven / Gradle / Ivy
/*
* Copyright (c) 2020, Oracle and/or its affiliates.
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License, version 2.0, as published by the
* Free Software Foundation.
*
* This program is also distributed with certain software (including but not
* limited to OpenSSL) that is licensed under separate terms, as designated in a
* particular file or component or in included license documentation. The
* authors of MySQL hereby grant you an additional permission to link the
* program and your derivative works with the separately licensed software that
* they have included with MySQL.
*
* Without limiting anything contained in the foregoing, this file, which is
* part of MySQL Connector/J, is also subject to the Universal FOSS Exception,
* version 1.0, a copy of which can be found at
* http://oss.oracle.com/licenses/universal-foss-exception.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
* for more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
package com.mysql.cj.util;
import java.text.Normalizer;
import java.text.Normalizer.Form;
import com.mysql.cj.exceptions.ExceptionFactory;
import com.mysql.cj.exceptions.WrongArgumentException;
/**
* Implementation for SASLprep: Stringprep Profile for User Names and Passwords, as specified in RFC 4013.
*
* @see RFC 3454
*/
public class SaslPrep {
/**
* The type of string usage regarding the support for unassigned code points as described in RFC
* 3454, Section 7.
*/
public enum StringType {
/**
* Stored strings using the profile MUST NOT contain any unassigned code points.
*/
STORED,
/**
* Queries for matching strings MAY contain unassigned code points.
*/
QUERY;
}
/**
* Prepares the given string by applying the "SASLprep" profile of the "stringprep" algorithm.
*
* @param str
* the string to prepare.
* @param sType
* the type of preparation with regard to the support for unassigned code points.
*
* @return
* the prepared version of the given string.
* @see RFC 4013
* @see RFC 3454
*/
public static String prepare(String str, StringType sType) {
if (str.length() == 0) {
return str;
}
StringBuilder sb = new StringBuilder(str.length());
// 2.1. Mapping.
for (char chr : str.toCharArray()) {
if (isNonAsciiSpaceChar(chr)) {
sb.append(' ');
} else if (!isMappeableToNothing(chr)) {
sb.append(chr);
}
}
// 2.2. Normalization.
String preparedStr = normalizeKc(sb);
// 2.3. Prohibited Output & 2.4. Bidirectional Characters & 2.5. Unassigned Code Points.
boolean startsWithRAndAlCat = isBidiRAndAlCat(preparedStr.codePointAt(0));
boolean endsWithRAndAlCat = isBidiRAndAlCat(
preparedStr.codePointAt(preparedStr.length() - (Character.isLowSurrogate(preparedStr.charAt(preparedStr.length() - 1)) ? 2 : 1)));
boolean containsRAndAlCat = startsWithRAndAlCat || endsWithRAndAlCat;
boolean containsLCat = false;
for (int i = 0, ni; i < preparedStr.length(); i = ni) {
char chr = preparedStr.charAt(i);
int cp = preparedStr.codePointAt(i);
ni = i + Character.charCount(cp);
// 2.3. Prohibited Output.
if (isProhibited(chr, cp)) {
throw ExceptionFactory.createException(WrongArgumentException.class, "Prohibited character at position " + i + ".");
}
// 2.4. Bidirectional Characters.
// (Already covered: MUST be prohibited - change display properties or are deprecated.)
// RFC 3454, Section 5.8.
if (!containsRAndAlCat) {
containsRAndAlCat = isBidiRAndAlCat(cp);
}
if (!containsLCat) {
containsLCat = isBidiLCat(cp);
}
if (containsRAndAlCat && containsLCat) {
throw ExceptionFactory.createException(WrongArgumentException.class, "Cannot contain both RandALCat characters and LCat characters.");
}
if (ni >= preparedStr.length() && containsRAndAlCat && (!startsWithRAndAlCat || !endsWithRAndAlCat)) {
throw ExceptionFactory.createException(WrongArgumentException.class,
"Cannot contain RandALCat characters and not start and end with RandALCat characters.");
}
// 2.5. Unassigned Code Points.
if (sType == StringType.STORED && isUnassigned(cp)) {
throw ExceptionFactory.createException(WrongArgumentException.class, "Unassigned character at position " + i + ".");
}
}
return preparedStr;
}
/**
* Mapping: non-ASCII space characters [StringPrep, C.1.2] that can be mapped to SPACE
* (U+0020).
*
* @param chr
* the character to check.
* @return
* true
if the character is one of the non-ASCII space characters, false
otherwise.
*/
private static boolean isNonAsciiSpaceChar(char chr) {
return chr == '\u00A0' || chr == '\u1680' || chr >= '\u2000' && chr <= '\u200B' || chr == '\u202F' || chr == '\u205F' || chr == '\u3000';
}
/**
* Mapping: the "commonly mapped to nothing" characters [StringPrep, B.1] that can be mapped
* to nothing.
*
* @param chr
* the character to check.
* @return
* true
if the character is one of the "commonly mapped to nothing" characters, false
otherwise.
*/
private static boolean isMappeableToNothing(char chr) {
return chr == '\u00AD' || chr == '\u034F' || chr == '\u1806' || chr >= '\u180B' && chr <= '\u180D' || chr >= '\u200B' && chr <= '\u200D'
|| chr == '\u2060' || chr >= '\uFE00' && chr <= '\uFE0F' || chr == '\uFEFF';
}
/**
* Normalization: Unicode normalization form KC.
*
* @param str
* the string to be normalized.
* @return
* a normalized version of the given string by the rules of the Unicode normalization form KC.
*/
private static String normalizeKc(CharSequence str) {
return Normalizer.normalize(str, Form.NFKC);
}
/**
* Checks if the given character is one of the prohibited characters under the "SASLprep Profile" rules.
*
* @param chr
* the character to check.
* @param cp
* the code point of the character to check.
* @return
* true
if the character is prohibited according to the profile rules, false
otherwise.
* @see RFC 4013, Section 2.3
*/
private static boolean isProhibited(char chr, int cp) {
return /* already covered: isNonAsciiSpaceChar(chr) || */ isAsciiControlCharacter(chr) || isNonAsciiControlCharacter(cp) || isPrivateUseCharacter(cp)
|| isNonCharacterCodePoint(cp) || isSurrogateCode(chr) || isInappropriateForPlainTextCharacter(chr)
|| isInappropriateForCanonicalRepresentationCharacter(chr) || isChangeDisplayPropertiesOrDeprecatedCharacter(chr) || isTaggingCharacter(cp);
}
/**
* Prohibited Output: ASCII control characters [StringPrep, C.2.1].
*
* @param chr
* the character to check.
* @return
* true
if the character is one of the ASCII control characters, false
otherwise.
*/
private static boolean isAsciiControlCharacter(char chr) {
return chr <= '\u001F' || chr == '\u007F';
}
/**
* Prohibited Output: non-ASCII control characters [StringPrep, C.2.2].
*
* @param cp
* the code point of the character to check.
* @return
* true
if the character is one of the non-ASCII control characters, false
otherwise.
*/
private static boolean isNonAsciiControlCharacter(int cp) {
return cp >= 0x0080 && cp <= 0x009F || cp == 0x06DD || cp == 0x070F || cp == 0x180E || cp == 0x200C || cp == 0x200D || cp == 0x2028 || cp == 0x2029
|| cp >= 0x2060 && cp <= 0x2063 || cp >= 0x206A && cp <= 0x206F || cp == 0xFEFF || cp >= 0xFFF9 && cp <= 0xFFFC
|| cp >= 0x1D173 && cp <= 0x1D17A;
}
/**
* Prohibited Output: private use characters [StringPrep, C.3].
*
* @param cp
* the code point of the character to check.
* @return
* true
if the character is one of the private use characters, false
otherwise.
*/
private static boolean isPrivateUseCharacter(int cp) {
return cp >= 0xE000 && cp <= 0xF8FF || cp >= 0xF0000 && cp <= 0xFFFFD || cp >= 0x100000 && cp <= 0x10FFFD;
}
/**
* Prohibited Output: non-character code points [StringPrep, C.4].
*
* @param cp
* the code point of the character to check.
* @return
* true
if the character is one of the non-character code points, false
otherwise.
*/
private static boolean isNonCharacterCodePoint(int cp) {
return cp >= 0xFDD0 && cp <= 0xFDEF || cp >= 0xFFFE && cp <= 0xFFFF || cp >= 0x1FFFE && cp <= 0x1FFFF || cp >= 0x2FFFE && cp <= 0x2FFFF
|| cp >= 0x3FFFE && cp <= 0x3FFFF || cp >= 0x4FFFE && cp <= 0x4FFFF || cp >= 0x5FFFE && cp <= 0x5FFFF || cp >= 0x6FFFE && cp <= 0x6FFFF
|| cp >= 0x7FFFE && cp <= 0x7FFFF || cp >= 0x8FFFE && cp <= 0x8FFFF || cp >= 0x9FFFE && cp <= 0x9FFFF || cp >= 0xAFFFE && cp <= 0xAFFFF
|| cp >= 0xBFFFE && cp <= 0xBFFFF || cp >= 0xCFFFE && cp <= 0xCFFFF || cp >= 0xDFFFE && cp <= 0xDFFFF || cp >= 0xEFFFE && cp <= 0xEFFFF
|| cp >= 0xFFFFE && cp <= 0xFFFFF || cp >= 0x10FFFE && cp <= 0x10FFFF;
}
/**
* Prohibited Output: surrogate code points [StringPrep, C.5].
*
* @param chr
* the character to check.
* @return
* true
if the character is one of the surrogate code points, false
otherwise.
*/
private static boolean isSurrogateCode(char chr) {
return chr >= '\uD800' && chr <= '\uDFFF';
}
/**
* Prohibited Output: inappropriate for plain text characters [StringPrep, C.6].
*
* @param chr
* the character to check.
* @return
* true
if the character is one of the inappropriate for plain text characters, false
otherwise.
*/
private static boolean isInappropriateForPlainTextCharacter(char chr) {
return chr == '\uFFF9' || chr >= '\uFFFA' && chr <= '\uFFFD';
}
/**
* Prohibited Output: inappropriate for canonical representation characters [StringPrep,
* C.7].
*
* @param chr
* the character to check.
* @return
* true
if the character is one of the inappropriate for canonical representation characters, false
otherwise.
*/
private static boolean isInappropriateForCanonicalRepresentationCharacter(char chr) {
return chr >= '\u2FF0' && chr <= '\u2FFB';
}
/**
* Prohibited Output: change display properties or deprecated characters [StringPrep, C.8].
*
* @param chr
* the character to check.
* @return
* true
if the character is one of the change display properties or deprecated characters, false
otherwise.
*/
private static boolean isChangeDisplayPropertiesOrDeprecatedCharacter(char chr) {
return chr == '\u0340' || chr == '\u0341' || chr == '\u200E' || chr == '\u200F' || chr >= '\u202A' && chr <= '\u202E'
|| chr >= '\u206A' && chr <= '\u206F';
}
/**
* Prohibited Output: tagging characters [StringPrep, C.9].
*
* @param cp
* the code point of the character to check.
* @return
* true
if the character is one of the tagging characters, false
otherwise.
*/
private static boolean isTaggingCharacter(int cp) {
return cp == 0xE0001 || cp >= 0xE0020 && cp <= 0xE007F;
}
/**
* Bidirectional Characters: RandALCat characters.
* See also RFC 3454, Section 6
*
* @param cp
* the code point of the character to check.
* @return
* true
if the character is one of the RandALCat characters, false
otherwise.
*/
private static boolean isBidiRAndAlCat(int cp) {
byte dir = Character.getDirectionality(cp);
return dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT || dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC;
}
/**
* Bidirectional Characters: LCat characters.
* See also RFC 3454, Section 6
*
* @param cp
* the code point of the character to check.
* @return
* true
if the character is one of the LCat characters, false
otherwise.
*/
private static boolean isBidiLCat(int cp) {
byte dir = Character.getDirectionality(cp);
return dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT;
}
/**
* Unassigned Code Points: list of unassigned code points.
* See also RFC 3454, Section 7.
*
*
* Note that this implementation does not check exactly the unassigned code points as specified in the RFC since it is based on Java's Unicode support,
* which is updated regularly while the specification is based on a static list of code points. This should have no major impact, though.
*
* @param cp
* the code point of the character to check.
* @return
* true
if the character is unassigned, false
otherwise.
*/
private static boolean isUnassigned(int cp) {
return !Character.isDefined(cp);
}
}