org.apache.pdfbox.pdmodel.encryption.SaslPrep Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of pdfbox Show documentation
Show all versions of pdfbox Show documentation
The Apache PDFBox library is an open source Java tool for working with PDF documents.
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.pdmodel.encryption;
import java.nio.CharBuffer;
import java.text.Normalizer;
/**
* Copied from https://github.com/tombentley/saslprep/blob/master/src/main/java/SaslPrep.java on
* 30.5.2019, commit 2e30daa.
*
* @author Tom Bentley
*/
class SaslPrep
{
private SaslPrep()
{
}
/**
* Return the {@code SASLPrep}-canonicalised version of the given {@code str} for use as a query
* string. This implements the {@code SASLPrep} algorithm defined in
* RFC 4013.
*
* @param str The string to canonicalise.
* @return The canonicalised string.
* @throws IllegalArgumentException if the string contained prohibited codepoints, or broke the
* requirements for bidirectional character handling.
* @see RFC 3454, Section 7 for
* discussion of what a query string is.
*/
static String saslPrepQuery(String str)
{
return saslPrep(str, true);
}
/**
* Return the {@code SASLPrep}-canonicalised version of the given
* @code str} for use as a stored string. This implements the {@code SASLPrep} algorithm defined
* in
* RFC 4013.
*
* @param str The string to canonicalise.
* @return The canonicalised string.
* @throws IllegalArgumentException if the string contained prohibited codepoints, or broke the
* requirements for bidirectional character handling.
* @see RFC 3454, Section 7 for
* discussion of what a stored string is.
*/
static String saslPrepStored(String str)
{
return saslPrep(str, false);
}
private static String saslPrep(String str, boolean allowUnassigned)
{
char[] chars = str.toCharArray();
// 1. Map
// non-ASCII space chars mapped to space
for (int i = 0; i < str.length(); i++)
{
char ch = str.charAt(i);
if (nonAsciiSpace(ch))
{
chars[i] = ' ';
}
}
int length = 0;
for (int i = 0; i < str.length(); i++)
{
char ch = chars[i];
if (!mappedToNothing(ch))
{
chars[length++] = ch;
}
}
// 2. Normalize
String normalized = Normalizer.normalize(CharBuffer.wrap(chars, 0, length), Normalizer.Form.NFKC);
boolean containsRandALCat = false;
boolean containsLCat = false;
boolean initialRandALCat = false;
for (int i = 0; i < normalized.length();)
{
final int codepoint = normalized.codePointAt(i);
// 3. Prohibit
if (prohibited(codepoint))
{
throw new IllegalArgumentException("Prohibited character " +
codepoint + " at position " + i);
}
// 4. Check bidi
final byte directionality = Character.getDirectionality(codepoint);
final boolean isRandALcat = directionality == Character.DIRECTIONALITY_RIGHT_TO_LEFT
|| directionality == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC;
containsRandALCat |= isRandALcat;
containsLCat |= directionality == Character.DIRECTIONALITY_LEFT_TO_RIGHT;
initialRandALCat |= i == 0 && isRandALcat;
if (!allowUnassigned && !Character.isDefined(codepoint))
{
throw new IllegalArgumentException("Character at position " + i + " is unassigned");
}
i += Character.charCount(codepoint);
if (initialRandALCat && i >= normalized.length() && !isRandALcat)
{
throw new IllegalArgumentException("First character is RandALCat, but last character is not");
}
}
if (containsRandALCat && containsLCat)
{
throw new IllegalArgumentException("Contains both RandALCat characters and LCat characters");
}
return normalized;
}
/**
* Return true if the given {@code codepoint} is a prohibited character
* as defined by
* RFC 4013,
* Section 2.3.
*/
static boolean prohibited(int codepoint)
{
return nonAsciiSpace((char)codepoint)
|| asciiControl((char)codepoint)
|| nonAsciiControl(codepoint)
|| privateUse(codepoint)
|| nonCharacterCodePoint(codepoint)
|| surrogateCodePoint(codepoint)
|| inappropriateForPlainText(codepoint)
|| inappropriateForCanonical(codepoint)
|| changeDisplayProperties(codepoint)
|| tagging(codepoint);
}
/**
* Return true if the given {@code codepoint} is a tagging character
* as defined by
* RFC 3454,
* Appendix C.9.
*/
private static boolean tagging(int codepoint)
{
return codepoint == 0xE0001
|| 0xE0020 <= codepoint && codepoint <= 0xE007F;
}
/**
* Return true if the given {@code codepoint} is change display properties
* or deprecated characters as defined by
* RFC 3454,
* Appendix C.8.
*/
private static boolean changeDisplayProperties(int codepoint)
{
return codepoint == 0x0340
|| codepoint == 0x0341
|| codepoint == 0x200E
|| codepoint == 0x200F
|| codepoint == 0x202A
|| codepoint == 0x202B
|| codepoint == 0x202C
|| codepoint == 0x202D
|| codepoint == 0x202E
|| codepoint == 0x206A
|| codepoint == 0x206B
|| codepoint == 0x206C
|| codepoint == 0x206D
|| codepoint == 0x206E
|| codepoint == 0x206F
;
}
/**
* Return true if the given {@code codepoint} is inappropriate for
* canonical representation characters as defined by
* RFC 3454,
* Appendix C.7.
*/
private static boolean inappropriateForCanonical(int codepoint)
{
return 0x2FF0 <= codepoint && codepoint <= 0x2FFB;
}
/**
* Return true if the given {@code codepoint} is inappropriate for plain
* text characters as defined by
* RFC 3454,
* Appendix C.6.
*/
private static boolean inappropriateForPlainText(int codepoint)
{
return codepoint == 0xFFF9
|| codepoint == 0xFFFA
|| codepoint == 0xFFFB
|| codepoint == 0xFFFC
|| codepoint == 0xFFFD
;
}
/**
* Return true if the given {@code codepoint} is a surrogate
* code point as defined by
* RFC 3454,
* Appendix C.5.
*/
private static boolean surrogateCodePoint(int codepoint)
{
return 0xD800 <= codepoint && codepoint <= 0xDFFF;
}
/**
* Return true if the given {@code codepoint} is a non-character
* code point as defined by
* RFC 3454,
* Appendix C.4.
*/
private static boolean nonCharacterCodePoint(int codepoint)
{
return 0xFDD0 <= codepoint && codepoint <= 0xFDEF
|| 0xFFFE <= codepoint && codepoint <= 0xFFFF
|| 0x1FFFE <= codepoint && codepoint <= 0x1FFFF
|| 0x2FFFE <= codepoint && codepoint <= 0x2FFFF
|| 0x3FFFE <= codepoint && codepoint <= 0x3FFFF
|| 0x4FFFE <= codepoint && codepoint <= 0x4FFFF
|| 0x5FFFE <= codepoint && codepoint <= 0x5FFFF
|| 0x6FFFE <= codepoint && codepoint <= 0x6FFFF
|| 0x7FFFE <= codepoint && codepoint <= 0x7FFFF
|| 0x8FFFE <= codepoint && codepoint <= 0x8FFFF
|| 0x9FFFE <= codepoint && codepoint <= 0x9FFFF
|| 0xAFFFE <= codepoint && codepoint <= 0xAFFFF
|| 0xBFFFE <= codepoint && codepoint <= 0xBFFFF
|| 0xCFFFE <= codepoint && codepoint <= 0xCFFFF
|| 0xDFFFE <= codepoint && codepoint <= 0xDFFFF
|| 0xEFFFE <= codepoint && codepoint <= 0xEFFFF
|| 0xFFFFE <= codepoint && codepoint <= 0xFFFFF
|| 0x10FFFE <= codepoint && codepoint <= 0x10FFFF
;
}
/**
* Return true if the given {@code codepoint} is a private use character
* as defined by RFC 3454,
* Appendix C.3.
*/
private static boolean privateUse(int codepoint)
{
return 0xE000 <= codepoint && codepoint <= 0xF8FF
|| 0xF000 <= codepoint && codepoint <= 0xFFFFD
|| 0x100000 <= codepoint && codepoint <= 0x10FFFD;
}
/**
* Return true if the given {@code ch} is a non-ASCII control character
* as defined by RFC 3454,
* Appendix C.2.2.
*/
private static boolean nonAsciiControl(int codepoint)
{
return 0x0080 <= codepoint && codepoint <= 0x009F
|| codepoint == 0x06DD
|| codepoint == 0x070F
|| codepoint == 0x180E
|| codepoint == 0x200C
|| codepoint == 0x200D
|| codepoint == 0x2028
|| codepoint == 0x2029
|| codepoint == 0x2060
|| codepoint == 0x2061
|| codepoint == 0x2062
|| codepoint == 0x2063
|| 0x206A <= codepoint && codepoint <= 0x206F
|| codepoint == 0xFEFF
|| 0xFFF9 <= codepoint && codepoint <= 0xFFFC
|| 0x1D173 <= codepoint && codepoint <= 0x1D17A;
}
/**
* Return true if the given {@code ch} is an ASCII control character
* as defined by RFC 3454,
* Appendix C.2.1.
*/
private static boolean asciiControl(char ch)
{
return '\u0000' <= ch && ch <= '\u001F' || ch == '\u007F';
}
/**
* Return true if the given {@code ch} is a non-ASCII space character
* as defined by RFC 3454,
* Appendix C.1.2.
*/
private static boolean nonAsciiSpace(char ch)
{
return ch == '\u00A0'
|| ch == '\u1680'
|| '\u2000' <= ch && ch <= '\u200B'
|| ch == '\u202F'
|| ch == '\u205F'
|| ch == '\u3000';
}
/**
* Return true if the given {@code ch} is a "commonly mapped to nothing" character
* as defined by RFC 3454,
* Appendix B.1.
*/
private static boolean mappedToNothing(char ch)
{
return ch == '\u00AD'
|| ch == '\u034F'
|| ch == '\u1806'
|| ch == '\u180B'
|| ch == '\u180C'
|| ch == '\u180D'
|| ch == '\u200B'
|| ch == '\u200C'
|| ch == '\u200D'
|| ch == '\u2060'
|| '\uFE00' <= ch && ch <= '\uFE0F'
|| ch == '\uFEFF';
}
}