All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pdfbox.pdmodel.encryption.SaslPrep Maven / Gradle / Ivy

Go to download

The Apache PDFBox library is an open source Java tool for working with PDF documents.

There is a newer version: 3.0.2
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.pdfbox.pdmodel.encryption;

import java.nio.CharBuffer;
import java.text.Normalizer;

/**
 * Copied from https://github.com/tombentley/saslprep/blob/master/src/main/java/SaslPrep.java on
 * 30.5.2019, commit 2e30daa.
 *
 * @author Tom Bentley
 */
class SaslPrep
{

    private SaslPrep()
    {
    }

    /**
     * Return the {@code SASLPrep}-canonicalised version of the given {@code str} for use as a query
     * string. This implements the {@code SASLPrep} algorithm defined in
     * RFC 4013.
     *
     * @param str The string to canonicalise.
     * @return The canonicalised string.
     * @throws IllegalArgumentException if the string contained prohibited codepoints, or broke the
     * requirements for bidirectional character handling.
     * @see RFC 3454, Section 7 for
     * discussion of what a query string is.
     */
    static String saslPrepQuery(String str)
    {
        return saslPrep(str, true);
    }

    /**
     * Return the {@code SASLPrep}-canonicalised version of the given
     * @code str} for use as a stored string. This implements the {@code SASLPrep} algorithm defined
     * in
     * RFC 4013.
     *
     * @param str The string to canonicalise.
     * @return The canonicalised string.
     * @throws IllegalArgumentException if the string contained prohibited codepoints, or broke the
     * requirements for bidirectional character handling.
     * @see RFC 3454, Section 7 for
     * discussion of what a stored string is.
     */
    static String saslPrepStored(String str)
    {
        return saslPrep(str, false);
    }

    private static String saslPrep(String str, boolean allowUnassigned)
    {
        char[] chars = str.toCharArray();

        // 1. Map
        // non-ASCII space chars mapped to space
        for (int i = 0; i < str.length(); i++)
        {
            char ch = str.charAt(i);
            if (nonAsciiSpace(ch))
            {
                chars[i] = ' ';
            }
        }

        int length = 0;
        for (int i = 0; i < str.length(); i++)
        {
            char ch = chars[i];
            if (!mappedToNothing(ch))
            {
                chars[length++] = ch;
            }
        }

        // 2. Normalize
        String normalized = Normalizer.normalize(CharBuffer.wrap(chars, 0, length), Normalizer.Form.NFKC);

        boolean containsRandALCat = false;
        boolean containsLCat = false;
        boolean initialRandALCat = false;
        for (int i = 0; i < normalized.length();)
        {
            final int codepoint = normalized.codePointAt(i);
            // 3. Prohibit
            if (prohibited(codepoint))
            {
                throw new IllegalArgumentException("Prohibited character " +
                        codepoint + " at position " + i);
            }

            // 4. Check bidi
            final byte directionality = Character.getDirectionality(codepoint);
            final boolean isRandALcat = directionality == Character.DIRECTIONALITY_RIGHT_TO_LEFT
                    || directionality == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC;
            containsRandALCat |= isRandALcat;
            containsLCat |= directionality == Character.DIRECTIONALITY_LEFT_TO_RIGHT;

            initialRandALCat |= i == 0 && isRandALcat;
            if (!allowUnassigned && !Character.isDefined(codepoint))
            {
                throw new IllegalArgumentException("Character at position " + i + " is unassigned");
            }

            i += Character.charCount(codepoint);

            if (initialRandALCat && i >= normalized.length() && !isRandALcat)
            {
                throw new IllegalArgumentException("First character is RandALCat, but last character is not");
            }
        }
        if (containsRandALCat && containsLCat)
        {
            throw new IllegalArgumentException("Contains both RandALCat characters and LCat characters");
        }
        return normalized;
    }

    /**
     * Return true if the given {@code codepoint} is a prohibited character
     * as defined by
     * RFC 4013,
     * Section 2.3.
     */
    static boolean prohibited(int codepoint)
    {
        return nonAsciiSpace((char)codepoint)
                || asciiControl((char)codepoint)
                || nonAsciiControl(codepoint)
                || privateUse(codepoint)
                || nonCharacterCodePoint(codepoint)
                || surrogateCodePoint(codepoint)
                || inappropriateForPlainText(codepoint)
                || inappropriateForCanonical(codepoint)
                || changeDisplayProperties(codepoint)
                || tagging(codepoint);
    }

    /**
     * Return true if the given {@code codepoint} is a tagging character
     * as defined by
     * RFC 3454,
     * Appendix C.9.
     */
    private static boolean tagging(int codepoint)
    {
        return codepoint == 0xE0001
                || 0xE0020 <= codepoint && codepoint <= 0xE007F;
    }

    /**
     * Return true if the given {@code codepoint} is change display properties
     * or deprecated characters as defined by
     * RFC 3454,
     * Appendix C.8.
     */
    private static boolean changeDisplayProperties(int codepoint)
    {
        return codepoint == 0x0340
                || codepoint == 0x0341
                || codepoint == 0x200E
                || codepoint == 0x200F
                || codepoint == 0x202A
                || codepoint == 0x202B
                || codepoint == 0x202C
                || codepoint == 0x202D
                || codepoint == 0x202E
                || codepoint == 0x206A
                || codepoint == 0x206B
                || codepoint == 0x206C
                || codepoint == 0x206D
                || codepoint == 0x206E
                || codepoint == 0x206F
                ;
    }

    /**
     * Return true if the given {@code codepoint} is inappropriate for
     * canonical representation characters as defined by
     * RFC 3454,
     * Appendix C.7.
     */
    private static boolean inappropriateForCanonical(int codepoint)
    {
        return 0x2FF0 <= codepoint && codepoint <= 0x2FFB;
    }

    /**
     * Return true if the given {@code codepoint} is inappropriate for plain
     * text characters as defined by
     * RFC 3454,
     * Appendix C.6.
     */
    private static boolean inappropriateForPlainText(int codepoint)
    {
        return codepoint == 0xFFF9
                || codepoint == 0xFFFA
                || codepoint == 0xFFFB
                || codepoint == 0xFFFC
                || codepoint == 0xFFFD
                ;
    }

    /**
     * Return true if the given {@code codepoint} is a surrogate
     * code point as defined by
     * RFC 3454,
     * Appendix C.5.
     */
    private static boolean surrogateCodePoint(int codepoint)
    {
        return 0xD800 <= codepoint && codepoint <= 0xDFFF;
    }

    /**
     * Return true if the given {@code codepoint} is a non-character
     * code point as defined by
     * RFC 3454,
     * Appendix C.4.
     */
    private static boolean nonCharacterCodePoint(int codepoint)
    {
        return 0xFDD0 <= codepoint && codepoint <= 0xFDEF
                || 0xFFFE <= codepoint && codepoint <= 0xFFFF
                || 0x1FFFE <= codepoint && codepoint <= 0x1FFFF
                || 0x2FFFE <= codepoint && codepoint <= 0x2FFFF
                || 0x3FFFE <= codepoint && codepoint <= 0x3FFFF
                || 0x4FFFE <= codepoint && codepoint <= 0x4FFFF
                || 0x5FFFE <= codepoint && codepoint <= 0x5FFFF
                || 0x6FFFE <= codepoint && codepoint <= 0x6FFFF
                || 0x7FFFE <= codepoint && codepoint <= 0x7FFFF
                || 0x8FFFE <= codepoint && codepoint <= 0x8FFFF
                || 0x9FFFE <= codepoint && codepoint <= 0x9FFFF
                || 0xAFFFE <= codepoint && codepoint <= 0xAFFFF
                || 0xBFFFE <= codepoint && codepoint <= 0xBFFFF
                || 0xCFFFE <= codepoint && codepoint <= 0xCFFFF
                || 0xDFFFE <= codepoint && codepoint <= 0xDFFFF
                || 0xEFFFE <= codepoint && codepoint <= 0xEFFFF
                || 0xFFFFE <= codepoint && codepoint <= 0xFFFFF
                || 0x10FFFE <= codepoint && codepoint <= 0x10FFFF
                ;
    }

    /**
     * Return true if the given {@code codepoint} is a private use character
     * as defined by RFC 3454,
     * Appendix C.3.
     */
    private static boolean privateUse(int codepoint)
    {
        return 0xE000 <= codepoint && codepoint <= 0xF8FF
                || 0xF000 <= codepoint && codepoint <= 0xFFFFD
                || 0x100000 <= codepoint && codepoint <= 0x10FFFD;
    }

    /**
     * Return true if the given {@code ch} is a non-ASCII control character
     * as defined by RFC 3454,
     * Appendix C.2.2.
     */
    private static boolean nonAsciiControl(int codepoint)
    {
        return 0x0080 <= codepoint && codepoint <= 0x009F
                || codepoint == 0x06DD
                || codepoint == 0x070F
                || codepoint == 0x180E
                || codepoint == 0x200C
                || codepoint == 0x200D
                || codepoint == 0x2028
                || codepoint == 0x2029
                || codepoint == 0x2060
                || codepoint == 0x2061
                || codepoint == 0x2062
                || codepoint == 0x2063
                || 0x206A <= codepoint && codepoint <= 0x206F
                || codepoint == 0xFEFF
                || 0xFFF9 <= codepoint && codepoint <= 0xFFFC
                || 0x1D173 <= codepoint && codepoint <= 0x1D17A;
    }

    /**
     * Return true if the given {@code ch} is an ASCII control character
     * as defined by RFC 3454,
     * Appendix C.2.1.
     */
    private static boolean asciiControl(char ch)
    {
        return '\u0000' <= ch && ch <= '\u001F' || ch == '\u007F';
    }

    /**
     * Return true if the given {@code ch} is a non-ASCII space character
     * as defined by RFC 3454,
     * Appendix C.1.2.
     */
    private static boolean nonAsciiSpace(char ch)
    {
        return ch == '\u00A0'
                || ch == '\u1680'
                || '\u2000' <= ch && ch <= '\u200B'
                || ch == '\u202F'
                || ch == '\u205F'
                || ch == '\u3000';
    }

    /**
     * Return true if the given {@code ch} is a "commonly mapped to nothing" character
     * as defined by RFC 3454,
     * Appendix B.1.
     */
    private static boolean mappedToNothing(char ch)
    {
        return ch == '\u00AD'
                || ch == '\u034F'
                || ch == '\u1806'
                || ch == '\u180B'
                || ch == '\u180C'
                || ch == '\u180D'
                || ch == '\u200B'
                || ch == '\u200C'
                || ch == '\u200D'
                || ch == '\u2060'
                || '\uFE00' <= ch && ch <= '\uFE0F'
                || ch == '\uFEFF';
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy