All Downloads are FREE. Search and download functionalities are using the official Maven repository.

graphql.parser.UnicodeUtil Maven / Gradle / Ivy

package graphql.parser;

import graphql.Internal;
import graphql.language.SourceLocation;

import java.io.IOException;
import java.io.StringWriter;

import static graphql.Assert.assertShouldNeverHappen;

/**
 * Contains Unicode helpers for parsing StringValue types in the grammar
 */
@Internal
public class UnicodeUtil {
    public static int MAX_UNICODE_CODE_POINT = 0x10FFFF;
    public static int LEADING_SURROGATE_LOWER_BOUND = 0xD800;
    public static int LEADING_SURROGATE_UPPER_BOUND = 0xDBFF;
    public static int TRAILING_SURROGATE_LOWER_BOUND = 0xDC00;
    public static int TRAILING_SURROGATE_UPPER_BOUND = 0xDFFF;

    public static int parseAndWriteUnicode(StringWriter writer, String string, int i, SourceLocation sourceLocation) {
        // Unicode code points can either be:
        //  1. Unbraced: four hex characters in the form \\u597D, or
        //  2. Braced: any number of hex characters surrounded by braces in the form \\u{1F37A}

        // Extract the code point hex digits. Index i points to 'u'
        int startIndex = isBracedEscape(string, i) ? i + 2 : i + 1;
        int endIndexExclusive = getEndIndexExclusive(string, i, sourceLocation);
        // Index for parser to continue at, the last character of the escaped unicode character. Either } or hex digit
        int continueIndex = isBracedEscape(string, i) ? endIndexExclusive : endIndexExclusive - 1;

        String hexStr = string.substring(startIndex, endIndexExclusive);
        Integer codePoint = Integer.parseInt(hexStr, 16);

        if (isTrailingSurrogateValue(codePoint)) {
            throw new InvalidSyntaxException(sourceLocation, "Invalid unicode - trailing surrogate must be preceded with a leading surrogate -", null, string.substring(i - 1, continueIndex + 1), null);
        } else if (isLeadingSurrogateValue(codePoint)) {
            if (!isEscapedUnicode(string, continueIndex + 1)) {
                throw new InvalidSyntaxException(sourceLocation, "Invalid unicode - leading surrogate must be followed by a trailing surrogate -", null, string.substring(i - 1, continueIndex + 1), null);
            }

            // Shift parser ahead to 'u' in second escaped Unicode character
            i = continueIndex + 2;
            int trailingStartIndex = isBracedEscape(string, i) ? i + 2 : i + 1;
            int trailingEndIndexExclusive = getEndIndexExclusive(string, i, sourceLocation);
            String trailingHexStr = string.substring(trailingStartIndex, trailingEndIndexExclusive);
            Integer trailingCodePoint = Integer.parseInt(trailingHexStr, 16);
            continueIndex = isBracedEscape(string, i) ? trailingEndIndexExclusive : trailingEndIndexExclusive - 1;

            if (isTrailingSurrogateValue(trailingCodePoint)) {
                writeCodePoint(writer, codePoint);
                writeCodePoint(writer, trailingCodePoint);
                return continueIndex;
            }

            throw new InvalidSyntaxException(sourceLocation, "Invalid unicode - leading surrogate must be followed by a trailing surrogate -", null, string.substring(i - 1, continueIndex + 1), null);
        } else if (isValidUnicodeCodePoint(codePoint)) {
            writeCodePoint(writer, codePoint);
            return continueIndex;
        }

        throw new InvalidSyntaxException(sourceLocation, "Invalid unicode - not a valid code point -", null, string.substring(i - 1, continueIndex + 1), null);
    }

    private static int getEndIndexExclusive(String string, int i, SourceLocation sourceLocation) {
        // Unbraced case, with exactly 4 hex digits
        if (string.length() > i + 5 && !isBracedEscape(string, i)) {
            return i + 5;
        }

        // Braced case, with any number of hex digits
        int endIndexExclusive = i + 2;
        do {
            if (endIndexExclusive + 1 >= string.length()) {
                throw new InvalidSyntaxException(sourceLocation, "Invalid unicode - incorrectly formatted escape -", null, string.substring(i - 1, endIndexExclusive), null);
            }
        } while (string.charAt(++endIndexExclusive) != '}');

        return endIndexExclusive;
    }

    private static boolean isValidUnicodeCodePoint(int value) {
        return value <= MAX_UNICODE_CODE_POINT;
    }

    private static boolean isEscapedUnicode(String string, int index) {
        if (index + 1 >= string.length()) {
            return false;
        }
        return string.charAt(index) == '\\' && string.charAt(index + 1) == 'u';
    }

    private static boolean isLeadingSurrogateValue(int value) {
        return LEADING_SURROGATE_LOWER_BOUND <= value && value <= LEADING_SURROGATE_UPPER_BOUND;
    }

    private static boolean isTrailingSurrogateValue(int value) {
        return TRAILING_SURROGATE_LOWER_BOUND <= value && value <= TRAILING_SURROGATE_UPPER_BOUND;
    }

    private static void writeCodePoint(StringWriter writer, int codepoint) {
        char[] chars = Character.toChars(codepoint);
        try {
            writer.write(chars);
        } catch (IOException e) {
            assertShouldNeverHappen();
        }
    }

    private static boolean isBracedEscape(String string, int i) {
        return string.charAt(i + 1) == '{';
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy