com.datastax.driver.core.ParseUtils Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of dse-java-driver-core Show documentation
A driver for DataStax Enterprise (DSE) and Apache Cassandra 1.2+ clusters that works exclusively with the Cassandra Query Language version 3 (CQL3) and Cassandra's binary protocol, supporting DSE-specific features such as geospatial types, DSE Graph and DSE authentication.
There is a newer version: 2.4.0
Show newest version
/*
 * Copyright DataStax, Inc.
 *
 * This software can be used solely with DataStax Enterprise. Please consult the license at
 * http://www.datastax.com/terms/datastax-dse-driver-license-terms
 */
package com.datastax.driver.core;

import java.text.ParseException;
import java.text.ParsePosition;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.TimeZone;
import java.util.concurrent.TimeUnit;

/**
 * Simple utility class used to help parsing CQL values (mainly UDT and collection ones).
 */
public abstract class ParseUtils {

    /**
     * Valid ISO-8601 patterns for CQL timestamp literals.
     */
    private static final String[] iso8601Patterns = new String[]{
            "yyyy-MM-dd HH:mm",
            "yyyy-MM-dd HH:mm:ss",
            "yyyy-MM-dd HH:mmZ",
            "yyyy-MM-dd HH:mm:ssZ",
            "yyyy-MM-dd HH:mm:ss.SSS",
            "yyyy-MM-dd HH:mm:ss.SSSZ",
            "yyyy-MM-dd'T'HH:mm",
            "yyyy-MM-dd'T'HH:mmZ",
            "yyyy-MM-dd'T'HH:mm:ss",
            "yyyy-MM-dd'T'HH:mm:ssZ",
            "yyyy-MM-dd'T'HH:mm:ss.SSS",
            "yyyy-MM-dd'T'HH:mm:ss.SSSZ",
            "yyyy-MM-dd",
            "yyyy-MM-ddZ"
    };

    /**
     * Returns the index of the first character in toParse from idx that is not a "space".
     *
     * @param toParse the string to skip space on.
     * @param idx     the index to start skipping space from.
     * @return the index of the first character in toParse from idx that is not a "space.
     */
    public static int skipSpaces(String toParse, int idx) {
        while (isBlank(toParse.charAt(idx)) && idx < toParse.length())
            ++idx;
        return idx;
    }

    /**
     * Assuming that idx points to the beginning of a CQL value in toParse, returns the
     * index of the first character after this value.
     *
     * @param toParse the string to skip a value form.
     * @param idx     the index to start parsing a value from.
     * @return the index ending the CQL value starting at {@code idx}.
     * @throws IllegalArgumentException if idx doesn't point to the start of a valid CQL
     *                                  value.
     */
    public static int skipCQLValue(String toParse, int idx) {
        if (idx >= toParse.length())
            throw new IllegalArgumentException();

        if (isBlank(toParse.charAt(idx)))
            throw new IllegalArgumentException();

        int cbrackets = 0;
        int sbrackets = 0;
        int parens = 0;
        boolean inString = false;

        do {
            char c = toParse.charAt(idx);
            if (inString) {
                if (c == '\'') {
                    if (idx + 1 < toParse.length() && toParse.charAt(idx + 1) == '\'') {
                        ++idx; // this is an escaped quote, skip it
                    } else {
                        inString = false;
                        if (cbrackets == 0 && sbrackets == 0 && parens == 0)
                            return idx + 1;
                    }
                }
                // Skip any other character
            } else if (c == '\'') {
                inString = true;
            } else if (c == '{') {
                ++cbrackets;
            } else if (c == '[') {
                ++sbrackets;
            } else if (c == '(') {
                ++parens;
            } else if (c == '}') {
                if (cbrackets == 0)
                    return idx;

                --cbrackets;
                if (cbrackets == 0 && sbrackets == 0 && parens == 0)
                    return idx + 1;
            } else if (c == ']') {
                if (sbrackets == 0)
                    return idx;

                --sbrackets;
                if (cbrackets == 0 && sbrackets == 0 && parens == 0)
                    return idx + 1;
            } else if (c == ')') {
                if (parens == 0)
                    return idx;

                --parens;
                if (cbrackets == 0 && sbrackets == 0 && parens == 0)
                    return idx + 1;
            } else if (isBlank(c) || !isIdentifierChar(c)) {
                if (cbrackets == 0 && sbrackets == 0 && parens == 0)
                    return idx;
            }
        } while (++idx < toParse.length());

        if (inString || cbrackets != 0 || sbrackets != 0 || parens != 0)
            throw new IllegalArgumentException();
        return idx;
    }

    /**
     * Assuming that idx points to the beginning of a CQL identifier in toParse, returns the
     * index of the first character after this identifier.
     *
     * @param toParse the string to skip an identifier from.
     * @param idx     the index to start parsing an identifier from.
     * @return the index ending the CQL identifier starting at {@code idx}.
     * @throws IllegalArgumentException if idx doesn't point to the start of a valid CQL
     *                                  identifier.
     */
    public static int skipCQLId(String toParse, int idx) {
        if (idx >= toParse.length())
            throw new IllegalArgumentException();

        char c = toParse.charAt(idx);
        if (isIdentifierChar(c)) {
            while (idx < toParse.length() && isIdentifierChar(toParse.charAt(idx)))
                idx++;
            return idx;
        }

        if (c != '"')
            throw new IllegalArgumentException();

        while (++idx < toParse.length()) {
            c = toParse.charAt(idx);
            if (c != '"')
                continue;

            if (idx + 1 < toParse.length() && toParse.charAt(idx + 1) == '\"')
                ++idx; // this is an escaped double quote, skip it
            else
                return idx + 1;
        }
        throw new IllegalArgumentException();
    }

    /**
     * Return {@code true} if the given character
     * is allowed in a CQL identifier, that is,
     * if it is in the range: {@code [0..9a..zA..Z-+._&]}.
     *
     * @param c The character to inspect.
     * @return {@code true} if the given character
     * is allowed in a CQL identifier, {@code false} otherwise.
     */
    public static boolean isIdentifierChar(int c) {
        return (c >= '0' && c <= '9')
                || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
                || c == '-' || c == '+' || c == '.' || c == '_' || c == '&';
    }

    /**
     * Return {@code true} if the given character
     * is a valid whitespace character in CQL, that is,
     * if it is a regular space, a tabulation sign,
     * or a new line sign.
     *
     * @param c The character to inspect.
     * @return {@code true} if the given character
     * is a valid whitespace character, {@code false} otherwise.
     */
    public static boolean isBlank(int c) {
        return c == ' ' || c == '\t' || c == '\n';
    }

    /**
     * Check whether the given string corresponds
     * to a valid CQL long literal.
     * Long literals are composed solely by digits,
     * but can have an optional leading minus sign.
     *
     * @param str The string to inspect.
     * @return {@code true} if the given string corresponds
     * to a valid CQL integer literal, {@code false} otherwise.
     */
    public static boolean isLongLiteral(String str) {
        if (str == null || str.isEmpty())
            return false;
        char[] chars = str.toCharArray();
        for (int i = 0; i < chars.length; i++) {
            char c = chars[i];
            if ((c < '0' && (i != 0 || c != '-')) || c > '9')
                return false;
        }
        return true;
    }

    /**
     * Return {@code true} if the given string is surrounded
     * by single quotes, and {@code false} otherwise.
     *
     * @param value The string to inspect.
     * @return {@code true} if the given string is surrounded
     * by single quotes, and {@code false} otherwise.
     */
    public static boolean isQuoted(String value) {
        return isQuoted(value, '\'');
    }

    /**
     * Quote the given string; single quotes are escaped.
     * If the given string is null, this method returns a quoted empty string ({@code ''}).
     *
     * @param value The value to quote.
     * @return The quoted string.
     */
    public static String quote(String value) {
        return quote(value, '\'');
    }

    /**
     * Unquote the given string if it is quoted; single quotes are unescaped.
     * If the given string is not quoted, it is returned without any modification.
     *
     * @param value The string to unquote.
     * @return The unquoted string.
     */
    public static String unquote(String value) {
        return unquote(value, '\'');
    }

    /**
     * Return {@code true} if the given string is surrounded
     * by double quotes, and {@code false} otherwise.
     *
     * @param value The string to inspect.
     * @return {@code true} if the given string is surrounded
     * by double quotes, and {@code false} otherwise.
     */
    public static boolean isDoubleQuoted(String value) {
        return isQuoted(value, '\"');
    }

    /**
     * Double quote the given string; double quotes are escaped.
     * If the given string is null, this method returns a quoted empty string ({@code ""}).
     *
     * @param value The value to double quote.
     * @return The double quoted string.
     */
    public static String doubleQuote(String value) {
        return quote(value, '"');
    }

    /**
     * Unquote the given string if it is double quoted; double quotes are unescaped.
     * If the given string is not double quoted, it is returned without any modification.
     *
     * @param value The string to un-double quote.
     * @return The un-double quoted string.
     */
    public static String unDoubleQuote(String value) {
        return unquote(value, '"');
    }

    /**
     * Parse the given string as a date, using one of the accepted ISO-8601 date patterns.
     * 
     * This method is adapted from Apache Commons {@code DateUtils.parseStrictly()} method (that is used Cassandra side
     * to parse date strings)..
     *
     * @throws ParseException If the given string is not a valid ISO-8601 date.
     * @see 'Working with timestamps' section of CQL specification
     */
    public static Date parseDate(String str) throws ParseException {
        SimpleDateFormat parser = new SimpleDateFormat();
        parser.setLenient(false);
        // set a default timezone for patterns that do not provide one
        parser.setTimeZone(TimeZone.getTimeZone("UTC"));
        // Java 6 has very limited support for ISO-8601 time zone formats,
        // so we need to transform the string first
        // so that accepted patterns are correctly handled,
        // such as Z for UTC, or "+00:00" instead of "+0000".
        // Note: we cannot use the X letter in the pattern
        // because it has been introduced in Java 7.
        str = str.replaceAll("(\\+|\\-)(\\d\\d):(\\d\\d)$", "$1$2$3");
        str = str.replaceAll("Z$", "+0000");
        ParsePosition pos = new ParsePosition(0);
        for (String parsePattern : iso8601Patterns) {
            parser.applyPattern(parsePattern);
            pos.setIndex(0);
            Date date = parser.parse(str, pos);
            if (date != null && pos.getIndex() == str.length()) {
                return date;
            }
        }
        throw new ParseException("Unable to parse the date: " + str, -1);
    }

    /**
     * Parse the given string as a date, using the supplied date pattern.
     * 

     * This method is adapted from Apache Commons {@code DateUtils.parseStrictly()} method (that is used Cassandra side
     * to parse date strings).
     *
     * @throws ParseException If the given string cannot be parsed with the given pattern.
     * @see 'Working with timestamps' section of CQL specification
     */
    public static Date parseDate(String str, String pattern) throws ParseException {
        return parseDate(str, pattern, false);
    }

    /**
     * Parse the given string as a date, using the supplied date pattern.
     * 

     * This method is adapted from Apache Commons {@code DateUtils.parseStrictly()} method (that is used Cassandra side
     * to parse date strings).
     *
     * @param str     the string to parse.
     * @param pattern the pattern to use.
     * @param lenient whether the parser is lenient or not.
     * @throws ParseException If the given string cannot be parsed with the given pattern.
     * @see 'Working with timestamps' section of CQL specification
     */
    public static Date parseDate(String str, String pattern, boolean lenient) throws ParseException {
        SimpleDateFormat parser = new SimpleDateFormat();
        parser.setLenient(lenient);
        // set a default timezone for patterns that do not provide one
        parser.setTimeZone(TimeZone.getTimeZone("UTC"));
        // make the parser proleptic, see java.util.GregorianCalendar.from(ZonedDateTime)
        GregorianCalendar cal = (GregorianCalendar) parser.getCalendar();
        cal.setGregorianChange(new Date(Long.MIN_VALUE));
        cal.setFirstDayOfWeek(Calendar.MONDAY);
        cal.setMinimalDaysInFirstWeek(4);
        // Java 6 has very limited support for ISO-8601 time zone formats,
        // so we need to transform the string first
        // so that accepted patterns are correctly handled,
        // such as Z for UTC, or "+00:00" instead of "+0000".
        // Note: we cannot use the X letter in the pattern
        // because it has been introduced in Java 7.
        str = str.replaceAll("(\\+|\\-)(\\d\\d):(\\d\\d)$", "$1$2$3");
        str = str.replaceAll("Z$", "+0000");
        ParsePosition pos = new ParsePosition(0);
        parser.applyPattern(pattern);
        pos.setIndex(0);
        Date date = parser.parse(str, pos);
        if (date != null && pos.getIndex() == str.length()) {
            return date;
        }
        throw new ParseException("Unable to parse the date: " + str, -1);
    }

    /**
     * Parse the given string as a time, using the following time pattern: {@code hh:mm:ss[.fffffffff]}.
     * 
     * This method is loosely based on {@code java.sql.Timestamp}.
     *
     * @param str The string to parse.
     * @return A long value representing the number of nanoseconds since midnight.
     * @throws ParseException if the string cannot be parsed.
     * @see 'Working with time' section of CQL specification
     */
    public static long parseTime(String str) throws ParseException {
        String nanos_s;

        long hour;
        long minute;
        long second;
        long a_nanos = 0;

        String formatError = "Timestamp format must be hh:mm:ss[.fffffffff]";
        String zeros = "000000000";

        if (str == null)
            throw new IllegalArgumentException(formatError);
        str = str.trim();

        // Parse the time
        int firstColon = str.indexOf(':');
        int secondColon = str.indexOf(':', firstColon + 1);

        // Convert the time; default missing nanos
        if (firstColon > 0 && secondColon > 0 && secondColon < str.length() - 1) {
            int period = str.indexOf('.', secondColon + 1);
            hour = Integer.parseInt(str.substring(0, firstColon));
            if (hour < 0 || hour >= 24)
                throw new IllegalArgumentException("Hour out of bounds.");

            minute = Integer.parseInt(str.substring(firstColon + 1, secondColon));
            if (minute < 0 || minute >= 60)
                throw new IllegalArgumentException("Minute out of bounds.");

            if (period > 0 && period < str.length() - 1) {
                second = Integer.parseInt(str.substring(secondColon + 1, period));
                if (second < 0 || second >= 60)
                    throw new IllegalArgumentException("Second out of bounds.");

                nanos_s = str.substring(period + 1);
                if (nanos_s.length() > 9)
                    throw new IllegalArgumentException(formatError);
                if (!Character.isDigit(nanos_s.charAt(0)))
                    throw new IllegalArgumentException(formatError);
                nanos_s = nanos_s + zeros.substring(0, 9 - nanos_s.length());
                a_nanos = Integer.parseInt(nanos_s);
            } else if (period > 0)
                throw new ParseException(formatError, -1);
            else {
                second = Integer.parseInt(str.substring(secondColon + 1));
                if (second < 0 || second >= 60)
                    throw new ParseException("Second out of bounds.", -1);
            }
        } else
            throw new ParseException(formatError, -1);

        long rawTime = 0;
        rawTime += TimeUnit.HOURS.toNanos(hour);
        rawTime += TimeUnit.MINUTES.toNanos(minute);
        rawTime += TimeUnit.SECONDS.toNanos(second);
        rawTime += a_nanos;
        return rawTime;
    }

    /**
     * Format the given long value as a CQL time literal, using the following time pattern: {@code hh:mm:ss[.fffffffff]}.
     *
     * @param value A long value representing the number of nanoseconds since midnight.
     * @return The formatted value.
     * @see 'Working with time' section of CQL specification
     */
    public static String formatTime(long value) {
        int nano = (int) (value % 1000000000);
        value -= nano;
        value /= 1000000000;
        int seconds = (int) (value % 60);
        value -= seconds;
        value /= 60;
        int minutes = (int) (value % 60);
        value -= minutes;
        value /= 60;
        int hours = (int) (value % 24);
        value -= hours;
        value /= 24;
        assert (value == 0);
        StringBuilder sb = new StringBuilder();
        leftPadZeros(hours, 2, sb);
        sb.append(":");
        leftPadZeros(minutes, 2, sb);
        sb.append(":");
        leftPadZeros(seconds, 2, sb);
        sb.append(".");
        leftPadZeros(nano, 9, sb);
        return sb.toString();
    }

    /**
     * Return {@code true} if the given string is surrounded
     * by the quote character given, and {@code false} otherwise.
     *
     * @param value The string to inspect.
     * @return {@code true} if the given string is surrounded
     * by the quote character, and {@code false} otherwise.
     */
    private static boolean isQuoted(String value, char quoteChar) {
        return value != null && value.length() > 1
                && value.charAt(0) == quoteChar && value.charAt(value.length() - 1) == quoteChar;
    }

    /**
     * @param quoteChar " or '
     * @return A quoted empty string.
     */
    private static String emptyQuoted(char quoteChar) {
        // don't handle non quote characters, this is done so that these are interned and don't create
        // repeated empty quoted strings.
        assert quoteChar == '"' || quoteChar == '\'';
        if (quoteChar == '"')
            return "\"\"";
        else
            return "''";
    }


    /**
     * Quotes text and escapes any existing quotes in the text.
     * {@code String.replace()} is a bit too inefficient (see JAVA-67, JAVA-1262).
     *
     * @param text      The text.
     * @param quoteChar The character to use as a quote.
     * @return The text with surrounded in quotes with all existing quotes escaped with (i.e. ' becomes '')
     */
    private static String quote(String text, char quoteChar) {
        if (text == null || text.isEmpty())
            return emptyQuoted(quoteChar);

        int nbMatch = 0;
        int start = -1;
        do {
            start = text.indexOf(quoteChar, start + 1);
            if (start != -1)
                ++nbMatch;
        } while (start != -1);

        // no quotes found that need to be escaped, simply surround in quotes and return.
        if (nbMatch == 0)
            return quoteChar + text + quoteChar;

        // 2 for beginning and end quotes.
        // length for original text
        // nbMatch for escape characters to add to quotes to be escaped.
        int newLength = 2 + text.length() + nbMatch;
        char[] result = new char[newLength];
        result[0] = quoteChar;
        result[newLength - 1] = quoteChar;
        int newIdx = 1;
        for (int i = 0; i < text.length(); i++) {
            char c = text.charAt(i);
            if (c == quoteChar) {
                // escape quote with another occurrence.
                result[newIdx++] = c;
                result[newIdx++] = c;
            } else {
                result[newIdx++] = c;
            }
        }
        return new String(result);
    }

    /**
     * Unquotes text and unescapes non surrounding quotes.
     * {@code String.replace()} is a bit too inefficient (see JAVA-67, JAVA-1262).
     *
     * @param text      The text
     * @param quoteChar The character to use as a quote.
     * @return The text with surrounding quotes removed and non surrounding quotes unescaped (i.e. '' becomes ')
     */
    private static String unquote(String text, char quoteChar) {
        if (!isQuoted(text, quoteChar))
            return text;

        if (text.length() == 2)
            return "";

        String search = emptyQuoted(quoteChar);
        int nbMatch = 0;
        int start = -1;
        do {
            start = text.indexOf(search, start + 2);
            // ignore the second to last character occurrence, as the last character is a quote.
            if (start != -1 && start != text.length() - 2)
                ++nbMatch;
        } while (start != -1);

        // no escaped quotes found, simply remove surrounding quotes and return.
        if (nbMatch == 0)
            return text.substring(1, text.length() - 1);

        // length of the new string will be its current length - the number of occurrences.
        int newLength = text.length() - nbMatch - 2;
        char[] result = new char[newLength];
        int newIdx = 0;
        // track whenever a quoteChar is encountered and the previous character is not a quoteChar.
        boolean firstFound = false;
        for (int i = 1; i < text.length() - 1; i++) {
            char c = text.charAt(i);
            if (c == quoteChar) {
                if (firstFound) {
                    // The previous character was a quoteChar, don't add this to result, this action in
                    // effect removes consecutive quotes.
                    firstFound = false;
                } else {
                    // found a quoteChar and the previous character was not a quoteChar, include in result.
                    firstFound = true;
                    result[newIdx++] = c;
                }
            } else {
                // non quoteChar encountered, include in result.
                result[newIdx++] = c;
                firstFound = false;
            }
        }
        return new String(result);
    }

    private static void leftPadZeros(int value, int digits, StringBuilder sb) {
        sb.append(String.format("%0" + digits + "d", value));
    }

    private ParseUtils() {
    }

}