com.zeroc.IceUtilInternal.StringUtil Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of ice Show documentation
Ice is a comprehensive RPC framework that helps you build distributed applications with minimal effort using familiar object-oriented idioms
There is a newer version: 3.7.10
Show newest version
//
// Copyright (c) ZeroC, Inc. All rights reserved.
//

package com.zeroc.IceUtilInternal;

public final class StringUtil
{
    //
    // Return the index of the first character in str to
    // appear in match, starting from 0. Returns -1 if none is
    // found.
    //
    public static int
    findFirstOf(String str, String match)
    {
        return findFirstOf(str, match, 0);
    }

    //
    // Return the index of the first character in str to
    // appear in match, starting from start. Returns -1 if none is
    // found.
    //
    public static int
    findFirstOf(String str, String match, int start)
    {
        final int len = str.length();
        for(int i = start; i < len; i++)
        {
            char ch = str.charAt(i);
            if(match.indexOf(ch) != -1)
            {
                return i;
            }
        }

        return -1;
    }

    //
    // Return the index of the first character in str which does
    // not appear in match, starting from 0. Returns -1 if none is
    // found.
    //
    public static int
    findFirstNotOf(String str, String match)
    {
        return findFirstNotOf(str, match, 0);
    }

    //
    // Return the index of the first character in str which does
    // not appear in match, starting from start. Returns -1 if none is
    // found.
    //
    public static int
    findFirstNotOf(String str, String match, int start)
    {
        final int len = str.length();
        for(int i = start; i < len; i++)
        {
            char ch = str.charAt(i);
            if(match.indexOf(ch) == -1)
            {
                return i;
            }
        }

        return -1;
    }

    private static void
    encodeChar(char c, StringBuilder sb, String special, com.zeroc.Ice.ToStringMode toStringMode)
    {
        switch(c)
        {
            case '\\':
            {
                sb.append("\\\\");
                break;
            }
            case '\'':
            {
                sb.append("\\'");
                break;
            }
            case '"':
            {
                sb.append("\\\"");
                break;
            }
            case '\007':
            {
                if(toStringMode == com.zeroc.Ice.ToStringMode.Compat)
                {
                    // Octal escape for compatibility with 3.6 and earlier
                    sb.append("\\007");
                }
                else
                {
                    sb.append("\\a");
                }
                break;
            }
            case '\b':
            {
                sb.append("\\b");
                break;
            }
            case '\f':
            {
                sb.append("\\f");
                break;
            }
            case '\n':
            {
                sb.append("\\n");
                break;
            }
            case '\r':
            {
                sb.append("\\r");
                break;
            }
            case '\t':
            {
                sb.append("\\t");
                break;
            }
            case '\013':
            {
                if(toStringMode == com.zeroc.Ice.ToStringMode.Compat)
                {
                    // Octal escape for compatibility with 3.6 and earlier
                    sb.append("\\013");
                }
                else
                {
                    sb.append("\\v");
                }
                break;
            }
            default:
            {
                if(special != null && special.indexOf(c) != -1)
                {
                    sb.append('\\');
                    sb.append(c);
                }
                else
                {
                    if(c < 32 || c > 126)
                    {
                        if(toStringMode == com.zeroc.Ice.ToStringMode.Compat)
                        {
                            //
                            // When ToStringMode=Compat, c is a UTF-8 byte
                            //
                            assert(c < 256);

                            sb.append('\\');
                            String octal = Integer.toOctalString(c);
                            //
                            // Add leading zeroes so that we avoid problems during
                            // decoding. For example, consider the encoded string
                            // \0013 (i.e., a character with value 1 followed by
                            // the character '3'). If the leading zeroes were omitted,
                            // the result would be incorrectly interpreted by the
                            // decoder as a single character with value 11.
                            //
                            for(int j = octal.length(); j < 3; j++)
                            {
                                sb.append('0');
                            }
                            sb.append(octal);
                        }
                        else if(c < 32 || c == 127 || toStringMode == com.zeroc.Ice.ToStringMode.ASCII)
                        {
                            // append \\unnnn
                            sb.append("\\u");
                            String hex = Integer.toHexString(c);
                            for(int j = hex.length(); j < 4; j++)
                            {
                                sb.append('0');
                            }
                            sb.append(hex);
                        }
                        else
                        {
                            // keep as is
                            sb.append(c);
                        }
                    }
                    else
                    {
                        // printable ASCII character
                        sb.append(c);
                    }
                }
                break;
            }
        }
    }

    //
    // Add escape sequences (like "\n" to the input string)
    // The second parameter adds characters to escape, and can be empty.
    //
    public static String
    escapeString(String s, String special, com.zeroc.Ice.ToStringMode toStringMode)
    {
        if(special != null)
        {
            for(int i = 0; i < special.length(); ++i)
            {
                if(special.charAt(i) < 32 || special.charAt(i) > 126)
                {
                    throw new IllegalArgumentException("special characters must be in ASCII range 32-126");
                }
            }
        }

        if(toStringMode == com.zeroc.Ice.ToStringMode.Compat)
        {
            // Encode UTF-8 bytes

            byte[] bytes = null;
            try
            {
                bytes = s.getBytes("UTF8");
            }
            catch(java.io.UnsupportedEncodingException ex)
            {
                assert(false);
                return null;
            }

            StringBuilder result = new StringBuilder(bytes.length);
            for(int i = 0; i < bytes.length; i++)
            {
                encodeChar((char)(bytes[i] & 0xFF), result, special, toStringMode);
            }

            return result.toString();
        }
        else
        {
            StringBuilder result = new StringBuilder(s.length());

            for(int i = 0; i < s.length(); i++)
            {
                char c = s.charAt(i);
                if(toStringMode == com.zeroc.Ice.ToStringMode.Unicode || !Character.isSurrogate(c))
                {
                    encodeChar(c, result, special, toStringMode);
                }
                else
                {
                    assert(toStringMode == com.zeroc.Ice.ToStringMode.ASCII && Character.isSurrogate(c));
                    if(i + 1 == s.length())
                    {
                        throw new IllegalArgumentException("High surrogate without low surrogate");
                    }
                    else
                    {
                        i++;
                        int codePoint = Character.toCodePoint(c, s.charAt(i));
                        // append \Unnnnnnnn
                        result.append("\\U");
                        String hex = Integer.toHexString(codePoint);
                        for(int j = hex.length(); j < 8; j++)
                        {
                            result.append('0');
                        }
                        result.append(hex);
                    }
                }
            }

            return result.toString();
        }
    }

    private static char
    checkChar(String s, int pos)
    {
        char c = s.charAt(pos);
        if(c < 32 || c == 127)
        {
            String msg;
            if(pos > 0)
            {
                msg = "character after `" + s.substring(0, pos) + "'";
            }
            else
            {
                msg = "first character";
            }
            msg += " has invalid ordinal value " + (int)c;
            throw new IllegalArgumentException(msg);
        }
        return c;
    }

    //
    // Decode the character or escape sequence starting at start and appends it to result;
    // returns the index of the first character following the decoded character
    // or escape sequence.
    //
    private static int
    decodeChar(String s, int start, int end, String special, StringBuilder result)
    {
        assert(start >= 0);
        assert(start < end);
        assert(end <= s.length());

        if(s.charAt(start) != '\\')
        {
            result.append(checkChar(s, start++));
        }
        else if(start + 1 == end)
        {
            ++start;
            result.append('\\');
        }
        else
        {
            char c = s.charAt(++start);

            switch(c)
            {
                case '\\':
                case '\'':
                case '"':
                case '?':
                {
                    ++start;
                    result.append(c);
                    break;
                }
                case 'a':
                {
                    ++start;
                    result.append('\u0007');
                    break;
                }
                case 'b':
                {
                    ++start;
                    result.append('\b');
                    break;
                }
                case 'f':
                {
                    ++start;
                    result.append('\f');
                    break;
                }
                case 'n':
                {
                    ++start;
                    result.append('\n');
                    break;
                }
                case 'r':
                {
                    ++start;
                    result.append('\r');
                    break;
                }
                case 't':
                {
                    ++start;
                    result.append('\t');
                    break;
                }
                case 'v':
                {
                    ++start;
                    result.append('\u000b');
                }
                case 'u':
                case 'U':
                {
                    int codePoint = 0;
                    boolean inBMP = (c == 'u');
                    int size = inBMP ? 4 : 8;
                    ++start;
                    while(size > 0 && start < end)
                    {
                        c = s.charAt(start++);
                        int charVal = 0;
                        if(c >= '0' && c <= '9')
                        {
                            charVal = c - '0';
                        }
                        else if(c >= 'a' && c <= 'f')
                        {
                            charVal = 10 + (c - 'a');
                        }
                        else if(c >= 'A' && c <= 'F')
                        {
                            charVal = 10 + (c - 'A');
                        }
                        else
                        {
                            break; // while
                        }
                        codePoint = codePoint * 16 + charVal;
                        --size;
                    }
                    if(size > 0)
                    {
                        throw new IllegalArgumentException("Invalid universal character name: too few hex digits");
                    }
                    if(codePoint >= 0xD800 && codePoint <= 0xDFFF)
                    {
                        throw new IllegalArgumentException("A universal character name cannot designate a surrogate");
                    }
                    if(inBMP || Character.isBmpCodePoint(codePoint))
                    {
                        result.append((char)codePoint);
                    }
                    else
                    {
                        result.append(Character.toChars(codePoint));
                    }
                    break;
                }

                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case 'x':
                {
                    // UTF-8 byte sequence encoded with octal or hex escapes

                    byte[] arr = new byte[end - start];
                    int i = 0;
                    boolean more = true;
                    while(more)
                    {
                        int val = 0;
                        if(c == 'x')
                        {
                            int size = 2;
                            ++start;
                            while(size > 0 && start < end)
                            {
                                c = s.charAt(start++);
                                int charVal = 0;
                                if(c >= '0' && c <= '9')
                                {
                                    charVal = c - '0';
                                }
                                else if(c >= 'a' && c <= 'f')
                                {
                                    charVal = 10 + (c - 'a');
                                }
                                else if(c >= 'A' && c <= 'F')
                                {
                                    charVal = 10 + (c - 'A');
                                }
                                else
                                {
                                    --start; // move back
                                    break; // while
                                }
                                val = val * 16 + charVal;
                                --size;
                            }
                            if(size == 2)
                            {
                                throw new IllegalArgumentException("Invalid \\x escape sequence: no hex digit");
                            }
                        }
                        else
                        {
                            for(int j = 0; j < 3 && start < end; ++j)
                            {
                                int charVal = s.charAt(start++) - '0';
                                if(charVal < 0 || charVal > 7)
                                {
                                    --start; // move back
                                    assert(j != 0); // must be at least one digit
                                    break; // for
                                }
                                val = val * 8 + charVal;
                            }
                            if(val > 255)
                            {
                                String msg = "octal value \\" + Integer.toOctalString(val) + " (" + val + ") is out of range";
                                throw new IllegalArgumentException(msg);
                            }
                        }

                        arr[i++] = (byte)val;

                        more = false;

                        if((start + 1 < end) && s.charAt(start) == '\\')
                        {
                            c = s.charAt(start + 1);
                            if(c == 'x' || (c >= '0' && c <= '9'))
                            {
                                start++;
                                more = true;
                            }
                        }
                    }

                    try
                    {
                        result.append(new String(arr, 0, i, "UTF8"));
                    }
                    catch(java.io.UnsupportedEncodingException ex)
                    {
                        throw new IllegalArgumentException("unsupported encoding", ex);
                    }
                    break;
                }
                default:
                {
                    if(special == null || special.isEmpty() || special.indexOf(c) == -1)
                    {
                        result.append('\\'); // not in special, so we keep the backslash
                    }
                    result.append(checkChar(s, start++));
                    break;
                }
            }
        }

        return start;
    }

    //
    // Remove escape sequences added by escapeString. Throws IllegalArgumentException
    // for an invalid input string.
    //
    public static String
    unescapeString(String s, int start, int end, String special)
    {
        assert(start >= 0 && start <= end && end <= s.length());

        if(special != null)
        {
            for(int i = 0; i < special.length(); ++i)
            {
                if(special.charAt(i) < 32 || special.charAt(i) > 126)
                {
                    throw new IllegalArgumentException("special characters must be in ASCII range 32-126");
                }
            }
        }

        // Optimization for strings without escapes
        int p = s.indexOf('\\', start);
        if(p == -1 || p >= end)
        {
            p = start;
            while(p < end)
            {
                checkChar(s, p++);
            }
            return s.substring(start, end);
        }
        else
        {
            StringBuilder sb = new StringBuilder(end - start);
            while(start < end)
            {
                start = decodeChar(s, start, end, special, sb);
            }
            return sb.toString();
        }
    }

    //
    // Join a list of strings using the given delimiter.
    //
    public static String
    joinString(java.util.List values, String delimiter)
    {
        StringBuffer s = new StringBuffer();
        boolean first = true;
        for(String v : values)
        {
            if(!first)
            {
                s.append(delimiter);
            }
            s.append(v);
            first = false;
        }
        return s.toString();
    }

    //
    // Split string helper; returns null for unmatched quotes
    //
    static public String[]
    splitString(String str, String delim)
    {
        java.util.List l = new java.util.ArrayList<>();
        char[] arr = new char[str.length()];
        int pos = 0;

        int n = 0;
        char quoteChar = '\0';
        while(pos < str.length())
        {
            if(quoteChar == '\0' && (str.charAt(pos) == '"' || str.charAt(pos) == '\''))
            {
                quoteChar = str.charAt(pos++);
                continue; // Skip the quote.
            }
            else if(quoteChar == '\0' && str.charAt(pos) == '\\' && pos + 1 < str.length() &&
                    (str.charAt(pos + 1) == '"' || str.charAt(pos + 1) == '\''))
            {
                ++pos; // Skip the backslash
            }
            else if(quoteChar != '\0' && str.charAt(pos) == '\\' && pos + 1 < str.length() &&
                    str.charAt(pos + 1) == quoteChar)
            {
                ++pos; // Skip the backslash
            }
            else if(quoteChar != '\0' && str.charAt(pos) == quoteChar)
            {
                ++pos;
                quoteChar = '\0';
                continue; // Skip the quote.
            }
            else if(delim.indexOf(str.charAt(pos)) != -1)
            {
                if(quoteChar == '\0')
                {
                    ++pos;
                    if(n > 0)
                    {
                        l.add(new String(arr, 0, n));
                        n = 0;
                    }
                    continue;
                }
            }

            if(pos < str.length())
            {
                arr[n++] = str.charAt(pos++);
            }
        }

        if(n > 0)
        {
            l.add(new String(arr, 0, n));
        }
        if(quoteChar != '\0')
        {
            return null; // Unmatched quote.
        }
        return l.toArray(new String[0]);
    }

    public static int
    checkQuote(String s)
    {
        return checkQuote(s, 0);
    }

    //
    // If a single or double quotation mark is found at the start position,
    // then the position of the matching closing quote is returned. If no
    // quotation mark is found at the start position, then 0 is returned.
    // If no matching closing quote is found, then -1 is returned.
    //
    public static int
    checkQuote(String s, int start)
    {
        char quoteChar = s.charAt(start);
        if(quoteChar == '"' || quoteChar == '\'')
        {
            start++;
            final int len = s.length();
            int pos;
            while(start < len && (pos = s.indexOf(quoteChar, start)) != -1)
            {
                if(s.charAt(pos - 1) != '\\')
                {
                    return pos;
                }
                start = pos + 1;
            }
            return -1; // Unmatched quote
        }
        return 0; // Not quoted
    }

    public static boolean
    match(String s, String pat, boolean emptyMatch)
    {
        assert(s.length() > 0);
        assert(pat.length() > 0);

        //
        // If pattern does not contain a wildcard just compare strings.
        //
        int beginIndex = pat.indexOf('*');
        if(beginIndex < 0)
        {
            return s.equals(pat);
        }

        //
        // Make sure start of the strings match
        //
        if(beginIndex > s.length() || !s.substring(0, beginIndex).equals(pat.substring(0, beginIndex)))
        {
            return false;
        }

        //
        // Make sure there is something present in the middle to match the
        // wildcard. If emptyMatch is true, allow a match of "".
        //
        int endLength = pat.length() - beginIndex - 1;
        if(endLength == 0)
        {
            return true;
        }
        if(endLength > s.length())
        {
            return false;
        }
        int endIndex = s.length() - endLength;
        if(endIndex < beginIndex || (!emptyMatch && endIndex == beginIndex))
        {
            return false;
        }

        //
        // Make sure end of the strings match
        //
        if(!s.substring(endIndex, s.length() - endIndex).equals(
               pat.substring(beginIndex + 1, pat.length() - beginIndex - 1)))
        {
            return false;
        }

        return true;
    }
}