org.eclipse.jetty.util.StringUtil Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of a-jetty-base Show documentation
A-Jetty Base can run on Android Java as well as on standard Java 7+ and it can run precompiled JSP/JSTL.
There is a newer version: 1.0.5
//
//  ========================================================================
//  Copyright (c) 1995-2016 Mort Bay Consulting Pty. Ltd.
//  ------------------------------------------------------------------------
//  All rights reserved. This program and the accompanying materials
//  are made available under the terms of the Eclipse Public License v1.0
//  and Apache License v2.0 which accompanies this distribution.
//
//      The Eclipse Public License is available at
//      http://www.eclipse.org/legal/epl-v10.html
//
//      The Apache License v2.0 is available at
//      http://www.opensource.org/licenses/apache2.0.php
//
//  You may elect to redistribute this code under either of these licenses.
//  ========================================================================
//

package org.eclipse.jetty.util;

import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;

import org.eclipse.jetty.util.log.Log;
import org.eclipse.jetty.util.log.Logger;

/** Fast String Utilities.
 *
 * These string utilities provide both convenience methods and
 * performance improvements over most standard library versions. The
 * main aim of the optimizations is to avoid object creation unless
 * absolutely required.
 *
 * 
 */
public class StringUtil
{
    private static final Logger LOG = Log.getLogger(StringUtil.class);
    
    
    private final static Trie CHARSETS= new ArrayTrie<>(256);
    
    public static final String ALL_INTERFACES="0.0.0.0";
    public static final String CRLF="\015\012";
    
    /** @deprecated use {@link System#lineSeparator()} instead */
    @Deprecated
    public static final String __LINE_SEPARATOR = System.lineSeparator();
       
    public static final String __ISO_8859_1="ISO-8859-1";
    public final static String __UTF8="UTF-8";
    public final static String __UTF16="UTF-16";

    /**
     * @deprecated Use {@link StandardCharsets#UTF_8}
     */
    @Deprecated
    public final static Charset __UTF8_CHARSET=StandardCharsets.UTF_8;
    /**
     * @deprecated Use {@link StandardCharsets#ISO_8859_1}
     */
    @Deprecated
    public final static Charset __ISO_8859_1_CHARSET=StandardCharsets.ISO_8859_1;
    /**
     * @deprecated Use {@link StandardCharsets#UTF_16}
     */
    @Deprecated
    public final static Charset __UTF16_CHARSET=StandardCharsets.UTF_16;
    /**
     * @deprecated Use {@link StandardCharsets#US_ASCII}
     */
    @Deprecated
    public final static Charset __US_ASCII_CHARSET=StandardCharsets.US_ASCII;
    
    static
    {
        CHARSETS.put("UTF-8",__UTF8);
        CHARSETS.put("UTF8",__UTF8);
        CHARSETS.put("UTF-16",__UTF16);
        CHARSETS.put("UTF16",__UTF16);
        CHARSETS.put("ISO-8859-1",__ISO_8859_1);
        CHARSETS.put("ISO_8859_1",__ISO_8859_1);
    }
    
    /* ------------------------------------------------------------ */
    /** Convert alternate charset names (eg utf8) to normalized
     * name (eg UTF-8).
     */
    public static String normalizeCharset(String s)
    {
        String n=CHARSETS.get(s);
        return (n==null)?s:n;
    }
    
    /* ------------------------------------------------------------ */
    /** Convert alternate charset names (eg utf8) to normalized
     * name (eg UTF-8).
     */
    public static String normalizeCharset(String s,int offset,int length)
    {
        String n=CHARSETS.get(s,offset,length);       
        return (n==null)?s.substring(offset,offset+length):n;
    }
    

    /* ------------------------------------------------------------ */
    public static final char[] lowercases = {
          '\000','\001','\002','\003','\004','\005','\006','\007',
          '\010','\011','\012','\013','\014','\015','\016','\017',
          '\020','\021','\022','\023','\024','\025','\026','\027',
          '\030','\031','\032','\033','\034','\035','\036','\037',
          '\040','\041','\042','\043','\044','\045','\046','\047',
          '\050','\051','\052','\053','\054','\055','\056','\057',
          '\060','\061','\062','\063','\064','\065','\066','\067',
          '\070','\071','\072','\073','\074','\075','\076','\077',
          '\100','\141','\142','\143','\144','\145','\146','\147',
          '\150','\151','\152','\153','\154','\155','\156','\157',
          '\160','\161','\162','\163','\164','\165','\166','\167',
          '\170','\171','\172','\133','\134','\135','\136','\137',
          '\140','\141','\142','\143','\144','\145','\146','\147',
          '\150','\151','\152','\153','\154','\155','\156','\157',
          '\160','\161','\162','\163','\164','\165','\166','\167',
          '\170','\171','\172','\173','\174','\175','\176','\177' };

    /* ------------------------------------------------------------ */
    /**
     * fast lower case conversion. Only works on ascii (not unicode)
     * @param s the string to convert
     * @return a lower case version of s
     */
    public static String asciiToLowerCase(String s)
    {
        char[] c = null;
        int i=s.length();

        // look for first conversion
        while (i-->0)
        {
            char c1=s.charAt(i);
            if (c1<=127)
            {
                char c2=lowercases[c1];
                if (c1!=c2)
                {
                    c=s.toCharArray();
                    c[i]=c2;
                    break;
                }
            }
        }

        while (i-->0)
        {
            if(c[i]<=127)
                c[i] = lowercases[c[i]];
        }
        
        return c==null?s:new String(c);
    }


    /* ------------------------------------------------------------ */
    public static boolean startsWithIgnoreCase(String s,String w)
    {
        if (w==null)
            return true;
        
        if (s==null || s.length()0;)
        {
            char c1=s.charAt(--sl);
            char c2=w.charAt(i);
            if (c1!=c2)
            {
                if (c1<=127)
                    c1=lowercases[c1];
                if (c2<=127)
                    c2=lowercases[c2];
                if (c1!=c2)
                    return false;
            }
        }
        return true;
    }
    
    /* ------------------------------------------------------------ */
    /**
     * returns the next index of a character from the chars string
     */
    public static int indexFrom(String s,String chars)
    {
        for (int i=0;i=0)
              return i;
        return -1;
    }
    
    /* ------------------------------------------------------------ */
    /**
     * replace substrings within string.
     */
    public static String replace(String s, String sub, String with)
    {
        int c=0;
        int i=s.indexOf(sub,c);
        if (i == -1)
            return s;
    
        StringBuilder buf = new StringBuilder(s.length()+with.length());

        do
        {
            buf.append(s.substring(c,i));
            buf.append(with);
            c=i+sub.length();
        } while ((i=s.indexOf(sub,c))!=-1);

        if (c=s.length())
                    break;
                buf.append(s.charAt(i));
            }
        }
    }

    
    /* ------------------------------------------------------------ */
    /**
     * append hex digit
     * 
     */
    public static void append(StringBuilder buf,byte b,int base)
    {
        int bi=0xff&b;
        int c='0'+(bi/base)%base;
        if (c>'9')
            c= 'a'+(c-'0'-10);
        buf.append((char)c);
        c='0'+bi%base;
        if (c>'9')
            c= 'a'+(c-'0'-10);
        buf.append((char)c);
    }

    /* ------------------------------------------------------------ */
    public static void append2digits(StringBuffer buf,int i)
    {
        if (i<100)
        {
            buf.append((char)(i/10+'0'));
            buf.append((char)(i%10+'0'));
        }
    }
    
    /* ------------------------------------------------------------ */
    public static void append2digits(StringBuilder buf,int i)
    {
        if (i<100)
        {
            buf.append((char)(i/10+'0'));
            buf.append((char)(i%10+'0'));
        }
    }
    
    /* ------------------------------------------------------------ */
    /** Return a non null string.
     * @param s String
     * @return The string passed in or empty string if it is null. 
     */
    public static String nonNull(String s)
    {
        if (s==null)
            return "";
        return s;
    }
    
    /* ------------------------------------------------------------ */
    public static boolean equals(String s,char[] buf, int offset, int length)
    {
        if (s.length()!=length)
            return false;
        for (int i=0;i
     * This will return a result on the first occurrence of a control character, regardless if
     * there are more than one.
     * 
     * 
     * Note: uses codepoint version of {@link Character#isISOControl(int)} to support Unicode better.
     * 
     *
     *      *   indexOfControlChars(null)      == -1
     *   indexOfControlChars("")        == -1
     *   indexOfControlChars("\r\n")    == 0
     *   indexOfControlChars("\t")      == 0
     *   indexOfControlChars("   ")     == -1
     *   indexOfControlChars("a")       == -1
     *   indexOfControlChars(".")       == -1
     *   indexOfControlChars(";\n")     == 1
     *   indexOfControlChars("abc\f")   == 3
     *   indexOfControlChars("z\010")   == 1
     *   indexOfControlChars(":\u001c") == 1
     * 
     *
     * @param str
     *            the string to test.
     * @return the index of first control character in string, -1 if no control characters encountered
     */
    public static int indexOfControlChars(String str)
    {
        if (str == null)
        {
            return -1;
        }
        int len = str.length();
        for (int i = 0; i < len; i++)
        {
            if (Character.isISOControl(str.codePointAt(i)))
            {
                // found a control character, we can stop searching  now
                return i;
            }
        }
        // no control characters
        return -1;
    }

    /* ------------------------------------------------------------ */
    /**
     * Test if a string is null or only has whitespace characters in it.
     * 
     * Note: uses codepoint version of {@link Character#isWhitespace(int)} to support Unicode better.
     * 
     * 
     *   isBlank(null)   == true
     *   isBlank("")     == true
     *   isBlank("\r\n") == true
     *   isBlank("\t")   == true
     *   isBlank("   ")  == true
     *   isBlank("a")    == false
     *   isBlank(".")    == false
     *   isBlank(";\n")  == false
     * 
     * 
     * @param str
     *            the string to test.
     * @return true if string is null or only whitespace characters, false if non-whitespace characters encountered.
     */
    public static boolean isBlank(String str)
    {
        if (str == null)
        {
            return true;
        }
        int len = str.length();
        for (int i = 0; i < len; i++)
        {
            if (!Character.isWhitespace(str.codePointAt(i)))
            {
                // found a non-whitespace, we can stop searching  now
                return false;
            }
        }
        // only whitespace
        return true;
    }
    
    /* ------------------------------------------------------------ */
    /**
     * Test if a string is not null and contains at least 1 non-whitespace characters in it.
     * 
     * Note: uses codepoint version of {@link Character#isWhitespace(int)} to support Unicode better.
     * 
     * 
     *   isNotBlank(null)   == false
     *   isNotBlank("")     == false
     *   isNotBlank("\r\n") == false
     *   isNotBlank("\t")   == false
     *   isNotBlank("   ")  == false
     *   isNotBlank("a")    == true
     *   isNotBlank(".")    == true
     *   isNotBlank(";\n")  == true
     * 
     * 
     * @param str
     *            the string to test.
     * @return true if string is not null and has at least 1 non-whitespace character, false if null or all-whitespace characters.
     */
    public static boolean isNotBlank(String str)
    {
        if (str == null)
        {
            return false;
        }
        int len = str.length();
        for (int i = 0; i < len; i++)
        {
            if (!Character.isWhitespace(str.codePointAt(i)))
            {
                // found a non-whitespace, we can stop searching  now
                return true;
            }
        }
        // only whitespace
        return false;
    }

    /* ------------------------------------------------------------ */
    public static boolean isUTF8(String charset)
    {
        return __UTF8.equalsIgnoreCase(charset)||__UTF8.equalsIgnoreCase(normalizeCharset(charset));
    }


    /* ------------------------------------------------------------ */
    public static String printable(String name)
    {
        if (name==null)
            return null;
        StringBuilder buf = new StringBuilder(name.length());
        for (int i=0;i' ' && c<0x7f)
                buf.append(c);
            else 
            {
                buf.append("0x");
                TypeUtil.toHex(b[i],buf);
            }
        }
        return buf.toString();
    }
    
    public static byte[] getBytes(String s)
    {
        return s.getBytes(StandardCharsets.ISO_8859_1);
    }
    
    public static byte[] getUtf8Bytes(String s)
    {
        return s.getBytes(StandardCharsets.UTF_8);
    }
    
    public static byte[] getBytes(String s,String charset)
    {
        try
        {
            return s.getBytes(charset);
        }
        catch(Exception e)
        {
            LOG.warn(e);
            return s.getBytes();
        }
    }
    
    
    
    /**
     * Converts a binary SID to a string SID
     * 
     * http://en.wikipedia.org/wiki/Security_Identifier
     * 
     * S-1-IdentifierAuthority-SubAuthority1-SubAuthority2-...-SubAuthorityn
     */
    public static String sidBytesToString(byte[] sidBytes)
    {
        StringBuilder sidString = new StringBuilder();
        
        // Identify this as a SID
        sidString.append("S-");
        
        // Add SID revision level (expect 1 but may change someday)
        sidString.append(Byte.toString(sidBytes[0])).append('-');
        
        StringBuilder tmpBuilder = new StringBuilder();
        
        // crunch the six bytes of issuing authority value
        for (int i = 2; i <= 7; ++i)
        {
            tmpBuilder.append(Integer.toHexString(sidBytes[i] & 0xFF));
        }
        
        sidString.append(Long.parseLong(tmpBuilder.toString(), 16)); // '-' is in the subauth loop
   
        // the number of subAuthorities we need to attach
        int subAuthorityCount = sidBytes[1];

        // attach each of the subAuthorities
        for (int i = 0; i < subAuthorityCount; ++i)
        {
            int offset = i * 4;
            tmpBuilder.setLength(0);
            // these need to be zero padded hex and little endian
            tmpBuilder.append(String.format("%02X%02X%02X%02X", 
                    (sidBytes[11 + offset] & 0xFF),
                    (sidBytes[10 + offset] & 0xFF),
                    (sidBytes[9 + offset] & 0xFF),
                    (sidBytes[8 + offset] & 0xFF)));  
            sidString.append('-').append(Long.parseLong(tmpBuilder.toString(), 16));
        }
        
        return sidString.toString();
    }
    
    /**
     * Converts a string SID to a binary SID
     * 
     * http://en.wikipedia.org/wiki/Security_Identifier
     * 
     * S-1-IdentifierAuthority-SubAuthority1-SubAuthority2-...-SubAuthorityn
     */
    public static byte[] sidStringToBytes( String sidString )
    {
        String[] sidTokens = sidString.split("-");
        
        int subAuthorityCount = sidTokens.length - 3; // S-Rev-IdAuth-
        
        int byteCount = 0;
        byte[] sidBytes = new byte[1 + 1 + 6 + (4 * subAuthorityCount)];
        
        // the revision byte
        sidBytes[byteCount++] = (byte)Integer.parseInt(sidTokens[1]);

        // the # of sub authorities byte
        sidBytes[byteCount++] = (byte)subAuthorityCount;

        // the certAuthority
        String hexStr = Long.toHexString(Long.parseLong(sidTokens[2]));
        
        while( hexStr.length() < 12) // pad to 12 characters
        {
            hexStr = "0" + hexStr;
        }

        // place the certAuthority 6 bytes
        for ( int i = 0 ; i < hexStr.length(); i = i + 2)
        {
            sidBytes[byteCount++] = (byte)Integer.parseInt(hexStr.substring(i, i + 2),16);
        }
                
        
        for ( int i = 3; i < sidTokens.length ; ++i)
        {
            hexStr = Long.toHexString(Long.parseLong(sidTokens[i]));
            
            while( hexStr.length() < 8) // pad to 8 characters
            {
                hexStr = "0" + hexStr;
            }     
            
            // place the inverted sub authorities, 4 bytes each
            for ( int j = hexStr.length(); j > 0; j = j - 2)
            {          
                sidBytes[byteCount++] = (byte)Integer.parseInt(hexStr.substring(j-2, j),16);
            }
        }
      
        return sidBytes;
    }
    

    /**
     * Convert String to an integer. Parses up to the first non-numeric character. If no number is found an IllegalArgumentException is thrown
     * 
     * @param string
     *            A String containing an integer.
     * @return an int
     */
    public static int toInt(String string)
    {
        int val = 0;
        boolean started = false;
        boolean minus = false;

        for (int i = 0; i < string.length(); i++)
        {
            char b = string.charAt(i);
            if (b <= ' ')
            {
                if (started)
                    break;
            }
            else if (b >= '0' && b <= '9')
            {
                val = val * 10 + (b - '0');
                started = true;
            }
            else if (b == '-' && !started)
            {
                minus = true;
            }
            else
                break;
        }

        if (started)
            return minus?(-val):val;
        throw new NumberFormatException(string);
    }

    /**
     * Convert String to an long. Parses up to the first non-numeric character. If no number is found an IllegalArgumentException is thrown
     * 
     * @param string
     *            A String containing an integer.
     * @return an int
     */
    public static long toLong(String string)
    {
        long val = 0;
        boolean started = false;
        boolean minus = false;

        for (int i = 0; i < string.length(); i++)
        {
            char b = string.charAt(i);
            if (b <= ' ')
            {
                if (started)
                    break;
            }
            else if (b >= '0' && b <= '9')
            {
                val = val * 10L + (b - '0');
                started = true;
            }
            else if (b == '-' && !started)
            {
                minus = true;
            }
            else
                break;
        }

        if (started)
            return minus?(-val):val;
        throw new NumberFormatException(string);
    }
    
    /**
     * Truncate a string to a max size.
     * 
     * @param str the string to possibly truncate
     * @param maxSize the maximum size of the string
     * @return the truncated string.  if str param is null, then the returned string will also be null.
     */
    public static String truncate(String str, int maxSize)
    {
        if (str == null)
        {
            return null;
        }

        if (str.length() <= maxSize)
        {
            return str;
        }

        return str.substring(0,maxSize);
    }

    /**
    * Parse the string representation of a list using {@link #csvSplit(List,String,int,int)}
    * @param s The string to parse, expected to be enclosed as '[...]'
    * @return An array of parsed values.
    */
    public static String[] arrayFromString(String s) 
    {
        if (s==null)
            return new String[]{};

        if (!s.startsWith("[") || !s.endsWith("]"))
            throw new IllegalArgumentException();
        if (s.length()==2)
            return new String[]{};

        return csvSplit(s,1,s.length()-2);
    }
    
    /**
    * Parse a CSV string using {@link #csvSplit(List,String, int, int)}
    * @param s The string to parse
    * @return An array of parsed values.
    */
    public static String[] csvSplit(String s)
    {
        if (s==null)
            return null;
        return csvSplit(s,0,s.length());
    }
    
    /**
     * Parse a CSV string using {@link #csvSplit(List,String, int, int)}
     * @param s The string to parse
     * @param off The offset into the string to start parsing
     * @param len The len in characters to parse
     * @return An array of parsed values.
     */
    public static String[] csvSplit(String s, int off,int len)
    {
        if (s==null)
            return null;
        if (off<0 || len<0 || off>s.length())
            throw new IllegalArgumentException();

        List list = new ArrayList<>();
        csvSplit(list,s,off,len);
        return list.toArray(new String[list.size()]);
    }

    enum CsvSplitState { PRE_DATA, QUOTE, SLOSH, DATA, WHITE, POST_DATA };

    /** Split a quoted comma separated string to a list
     * Handle rfc4180-like 
     * CSV strings, with the exceptions:

     * quoted values may contain double quotes escaped with back-slash
     * 
Non-quoted values are trimmed of leading trailing white space
     * 
trailing commas are ignored
     * 
double commas result in a empty string value
     *   
     * @param list The Collection to split to (or null to get a new list)
     * @param s The string to parse
     * @param off The offset into the string to start parsing
     * @param len The len in characters to parse
     * @return list containing the parsed list values
     */
    public static List csvSplit(List list,String s, int off,int len)
    {
        if (list==null)
            list=new ArrayList<>();
        CsvSplitState state = CsvSplitState.PRE_DATA;
        StringBuilder out = new StringBuilder();
        int last=-1;
        while (len>0)
        {
            char ch = s.charAt(off++);
            len--;
            
            switch(state)
            {
                case PRE_DATA:
                    if (Character.isWhitespace(ch))
                        continue;

                    if ('"'==ch)
                    {
                        state=CsvSplitState.QUOTE;
                        continue;
                    }
                    
                    if (','==ch)
                    {
                        list.add("");
                        continue;
                    }

                    state=CsvSplitState.DATA;
                    out.append(ch);
                    continue;

                case DATA:
                    if (Character.isWhitespace(ch))
                    {
                        last=out.length();
                        out.append(ch);
                        state=CsvSplitState.WHITE;
                        continue;
                    }
                    
                    if (','==ch)
                    {
                        list.add(out.toString());
                        out.setLength(0);
                        state=CsvSplitState.PRE_DATA;
                        continue;
                    }

                    out.append(ch);
                    continue;
                    
                case WHITE:
                    if (Character.isWhitespace(ch))
                    {
                        out.append(ch);
                        continue;
                    }
                    
                    if (','==ch)
                    {
                        out.setLength(last);
                        list.add(out.toString());
                        out.setLength(0);
                        state=CsvSplitState.PRE_DATA;
                        continue;
                    }
                    
                    state=CsvSplitState.DATA;
                    out.append(ch);
                    last=-1;
                    continue;

                case QUOTE:
                    if ('\\'==ch)
                    {
                        state=CsvSplitState.SLOSH;
                        continue;
                    }
                    if ('"'==ch)
                    {
                        list.add(out.toString());
                        out.setLength(0);
                        state=CsvSplitState.POST_DATA;
                        continue;
                    }
                    out.append(ch);
                    continue;
                    
                case SLOSH:
                    out.append(ch);
                    state=CsvSplitState.QUOTE;
                    continue;
                    
                case POST_DATA:
                    if (','==ch)
                    {
                        state=CsvSplitState.PRE_DATA;
                        continue;
                    }
                    continue;
            }
        }

        switch(state)
        {
            case PRE_DATA:
            case POST_DATA:
                break;

            case DATA:
            case QUOTE:
            case SLOSH:
                list.add(out.toString());
                break;
                
            case WHITE:
                out.setLength(last);
                list.add(out.toString());
                break;
        }
        
        return list;
    }

    public static String sanitizeXmlString(String html)
    {
        if (html==null)
            return null;
        
        int i=0;
        
        // Are there any characters that need sanitizing?
        loop: for (;i' :
                case '\'':
                case '"':
                    break loop;

                default:
                    if (Character.isISOControl(c) && !Character.isWhitespace(c))
                        break loop;
            }
        }

        // No characters need sanitizing, so return original string
        if (i==html.length())
            return html;
        
        // Create builder with OK content so far 
        StringBuilder out = new StringBuilder(html.length()*4/3);
        out.append(html,0,i);
        
        // sanitize remaining content
        for (;i' :
                    out.append(">");
                    break;
                case '\'':
                    out.append("'");
                    break;
                case '"':
                    out.append(""");
                    break;

                default:
                    if (Character.isISOControl(c) && !Character.isWhitespace(c))
                        out.append('?');
                    else
                        out.append(c);
            }
        }
        return out.toString();
    }

}