All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.dell.doradus.common.Utils Maven / Gradle / Ivy

/*
 * Copyright (C) 2014 Dell, Inc.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.dell.doradus.common;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.net.Socket;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collection;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TimeZone;
import java.util.TreeSet;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import javax.xml.bind.DatatypeConverter;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.w3c.dom.Comment;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.InputSource;

/**
 * Static helper functions used by Doradus. No instances are allowed.
 */
final public class Utils {
    /**
     * The Charset object for the UTF-8 character set.
     */
    public static final Charset UTF8_CHARSET = Charset.forName("UTF-8");
    
    /**
     * The UTC timezone (aka GMT or Zulu time).
     */
    public static final TimeZone UTC_TIMEZONE = TimeZone.getTimeZone("GMT");
    
    private static final char[] UID_CHARS = "0123456789abcdefghijklmnopqrstuv".toCharArray();
    
    // Static methods only
    private Utils() {
        throw new AssertionError();
    }

    /**
     * Return true if the given string contains only letters, digits, and underscores.
     *
     * @param   string  String to be tested.
     * @return          True if the string is not null, not empty, and contains only
     *                  letters, digits, and underscores.
     */
    public static boolean allAlphaNumUnderscore(String string) {
        if (string == null || string.length() == 0) {
            return false;
        }
        for (int index = 0; index < string.length(); index++) {
            char ch = string.charAt(index);
            if (!isLetter(ch) && !isDigit(ch) && ch != '_') {
                return false;
            }
        }
        return true;
    }   // allAlphaNumUnderscore

    /**
     * Return true if the given string contains only digits (characters '0' - '9').
     *
     * @param   string  String to be tested.
     * @return          True if the string is not null, not empty, and contains only
     *                  digit characters '0' through '9'. 
     */
    public static boolean allDigits(String string) {
        if (string == null || string.length() == 0) {
            return false;
        }
        for (int index = 0; index < string.length(); index++) {
            char ch = string.charAt(index);
            if (ch < '0' || ch > '9') {
                return false;
            }
        }
        return true;
    }   // allDigits
    
    /**
     * Convert (decode) the given Base64-encoded String to its binary form.
     * 
     * @param base64Value               Base64-encoded string.
     * @return                          Decoded binary value.
     * @throws IllegalArgumentException If the given string is not a valid Base64 value.
     */
    public static byte[] base64ToBinary(String base64Value) throws IllegalArgumentException {
        Utils.require(base64Value.length() % 4 == 0,
                      "Invalid base64 value (must be a multiple of 4 chars): " + base64Value);
        return DatatypeConverter.parseBase64Binary(base64Value);
    }   // base64ToBinary
    
    /**
     * Decode the given Base64-encoded String to binary and then return as a string of hex
     * digits.
     * 
     * @param base64Value               Base64-encoded string.
     * @return                          Decoded binary value re-encoded as a hex string.
     * @throws IllegalArgumentException If the given string is not a valid Base64 value.
     */
    public static String base64ToHex(String base64Value) throws IllegalArgumentException {
        byte[] binary = base64ToBinary(base64Value);
        return DatatypeConverter.printHexBinary(binary);
    }   // base64ToHex
    
    /**
     * Convert (encode) the given binary value using Base64.
     * 
     * @param  value                    A binary value.
     * @return                          Base64-encoded value.
     * @throws IllegalArgumentException If the given value is null.
     */
    public static String base64FromBinary(byte[] value) throws IllegalArgumentException {
        return DatatypeConverter.printBase64Binary(value);
    }   // base64FromBinary
    
    /**
     * Decode the given hex string to binary and then re-encoded it as a Base64 string.
     * 
     * @param hexValue                  String of hexadecimal characters.
     * @return                          Decoded binary value re-encoded with Base64.
     * @throws IllegalArgumentException If the given value is null or invalid.
     */
    public static String base64FromHex(String hexValue) throws IllegalArgumentException {
        byte[] binary = DatatypeConverter.parseHexBinary(hexValue);
        return base64FromBinary(binary);
    }   // base64FromHex
    
    /**
     * Convert (encode) the given binary value, beginning at the given offset and
     * consisting of the given length, using Base64.
     * 
     * @param  value                    A binary value.
     * @param  offset                   Zero-based index where data begins.
     * @param  length                   Number of bytes to encode.
     * @return                          Base64-encoded value.
     * @throws IllegalArgumentException If the given value is null.
     */
    public static String base64FromBinary(byte[] value, int offset, int length) throws IllegalArgumentException {
        return DatatypeConverter.printBase64Binary(Arrays.copyOfRange(value, offset, offset + length));
    }   // base64FromBinary
    
    /**
     * Convert the given String to UTF-8, encode the result with Base64, and return the
     * encoded value as a string. The result string will only contain valid Base64
     * characters.
     * 
     * @param   value   Unicode String value.
     * @return          Base64 encoding of UTF-8 encoded String value.
     */
    public static String base64FromString(String value) {
        return DatatypeConverter.printBase64Binary(toBytes(value));
    }   // base64FromString
    
    /**
     * Decode the given base64 value to binary, then decode the result as a UTF-8 sequence
     * and return the resulting String.
     * 
     * @param base64Value   Base64-encoded value of a UTF-8 encoded string.
     * @return              Decoded string value.
     */
    public static String base64ToString(String base64Value) {
        Utils.require(base64Value.length() % 4 == 0,
                      "Invalid base64 value (must be a multiple of 4 chars): " + base64Value);
        byte[] utf8String = DatatypeConverter.parseBase64Binary(base64Value);
        return toString(utf8String);
    }   // base64ToString
    
    /**
     * Return the Java Unicode escape sequence for the given character. For example, the
     * null character (0x00) is converted to the string "\u0000". This method is useful
     * for creating display-friendly strings that contain hidden non-printable characters.
     *
     * @param ch    Character to be converted.
     * @return      String containing the Java Unicode escape sequence for the given
     *              character.
     */
    public static String charToEscape(char ch) {
        String hexValue = Integer.toHexString(ch);
        if (hexValue.length() == 1) {
            return "\\u000" + hexValue;
        }
        if (hexValue.length() == 2) {
            return "\\u00" + hexValue;
        }
        if (hexValue.length() == 3) {
            return "\\u0" + hexValue;
        }
        return "\\u" + hexValue;
    }   // charToEscape

    /**
     * Silently close the given object and don't complain if it's null or alread closed.
     *
     * @param closeable A closeable object.
     */
    public static void close(Closeable closeable) {
        if (closeable != null) {
            try {
                closeable.close();
            } catch (IOException e) {
                // ignore
            }
        }
    }   // close (Closeable)

    /**
     * Close the given socket and don't complain if it's null, already closed, or
     * socket.close() complains.
     *
     * @param socket    A socket to be closed.
     */
    public static void close(Socket socket) {
        try {
            if (socket != null) {
                socket.close();
            }
        } catch (Exception ex) {
            // ignore
        }
    }   // close (socket)

    // The last value allocated by getTimeMicros(). See below.
    private static long g_lastMicroValue = 0;
    private static final Object g_lastMicroLock = new Object();

    /**
     * Get the current time in microseconds since the epoch. This method is synchronized
     * and guarantees that each successive call, even by different threads, returns
     * increasing values.
     *
     * @return  Current time in microseconds (though not necessarily with microsecond
     *          precision).
     */
    public static long getTimeMicros() {
        // Use use a dedicated lock object rather than synchronizing on the method, which
        // would synchronize on the Utils.class object, which is too coarse-grained.
        synchronized (g_lastMicroLock) {
            // We use System.currentTimeMillis() * 1000 for compatibility with the CLI and
            // other tools. This makes our timestamps "milliseconds since the epoch".
            long newValue = System.currentTimeMillis() * 1000;
            if (newValue <= g_lastMicroValue) {
                // Either two threads called us very quickly or the system clock was set
                // back a little. Just return the last value allocated + 1. Eventually,
                // the system clock will catch up.
                newValue = g_lastMicroValue + 1;
            }
            g_lastMicroValue = newValue;
            return newValue;
        }
    }   // getTimeMicros

    
    /**
     * Get globally unique id as string of 36 characters (same length as UUID.toString)  
     * Subsequent IDs are in almost increasing order (random within same millisecond)
     * Format is the following:
     * 
     *  xxxxxxxxx-yyyyyyyyyyyyyzzzzzzzzzzzzz
     *  x: timestamp in milliseconds (9 characters, encoded with 32 characters keeping the order)
     *  y: high 8-byte value of UUID (13 characters, same encoding)
     *  z: low 8-byte value of UUID (13 characters, same encoding)
     */
    public static String getUniqueId() {
        char[] data = new char[36]; 
        long l0 = System.currentTimeMillis();
        UUID uuid = UUID.randomUUID();
        long l1 = uuid.getMostSignificantBits();
        long l2 = uuid.getLeastSignificantBits();
        //we don't use Long.toString(long, radix) because we want to treat values as unsigned
        for(int i = 0; i < 9; i++) {
            data[8 - i] = UID_CHARS[(int)(l0 & 31)];
            l0 >>>= 5;
        }
        if(l0 != 0) throw new RuntimeException("ERROR");
        data[9] = '-';
        for(int i = 0; i < 13; i++) {
            data[22 - i] = UID_CHARS[(int)(l1 & 31)];
            l1 >>>= 5;
        }
        if(l1 != 0) throw new RuntimeException("ERROR");
        for(int i = 0; i < 13; i++) {
            data[35 - i] = UID_CHARS[(int)(l2 & 31)];
            l2 >>>= 5;
        }
        if(l2 != 0) throw new RuntimeException("ERROR");
        
        String v = new String(data);
        return v;
    }
    
    /**
     * Turn the given iterable collection into a simple comma-separated value (CSV)
     * String. For example, ["abc", "def", "xyz"] becomes "abc, def, xyz". Each member in
     * the collection is turned into a string using its toString() method.
     * 
     * @param strIterable   List.
     * @return          Comma-separated list of values.
     */
    public static String collToCSVString(Iterable strIterable) {
        StringBuilder buffer = new StringBuilder();
        for (Object value : strIterable) {
            if (buffer.length() > 0) {
                buffer.append(", ");
            }
            buffer.append(value.toString());
        }
        return buffer.toString();
    }   // collToCSVString
    
    /**
     * Compress the given message using GZIP, returning the compressed result as a byte[].
     *
     * @param  message      Message to be compressed (must be non-null and length > 0).
     * @return              Decompressed message.
     * @throws IOException  If an error occurs such as a corrupt GZIP format.
     */
    public static byte[] compressGZIP(byte[] message) throws IOException {
        // Write data through a GZIPOutputStream into a ByteArrayOutputStream.
        ByteArrayOutputStream bytesOut = new ByteArrayOutputStream();
        GZIPOutputStream gzipOut = new GZIPOutputStream(bytesOut);
        gzipOut.write(message);
        gzipOut.finish();
        return bytesOut.toByteArray();
    }   // compressGZIP

    /**
     * Join together the objects in the given collection into a single string with values
     * separated by the given separator. An empty collection returns an empty String. A
     * collection with a single object returns the toString() of the given object.
     * Otherwise, the toString() value of each object is concatenated in iteration order
     * with the given separation string added between values (but not before the first
     * value or after the last value).
     *
     * @param values    A Collection of objects.
     * @param sepStr    Separator string to use between values.
     * @return          The Strings concatenated together with the given separate string
     *                  between values.
     */
    public static  String concatenate(Collection values, String sepStr) {
        assert values != null;
        assert sepStr != null;

        // Watch for the empty case first.
        if (values.size() == 0) {
            return "";
        }

        // This handles any size >= 1.
        StringBuilder buffer = new StringBuilder();
        boolean bFirst = true;
        for (T value : values) {
            if (bFirst) {
                bFirst = false;
            } else {
                buffer.append(sepStr);
            }
            buffer.append(value.toString());
        }
        return buffer.toString();
    }   // concatenate

    /**
     * Join together the Strings in the given array into a single string with values
     * separated by the given string. An empty array returns an empty String. An array
     * with a single value returns the same value. Otherwise, each value is concatenated
     * in order with the separation string added between values (but not before the first
     * value or after the last value).
     *
     * @param values    An array of Strings.
     * @param sepStr    Separator string to use between values.
     * @return          The Strings concatenated together with the given separate string
     *                  between values.
     */
    public static String concatenate(String[] values, String sepStr) {
        assert values != null;
        assert sepStr != null;

        // Watch for the empty case first.
        if (values.length == 0) {
            return "";
        }

        // This handles any size >= 1.
        StringBuilder buffer = new StringBuilder();
        boolean bFirst = true;
        for (String value : values) {
            if (bFirst) {
                bFirst = false;
            } else {
                buffer.append(sepStr);
            }
            buffer.append(value);
        }
        return buffer.toString();
    }   // concatenate

    /**
     * Returns true if the given string contains any characters that are considered illegal
     * in XML. See http://www.w3.org/TR/xml/#charsets. The legal XML characters XML are:
     * 
     *      #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
     * 
* Note that Java String chars can only be up to 0xFFFF * * @param str Non-null string to be tested. * @return True if the string contains a character considered illegal in XML. */ public static boolean containsIllegalXML(String str) { assert str != null; for (int index = 0; index < str.length(); index++) { char ch = str.charAt(index); if ((ch <= 0x08) || (ch >= 0x0B && ch <= 0x0C) || (ch >= 0x0E && ch <= 0x19) || (ch >= 0xD800 && ch <= 0xDFFF) || (ch >= 0xFFFE)) { return true; } } return false; } // containsIllegalXML /** * Concatenate the contents of all provided byte[] arrays into a single value from * left to right. The result byte[] will equal the sum of all the individual byte[] * lengths. The result may be zero-length but it won't be null. * * @param arrays One or more byte[] arrays. * @return Single byte[] with all input byte[] arrays concatenated from left * to right. */ public static byte[] concatenate(byte[]... arrays) { // Compute the total size needed. int totalLen = 0; for (byte[] array : arrays) { totalLen += array.length; } byte[] result = new byte[totalLen]; // Concatenate from left to right. int offset = 0; for (byte[] array : arrays) { for (int inx = 0; inx < array.length; inx++) { result[offset++] = array[inx]; } } return result; } // concatenate // This should be greater than any values already in the database since the last run. private static AtomicLong g_nextDocID = new AtomicLong(System.currentTimeMillis()); /** * Create a unique value that can be used as an object ID. This method will return a * unique value, even in a concurrent environment. * * @return A unique value as a string. */ public static String createObjectID() { return Long.toString(g_nextDocID.incrementAndGet()); } // createObjectID /** * Convert a date in the format yyyy-MM-dd HH:mm:ss.SSS into a Date object using the * UTC timezone. All trailing components are optional in right-to-left order, hence * all of the following are valid: *
     *      2012-12-12 12:12:12.123
     *      2012-12-12 12:12:12
     *      2012-12-12 12:12
     *      2012-12-12 12
     *      2012-12-12
     *      2012-12
     *      2012
     * 
* Omitted time components default to 0; omitted date components default to 1. If the * given string is badly formatted, an exception is thrown. * * @param dateString Date string in the format "yyyy-MM-dd HH:mm:ss.SSS". * @return Date object in UTC time zone representing the give value. * @throws IllegalArgumentException If the given date string is badly formatted. */ public static Date dateFromString(String dateString) throws IllegalArgumentException { // parseDate() does all the work. return parseDate(dateString).getTime(); } // dateFromString /** * Decompress the given buffer using GZIP, returning the result as a byte[]. * * @param buffer Buffer contain data to decompress. * @return Decompressed data as a byte[]. * @throws IOException If an error occurs reading from the stream, etc. */ public static byte[] decompressGZIP(byte[] buffer) throws IOException { // Wrap the buffer in a ByteArrayInputStream and extract the decompressed pieces // into a ByteArrayOutputStream. ByteArrayInputStream bytesIn = new ByteArrayInputStream(buffer); GZIPInputStream gzipIn = new GZIPInputStream(bytesIn); ByteArrayOutputStream bytesBuffer = new ByteArrayOutputStream(); byte[] chunkBuffer = new byte[65536]; for (int bytesRead = gzipIn.read(chunkBuffer); bytesRead > 0; bytesRead = gzipIn.read(chunkBuffer)) { // Push this chunk into the ByteArrayOutputStream bytesBuffer.write(chunkBuffer, 0, bytesRead); } gzipIn.close(); return bytesBuffer.toByteArray(); } // decompressGZIP /** * Create and return an InputStream that will read the given byte[] and decompress it * as bytes are read from the stream. Reading the byte[] as a decompression stream * saves memory when the array is large. * * @param buffer A byte[] containing a GZIP-compressed value. * @return An InputStream that will stream the decompressed value as * bytes as read. * @throws IOException If an error occurs creating the GZIPInputStream (e.g., the * byte[] value is not a valid GZIP-compressed message). */ public static InputStream getGZIPDecompressStream(byte[] buffer) throws IOException { ByteArrayInputStream bytesIn = new ByteArrayInputStream(buffer); GZIPInputStream gzipIn = new GZIPInputStream(bytesIn); return gzipIn; } // getGZIPDecompressStream /** * Create a String by converting each byte directly into a character. However, if any * byte in the given value is less than a space, a hex string is returned instead. * * @param value A byte[] to convert. * @return A String of the same values as characters replaced 1-to-1 or converted * into a hex string. */ public static String deWhite(byte[] value) { // If the value contains anything less than a space. StringBuilder buffer = new StringBuilder(); boolean bAllPrintable = true; for (byte b : value) { if ((int)(b & 0xFF) < ' ') { bAllPrintable = false; break; } } if (bAllPrintable) { // All >= space; convert directly to chars. for (byte b : value) { buffer.append((char)b); } } else { // At least one non-printable. Convert to hex. buffer.append("0x"); for (byte b : value) { buffer.append(toHexChar(((int)b & 0xF0) >> 4)); buffer.append(toHexChar(((int)b & 0xF))); } } return buffer.toString(); } // deWhite /** * Create a String by converting each byte in the given ByteBuffer directly into a * character. However, if any byte in the given value is less than a space, a hex * string is returned instead. This method calls {@link #copyBytes(ByteBuffer)} to * safely copy the bytes in the given buffer and then calls {@link #deWhite(byte[])} * to do the work. * * @param value A ByteBuffer to convert. * @return A String of the same values as characters replaced 1-to-1 or converted * into a hex string. */ public static String deWhite(ByteBuffer value) { return deWhite(copyBytes(value)); } // deWhite /** * Indicate if the given string ends with the given character. This is essentially * the same as {@link String#endsWith(String)} except that it tests for ending with * a character instead of a String. False is returned if the given string is null, * empty, or its last character does not match ch. * * @param str String to be tested. * @param ch Char to test for. * @return True if the string's last character is ch. */ public static boolean endsWith(String str, char ch) { return str != null && str.length() > 0 && str.charAt(str.length() - 1) == ch; } // endsWith // Hexadecimal digits private final static String hexChars = "0123456789ABCDEF"; /** * Converts the given bytes into a hexadecimal representation. * * @param bytes Source bytes * @return Hexadecimal string */ public static String toHexBytes(byte[] bytes) { StringBuilder builder = new StringBuilder(); for (byte b : bytes) { int first = (b >> 4) & 15; int second = b & 15; builder.append(hexChars.charAt(first)).append(hexChars.charAt(second)); } return builder.toString(); } // toHexBytes ///// Calendar formatting /** * Format a Calendar date with a given precision. 'precision' must be a Calendar * "field" value such as Calendar.MINUTE. The allowed precisions and the corresponding * string formats returned are: *
     *      Calendar.MILLISECOND:   YYYY-MM-DD hh:mm:ss.SSS 
     *      Calendar.SECOND:        YYYY-MM-DD hh:mm:ss 
     *      Calendar.MINUTE:        YYYY-MM-DD hh:mm 
     *      Calendar.HOUR:          YYYY-MM-DD hh 
     *      Calendar.DATE:          YYYY-MM-DD
     *      Calendar.MONTH:         YYYY-MM
     *      Calendar.YEAR:          YYYY
     * 
* Note that Calendar.DAY_OF_MONTH is a synonym for Calendar.DATE. * * @param date Date as a Calendar to be formatted. * @param precision Calendar field value of desired precision. * @return String formatted to the requested precision. */ public static String formatDate(Calendar date, int precision) { assert date != null; // Remember that the bloody month field is zero-relative! switch (precision) { case Calendar.MILLISECOND: // YYYY-MM-DD hh:mm:ss.SSS return String.format("%04d-%02d-%02d %02d:%02d:%02d.%03d", date.get(Calendar.YEAR), date.get(Calendar.MONTH)+1, date.get(Calendar.DAY_OF_MONTH), date.get(Calendar.HOUR_OF_DAY), date.get(Calendar.MINUTE), date.get(Calendar.SECOND), date.get(Calendar.MILLISECOND)); case Calendar.SECOND: // YYYY-MM-DD hh:mm:ss return String.format("%04d-%02d-%02d %02d:%02d:%02d", date.get(Calendar.YEAR), date.get(Calendar.MONTH)+1, date.get(Calendar.DAY_OF_MONTH), date.get(Calendar.HOUR_OF_DAY), date.get(Calendar.MINUTE), date.get(Calendar.SECOND)); case Calendar.MINUTE: // YYYY-MM-DD hh:mm return String.format("%04d-%02d-%02d %02d:%02d", date.get(Calendar.YEAR), date.get(Calendar.MONTH)+1, date.get(Calendar.DAY_OF_MONTH), date.get(Calendar.HOUR_OF_DAY), date.get(Calendar.MINUTE)); case Calendar.HOUR: // YYYY-MM-DD hh return String.format("%04d-%02d-%02d %02d", date.get(Calendar.YEAR), date.get(Calendar.MONTH)+1, date.get(Calendar.DAY_OF_MONTH), date.get(Calendar.HOUR_OF_DAY)); case Calendar.DATE: // YYYY-MM-DD return String.format("%04d-%02d-%02d", date.get(Calendar.YEAR), date.get(Calendar.MONTH)+1, date.get(Calendar.DAY_OF_MONTH)); case Calendar.MONTH: // YYYY-MM return String.format("%04d-%02d", date.get(Calendar.YEAR), date.get(Calendar.MONTH)+1); case Calendar.YEAR: // YYYY return String.format("%04d", date.get(Calendar.YEAR)); } throw new IllegalArgumentException("Unknown precision: " + precision); } // formatDate /** * Format a Calendar date as "YYYY-MM-DD HH:mm:ss". This is a convenience method * that calles {@link #formatDate(Calendar, int)} with Calendar.SECOND for 'precision'. * * @param date Date as a Calendar to be formatted. * @return "YYYY-MM-DD HH:mm:ss". */ public static String formatDate(Calendar date) { return formatDate(date, Calendar.SECOND); } // formatDate ///// Date as milliseconds (long) formatting /** * Format a Date.getTime() value in the UTC time zone as a string with a given * precision. 'precision' must be a Calendar "field" value such as Calendar.MINUTE. * The allowed precisions and the corresponding string formats returned are: *
     *      Calendar.MILLISECOND:   YYYY-MM-DD hh:mm:ss.SSS 
     *      Calendar.SECOND:        YYYY-MM-DD hh:mm:ss 
     *      Calendar.MINUTE:        YYYY-MM-DD hh:mm 
     *      Calendar.HOUR:          YYYY-MM-DD hh 
     *      Calendar.DATE:          YYYY-MM-DD
     *      Calendar.MONTH:         YYYY-MM
     *      Calendar.YEAR:          YYYY
     * 
* Note that Calendar.DAY_OF_MONTH is a synonym for Calendar.DATE. This is a * convenience that converts the given time value to a GregorianCalendar object and * then calls {@link #formatDate(Calendar, int)}. * * @param time Date.getTime() value to be formatted in UTC time zone. * @param precision Calendar field value of desired precision. * @return String formatted to the requested precision. */ public static String formatDateUTC(long time, int precision) { // Map date/time to a GregorianCalendar object (GMT time zone). GregorianCalendar date = new GregorianCalendar(UTC_TIMEZONE); date.setTimeInMillis(time); return formatDate(date, precision); } // formatDateUTC /** * Format a Date.getTime() value as "YYYY-MM-DD HH:mm:ss". This method creates a * GregorianCalendar object using the local time zone and then calls * {@link #formatDate(Calendar)}. * * @param time Date/time in Date.getTime() format (milliseconds since the epoch). * @return "YYYY-MM-DD HH:mm:ss". */ public static String formatDate(long time) { // Map date/time to a GregorianCalendar object (local time zone). GregorianCalendar date = new GregorianCalendar(); date.setTimeInMillis(time); return formatDate(date, Calendar.SECOND); } // formatDate /** * Format a Date.getTime() value as "YYYY-MM-DD HH:mm:ss". This method creates a * GregorianCalendar object using the GMT time zone and then calls * {@link #formatDate(Calendar)}. * * @param time Date/time in Date.getTime() format (milliseconds since the epoch). * @return "YYYY-MM-DD HH:mm:ss". */ public static String formatDateUTC(long time) { // Map date/time to a GregorianCalendar object (GMT time zone). GregorianCalendar date = new GregorianCalendar(UTC_TIMEZONE); date.setTimeInMillis(time); return formatDate(date, Calendar.SECOND); } // formatDateUTC ///// Date formatting /** * Format a Date in the UTC time zone with a given set of precision. 'precision' * must be a Calendar "field" value such as Calendar.MINUTE. The allowed precisions * and the corresponding string formats returned are: *
     *      Calendar.MILLISECOND:   YYYY-MM-DD hh:mm:ss.SSS 
     *      Calendar.SECOND:        YYYY-MM-DD hh:mm:ss 
     *      Calendar.MINUTE:        YYYY-MM-DD hh:mm 
     *      Calendar.HOUR:          YYYY-MM-DD hh 
     *      Calendar.DATE:          YYYY-MM-DD
     *      Calendar.MONTH:         YYYY-MM
     *      Calendar.YEAR:          YYYY
     * 
* Note that Calendar.DAY_OF_MONTH is a synonym for Calendar.DATE. This is a * convenience that converts the given Date to a GregorianCalendar object and then * calls {@link #formatDate(Calendar, int)}. * * @param time Java.util.Date value to be formatted in UTC time zone. * @param precision Calendar field value of desired precision. * @return String formatted to the requested precision. */ public static String formatDateUTC(Date time, int precision) { // Map date/time to a GregorianCalendar object (GMT time zone). GregorianCalendar date = new GregorianCalendar(UTC_TIMEZONE); date.setTimeInMillis(time.getTime()); return formatDate(date, precision); } // formatDateUTC /** * Format a Date value as "YYYY-MM-DD HH:mm:ss". This method creates a * GregorianCalendar object using the GMT time zone and then calls * {@link #formatDate(Calendar)}. * * @param time Date value. * @return "YYYY-MM-DD HH:mm:ss". */ public static String formatDateUTC(Date time) { // Map date/time to a GregorianCalendar object (GMT time zone). GregorianCalendar date = new GregorianCalendar(UTC_TIMEZONE); date.setTimeInMillis(time.getTime()); return formatDate(date); } // formatDateUTC /** * Format the given elapsed time in milliseconds as a nice readable string. For * example: * 3721000 returns "1 hour, 2 minutes, 1 second" * 1500 returns "2 seconds" * 7201000 returns "2 hours, 1 second" * Milliseconds are rounded to the nearest second. * * @param millis Elapsed time in milliseconds. * @return A string in the format "[[h hour[s][, ]][m minute[s][, ]][s second[s]]" */ public static String formatElapsedTime(long millis) { // Round to the nearest second. long secs = (millis + 500) / 1000; StringBuilder buffer = new StringBuilder(); if (secs > 3600) { // >= 1 hour. long hours = secs / 3600; if (hours == 1) { buffer.append("1 hour"); } else { buffer.append("" + hours + " hours"); } secs -= hours * 3600; } if (secs > 60) { // Non-zero minutes. if (buffer.length() > 0) { buffer.append(", "); } long mins = secs / 60; if (mins == 1) { buffer.append("1 minute"); } else { buffer.append("" + mins + " minutes"); } secs -= mins * 60; } if (secs > 0 || buffer.length() == 0) { // Non-zero seconds or the enter value is zero. if (buffer.length() > 0) { buffer.append(", "); } if (secs == 1) { buffer.append("1 second"); } else { buffer.append("" + secs + " seconds"); } } return buffer.toString(); } // formatElapsedTime /** * Extract the bytes in the given ByteBuffer and return it as a byte[] without * affecting the mark, position, or limit of the given buffer. This method should be * used instead of {@link #getBytes(ByteBuffer)} when the ByteBuffer might be re-read * again. * * @param bytes ByteBuffer. * @return Contents between 'position' and 'limit' (aka 'remaining') as a * byte[]. Parameter object is unaffected. */ public static byte[] copyBytes(ByteBuffer bytes) { ByteBuffer copy = bytes.duplicate(); byte[] result = new byte[copy.remaining()]; // bytes between position and limit copy.get(result); return result; } // getBytes /** * Extract the bytes in the given ByteBuffer and return it as a byte[]. CAUTION: this * method calls ByteBuffer.get(), which transfers bytes from the ByteBuffer to * the result buffer. Hence, it is "destructive" in the sense that the value cannot be * examined again without calling ByteBuffer.rewind() or something else. * * @param bytes ByteBuffer. * @return Contents between 'position' and 'limit' (aka 'remaining') as a * byte[]. * @see #copyBytes(ByteBuffer) */ public static byte[] getBytes(ByteBuffer bytes) { byte[] result = new byte[bytes.remaining()]; // bytes between position and limit bytes.get(result); return result; } // getBytes /** * Verify that the given value is either "true" or "false" and return the corresponding * boolean value. If the value is invalid, an IllegalArgumentException is thrown. * * @param value Candidate boolean value in string form. * @return Boolean value of string if valid. * @throws IllegalArgumentException If the valie is not "true" or "false". */ public static boolean getBooleanValue(String value) throws IllegalArgumentException { require("true".equalsIgnoreCase(value) || "false".equalsIgnoreCase(value), "'true' or 'false' expected: " + value); return "true".equalsIgnoreCase(value); } // getBooleanValue /** * Compute the MD5 of the given byte[] array. If the MD5 algorithm is not available * from the MessageDigest registry, an IllegalArgumentException will be thrown. * * @param src Binary value to compute the MD5 digest for. Can be empty but not null. * @return 16-byte MD5 digest value. */ public static byte[] getMD5(byte[] src) { assert src != null; try { return MessageDigest.getInstance("MD5").digest(src); } catch (NoSuchAlgorithmException ex) { throw new IllegalArgumentException("Missing 'MD5' algorithm", ex); } } // getMD5 /** * Compute the MD5 of the given Unicode string. Because the MD5 algorithm is * byte-oriented, this method first converts the string to bytes using UTF-8 and then * calls {@link #getMD5(byte[])}. * * @param src String value to compute the MD5 digest for. Can be empty but not null. * @return 16-byte MD5 digest value. */ public static byte[] getMD5(String src) { assert src != null; return getMD5(toBytes(src)); // converts via UTF-8 } // getMD5 /** * Get the stack trace from the given exception or error as a string. By default, * Throwable objects only allow accessing the stack trace as an StackTraceElement[] * or via a PrintStream or PrintWriter. * * @param ex Exception, Error, or other object that implements Throwable. * @return The exception's stack trace as a string (with embedded newlines). */ public static String getStackTrace(Throwable ex) { // Wrap a ByteArrayOutputStream with a PrintStream and write the stack trace to // the PrintStream. ByteArrayOutputStream outStream = new ByteArrayOutputStream(); PrintStream prnStrm = new PrintStream(outStream); ex.printStackTrace(prnStrm); prnStrm.close(); // flushes content to the byte stream // Extract the byte stream as a single string. return outStream.toString(); } // getStackTrace /** * Get the concatenated value of all Text nodes that are immediate children of the * given Element. If the element has no content, it will not have a child Text node. * If it does have content, it will usually have a single child Text node. But in * rare cases it could have multiple child Text nodes. If multiple child Text nodes * are found, their content is concatenated into a single string, each separated by a * single space. The value returned is trimmed of beginning and ending whitespace. * If the element has no child Text nodes, or if all child Text nodes are empty or * have whitespace-only values, an empty string is returned. * * @param elem Element to examine. * @return Concatenated text of all child Text nodes. An empty string is returned * if there are no child Text nodes or they are all empty or contain only * whitespace. */ public static String getElementText(Element elem) { StringBuilder result = new StringBuilder(); NodeList nodeList = elem.getChildNodes(); for (int index = 0; index < nodeList.getLength(); index++) { Node childNode = nodeList.item(index); if (childNode != null && (childNode instanceof Text)) { result.append(" "); result.append(((Text)childNode).getData()); } } return result.toString().trim(); } // getElementText /** * Convert (encode) the given binary value to a hex string. * * @param value Binary value. * @return Hex string representation of same value. * @throws IllegalArgumentException If the given value is null. */ public static String hexFromBinary(byte[] value) throws IllegalArgumentException { return DatatypeConverter.printHexBinary(value); } // hexFromBinary /** * Convert (decode) the given Hex-encoded String to its binary form. * * @param hexValue Hex-encoded string. * @return Decoded binary value. * @throws IllegalArgumentException If the given string is not a valid hex value. */ public static byte[] hexToBinary(String hexValue) throws IllegalArgumentException { Utils.require(hexValue.length() % 2 == 0, "Invalid hex value (must be a multiple of 2 chars): " + hexValue); return DatatypeConverter.parseHexBinary(hexValue); } // hexToBinary /** * Return the first index where the given character occurs in the given buffer or -1 * if is not found. This is like String.indexOf() but it works on byte[] arrays. If * the given buffer is null or empty, -1 is returned. * * @param buffer byte[] to search. * @param ch Character to find. * @return Zero-relative index where character was first found or -1 if the * character does not occur or is not found. */ public static int indexOf(byte[] buffer, char ch) { if (buffer == null) { return -1; } for (int index = 0; index < buffer.length; index++) { if (buffer[index] == ch) { return index; } } return -1; } // indexOf /** * Return true if the given character is a decimal digit: 0-9. Compared to * Character.isDigit(), this method is stricter and allow recognizes ISO-LATIN-1 * digits. * * @param ch Char to test. * @return True if the given character is a valid digit: '0' to '9'. */ public static boolean isDigit(char ch) { return (ch >= '0' && ch <= '9'); } // isHexDigit /** * Return true if the given character is a valid hex digit: 0-9, a-z, or A-Z. * * @param ch Char to test. * @return True if the given character is a valid hex digit: 0-9, a-z, or A-Z. */ public static boolean isHexDigit(char ch) { return (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'); } // isHexDigit /** * Convenience method that tests if the given string is null or empty. This prevents * having to write (str == null || str.isEmpty()). * * @param str String to test, possible null. * @return True if str == null || str.isEmpty() */ public static boolean isEmpty(String str) { return str == null || str.isEmpty(); } // isEmpty /** * Return true if the given character is an upper or lower case letter. Compared to * Character.isLetter(), this method is stricter and only recognizes ASCII characters * "A" to "Z" and "a" to "z" as letters. * * @param ch Char to test. * @return True if the given character is an upper- or lower-case letter. */ public static boolean isLetter(char ch) { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); } // isLetter /** * Indicate if the given character is considered a wildchar ('*' or '?'). * * @param ch Character to test * @return True if the given character is considered a wildchar ('*' or '?'). */ public static boolean isWildcardChar(char ch) { return ch == '?' || ch == '*'; } // isWildcardChar /** * Indicate if the given string matches the given pattern, which can contain '*' * and/or '?' wildcards. The strings are compared case-insensitive. If the given * string is null or empty, false is returned. The given pattern must have a value. * * @param strIn String to be tested. * @param patternIn Pattern to be matched. * @return True if the string matches the pattern. * @throws IllegalArgumentException If the given pattern is null or empty. */ public static boolean matchesPattern(String strIn, String patternIn) throws IllegalArgumentException { if (patternIn == null || patternIn.length() == 0) { throw new IllegalArgumentException(); } // If the test string is empty, we say that it doesn't match the given pattern. if (strIn == null || strIn.length() == 0) { return false; } // Upcase both strings so that we perform a case-insensitive comparison. String str = strIn.toUpperCase(); String pattern = patternIn.toUpperCase(); // Move through string as it matches pattern. int strInx = 0; int patInx = 0; while (strInx < str.length()) { // Did we consume all pattern chars? if (patInx >= pattern.length()) { // Pattern ended but more chars in string return false; } if (pattern.charAt(patInx) == '*') { // Multi-char wildcard; start by skipping all next wildcard chars do patInx++; while (patInx < pattern.length() && isWildcardChar(pattern.charAt(patInx))); if (patInx >= pattern.length()) { // Rest of pattern was wildcards; string is considered matched. return true; } // See if string contains the current non-wildcard pattern char subset boolean bSubsetMatched = false; int strStartInx = strInx; do { // Skip to next string char that matches current char in pattern strInx = strStartInx; while (strInx < str.length() && str.charAt(strInx) != pattern.charAt(patInx)) { strInx++; } if (strInx >= str.length()) { // Hit end of string without finding a match. return false; } // See how far string and pattern characters match. int subPatInx = patInx; do { // Current string and subset chars match; skip both. subPatInx++; strInx++; } while (strInx < str.length() && subPatInx < pattern.length() && pattern.charAt(subPatInx) != '*' && (str.charAt(strInx) == pattern.charAt(subPatInx) || pattern.charAt(subPatInx) == '?')); if ((subPatInx >= pattern.length() && strInx >= str.length()) || (subPatInx < pattern.length() && pattern.charAt(subPatInx) == '*')) { // String matched pattern subset (*) or entire rest of pattern. bSubsetMatched = true; patInx = subPatInx; } else { strStartInx++; } } while (!bSubsetMatched); } else if (pattern.charAt(patInx) == '?' || str.charAt(strInx) == pattern.charAt(patInx)) { // single char matched; advance to next char strInx++; patInx++; } else { return false; // String char didn't match pattern char } } // If we get here, we hit the end of string; it matches the pattern if the // rest of the pattern consists only of '*' while (patInx < pattern.length() && pattern.charAt(patInx) == '*') { patInx++; } return patInx >= pattern.length(); } // matchesPattern /** * Compute the MD5 of the given string and return it as a Base64-encoded value. The * string is first converted to bytes using UTF-8, and the MD5 is computed on that * value. The MD5 value is 16 bytes, but the Base64-encoded string is 24 chars. * * @param strIn A Unicode string. * @return Base64-encoded value of the strings UTF-8 encoded value. */ public static String md5Encode(String strIn) { try { MessageDigest md5 = MessageDigest.getInstance("md5"); byte[] bin = toBytes(strIn); byte[] bout = md5.digest(bin); String strOut = javax.xml.bind.DatatypeConverter.printBase64Binary(bout); return strOut; }catch (Exception e) { throw new RuntimeException(e.getMessage(), e); } } // md5Encode /** * Convert a date in the format yyyy-MM-dd HH:mm:ss.SSS into a GregorianCalendar object * using the UTC timezone. All trailing components are optional in right-to-left order, * hence all of the following are valid: *
     *      2012-12-12 12:12:12.123
     *      2012-12-12 12:12:12
     *      2012-12-12 12:12
     *      2012-12-12 12
     *      2012-12-12
     *      2012-12
     *      2012
     * 
* Omitted time components default to 0; omitted date components default to 1. If the * given string is badly formatted, an exception is thrown. * * @param dateString Date string in the format "yyyy-MM-dd HH:mm:ss.SSS". * @return GregorianCalendar object in UTC time zone representing the * given value. * @throws IllegalArgumentException If the given date string is badly formatted. */ public static GregorianCalendar parseDate(String dateString) throws IllegalArgumentException { // SimpleDateFormat is kinda slow and not thread safe, so we'll parse manually. Utils.require(dateString != null, "Date string cannot be null"); String str = dateString.trim(); Utils.require(str.length() >= 0, "Invalid date format: " + dateString); AtomicInteger pos = new AtomicInteger(); try { // Scan elements int year = scanDatePart('\0', 0, str, pos, 4, 4, 1, 9999); int month = scanDatePart('-', 1, str, pos, 1, 2, 1, 12); int day = scanDatePart('-', 1, str, pos, 1, 2, 1, 31); int hour = scanDatePart(' ', 0, str, pos, 1, 2, 0, 23); int min = scanDatePart(':', 0, str, pos, 1, 2, 0, 59); int sec = scanDatePart(':', 0, str, pos, 1, 2, 0, 59); int milli = scanDatePart('.', 0, str, pos, 0, 3, 0, 999); // Assemble parts into a GregorianCalendar in UTC timezone. GregorianCalendar date = new GregorianCalendar(UTC_TIMEZONE); date.set(Calendar.YEAR, year); date.set(Calendar.MONTH, month - 1); // 0-relative date.set(Calendar.DAY_OF_MONTH, day); date.set(Calendar.HOUR_OF_DAY, hour); // 0-23 date.set(Calendar.MINUTE, min); date.set(Calendar.SECOND, sec); date.set(Calendar.MILLISECOND, milli); return date; } catch (Exception e) { throw new IllegalArgumentException("Invalid date format"); } } // parseDate /** * Parse the given URI and return its path nodes, query, and fragment parts as * separate components. The extracted parts are not decoded. For example, if the * URI string is: *
     *      /foo%20bar/baz?x=%20+y=2%20#thisbethefragment
     * 
* the extracted parts are: *
     *      path list:  {"foo%20bar", "baz"}
     *      query:      "x=%20+y=2%20"
     *      fragment:   "thisbethefragment"
     * 
* All parameters must be non-null. * * @param uriStr URI string to be split. * @param uriPathList Will contain path nodes extracted from URI in order, not decoded. * @param uriQuery Will contain query extracted from URI, if any, not decoded. * @param uriFragment Will contain fragment extracted from URI, if any, not decoded. * @see #splitURI(String, StringBuilder, StringBuilder, StringBuilder) */ public static void parseURI(String uriStr, List uriPathList, StringBuilder uriQuery, StringBuilder uriFragment) { assert uriStr != null; assert uriPathList != null; assert uriQuery != null; assert uriFragment != null; // Start with everything empty. uriPathList.clear(); uriQuery.setLength(0); uriFragment.setLength(0); // Find location of query (?) and fragment (#) markers, if any. int quesInx = uriStr.indexOf('?'); int hashInx = uriStr.indexOf('#'); if (hashInx >= 0 && quesInx >= 0 && hashInx < quesInx) { // Technically this is an invalid URI since the fragment should always follow // the query. We'll just pretend we didn't see the hash. hashInx = -1; } // The path starts at index 0. Point to where it ends. int pathEndInx = quesInx >= 0 ? quesInx : hashInx >= 0 ? hashInx : uriStr.length(); // Split path into nodes based on "/". Append non-empty nodes to path list. String[] pathNodes = uriStr.substring(0, pathEndInx).split("/"); for (String pathNode : pathNodes) { if (pathNode.length() > 0) { uriPathList.add(pathNode); } } // Extract the query part, if any. if (quesInx >= pathEndInx) { int quesEndInx = hashInx > quesInx ? hashInx : uriStr.length(); uriQuery.append(uriStr.substring(quesInx + 1, quesEndInx)); } // Extract the fragment part, if any. if (hashInx >= 0) { uriFragment.append(uriStr.substring(hashInx + 1, uriStr.length())); } } // parseURI /** * Parse the given XML document, creating a DOM tree whose root Document object is * returned. An IllegalArgumentException is thrown if the XML is malformed. * * @param xmlDoc XML document as a String. * @return Root document element of the parsed DOM tree. * @throws IllegalArgumentException If the XML is malformed. */ public static Element parseXMLDocument(String xmlDoc) throws IllegalArgumentException { // Parse the given XML document returning its root document Element if it parses. // Wrap the document payload as an InputSource. Reader stringReader = new StringReader(xmlDoc); InputSource inputSource = new InputSource(stringReader); // Parse the document into a DOM tree. DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder parser = null; Document doc = null; try { parser = dbf.newDocumentBuilder(); doc = parser.parse(inputSource); } catch (Exception ex) { // Turn ParserConfigurationException, SAXException, etc. into an IllegalArgumentException throw new IllegalArgumentException("Error parsing XML document: " + ex.getMessage()); } return doc.getDocumentElement(); } // parseXMLDocument /** * Parse an XML document from the given Reader, creating a DOM tree whose root * Document object is returned. An IllegalArgumentException is thrown if the XML is * malformed. * * @param reader Reader from which XML text is read. * @return Root document element of the parsed DOM tree. * @throws IllegalArgumentException If the XML is malformed. */ public static Element parseXMLDocument(Reader reader) throws IllegalArgumentException { // Wrap the document payload as an InputSource. InputSource inputSource = new InputSource(reader); // Parse the document into a DOM tree. DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder parser = null; Document doc = null; try { parser = dbf.newDocumentBuilder(); doc = parser.parse(inputSource); } catch (Exception ex) { // Turn ParserConfigurationException, SAXException, etc. into an IllegalArgumentException throw new IllegalArgumentException("Error parsing XML document: " + ex.getMessage()); } return doc.getDocumentElement(); } // parseXMLDocument /** * Read all data from the given reader into a buffer and return it as a single String. * If an I/O error occurs while reading the reader, it is passed through to the caller. * The reader is closed when before returning. * * @param reader Open character reader. * @return All characters read from reader accumulated as a single String. * @throws IOException If an error occurs reading from the reader. */ public static String readerToString(Reader reader) throws IOException { assert reader != null; StringWriter strWriter = new StringWriter(); char[] buffer = new char[65536]; int charsRead = reader.read(buffer); while (charsRead > 0) { strWriter.write(buffer, 0, charsRead); charsRead = reader.read(buffer); } reader.close(); return strWriter.toString(); } // readerToString /** * Assert that the given expression and throw an IllegalArgumentException if it is * false. This check is performed even if -enableassertions is not in effect. * * @param assertion Boolean expression that must be true. * @param errMsg String used to in the IllegalArgumentException * constructor if thrown. * @throws IllegalArgumentException If the expression is false. * @see #require(boolean, String, Object...) */ public static void require(boolean assertion, String errMsg) throws IllegalArgumentException { if (!assertion) { throw new IllegalArgumentException(errMsg); } } // require /** * Assert that the given expression and throw an IllegalArgumentException if it is * false. This check is performed even if -enableassertions is not in effect. This * method allows the IllegalArgumentException text to be formatted using a * String.format() format string and a variable argument list. * * @param assertion Boolean expression that must be true. * @param errMsgFormat Format string used to compose the error message. * Must follow the conventions of String.format() * (e.g., "Error on '%s': occurred: %d). * @param args Variable argument list passed to String.format(). * @throws IllegalArgumentException If the expression is false. * @see #require(boolean, String) */ public static void require(boolean assertion, String errMsgFormat, Object... args) throws IllegalArgumentException { if (!assertion) { throw new IllegalArgumentException(String.format(errMsgFormat, args)); } } // require /** * Assert that the given org.w3c.doc.Node is a comment element or a Text element and * that it ontains whitespace only, otherwise throw an IllegalArgumentException using * the given error message. This is helpful when nothing is expected at a certain * place in a DOM tree, yet comments or whitespace text nodes can appear. * * @param node A DOM Node object. * @param errMsg String used to in the IllegalArgumentException * constructor if thrown. * @throws IllegalArgumentException If the expression is false. */ public static void requireEmptyText(Node node, String errMsg) throws IllegalArgumentException { require((node instanceof Text) || (node instanceof Comment), errMsg + ": " + node.toString()); if (node instanceof Text) { Text text = (Text)node; String textValue = text.getData(); require(textValue.trim().length() == 0, errMsg + ": " + textValue); } } // requireEmptyText /** * Split the given string using the given separate, returning the components as a * set. This method does the opposite as {@link #concatenate(Collection, String)}. * If a null or empty string is passed, an empty set is returned. * * @param str String to be split. * @param sepStr Separator string that lies between values. * @return Set of separated substrings. The set may be empty but it will * not be null. */ public static Set split(String str, String sepStr) { // Split but watch out for empty substrings. Set result = new HashSet(); if (str != null) { for (String value : str.split(sepStr)) { if (value.length() > 0) { result.add(value); } } } return result; } // split /** * Split the given string using the given separate, returning the components as a * SortedSet. This method is similar to {@link #split(String, String)} except that * values are sorted by string value. If a null or empty string is passed, an empty * set is returned. * * @param str String to be split. * @param sepStr Separator string that lies between values. * @return SortedSet of separated substrings. The set may be empty but it * will not be null. */ public static SortedSet splitSorted(String str, String sepStr) { // Split but watch out for empty substrings. SortedSet result = new TreeSet(); if (str != null) { for (String value : str.split(sepStr)) { if (value.length() > 0) { result.add(value); } } } return result; } // splitSorted /** * Split the given string by a separator char. Unlike split(String,String), doesn't use RegEx * * @param str String to be split. * @param sepChr Separator character that lies between values. * @return List of separated substrings */ public static List split(String str, char sepChr) { List result = new ArrayList(); int idx = 0; while(true) { int idx2 = str.indexOf(sepChr, idx); if(idx2 < 0) { result.add(str.substring(idx)); break; } result.add(str.substring(idx, idx2)); idx = idx2 + 1; } return result; } /** * Split-out the path, query, and fragment parts of the given URI string. The URI is * expected to that obtained from a GET or other HTTP request. The extracted parts * are not decoded. For example, if the URI string is: *
     *      /foo/bar?x=%20+y=2%20#thisbethefragment
     * 
* the extract parts are: *
     *      path:       /foo/bar
     *      query:      x=%20+y=2%20
     *      fragment:   thisbethefragment
     * 
* All parameters must be non-null. * * @param uriStr URI string to be split. * @param uriPath Will contain path extracted from URI. * @param uriQuery Will contain query extracted from URI, if any, not decoded. * @param uriFragment Will contain fragment extracted from URI, if any, not decoded. */ public static void splitURI(String uriStr, StringBuilder uriPath, StringBuilder uriQuery, StringBuilder uriFragment) { assert uriStr != null; assert uriPath != null; assert uriQuery != null; assert uriFragment != null; // Find location of query (?) and fragment (#) markers, if any. int quesInx = uriStr.indexOf('?'); int hashInx = uriStr.indexOf('#'); if (hashInx >= 0 && quesInx >= 0 && hashInx < quesInx) { // Technically this is an invalid URI since the fragment should always follow // the query. We'll just pretend we didn't see the hash. hashInx = -1; } // The path starts at index 0. Point to where it ends. uriPath.setLength(0); int pathEndInx = quesInx >= 0 ? quesInx : hashInx >= 0 ? hashInx : uriStr.length(); uriPath.append(uriStr.substring(0, pathEndInx)); // Extract the query part, if any. uriQuery.setLength(0); if (quesInx >= pathEndInx) { int quesEndInx = hashInx > quesInx ? hashInx : uriStr.length(); uriQuery.append(uriStr.substring(quesInx + 1, quesEndInx)); } // Extract the fragment part, if any. uriFragment.setLength(0); if (hashInx >= 0) { uriFragment.append(uriStr.substring(hashInx + 1, uriStr.length())); } } // splitURI /** * Split the given query component of a URI into its decoded parts. First, parts * delimited by non-encoded '&'s are separated. Then, each unencoded '+' is replaced * with a space within each part. Finally, the parts are URL-decoded and stored in the * result string array. For example, if the URI is: *
     *      "/foo/bar?a=cat&b=dog+%24sheep"
     * 
* The query component can be extracted using * {@link #splitURI(String, StringBuilder, StringBuilder, StringBuilder)}, yielding: *
     *    "a=cat&b=dog+%24sheep".
     * 
* If this string is then passed to this method, it would return the following length- * two array of Strings: *
     *    result[0] = "a=cat"
     *    result[1] = "b=dog $sheep"
     * 
* * @param uriQuery Query component of a URI (cannot be null). * @return An array of separated, decoded strings, one per part. */ public static String[] splitURIQuery(String uriQuery) { assert uriQuery != null; // Separate '&' parts into separate strings. String[] parts = uriQuery.split("&"); for (int inx = 0; inx < parts.length; inx++) { // Replace '+' signs with ' ' and decode this part. parts[inx] = Utils.urlDecode(parts[inx].replace('+', ' ')); } return parts; } // splitURIQuery /** * Split the given query component of a URI into its decoded parts and return them as * a name/value map. First, parts delimited by non-encoded '&'s are separated. Then, * each unencoded '+' is replaced with a space within each part. Finally, the parts * are URL-decoded and stored in the result map keyed by name. For example, if the URI * is: *
     *      "/foo/bar?a=cat&b=dog+%24sheep&c"
     * 
* The query component can be extracted using * {@link #splitURI(String, StringBuilder, StringBuilder, StringBuilder)}, yielding: *
     *    "a=cat&b=dog+%24sheep&c".
     * 
* If this string is then passed to this method, it would return the following Map: *
     *    "a": "cat"
     *    "b": "dog $sheep"
     *    "c": ""
     * 
* As shown, if a parameter has no "=" sign, the whole parameter is used as the name * and the value is an empty string. This method calls {@link #splitURIQuery(String)}. * * @param uriQuery Query component of a URI (cannot be null). * @return A map of decoded parameter name/value pairs. * @throws IllegalArgumentException If a parameter is specified twice. */ public static Map parseURIQuery(String uriQuery) throws IllegalArgumentException { if (uriQuery == null) { return new HashMap(0); } String[] queryParts = Utils.splitURIQuery(uriQuery); Map map = new HashMap(queryParts.length); for (String queryPart : queryParts) { int eqInx = queryPart.indexOf('='); String paramName = eqInx < 0 ? queryPart : queryPart.substring(0, eqInx); String paramValue = eqInx < 0 ? "" : queryPart.substring(eqInx + 1); require(map.put(paramName, paramValue) == null, "Query parameter can only be specified once: " + paramName); } return map; } // parseURIQuery /** * Concatenate and encode the given name/value pairs into a valid URI query string. * This method is the complement of {@link #parseURIQuery(String)}. * * @param uriParams Unencoded name/value pairs. * @return URI query in the form {name 1}={value 1}&...&{name}={value n}. */ public static String joinURIQuery(Map uriParams) { StringBuilder buffer = new StringBuilder(); for (String name : uriParams.keySet()) { String value = uriParams.get(name); if (buffer.length() > 0) { buffer.append("&"); } buffer.append(Utils.urlEncode(name)); if (!Utils.isEmpty(value)) { buffer.append("="); buffer.append(Utils.urlEncode(value)); } } return buffer.toString(); } // joinURIQuery /** * Indicate if the given string starts with the given prefix. This is a more compact * way of writing string.regionMatches(true, 0, prefix, 0, prefix.length()). * * @param string String to be tested. * @param prefix Prefix to compare against string. * @return True if string starts with prefix, case-insensitive. */ public static boolean startsWith(String string, String prefix) { return string.regionMatches(true, 0, prefix, 0, prefix.length()); } // startsWith /** * Convert the given String to a byte[] value using UTF-8 and wrap in a ByteBuffer. * * @param value String value. * @return UTF-8 converted value wrapped in a ByteBuffer. */ public static ByteBuffer toByteBuffer(String value) { return ByteBuffer.wrap(toBytes(value)); } // toByteBuffer /** * Convert the given string to a byte[] in using the {@link #UTF8_CHARSET} encoder. * This is the inverse of {@link #toString(byte[])}. A null value is allowed, which * begets a null result. * * @param str String value to be converted. * @return Lossless, encoded value as a byte[], or null if str is null. */ public static byte[] toBytes(String str) { if (str == null) { return null; } //optimization for ascii strings byte[] ascii = toAsciiBytes(str); if(ascii != null) return ascii; ByteBuffer bb = UTF8_CHARSET.encode(str); return getBytes(bb); } // toBytes // return string as bytes if it has only ascii symbols, or null private static byte[] toAsciiBytes(String str) { for(int i = 0; i < str.length(); i++) { if(str.charAt(i) > 127) return null; } byte[] bytes = new byte[str.length()]; for(int i = 0; i < str.length(); i++) { bytes[i] = (byte)str.charAt(i); } return bytes; } /** * Convert a long to a byte[] using the format Cassandra wants for a column or * supercolumn name. The antimethod for this one is {@link #toLong(byte[])}. * * @param value Long value to be converted. * @return Same value encoded into an byte[8] array. */ public static byte[] toBytes(long value) { byte[] bytes = new byte[8]; ByteBuffer buffer = ByteBuffer.wrap(bytes); buffer.putLong(value); return bytes; } // toBytes /** * Convert the given value, which must be between 0 and 15, into its equivalent hex * character between 0 and F. * * @param value Integer value between 0 and 15. * @return Equivalent hex character between 0 and F. * @throws IllegalArgumentException If the value is out of range. */ public static char toHexChar(int value) { switch (value) { case 0: return '0'; case 1: return '1'; case 2: return '2'; case 3: return '3'; case 4: return '4'; case 5: return '5'; case 6: return '6'; case 7: return '7'; case 8: return '8'; case 9: return '9'; case 10: return 'A'; case 11: return 'B'; case 12: return 'C'; case 13: return 'D'; case 14: return 'E'; case 15: return 'F'; default: throw new IllegalArgumentException("Value must be between 0 and 15: " + value); } } // toHexChar /** * Convert the given hex character (0-9, A-Z, or a-z) into its decimal equivalent value. * * @param ch A hex character. * @return Decimal equivalent of value (0-15). */ public static int fromHexChar(char ch) { switch (ch) { case '0': return 0; case '1': return 1; case '2': return 2; case '3': return 3; case '4': return 4; case '5': return 5; case '6': return 6; case '7': return 7; case '8': return 8; case '9': return 9; case 'a': case 'A': return 10; case 'b': case 'B': return 11; case 'c': case 'C': return 12; case 'd': case 'D': return 13; case 'e': case 'E': return 14; case 'f': case 'F': return 15; default: throw new IllegalArgumentException("Must be a hex char: " + ch); } } // fromHexChar /** * Parse the given string and return a list of the whitespace-delimited tokens that it * contains mapped to the number of occurrences of each token. Only whitespace * characters (SP, CR, LF, TAB, FF, VT) are used to delimit tokens. * * @param string String to be tokenized. * @return Map of tokens to occurrence counts. */ public static Map tokenize(String string) { Map result = new HashMap(); String[] tokens = string.split("\\s"); // regular expression for "all whitespace" for (String token : tokens) { // For some reasons, sometimes split() creates empty values. if (token.length() == 0) { continue; } // Tokens are returned down-cased. String tokenDown = token.toLowerCase(); AtomicInteger count = result.get(tokenDown); if (count == null) { result.put(tokenDown, new AtomicInteger(1)); } else { count.incrementAndGet(); } } return result; } // tokenize /** * Convert a long value encoded as a byte[] back into its long value. This method is * the opposite of {@link #toBytes(long)}. * * @param bytes byte[8] array with an encoded long value. * @return Decoded long value. */ public static long toLong(byte[] bytes) { // Extract a long stored in a byte[] using a ByteBuffer method. ByteBuffer buffer = ByteBuffer.wrap(bytes); return buffer.getLong(); } // toLong /** * Convert the given byte[] to a String using the {@link #UTF8_CHARSET} decoder. This * is the inverse of {@link #toBytes(String)}. As with that method, null begets null. * * @param bytes A byte[] representing a String value. * @return The decoded String value, or null if null is given. */ public static String toString(byte[] bytes) { if (bytes == null) { return null; } //optimization for ASCII string String ascii = toAsciiString(bytes); if(ascii != null) return ascii; return new String(bytes, UTF8_CHARSET); } // toString // return string if bytes have only ascii symbols, or null private static String toAsciiString(byte[] bytes) { for(int i = 0; i < bytes.length; i++) { if(bytes[i] < 0) return null; } char[] chars = new char[bytes.length]; for(int i = 0; i < bytes.length; i++) { chars[i] = (char)bytes[i]; } return new String(chars); } /** * Extract the byte[] within the given ByteBuffer and decode into a String using UTF-8. * This method calls {@link #copyBytes(ByteBuffer)}, which examines the ByteBuffer * without side-effects, therefore allowing it to be read again. * * @param bytes ByteBuffer object. * @return Internal byte[] value converted to a String using UTF-8. */ public static String toString(ByteBuffer bytes) { return toString(copyBytes(bytes)); } // toString /** * Convert the a subset of given byte[] starting at index 'offset' for 'length' bytes * to a String using the reverse process used by {@link #toBytes(String)}. As with * that method, null begets null. * * @param bytes Byte[] to convert. * @param offset Index of first byte to convert. * @param length Number of bytes to convert. * @return Decoded string, or null if null is given. */ public static String toString(byte[] bytes, int offset, int length) { if (bytes == null) { return null; } //optimization for ASCII string String ascii = toAsciiString(bytes, offset, length); if(ascii != null) return ascii; return new String(bytes, offset, length, UTF8_CHARSET); } // toString // return string if bytes have only ascii symbols, or null private static String toAsciiString(byte[] bytes, int offset, int length) { for(int i = 0; i < length; i++) { if(bytes[offset + i] < 0) return null; } char[] chars = new char[length]; for(int i = 0; i < length; i++) { chars[i] = (char)bytes[offset + i]; } return new String(chars); } /** * Ensure that the given string is no longer than the given max length, truncating it * if necessary. If string.length() is <= maxLength, the same string is returned. * Otherwise, a substring of the first maxLength characters is returned. * * @param string String to test. * @param maxLength Maximum length. * @return Same or truncated string as described above. */ public static String truncateTo(String string, int maxLength) { if (string.length() <= maxLength) { return string; } return string.substring(0, maxLength); } // truncateTo /** * Truncate the given GregorianCalendar date to the nearest week. This is done by * cloning it and rounding the value down to the closest Monday. If the given date * already occurs on a Monday, a copy of the same date is returned. * * @param date A GregorianCalendar object. * @return A copy of the same value, truncated to the nearest Monday. */ public static GregorianCalendar truncateToWeek(GregorianCalendar date) { // Round the date down to the MONDAY of the same week. GregorianCalendar result = (GregorianCalendar)date.clone(); switch (result.get(Calendar.DAY_OF_WEEK)) { case Calendar.TUESDAY: result.add(Calendar.DAY_OF_MONTH, -1); break; case Calendar.WEDNESDAY: result.add(Calendar.DAY_OF_MONTH, -2); break; case Calendar.THURSDAY: result.add(Calendar.DAY_OF_MONTH, -3); break; case Calendar.FRIDAY: result.add(Calendar.DAY_OF_MONTH, -4); break; case Calendar.SATURDAY: result.add(Calendar.DAY_OF_MONTH, -5); break; case Calendar.SUNDAY: result.add(Calendar.DAY_OF_MONTH, -6); break; default: break; } return result; } // truncateToWeek /** * Decode the given string by replacing %-escape sequences with the corresponding * real characters. For example "A%20B" returns "A B". If a null string is passed, * the result is null. This method calls URLDecoder.decode("UTF-8", strIn), which * converts UTF-8 sequences to Unicode characters. It also converts non-escaped * '+' signs into a space. * * @param strIn Input string to decode. * @return Ouput string with %hh sequences replaced by the corresponding * character, or null if strIn is null. * @see #urlEncode(String) */ public static String urlDecode(String strIn) { // Null begets null. if (strIn == null) { return null; } try { return URLDecoder.decode(strIn, "UTF-8"); } catch (UnsupportedEncodingException e) { // This should never happen since UTF-8 always exists. throw new IllegalArgumentException("UTF-8"); } } // urlDecode /** * Encode the given string by replacing characters not legal in URLs with the * appropriate escape sequences. For example "A B" becomes "A%20B". If a null string * is passed, the result is null. This method calls URLEncoder.decode("UTF-8", strIn), * which converts non-ASCII characters to UTF-8 sequences and then escapes the UTF-8 * sequences. It also converts spaces into '+' signs. * * @param strIn Input string to encode. * @return URL-encoded version of the same string, or null if strIn is null. * @see #urlDecode(String) */ public static String urlEncode(String strIn) { // Null begets null. if (strIn == null) { return null; } try { return URLEncoder.encode(strIn, "UTF-8"); } catch (UnsupportedEncodingException e) { // This should never happen since UTF-8 always exists. throw new IllegalArgumentException("UTF-8"); } } // urlEncode ////// Private methods /** * Scan a date/time part, verify its value, and return it. All parts are considered * optional, so if pos.get() is >= str.length(), the default value is returned and * nothing is parsed. Otherwise, the part must begin with given prefix, consist of * digits in the required range, and denote a value in the given range. pos is * incremented to reflect characters parsed. * * @param prefix If not '\0', the part must begin with this character. * @param defaultValue If all characters are consumed, this value is returned. * @param str The string to be parsed. * @param pos The current parse position (index into str). * @param minDigits Minimum number of digits the part must have. * @param maxDigits Maximum number of digits parsed for part. * @param minValue Minimum value part can have. * @param maxValue Maximum value part can have. * @throws IllegalArgumentException If the part is present but has the * wrong prefix, too few digits, or is out of range. * @return Value of scanned date/time part. */ private static int scanDatePart(char prefix, int defaultValue, String str, AtomicInteger pos, int minDigits, int maxDigits, int minValue, int maxValue) throws IllegalArgumentException { // If all characters are consumed, just return the default value. if (pos.get() >= str.length()) { return defaultValue; } // If there's a prefix character, require it. if (prefix != '\0') { require(str.charAt(pos.getAndIncrement()) == prefix, "'" + prefix + "' expected"); } // Scan up to maxDigits into a numeric value. int value = 0; int digitsScanned = 0; while (pos.get() < str.length() && digitsScanned < maxDigits) { char ch = str.charAt(pos.get()); if (ch >= '0' && ch <= '9') { value = value * 10 + (ch - '0'); digitsScanned++; pos.incrementAndGet(); } else { break; } } // Ensure we got the required minimum digits and the value is within range. Utils.require(digitsScanned >= minDigits && value >= minValue && value <= maxValue, "Invalid value for date/time part"); return value; } // scanDatePart /** * Deletes a directory recursively * @param dir * directory to delete */ public static boolean deleteDirectory(final File dir) { boolean success = true; if (dir != null && dir.exists()) { try { if (dir.isDirectory()) { for (final File file : dir.listFiles()) { if(file == null) { return false; } if (!deleteDirectory(file)) { success = false; return success; } } } if (!dir.delete()) { success = false; return success; } return success; } catch (Exception e) { // Failed to delete files or directory } } return false; } } // class Utils




© 2015 - 2025 Weber Informatics LLC | Privacy Policy