edu.emory.mathcs.util.security.CharStrings Maven / Gradle / Ivy

Go to download
/*
 * Written by Dawid Kurzyniec and released to the public domain, as explained
 * at http://creativecommons.org/licenses/publicdomain
 */

package edu.emory.mathcs.util.security;

import java.util.*;
import java.io.*;

/**
 * Utility methods to securely manipulate on character arrays. The methods
 * allow to treat the character arrays similarly to strings, yet they ensure
 * that all temporary arrays are zeroed-out before discarding.
 * 
 * Application of this class stems from the fact that String class is not
 * appropriate for holding passwords and other sensitive information.
 * Strings cannot be zeroed-out before unreferencing, thus the content
 * may be dangling in memory for quite a while before it is garbage-collected,
 * and it can stay in the process data block even longer, until it is
 * overwritten. It has been demonstrated that attacker can obtain the data
 * in clear text by forcing the operating system to swap out the application
 * in question, and then by reading the swap file.
 * To minimize the risk, sensitive data should be cleared explicitly as soon as
 * possible. It suggests using mutable character arrays in favor of strings.
 * This class allows to operate on such arrays much like on strings, in
 * particular, it provides methods to securely concatenate them, as well as
 * write them to, and read them from streams in the UTF format.
 *
 * @author Dawid Kurzyniec
 * @version 1.0
 */

public class CharStrings {
    /** this is an utility class */
    private CharStrings() {}

    /**
     * Returns a concatenation of two character arrays.
     * @param s1 first array
     * @param s2 second array
     * @return new array containing s1 concatenated with s2
     */
    public static char[] concat(char[] s1, char[] s2) {
        if (s1 == null) return s2;
        if (s2 == null) return s1;
        char[] s3 = new char[s1.length + s2.length];
        System.arraycopy(s1, 0, s3, 0, s1.length);
        System.arraycopy(s2, 0, s3, s1.length, s2.length);
        return s3;
    }

    /**
     * Compares two character arrays.
     *
     * @param s1 first array
     * @param s2 second array
     * @return true if arrays have identical content; false otherwise
     */
    public static boolean equals(char[] s1, char[] s2) {
        return Arrays.equals(s1, s2);
    }

    /**
     * Zeroes-out the specified character array.
     * @param s the array to zero-out
     */
    public static void clear(char[] s) {
        if (s != null) Arrays.fill(s, (char)0);
    }

    /**
     * Writes the specified character array to the output stream using UTF-8
     * encoding.
     *
     * @param out the output to write to
     * @param s array to write to
     * @return the number of bytes written
     * @throws IOException if I/O error occurs
     */
    public static int writeUTF(OutputStream out, char[] s) throws IOException {
        return writeUTF(out, s, 0, s.length);
    }

    /**
     * Writes a portion of the specified character array to the output stream
     * using UTF-8 encoding.
     *
     * @param out the output to write to
     * @param s array to write to
     * @param off start offset within s
     * @param len number of characters to write
     * @return the number of bytes written
     * @throws IOException if I/O error occurs
     */
    public static int writeUTF(OutputStream out, char[] s, int off, int len)
        throws IOException
    {
        int utflen = getUTFLen(s, off, len);
        if (utflen > 65535) {
            throw new UTFDataFormatException();
        }
        byte[] dest = new byte[utflen+2];
        dest[0] = (byte) ((utflen >>> 8) & 0xFF);
        dest[1] = (byte) ((utflen >>> 0) & 0xFF);
        toUTF(s, off, len, dest, 2);
        out.write(dest);
        Arrays.fill(dest, (byte)0);
        return dest.length;
    }

    /**
     * Converts the specified character array into its UTF-8 encoding.
     *
     * @param s the array to encode
     * @return UTF-8 encoded array
     */
    public static byte[] toUTF(char[] s) {
        return toUTF(s, 0, s.length);
    }

    /**
     * Converts a portion of the specified character array into its UTF-8
     * encoding.
     *
     * @param s the array to encode
     * @param off the start offset within s
     * @param len the number of characters to encode
     * @return UTF-8 encoded array
     */
    public static byte[] toUTF(char[] s, int off, int len) {
        byte[] dest = new byte[getUTFLen(s, off, len)];
        toUTF(s, off, len, dest, 0);
        return dest;
    }

    /**
     * Returns the number of bytes of an UTF-8 encoding of a portion of the
     * specified character array.
     *
     * @param s the character array
     * @param off the start offset within s
     * @param len the number of characters to include
     * @return the number of bytes of an UTF-8 encoding
     */
    public static int getUTFLen(char[] s, int off, int len) {
        if (s == null) {
            throw new NullPointerException();
        } else if ((off < 0) || (off > s.length) || (len < 0) ||
                   ((off + len) > s.length) || ((off + len) < 0)) {
            throw new IndexOutOfBoundsException();
        }

        int utflen = 0;
        int c;

        for (int i = 0; i < len; i++) {
            c = s[i];
            if ((c >= 0x0001) && (c <= 0x007F)) {
                utflen++;
            } else if (c > 0x07FF) {
                utflen += 3;
            } else {
                utflen += 2;
            }
        }

        return utflen;
    }

    private static void toUTF(char[] s, int off, int len, byte[] dest, int destoff) {
        int c, count = destoff;

        for (int i = 0; i < len; i++) {
            c = s[i];
            if ((c >= 0x0001) && (c <= 0x007F)) {
                dest[count++] = (byte) c;
            } else if (c > 0x07FF) {
                dest[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
                dest[count++] = (byte) (0x80 | ((c >>  6) & 0x3F));
                dest[count++] = (byte) (0x80 | ((c >>  0) & 0x3F));
            } else {
                dest[count++] = (byte) (0xC0 | ((c >>  6) & 0x1F));
                dest[count++] = (byte) (0x80 | ((c >>  0) & 0x3F));
            }
        }
    }

    /**
     * Reads UTF-8 encoded character array from an input stream.
     * @param in the input stream to read from
     * @return decoded character array
     * @throws IOException if I/O error occurs
     */
    public final static char[] readUTF(InputStream in) throws IOException {
        DataInput din = (in instanceof DataInput)
            ? (DataInput)in
            : new DataInputStream(in);
        return readUTF(din);
    }

    /**
     * Reads UTF-8 encoded character array from a data input.
     * @param in the data input to read from
     * @return decoded character array
     * @throws IOException if I/O error occurs
     */
    public final static char[] readUTF(DataInput in) throws IOException {
        int utflen = in.readUnsignedShort();
        byte bytearr[] = new byte[utflen];
        try {
            in.readFully(bytearr, 0, utflen);
            return fromUTF(bytearr);
        }
        finally {
            Arrays.fill(bytearr, (byte)0);
        }
    }

    /**
     * Recovers a character array out of its UTF-8 encoding.
     *
     * @param utfString containing UTF-8-encoded character array
     * @return the decoded character array
     * @throws IOException if I/O error occurs
     */
    public final static char[] fromUTF(final byte[] utfString) throws IOException {
        return fromUTF(utfString, 0, utfString.length);
    }

    /**
     * Recovers a character array out of its UTF-8 encoding.
     *
     * @param utfString array containing UTF-8-encoded character array
     * @param off start offset within utfString
     * @param len number of bytes to include
     * @return the decoded character array
     * @throws IOException if I/O error occurs
     */
    public final static char[] fromUTF(final byte[] utfString, int off, int len) throws IOException {
        char[] arr = new char[len];
        int c, char2, char3;
        int idx = 0;

        try {
            while (off < len) {
                c = (int) utfString[off] & 0xff;
                switch (c >> 4) {
                    case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
                        /* 0xxxxxxx*/
                        off++;
                        arr[idx++] = (char) c;
                        break;
                    case 12: case 13:
                        /* 110x xxxx   10xx xxxx*/
                        off += 2;
                        if (off > len)
                            throw new UTFDataFormatException();
                        char2 = (int) utfString[off - 1];
                        if ( (char2 & 0xC0) != 0x80)
                            throw new UTFDataFormatException();
                        arr[idx++] = (char) ( ( (c & 0x1F) << 6) |
                                             (char2 & 0x3F));
                        break;
                    case 14:
                        /* 1110 xxxx  10xx xxxx  10xx xxxx */
                        off += 3;
                        if (off > len)
                            throw new UTFDataFormatException();
                        char2 = (int) utfString[off - 2];
                        char3 = (int) utfString[off - 1];
                        if ( ( (char2 & 0xC0) != 0x80) ||
                            ( (char3 & 0xC0) != 0x80))
                            throw new UTFDataFormatException();
                        arr[idx++] = (char) ( ( (c & 0x0F) << 12) |
                                             ( (char2 & 0x3F) << 6) |
                                             ( (char3 & 0x3F) << 0));
                        break;
                    default:
                        /* 10xx xxxx,  1111 xxxx */
                        throw new UTFDataFormatException();
                }
            }
            // The number of chars produced may be less than utflen
            if (idx < len) {
                char[] newarr = new char[idx];
                System.arraycopy(arr, 0, newarr, 0, idx);
                Arrays.fill(arr, (char)0);
                arr = newarr;
            }

            return arr;
        }
        catch (IOException e) {
            Arrays.fill(arr, (char)0);
            throw e;
        }
    }

}