All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.tomitribe.util.Escapes Maven / Gradle / Ivy

There is a newer version: 2.0.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.tomitribe.util;

public class Escapes {

    private Escapes() {
    }

    /**
     * Credit to tchrist on StackOverflow
     *
     *  - https://stackoverflow.com/a/4298836/190816
     *  - https://stackoverflow.com/users/471272/tchrist
     *
     */
    public final static String unescape(String oldstr) {

        final StringBuilder newString = new StringBuilder(oldstr.length());

        boolean sawBackslash = false;

        for (int i = 0; i < oldstr.length(); i++) {
            int cp = oldstr.codePointAt(i);
            if (oldstr.codePointAt(i) > Character.MAX_VALUE) i++;

            if (!sawBackslash) {
                if (cp == '\\') {
                    sawBackslash = true;
                } else {
                    newString.append(Character.toChars(cp));
                }
                continue; /* switch */
            }

            if (cp == '\\') {
                sawBackslash = false;
                newString.append('\\');
                newString.append('\\');
                continue; /* switch */
            }

            switch (cp) {

                case 'r':
                    newString.append('\r');
                    break; /* switch */

                case 'n':
                    newString.append('\n');
                    break; /* switch */

                case 'f':
                    newString.append('\f');
                    break; /* switch */

                /* PASS a \b THROUGH!! */
                case 'b':
                    newString.append("\\b");
                    break; /* switch */

                case 't':
                    newString.append('\t');
                    break; /* switch */

                case 'a':
                    newString.append('\007');
                    break; /* switch */

                case 'e':
                    newString.append('\033');
                    break; /* switch */

                /*
                 * A "control" character is what you get when you xor its
                 * codepoint with '@'==64.  This only makes sense for ASCII,
                 * and may not yield a "control" character after all.
                 *
                 * Strange but true: "\c{" is ";", "\c}" is "=", etc.
                 */
                case 'c': {
                    if (++i == oldstr.length()) {
                        throw new IllegalArgumentException("trailing \\c");
                    }
                    cp = oldstr.codePointAt(i);
                    /*
                     * don't need to grok surrogates, as next line blows them up
                     */
                    if (cp > 0x7f) {
                        throw new IllegalArgumentException("expected ASCII after \\c");
                    }
                    newString.append(Character.toChars(cp ^ 64));
                    break; /* switch */
                }

                case '8':
                case '9':
                    throw new IllegalArgumentException("illegal octal digit");
    /* NOTREACHED */

        /*
         * may be 0 to 2 octal digits following this one
         * so back up one for fallthrough to next case;
         * unread this digit and fall through to next case.
         */
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                    --i;
                          /* FALLTHROUGH */

                /*
                 * Can have 0, 1, or 2 octal digits following a 0
                 * this permits larger values than octal 377, up to
                 * octal 777.
                 */
                case '0': {
                    if (i + 1 == oldstr.length()) {
                        /* found \0 at end of string */
                        newString.append(Character.toChars(0));
                        break; /* switch */
                    }
                    i++;
                    int digits = 0;
                    int j;
                    for (j = 0; j <= 2; j++) {
                        if (i + j == oldstr.length()) {
                            break; /* for */
                        }
                        /* safe because will unread surrogate */
                        int ch = oldstr.charAt(i + j);
                        if (ch < '0' || ch > '7') {
                            break; /* for */
                        }
                        digits++;
                    }
                    if (digits == 0) {
                        --i;
                        newString.append('\0');
                        break; /* switch */
                    }
                    try {
                        int value = Integer.parseInt(oldstr.substring(i, i + digits), 8);
                        newString.append(Character.toChars(value));
                    } catch (NumberFormatException nfe) {
                        throw new IllegalArgumentException("invalid octal value for \\0 escape");
                    }
                    i += digits - 1;
                    break; /* switch */
                } /* end case '0' */

                case 'x': {
                    if (i + 2 > oldstr.length()) throw new IllegalArgumentException("string too short for \\x escape");
                    i++;

                    boolean sawBrace = false;
                    if (oldstr.charAt(i) == '{') {
                            /* ^^^^^^ ok to ignore surrogates here */
                        i++;
                        sawBrace = true;
                    }
                    int j;
                    for (j = 0; j < 8; j++) {

                        if (!sawBrace && j == 2) break;  /* for */

                        /*
                         * ASCII test also catches surrogates
                         */
                        int ch = oldstr.charAt(i + j);
                        if (ch > 127) {
                            throw new IllegalArgumentException("illegal non-ASCII hex digit in \\x escape");
                        }

                        if (sawBrace && ch == '}')  break; /* for */

                        if (!((ch >= '0' && ch <= '9')
                                ||
                                (ch >= 'a' && ch <= 'f')
                                ||
                                (ch >= 'A' && ch <= 'F')
                        )
                                ) {
                            throw new IllegalArgumentException(String.format("illegal hex digit #%d '%c' in \\x", ch, ch));
                        }

                    }

                    if (j == 0) {
                        throw new IllegalArgumentException("empty braces in \\x{} escape");
                    }

                    int value = 0;
                    try {
                        value = Integer.parseInt(oldstr.substring(i, i + j), 16);
                    } catch (NumberFormatException nfe) {
                        throw new IllegalArgumentException("invalid hex value for \\x escape");
                    }
                    newString.append(Character.toChars(value));
                    if (sawBrace) j++;
                    i += j - 1;
                    break; /* switch */
                }

                case 'u': {
                    if (i + 4 > oldstr.length()) {
                        throw new IllegalArgumentException("string too short for \\u escape");
                    }
                    i++;
                    int j;
                    for (j = 0; j < 4; j++) {
                        /* this also handles the surrogate issue */
                        if (oldstr.charAt(i + j) > 127) {
                            throw new IllegalArgumentException("illegal non-ASCII hex digit in \\u escape");
                        }
                    }
                    int value = 0;
                    try {
                        value = Integer.parseInt(oldstr.substring(i, i + j), 16);
                    } catch (NumberFormatException nfe) {
                        throw new IllegalArgumentException("invalid hex value for \\u escape");
                    }
                    newString.append(Character.toChars(value));
                    i += j - 1;
                    break; /* switch */
                }

                case 'U': {
                    if (i + 8 > oldstr.length()) {
                        throw new IllegalArgumentException("string too short for \\U escape");
                    }
                    i++;
                    int j;
                    for (j = 0; j < 8; j++) {
                        /* this also handles the surrogate issue */
                        if (oldstr.charAt(i + j) > 127) {
                            throw new IllegalArgumentException("illegal non-ASCII hex digit in \\U escape");
                        }
                    }
                    int value = 0;
                    try {
                        value = Integer.parseInt(oldstr.substring(i, i + j), 16);
                    } catch (NumberFormatException nfe) {
                        throw new IllegalArgumentException("invalid hex value for \\U escape");
                    }
                    newString.append(Character.toChars(value));
                    i += j - 1;
                    break; /* switch */
                }

                default:
                    newString.append('\\');
                    newString.append(Character.toChars(cp));
               /*
                * say(String.format(
                *       "DEFAULT unrecognized escape %c passed through",
                *       cp));
                */
                    break; /* switch */

            }
            sawBackslash = false;
        }

        /* weird to leave one at the end */
        if (sawBackslash) newString.append('\\');

        return newString.toString();
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy