All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.xwiki.rendering.wikimodel.impl.WikiScannerUtil Maven / Gradle / Ivy

The newest version!
/*
 * See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 * This is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * This software is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this software; if not, write to the Free
 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
 */
package org.xwiki.rendering.wikimodel.impl;

import java.util.List;

import org.xwiki.rendering.wikimodel.WikiParameter;

/**
 * This class contains some utility methods used by scanners.
 *
 * @version $Id: e40537f670f6c8c1b6df36312c26a57599eff7ff $
 * @since 4.0M1
 */
public class WikiScannerUtil
{
    /**
     * The default character to use has escaping char.
     */
    private static final char DEFAULT_ESCAPECHAR = '\u005c\u005c';

    /**
     * Extracts and returns a substring of the given string starting from the
     * given open sequence and finishing by the specified close sequence. This
     * method unescapes all symbols prefixed by the given escape symbol.
     *
     * @param str from this string the substring framed by the specified open
     * and close sequence will be returned
     * @param open the start substring sequence
     * @param close the closing substring sequence
     * @return a substring of the given string starting from the given open
     *         sequence and finishing by the specified close sequence
     */
    public static String extractSubstring(String str, String open, String close)
    {
        return extractSubstring(str, open, close, DEFAULT_ESCAPECHAR, true);
    }

    /**
     * Extracts and returns a substring of the given string starting from the
     * given open sequence and finishing by the specified close sequence. This
     * method unescapes all symbols prefixed by the given escape symbol.
     *
     * @param str from this string the substring framed by the specified open
     * and close sequence will be returned
     * @param open the start substring sequence
     * @param close the closing substring sequence
     * @param escape the escape symbol
     * @return a substring of the given string starting from the given open
     *         sequence and finishing by the specified close sequence
     */
    public static String extractSubstring(
        String str,
        String open,
        String close,
        char escape)
    {
        return extractSubstring(str, open, close, escape, true);
    }

    /**
     * Extracts and returns a substring of the given string starting from the
     * given open sequence and finishing by the specified close sequence. This
     * method unescapes all symbols prefixed by the given escape symbol.
     *
     * @param str from this string the substring framed by the specified open
     * and close sequence will be returned
     * @param open the start substring sequence
     * @param close the closing substring sequence
     * @param escape the escape symbol
     * @param cleanEscape indicate if the escaping char has to be removed.
     * Useful when the substring use the same escaping that the string.
     * @return a substring of the given string starting from the given open
     *         sequence and finishing by the specified close sequence
     */
    public static String extractSubstring(
        String str,
        String open,
        String close,
        char escape,
        boolean cleanEscape)
    {
        int i;
        StringBuffer buf = new StringBuffer();
        int len = str.length();
        for (i = 0; i < len; i++) {
            if (str.startsWith(open, i)) {
                i += open.length();
                break;
            }
        }

        boolean escaped = false;
        for (; i < len; i++) {
            if (escaped) {
                char ch = str.charAt(i);
                buf.append(ch);
                escaped = false;
            } else {
                if (str.startsWith(close, i)) {
                    break;
                }
                char ch = str.charAt(i);
                escaped = ch == escape;
                if (!escaped || !cleanEscape) {
                    buf.append(ch);
                }
            }
        }

        return buf.toString();
    }

    /**
     * This method copies to the given buffer all characters from the specified
     * position of the character array to the next delimiter position. This
     * method returns the next position just after the delimiter (or the length
     * of the array if no delimiters was found).
     *
     * @param array the array of character used as a source of chars
     * @param pos the start position in the array
     * @param delimiter the delimiter; this method copies all character from the
     * current position to the first delimiter sequence
     * @param buf the buffer where the content should be appended
     * @return the next position just after the delimiter of the end of the
     *         sequence
     */
    public static int getNextSequence(
        char[] array,
        int pos,
        char[] delimiter,
        StringBuffer buf)
    {
        buf.delete(0, buf.length());
        char quot = 0;
        for (; pos < array.length; pos++) {
            char ch = array[pos];
            if (quot != 0) {
                if (ch == quot) {
                    quot = 0;
                }
            } else {
                if (ch == '\"') {
                    quot = ch;
                } else {
                    // Checks if a new delimiter sequence was found in the
                    // current position
                    int i = skipSequence(array, pos, delimiter);
                    if (i > pos) {
                        pos = i;
                        break;
                    }
                }
            }
            buf.append(ch);
        }
        return pos;
    }

    /**
     * @param array from this array of bytes the next token will be returned
     * @param pos the current position in the array of bytes
     * @param buf to this buffer the extracted token value will be appended
     * @param trim this array is used to return the boolean flag specifying if
     * the value collected in the buffer should be trimmed or not
     * @param escapeChar the escaping character
     * @return the new position in the array after extracting of a new token
     */
    private static int getNextToken(
        char[] array,
        int pos,
        char[] delimiter,
        StringBuffer buf,
        boolean[] trim,
        char escapeChar)
    {
        buf.delete(0, buf.length());
        boolean escaped = false;
        if (pos < array.length && (array[pos] == '\'' || array[pos] == '"')) {
            trim[0] = false;
            char endChar = array[pos];
            pos++;
            for (; pos < array.length && (escaped || array[pos] != endChar); pos++) {
                if (escaped) {
                    buf.append(array[pos]);
                    escaped = false;
                } else {
                    escaped = array[pos] == escapeChar;
                    if (!escaped) {
                        buf.append(array[pos]);
                    }
                }
            }
            if (pos < array.length) {
                pos++;
            }
        } else {
            trim[0] = true;
            for (; pos < array.length; pos++) {
                if (escaped) {
                    buf.append(array[pos]);
                    escaped = false;
                } else {
                    if ((array[pos] == '=' || skipSequence(
                        array,
                        pos,
                        delimiter) > pos))
                    {
                        break;
                    }
                    if (array[pos] == '\'' || array[pos] == '"') {
                        break;
                    }

                    escaped = array[pos] == escapeChar;
                    if (!escaped) {
                        buf.append(array[pos]);
                    }
                }
            }
        }

        return pos;
    }

    /**
     * Indicate if the specified sequence starts from the given position in the
     * character array.
     *
     * @param array the array of characters
     * @param arrayPos the position of the first character in the array;
     * starting from this position the sequence should be skipped
     * @param sequence the sequence of characters to match
     * @return true if the sequence is found, false otherwise
     */
    public static boolean matchesSequence(
        char[] array,
        int arrayPos,
        char[] sequence)
    {
        int i;
        int j;
        for (i = arrayPos, j = 0; i < array.length && j < sequence.length; i++, j++) {
            if (array[i] != sequence[j]) {
                break;
            }
        }
        return j == sequence.length;
    }

    /**
     * Moves forward the current position in the array until the first not empty
     * character is found.
     *
     * @param array the array of characters where the spaces are searched
     * @param pos the current position in the array; starting from this position
     * the spaces will be searched
     * @param buf to this buffer all not empty characters will be added
     * @return the new position int the array of characters
     */
    private static int removeWhitespaces(char[] array, int pos, StringBuffer buf)
    {
        buf.delete(0, buf.length());
        for (; pos < array.length
            && (array[pos] == '=' || Character.isWhitespace(array[pos])); pos++)
        {
            if (array[pos] == '=') {
                buf.append(array[pos]);
            }
        }
        return pos;
    }

    /**
     * Skips the specified sequence if it starts from the given position in the
     * character array.
     *
     * @param array the array of characters
     * @param arrayPos the position of the first character in the array;
     * starting from this position the sequence should be skipped
     * @param sequence the sequence of characters to skip
     * @return a new value of the character counter
     */
    public static int skipSequence(char[] array, int arrayPos, char[] sequence)
    {
        int i;
        int j;
        for (i = arrayPos, j = 0; i < array.length && j < sequence.length; i++, j++) {
            if (array[i] != sequence[j]) {
                break;
            }
        }
        return j == sequence.length ? i : arrayPos;
    }

    /**
     * Splits the given string into a set of key-value pairs; all extracted
     * values will be added to the given list
     *
     * @param str the string to split
     * @param list to this list all extracted values will be added
     */
    public static int splitToPairs(String str, List list)
    {
        return splitToPairs(str, list, null);
    }

    public static int splitToPairs(
        String str,
        List list,
        char escapeChar)
    {
        return splitToPairs(str, list, null, null, escapeChar);
    }

    /**
     * Splits the given string into a set of key-value pairs; all extracted
     * values will be added to the given list
     *
     * @param str the string to split
     * @param list to this list all extracted values will be added
     * @param delimiter a delimiter for individual key/value pairs
     */
    public static int splitToPairs(
        String str,
        List list,
        String delimiter)
    {
        return splitToPairs(str, list, delimiter, null);
    }

    public static int splitToPairs(
        String str,
        List list,
        String delimiter,
        String end)
    {
        return splitToPairs(str, list, delimiter, end, DEFAULT_ESCAPECHAR);
    }

    /**
     * Splits the given string into a set of key-value pairs; all extracted
     * values will be added to the given list
     *
     * @param str the string to split
     * @param list to this list all extracted values will be added
     * @param delimiter a delimiter for individual key/value pairs
     * @param end the ending sequence, if null it's not taken into account
     * @param escapeChar the escaping character
     * @return the index where parser stopped
     */
    public static int splitToPairs(
        String str,
        List list,
        String delimiter,
        String end,
        char escapeChar)
    {
        if (str == null) {
            return 0;
        }
        char[] array = str.toCharArray();
        if (delimiter == null) {
            delimiter = " ";
        }
        char[] delimiterArray = delimiter.toCharArray();
        char[] endArray = end != null ? end.toCharArray() : new char[0];
        StringBuffer buf = new StringBuffer();
        int i = 0;
        boolean[] trim = {false};
        for (; i < array.length; ) {
            String key = null;
            String value = null;
            i = removeWhitespaces(array, i, buf);
            if (i >= array.length) {
                break;
            }
            int prev = i;
            i = skipSequence(array, i, delimiterArray);
            if (i >= array.length) {
                break;
            }
            if (i > prev) {
                i = removeWhitespaces(array, i, buf);
                if (i >= array.length) {
                    break;
                }
            }
            // if provided ending sequence is found, we stop parsing
            if (end != null && matchesSequence(array, i, endArray)) {
                break;
            }

            i = getNextToken(array, i, delimiterArray, buf, trim, escapeChar);
            key = buf.toString().trim();

            i = removeWhitespaces(array, i, buf);
            if (buf.indexOf("=") >= 0) {
                i = getNextToken(
                    array,
                    i,
                    delimiterArray,
                    buf,
                    trim,
                    escapeChar);
                value = buf.toString();
                if (trim[0]) {
                    value = value.trim();
                }
            }

            WikiParameter entry = new WikiParameter(key, value);
            list.add(entry);
        }

        return i;
    }

    /**
     * Unescapes the given string and returns the result. This method uses the
     * default escape symbol (see {@link #DEFAULT_ESCAPECHAR}).
     *
     * @param str the string to unescape
     * @return an unescaped string
     */
    public static String unescape(String str)
    {
        return unescape(str, DEFAULT_ESCAPECHAR);
    }

    /**
     * Unescapes the given string and returns the result.
     *
     * @param str the string to unescape
     * @param escape the symbol used to escape characters
     * @return an unescaped string
     */
    public static String unescape(String str, char escape)
    {
        if (str == null) {
            return "";
        }
        StringBuffer buf = new StringBuffer();
        char[] array = str.toCharArray();
        boolean escaped = false;
        for (int i = 0; i < array.length; i++) {
            char ch = array[i];
            if (escaped) {
                buf.append(ch);
                escaped = false;
            } else {
                escaped = (ch == escape);
                if (!escaped) {
                    buf.append(ch);
                }
            }
        }
        return buf.toString();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy