net.sourceforge.pmd.util.StringUtil Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of pmd-core Show documentation
PMD is an extensible multilanguage static code analyzer. It finds common programming flaws like unused variables, empty catch blocks, unnecessary object creation, and so forth. It's mainly concerned with Java and Apex, but supports 16 other languages. It comes with 400+ built-in rules. It can be extended with custom rules. It uses JavaCC and Antlr to parse source files into abstract syntax trees (AST) and runs rules against them to find violations. Rules can be written in Java or using a XPath query. Currently, PMD supports Java, JavaScript, Salesforce.com Apex and Visualforce, Kotlin, Swift, Modelica, PLSQL, Apache Velocity, JSP, WSDL, Maven POM, HTML, XML and XSL. Scala is supported, but there are currently no Scala rules available. Additionally, it includes CPD, the copy-paste-detector. CPD finds duplicated code in Coco, C/C++, C#, Dart, Fortran, Gherkin, Go, Groovy, HTML, Java, JavaScript, JSP, Julia, Kotlin, Lua, Matlab, Modelica, Objective-C, Perl, PHP, PLSQL, Python, Ruby, Salesforce.com Apex and Visualforce, Scala, Swift, T-SQL, Typescript, Apache Velocity, WSDL, XML and XSL.
There is a newer version: 7.5.0-metrics
Show newest version
/**
 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
 */

package net.sourceforge.pmd.util;

import java.text.MessageFormat;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import org.apache.commons.lang3.StringUtils;
import org.checkerframework.checker.nullness.qual.NonNull;

import net.sourceforge.pmd.lang.document.Chars;

/**
 * String-related utility functions. See also {@link StringUtils}.
 *
 * @author BrianRemedios
 * @author Clément Fournier
 */
public final class StringUtil {


    private static final Pattern XML_10_INVALID_CHARS = Pattern.compile("[[\\x00-\\x1F]&&[^\\x09\\x0A\\x0D]]");

    private StringUtil() {
    }

    public static String inSingleQuotes(String s) {
        if (s == null) {
            s = "";
        }
        return "'" + s + "'";
    }

    public static @NonNull String inDoubleQuotes(String expected) {
        return "\"" + expected + "\"";
    }


    /**
     * Returns the (1-based) line number of the character at the given index.
     * Line terminators (\r, \n) are assumed to be on the line they *end*
     * and not on the following line. The method also accepts that the given
     * offset be the length of the string (in which case there's no targeted character),
     * to get the line number of a character that would be inserted at
     * the end of the string.
     *
     *      *
     *     lineNumberAt("a\nb", 0)  = 1
     *     lineNumberAt("a\nb", 1)  = 1
     *     lineNumberAt("a\nb", 2)  = 2
     *     lineNumberAt("a\nb", 3)  = 2  // charAt(3) doesn't exist though
     *     lineNumberAt("a\nb", 4)  = -1
     *
     *     lineNumberAt("", 0) = 1
     *     lineNumberAt("", _) = -1
     *
     * 
     *
     * @param charSeq         Char sequence
     * @param offsetInclusive Offset in the sequence of the targeted character.
     *                        May be the length of the sequence.
     * @return -1 if the offset is not in {@code [0, length]}, otherwise
     * the line number
     */
    public static int lineNumberAt(CharSequence charSeq, int offsetInclusive) {
        int len = charSeq.length();

        if (offsetInclusive > len || offsetInclusive < 0) {
            return -1;
        }

        int l = 1;
        for (int curOffset = 0; curOffset < offsetInclusive; curOffset++) {
            // if we end up outside the string, then the line is undefined
            if (curOffset >= len) {
                return -1;
            }

            char c = charSeq.charAt(curOffset);
            if (c == '\n') {
                l++;
            } else if (c == '\r') {
                if (curOffset + 1 < len && charSeq.charAt(curOffset + 1) == '\n') {
                    if (curOffset == offsetInclusive - 1) {
                        // the CR is assumed to be on the same line as the LF
                        return l;
                    }
                    curOffset++; // SUPPRESS CHECKSTYLE jump to after the \n
                }
                l++;
            }
        }
        return l;
    }

    /**
     * Returns the (1-based) column number of the character at the given index.
     * Line terminators are by convention taken to be part of the line they end,
     * and not the new line they start. Each character has width 1 (including {@code \t}).
     * The method also accepts that the given offset be the length of the
     * string (in which case there's no targeted character), to get the column
     * number of a character that would be inserted at the end of the string.
     *
     *      *
     *     columnNumberAt("a\nb", 0)  = 1
     *     columnNumberAt("a\nb", 1)  = 2
     *     columnNumberAt("a\nb", 2)  = 1
     *     columnNumberAt("a\nb", 3)  = 2   // charAt(3) doesn't exist though
     *     columnNumberAt("a\nb", 4)  = -1
     *
     *     columnNumberAt("a\r\n", 2)  = 3
     *
     * 
     *
     * @param charSeq         Char sequence
     * @param offsetInclusive Offset in the sequence
     * @return -1 if the offset is not in {@code [0, length]}, otherwise
     * the column number
     */
    public static int columnNumberAt(CharSequence charSeq, final int offsetInclusive) {
        if (offsetInclusive == charSeq.length()) {
            return charSeq.length() == 0 ? 1 : 1 + columnNumberAt(charSeq, offsetInclusive - 1);
        } else if (offsetInclusive > charSeq.length() || offsetInclusive < 0) {
            return -1;
        }

        int col = 0;
        char next = 0;
        for (int i = offsetInclusive; i >= 0; i--) {
            char c = charSeq.charAt(i);

            if (offsetInclusive != i) {
                if (c == '\n' || c == '\r' && next != '\n') {
                    return col;
                }
            }

            col++;
            next = c;
        }
        return col;
    }

    /**
     * Like {@link StringBuilder#append(CharSequence)}, but uses an optimized
     * implementation if the charsequence happens to be a {@link Chars}. {@link StringBuilder}
     * already optimises the cases where the charseq is a string, a StringBuilder,
     * or a stringBuffer. This is especially useful in parsers.
     */
    public static StringBuilder append(StringBuilder sb, CharSequence charSeq) {
        if (charSeq instanceof Chars) {
            ((Chars) charSeq).appendChars(sb);
            return sb;
        } else {
            return sb.append(charSeq);
        }
    }

    /**
     * Returns the substring following the last occurrence of the
     * given character. If the character doesn't occur, returns
     * the whole string. This contrasts with {@link StringUtils#substringAfterLast(String, String)},
     * which returns the empty string in that case.
     *
     * @param str String to cut
     * @param c   Delimiter
     */
    public static String substringAfterLast(String str, int c) {
        int i = str.lastIndexOf(c);
        return i < 0 ? str : str.substring(i + 1);
    }

    /**
     * Formats a double to a percentage, keeping {@code numDecimal} decimal places.
     *
     * @param val         a double value between 0 and 1
     * @param numDecimals The number of decimal places to keep
     *
     * @return A formatted string
     *
     * @throws IllegalArgumentException if the double to format is not between 0 and 1
     */
    public static String percentageString(double val, int numDecimals) {
        if (val < 0 || val > 1) {
            throw new IllegalArgumentException("Expected a number between 0 and 1");
        }

        return String.format(Locale.ROOT, "%." + numDecimals + "f%%", 100 * val);
    }


    /**
     * Checks for the existence of any of the listed prefixes on the non-null
     * text and removes them.
     *
     * @return String
     */
    public static String withoutPrefixes(String text, String... prefixes) {

        for (String prefix : prefixes) {
            if (text.startsWith(prefix)) {
                return text.substring(prefix.length());
            }
        }

        return text;
    }

    /**
     * Remove characters, that are not allowed in XML 1.0 documents.
     *
     * Allowed characters are:
     * 

     * Char    ::=      #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
     *  // any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
     * 
     * (see Extensible Markup Language (XML) 1.0 (Fifth Edition)).
     */
    public static String removedInvalidXml10Characters(String text) {
        Matcher matcher = XML_10_INVALID_CHARS.matcher(text);
        return matcher.replaceAll("");
    }

    /**
     * Replace some whitespace characters so they are visually apparent.
     *
     * @return String
     */
    public static String escapeWhitespace(Object o) {

        if (o == null) {
            return null;
        }
        String s = String.valueOf(o);
        s = s.replace("\n", "\\n");
        s = s.replace("\r", "\\r");
        s = s.replace("\t", "\\t");
        return s;
    }


    /**
     * Determine the maximum number of common leading whitespace characters the
     * strings share in the same sequence. Useful for determining how many
     * leading characters can be removed to shift all the text in the strings to
     * the left without misaligning them.
     *
     * Note: the spec is described in
     * String#stripIndent
     *
     * 
     * The minimum indentation (min) is determined as follows:
     * 

     *     For each non-blank line (as defined by isBlank()), the leading white space characters are counted.
     *     
The leading white space characters on the last line are also counted even if blank.
     * 
     * The min value is the smallest of these counts.
     * 
     *
     * @throws NullPointerException If the parameter is null
     */
    private static int maxCommonLeadingWhitespaceForAll(List lines) {
        int maxCommonWs = Integer.MAX_VALUE;
        for (int i = 0; i < lines.size(); i++) {
            CharSequence line = lines.get(i);
            // compute common prefix
            if (!StringUtils.isBlank(line) || i == lines.size() - 1) {
                maxCommonWs = Math.min(maxCommonWs, countLeadingWhitespace(line));
            }
        }
        if (maxCommonWs == Integer.MAX_VALUE) {
            // common prefix not found
            maxCommonWs = 0;
        }
        return maxCommonWs;
    }

    /**
     * Returns a list of
     */
    public static List linesWithTrimIndent(Chars source) {
        List result = source.lineStream().collect(CollectionUtil.toMutableList());
        trimIndentInPlace(result);
        return result;
    }

    /**
     * Trim the common indentation of each line in place in the input list.
     * Trailing whitespace is removed on each line. Note that blank lines do
     * not count towards computing the max common indentation, except
     * the last one.
     *
     * @param lines mutable list
     */
    public static void trimIndentInPlace(List lines) {
        int trimDepth = maxCommonLeadingWhitespaceForAll(lines);
        lines.replaceAll(chars -> chars.length() >= trimDepth
                                  ? chars.subSequence(trimDepth).trimEnd()
                                  : chars.trimEnd());
    }

    /**
     * Trim common indentation in the lines of the string. Like
     * {@link #trimIndentInPlace(List)} called with the list of lines
     * and joined with {@code \n}.
     */
    public static StringBuilder trimIndent(Chars string) {
        List lines = string.lineStream().collect(CollectionUtil.toMutableList());
        trimIndentInPlace(lines);
        return CollectionUtil.joinCharsIntoStringBuilder(lines, "\n");
    }


    private static int countLeadingWhitespace(CharSequence s) {
        int count = 0;
        while (count < s.length() && Character.isWhitespace(s.charAt(count))) {
            count++;
        }
        return count;
    }


    /**
     * Are the two String values the same. The Strings can be optionally trimmed
     * before checking. The Strings can be optionally compared ignoring case.
     * The Strings can be have embedded whitespace standardized before
     * comparing. Two null values are treated as equal.
     *
     * @param s1                    The first String.
     * @param s2                    The second String.
     * @param trim                  Indicates if the Strings should be trimmed before comparison.
     * @param ignoreCase            Indicates if the case of the Strings should ignored during comparison.
     * @param standardizeWhitespace Indicates if the embedded whitespace should be standardized before comparison.
     *
     * @return true if the Strings are the same, false otherwise.
     */
    public static boolean isSame(String s1, String s2, boolean trim, boolean ignoreCase,
                                 boolean standardizeWhitespace) {
        if (s1 == null && s2 == null) {
            return true;
        } else if (s1 == null || s2 == null) {
            return false;
        } else {
            if (trim) {
                s1 = s1.trim();
                s2 = s2.trim();
            }
            if (standardizeWhitespace) {
                // Replace all whitespace with a standard single space
                // character.
                s1 = s1.replaceAll("\\s+", " ");
                s2 = s2.replaceAll("\\s+", " ");
            }
            return ignoreCase ? s1.equalsIgnoreCase(s2) : s1.equals(s2);
        }
    }


    /**
     * Formats all items onto a string with separators if more than one exists,
     * return an empty string if the items are null or empty.
     *
     * @param items     Object[]
     * @param separator String
     *
     * @return String
     */
    public static String asString(Object[] items, String separator) {

        if (items == null || items.length == 0) {
            return "";
        }
        if (items.length == 1) {
            return items[0].toString();
        }

        StringBuilder sb = new StringBuilder(items[0].toString());
        for (int i = 1; i < items.length; i++) {
            sb.append(separator).append(items[i]);
        }

        return sb.toString();
    }

    /**
     * If the string starts and ends with the delimiter, returns the substring
     * within the delimiters. Otherwise returns the original string. The
     * start and end delimiter must be 2 separate instances.
     * {@code
     * removeSurrounding("",     _ )  = ""
     * removeSurrounding("q",   'q')  = "q"
     * removeSurrounding("qq",  'q')  = ""
     * removeSurrounding("q_q", 'q')  = "_"
     * }
     */
    public static String removeSurrounding(String string, char delimiter) {
        if (string.length() >= 2
            && string.charAt(0) == delimiter
            && string.charAt(string.length() - 1) == delimiter) {
            return string.substring(1, string.length() - 1);
        }
        return string;
    }

    /**
     * Like {@link #removeSurrounding(String, char) removeSurrounding} with
     * a double quote as a delimiter.
     */
    public static String removeDoubleQuotes(String string) {
        return removeSurrounding(string, '"');
    }

    /**
     * Truncate the given string to some maximum length. If it needs
     * truncation, the ellipsis string is appended. The length of the
     * returned string is always lower-or-equal to the maxOutputLength,
     * even when truncation occurs.
     */
    public static String elide(String string, int maxOutputLength, String ellipsis) {
        AssertionUtil.requireNonNegative("maxOutputLength", maxOutputLength);
        if (ellipsis.length() > maxOutputLength) {
            throw new IllegalArgumentException("Ellipsis too long '" + ellipsis + "', maxOutputLength=" + maxOutputLength);
        }
        if (string.length() <= maxOutputLength) {
            return string;
        }
        String truncated = string.substring(0, maxOutputLength - ellipsis.length());
        return truncated + ellipsis;
    }


    /**
     * Replaces unprintable characters by their escaped (or unicode escaped)
     * equivalents in the given string
     */
    public static String escapeJava(String str) {
        StringBuilder retval = new StringBuilder();
        for (int i = 0; i < str.length(); i++) {
            final char ch = str.charAt(i);
            switch (ch) {
            case 0:
                break;
            case '\b':
                retval.append("\\b");
                break;
            case '\t':
                retval.append("\\t");
                break;
            case '\n':
                retval.append("\\n");
                break;
            case '\f':
                retval.append("\\f");
                break;
            case '\r':
                retval.append("\\r");
                break;
            case '\"':
                retval.append("\\\"");
                break;
            case '\'':
                retval.append("\\'");
                break;
            case '\\':
                retval.append("\\\\");
                break;
            default:
                if (ch < 0x20 || ch > 0x7e) {
                    String s = "0000" + Integer.toString(ch, 16);
                    retval.append("\\u").append(s.substring(s.length() - 4));
                } else {
                    retval.append(ch);
                }
                break;
            }
        }
        return retval.toString();
    }

    /**
     * Escape the string so that it appears literally when interpreted
     * by a {@link MessageFormat}.
     */
    public static String quoteMessageFormat(String str) {
        return str.replaceAll("'", "''");
    }


    /** Return the empty string if the parameter is null. */
    public static String nullToEmpty(final String value) {
        return value == null ? "" : value;
    }


    public enum CaseConvention {
        /** SCREAMING_SNAKE_CASE. */
        SCREAMING_SNAKE_CASE {
            @Override
            List toWords(String name) {
                return CollectionUtil.map(name.split("_"), s -> s.toLowerCase(Locale.ROOT));
            }

            @Override
            String joinWords(List words) {
                return words.stream().map(s -> s.toUpperCase(Locale.ROOT)).collect(Collectors.joining("_"));
            }
        },
        /** camelCase. */
        CAMEL_CASE {
            @Override
            List toWords(String name) {
                return PASCAL_CASE.toWords(name);
            }

            @Override
            String joinWords(List words) {
                if (words.isEmpty()) {
                    return "";
                }
                return words.get(0).toLowerCase(Locale.ROOT) + PASCAL_CASE.joinWords(words.subList(1, words.size()));
            }
        },
        /** PascalCase. */
        PASCAL_CASE {
            @Override
            List toWords(String name) {
                return CollectionUtil.map(name.split("(? s.toLowerCase(Locale.ROOT));
            }

            @Override
            String joinWords(List words) {
                return words.stream().map(StringUtils::capitalize).collect(Collectors.joining());
            }
        },
        /** space separated. */
        SPACE_SEPARATED {
            @Override
            List toWords(String name) {
                return CollectionUtil.map(name.split("\\s++"), s -> s.toLowerCase(Locale.ROOT));
            }

            @Override
            String joinWords(List words) {
                return String.join(" ", words);
            }
        };

        /** Split a name written with this convention into a list of *lowercase* words. */
        abstract List toWords(String name);

        /** Takes a list of lowercase words and joins them into a name following this convention. */
        abstract String joinWords(List words);

        public String convertTo(CaseConvention to, String name) {
            return to.joinWords(toWords(name));
        }
    }
}