com.itextpdf.styledxmlparser.jsoup.internal.StringUtil Maven / Gradle / Ivy
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
package com.itextpdf.styledxmlparser.jsoup.internal;
import com.itextpdf.styledxmlparser.jsoup.helper.Validate;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.Stack;
import java.util.regex.Pattern;
/**
A minimal String utility class. Designed for internal jsoup use only - the API and outcome may change without
notice.
*/
public final class StringUtil {
// memoised padding up to 21
static final String[] padding = {"", " ", " ", " ", " ", " ", " ", " ", " ",
" ", " ", " ", " ", " ", " ", " ",
" ", " ", " ", " ", " "};
private static final int maxPaddingWidth = 30; // so very deeply nested nodes don't get insane padding amounts
/**
* Join a collection of strings by a separator
* @param strings collection of string objects
* @param sep string to place between strings
* @return joined string
*/
public static String join(Collection> strings, String sep) {
return join(strings.iterator(), sep);
}
/**
* Join a collection of strings by a separator
* @param strings iterator of string objects
* @param sep string to place between strings
* @return joined string
*/
public static String join(Iterator> strings, String sep) {
if (!strings.hasNext())
return "";
String start = strings.next().toString();
if (!strings.hasNext()) // only one, avoid builder
return start;
StringJoiner j = new StringJoiner(sep);
j.add(start);
while (strings.hasNext()) {
j.add(strings.next());
}
return j.complete();
}
/**
* Join an array of strings by a separator
* @param strings collection of string objects
* @param sep string to place between strings
* @return joined string
*/
public static String join(String[] strings, String sep) {
return join(Arrays.asList(strings), sep);
}
/**
A StringJoiner allows incremental / filtered joining of a set of stringable objects.
@since 1.14.1
*/
public static class StringJoiner {
StringBuilder sb = borrowBuilder(); // sets null on builder release so can't accidentally be reused
final String separator;
boolean first = true;
/**
Create a new joiner, that uses the specified separator. MUST call {@link #complete()} or will leak a thread
local string builder.
@param separator the token to insert between strings
*/
public StringJoiner(String separator) {
this.separator = separator;
}
/**
Add another item to the joiner, will be separated
*/
public StringJoiner add(Object stringy) {
Validate.notNull(sb); // don't reuse
if (!first)
sb.append(separator);
sb.append(stringy);
first = false;
return this;
}
/**
Append content to the current item; not separated
*/
public StringJoiner append(Object stringy) {
Validate.notNull(sb); // don't reuse
sb.append(stringy);
return this;
}
/**
Return the joined string, and release the builder back to the pool. This joiner cannot be reused.
*/
public String complete() {
String string = releaseBuilder(sb);
sb = null;
return string;
}
}
/**
* Returns space padding (up to a max of 30).
* @param width amount of padding desired
* @return string of spaces * width
*/
public static String padding(int width) {
if (width < 0)
throw new IllegalArgumentException("width must be > 0");
if (width < padding.length)
return padding[width];
width = Math.min(width, maxPaddingWidth);
char[] out = new char[width];
for (int i = 0; i < width; i++)
out[i] = ' ';
return String.valueOf(out);
}
/**
* Tests if a string is blank: null, empty, or only whitespace (" ", \r\n, \t, etc)
* @param string string to test
* @return if string is blank
*/
public static boolean isBlank(String string) {
if (string == null || string.length() == 0)
return true;
int l = string.length();
for (int i = 0; i < l; i++) {
if (!StringUtil.isWhitespace(string.codePointAt(i)))
return false;
}
return true;
}
/**
* Tests if a string is numeric, i.e. contains only digit characters
* @param string string to test
* @return true if only digit chars, false if empty or null or contains non-digit chars
*/
public static boolean isNumeric(String string) {
if (string == null || string.length() == 0)
return false;
int l = string.length();
for (int i = 0; i < l; i++) {
if (!Character.isDigit(string.codePointAt(i)))
return false;
}
return true;
}
/**
* Tests if a code point is "whitespace" as defined in the HTML spec. Used for output HTML.
* @param c code point to test
* @return true if code point is whitespace, false otherwise
* @see #isActuallyWhitespace(int)
*/
public static boolean isWhitespace(int c){
return c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r';
}
/**
* Tests if a code point is "whitespace" as defined by what it looks like. Used for Element.text etc.
* @param c code point to test
* @return true if code point is whitespace, false otherwise
*/
public static boolean isActuallyWhitespace(int c){
return c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r' || c == 160;
// 160 is (non-breaking space). Not in the spec but expected.
}
public static boolean isInvisibleChar(int c) {
return c == 8203 || c == 173; // zero width sp, soft hyphen
// previously also included zw non join, zw join - but removing those breaks semantic meaning of text
}
/**
* Normalise the whitespace within this string; multiple spaces collapse to a single, and all whitespace characters
* (e.g. newline, tab) convert to a simple space
* @param string content to normalise
* @return normalised string
*/
public static String normaliseWhitespace(String string) {
StringBuilder sb = StringUtil.borrowBuilder();
appendNormalisedWhitespace(sb, string, false);
return StringUtil.releaseBuilder(sb);
}
/**
* After normalizing the whitespace within a string, appends it to a string builder.
* @param accum builder to append to
* @param string string to normalize whitespace within
* @param stripLeading set to true if you wish to remove any leading whitespace
*/
public static void appendNormalisedWhitespace(StringBuilder accum, String string, boolean stripLeading) {
boolean lastWasWhite = false;
boolean reachedNonWhite = false;
int len = string.length();
int c;
for (int i = 0; i < len; i+= Character.charCount(c)) {
c = string.codePointAt(i);
if (isActuallyWhitespace(c)) {
if ((stripLeading && !reachedNonWhite) || lastWasWhite)
continue;
accum.append(' ');
lastWasWhite = true;
}
else if (!isInvisibleChar(c)) {
accum.appendCodePoint(c);
lastWasWhite = false;
reachedNonWhite = true;
}
}
}
public static boolean in(final String needle, final String... haystack) {
final int len = haystack.length;
for (int i = 0; i < len; i++) {
if (haystack[i].equals(needle))
return true;
}
return false;
}
public static boolean inSorted(String needle, String[] haystack) {
return Arrays.binarySearch(haystack, needle) >= 0;
}
/**
Tests that a String contains only ASCII characters.
@param string scanned string
@return true if all characters are in range 0 - 127
*/
public static boolean isAscii(String string) {
Validate.notNull(string);
for (int i = 0; i < string.length(); i++) {
int c = string.charAt(i);
if (c > 127) { // ascii range
return false;
}
}
return true;
}
private static Pattern extraDotSegmentsPattern = Pattern.compile("^/((\\.{1,2}/)+)");
/**
* Create a new absolute URL, from a provided existing absolute URL and a relative URL component.
* @param base the existing absolute base URL
* @param relUrl the relative URL to resolve. (If it's already absolute, it will be returned)
* @return the resolved absolute URL
* @throws MalformedURLException if an error occurred generating the URL
*/
public static URL resolve(URL base, String relUrl) throws MalformedURLException {
// workaround: java resolves '//path/file + ?foo' to '//path/?foo', not '//path/file?foo' as desired
if (relUrl.startsWith("?"))
relUrl = base.getPath() + relUrl;
// workaround: //example.com + ./foo = //example.com/./foo, not //example.com/foo
URL url = new URL(base, relUrl);
String fixedFile = extraDotSegmentsPattern.matcher(url.getFile()).replaceFirst("/");
if (url.getRef() != null) {
fixedFile = fixedFile + "#" + url.getRef();
}
return new URL(url.getProtocol(), url.getHost(), url.getPort(), fixedFile);
}
/**
* Create a new absolute URL, from a provided existing absolute URL and a relative URL component.
* @param baseUrl the existing absolute base URL
* @param relUrl the relative URL to resolve. (If it's already absolute, it will be returned)
* @return an absolute URL if one was able to be generated, or the empty string if not
*/
public static String resolve(final String baseUrl, final String relUrl) {
URL base;
try {
try {
base = new URL(baseUrl);
} catch (MalformedURLException e) {
// the base is unsuitable, but the attribute/rel may be abs on its own, so try that
URL abs = new URL(relUrl);
return abs.toExternalForm();
}
return resolve(base, relUrl).toExternalForm();
} catch (MalformedURLException e) {
return "";
}
}
private static final ThreadLocal> threadLocalBuilders = new ThreadLocal>() {
@Override
protected Stack initialValue() {
return new Stack<>();
}
};
/**
* Maintains cached StringBuilders in a flyweight pattern, to minimize new StringBuilder GCs. The StringBuilder is
* prevented from growing too large.
*
* Care must be taken to release the builder once its work has been completed, with {@link #releaseBuilder}
* @return an empty StringBuilder
*/
public static StringBuilder borrowBuilder() {
Stack builders = threadLocalBuilders.get();
return builders.empty() ?
new StringBuilder(MaxCachedBuilderSize) :
builders.pop();
}
/**
* Release a borrowed builder. Care must be taken not to use the builder after it has been returned, as its
* contents may be changed by this method, or by a concurrent thread.
* @param sb the StringBuilder to release.
* @return the string value of the released String Builder (as an incentive to release it!).
*/
public static String releaseBuilder(StringBuilder sb) {
Validate.notNull(sb);
String string = sb.toString();
if (sb.length() > MaxCachedBuilderSize)
sb = new StringBuilder(MaxCachedBuilderSize); // make sure it hasn't grown too big
else
sb.delete(0, sb.length()); // make sure it's emptied on release
Stack builders = threadLocalBuilders.get();
builders.push(sb);
while (builders.size() > MaxIdleBuilders) {
builders.pop();
}
return string;
}
private static final int MaxCachedBuilderSize = 8 * 1024;
private static final int MaxIdleBuilders = 8;
}