org.pkl.thirdparty.commonmark.internal.util.Escaping Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of pkl-tools Show documentation
Show all versions of pkl-tools Show documentation
Fat Jar containing pkl-cli, pkl-codegen-java, pkl-codegen-kotlin, pkl-config-java, pkl-core, pkl-doc, and their shaded third-party dependencies.
package org.pkl.thirdparty.commonmark.internal.util;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Escaping {
public static final String ESCAPABLE = "[!\"#$%&\'()*+,./:;<=>?@\\[\\\\\\]^_`{|}~-]";
public static final String ENTITY = "&(?:#x[a-f0-9]{1,6}|#[0-9]{1,7}|[a-z][a-z0-9]{1,31});";
private static final Pattern BACKSLASH_OR_AMP = Pattern.compile("[\\\\&]");
private static final Pattern ENTITY_OR_ESCAPED_CHAR =
Pattern.compile("\\\\" + ESCAPABLE + '|' + ENTITY, Pattern.CASE_INSENSITIVE);
// From RFC 3986 (see "reserved", "unreserved") except don't escape '[' or ']' to be compatible with JS encodeURI
private static final Pattern ESCAPE_IN_URI =
Pattern.compile("(%[a-fA-F0-9]{0,2}|[^:/?#@!$&'()*+,;=a-zA-Z0-9\\-._~])");
private static final char[] HEX_DIGITS =
new char[]{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
private static final Pattern WHITESPACE = Pattern.compile("[ \t\r\n]+");
private static final Replacer UNESCAPE_REPLACER = new Replacer() {
@Override
public void replace(String input, StringBuilder sb) {
if (input.charAt(0) == '\\') {
sb.append(input, 1, input.length());
} else {
sb.append(Html5Entities.entityToString(input));
}
}
};
private static final Replacer URI_REPLACER = new Replacer() {
@Override
public void replace(String input, StringBuilder sb) {
if (input.startsWith("%")) {
if (input.length() == 3) {
// Already percent-encoded, preserve
sb.append(input);
} else {
// %25 is the percent-encoding for %
sb.append("%25");
sb.append(input, 1, input.length());
}
} else {
byte[] bytes = input.getBytes(StandardCharsets.UTF_8);
for (byte b : bytes) {
sb.append('%');
sb.append(HEX_DIGITS[(b >> 4) & 0xF]);
sb.append(HEX_DIGITS[b & 0xF]);
}
}
}
};
public static String escapeHtml(String input) {
// Avoid building a new string in the majority of cases (nothing to escape)
StringBuilder sb = null;
loop:
for (int i = 0; i < input.length(); i++) {
char c = input.charAt(i);
String replacement;
switch (c) {
case '&':
replacement = "&";
break;
case '<':
replacement = "<";
break;
case '>':
replacement = ">";
break;
case '\"':
replacement = """;
break;
default:
if (sb != null) {
sb.append(c);
}
continue loop;
}
if (sb == null) {
sb = new StringBuilder();
sb.append(input, 0, i);
}
sb.append(replacement);
}
return sb != null ? sb.toString() : input;
}
/**
* Replace entities and backslash escapes with literal characters.
*/
public static String unescapeString(String s) {
if (BACKSLASH_OR_AMP.matcher(s).find()) {
return replaceAll(ENTITY_OR_ESCAPED_CHAR, s, UNESCAPE_REPLACER);
} else {
return s;
}
}
public static String percentEncodeUrl(String s) {
return replaceAll(ESCAPE_IN_URI, s, URI_REPLACER);
}
public static String normalizeLabelContent(String input) {
String trimmed = input.trim();
// This is necessary to correctly case fold "\u1E9E" (LATIN CAPITAL LETTER SHARP S) to "SS":
// "\u1E9E".toLowerCase(Locale.ROOT) -> "\u00DF" (LATIN SMALL LETTER SHARP S)
// "\u00DF".toUpperCase(Locale.ROOT) -> "SS"
// Note that doing upper first (or only upper without lower) wouldn't work because:
// "\u1E9E".toUpperCase(Locale.ROOT) -> "\u1E9E"
String caseFolded = trimmed.toLowerCase(Locale.ROOT).toUpperCase(Locale.ROOT);
return WHITESPACE.matcher(caseFolded).replaceAll(" ");
}
private static String replaceAll(Pattern p, String s, Replacer replacer) {
Matcher matcher = p.matcher(s);
if (!matcher.find()) {
return s;
}
StringBuilder sb = new StringBuilder(s.length() + 16);
int lastEnd = 0;
do {
sb.append(s, lastEnd, matcher.start());
replacer.replace(matcher.group(), sb);
lastEnd = matcher.end();
} while (matcher.find());
if (lastEnd != s.length()) {
sb.append(s, lastEnd, s.length());
}
return sb.toString();
}
private interface Replacer {
void replace(String input, StringBuilder sb);
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy