org.eclipse.rdf4j.common.text.StringUtil Maven / Gradle / Ivy
/*******************************************************************************
* Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.common.text;
import java.io.IOException;
import java.util.Arrays;
import java.util.Locale;
public class StringUtil {
private static final char[] IRI_DONT_ESCAPE = new char[] { '_', '~', '.', '-', '!', '$', '&', '\'', '(',
')', '*', '+', ',', ';', '=', ':', '/', '?', '#', '@', '%', '[', ']' };
static {
// sorting array to allow simple binary search for char lookup.
Arrays.sort(IRI_DONT_ESCAPE);
}
private static String hex(int c) {
return Integer.toHexString(c).toUpperCase(Locale.US);
}
/**
* Escapes a string to a (mostly) conforming IRI value and append it to the appendable.
*
* Non-ASCII (valid) values can optionally be numerically encoded by setting escapeUnicode to true. Most characters
* that are invalid in an IRI - like a white space or control character - are percent-encoded.
*
* This is slightly faster than {@link org.eclipse.rdf4j.common.net.ParsedIRI#create(String)} for valid IRI (without
* percents) and much faster for IRI with invalid (percent-encoded) characters, though it is less accurate.
*
* @param str
* @param appendable
* @param escapeUnicode escape non-ASCII values numerically
* @throws IOException
*/
public static void simpleEscapeIRI(String str, Appendable appendable, boolean escapeUnicode) throws IOException {
int strlen = str.length();
for (int i = 0; i < strlen; i++) {
char c = str.charAt(i);
if (ASCIIUtil.isLetterOrNumber(c)) {
appendable.append(c);
} else if (c < 0xA0) {
if (Arrays.binarySearch(IRI_DONT_ESCAPE, c) > -1) {
appendable.append(c);
} else {
appendable.append('%').append(hex(c));
}
} else {
if (escapeUnicode) {
if (c <= 0xFF) {
appendable.append("\\u00").append(hex(c));
} else if (c <= 0x0FFF) {
appendable.append("\\u0").append(hex(c));
} else {
if (Character.isSurrogate(c) && (i < strlen - 1)) {
// U+10000 - U+10FFFF
int code = str.codePointAt(i);
i++;
appendable.append("\\U000").append(hex(code));
} else {
appendable.append("\\u").append(hex(c));
}
}
} else {
appendable.append(c);
}
}
}
}
/**
* Appends the specified character n times to the supplied StringBuilder.
*
* @param c The character to append.
* @param n The number of times the character should be appended.
* @param sb The StringBuilder to append the character(s) to.
*/
public static void appendN(char c, int n, StringBuilder sb) {
for (int i = n; i > 0; i--) {
sb.append(c);
}
}
/**
* Removes the double quote from the start and end of the supplied string if it starts and ends with this character.
* This method does not create a new string if text doesn't start and end with double quotes, the
* text object itself is returned in that case.
*
* @param text The string to remove the double quotes from.
* @return The trimmed string, or a reference to text if it did not start and end with double quotes.
*/
public static String trimDoubleQuotes(String text) {
int textLength = text.length();
if (textLength >= 2 && text.charAt(0) == '"' && text.charAt(textLength - 1) == '"') {
return text.substring(1, textLength - 1);
}
return text;
}
}