![JAR search and dependency download from the Maven repository](/logo.png)
com.googlecode.protobuf.format.util.TextUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of protobuf-java-format Show documentation
Show all versions of protobuf-java-format Show documentation
Provide serialization and de-serialization of different formats based on Google’s protobuf Message. Enables overriding the default (byte array) output to text based formats such as XML, JSON and HTML.
The newest version!
/**
* Copyright 2000-2011 NeuStar, Inc. All rights reserved.
* NeuStar, the Neustar logo and related names and logos are registered
* trademarks, service marks or tradenames of NeuStar, Inc. All other
* product names, company names, marks, logos and symbols may be trademarks
* of their respective owners.
*/
package com.googlecode.protobuf.format.util;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigInteger;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.util.regex.Pattern;
import com.google.protobuf.ByteString;
/**
* Utilities for coercing types
* largely follows google/protobuf/text_format.cc.
*/
public class TextUtils {
private static final Pattern DOUBLE_INFINITY =
Pattern.compile("-?inf(inity)?", Pattern.CASE_INSENSITIVE);
private static final Pattern FLOAT_INFINITY =
Pattern.compile("-?inf(inity)?f?", Pattern.CASE_INSENSITIVE);
private static final Pattern FLOAT_NAN =
Pattern.compile("nanf?", Pattern.CASE_INSENSITIVE);
private static final Pattern DIGITS =
Pattern.compile("[0-9]", Pattern.CASE_INSENSITIVE);
/**
* Convert an unsigned 64-bit integer to a string.
*/
public static String unsignedToString(final long value) {
if (value >= 0) {
return Long.toString(value);
} else {
// Pull off the most-significant bit so that BigInteger doesn't think
// the number is negative, then set it again using setBit().
return BigInteger.valueOf(value & 0x7FFFFFFFFFFFFFFFL).setBit(63).toString();
}
}
/**
* Convert an unsigned 32-bit integer to a string.
*/
public static String unsignedToString(final int value) {
if (value >= 0) {
return Integer.toString(value);
} else {
return Long.toString((value) & 0x00000000FFFFFFFFL);
}
}
/**
* Convert an unsigned 64-bit integer to a {@link BigInteger}.
*/
public static BigInteger unsignedLong(long value) {
if (value < 0) {
// Pull off the most-significant bit so that BigInteger doesn't think
// the number is negative, then set it again using setBit().
return BigInteger.valueOf(value & 0x7FFFFFFFFFFFFFFFL).setBit(63);
}
return BigInteger.valueOf(value);
}
/**
* Is this a hex digit?
*/
public static boolean isHex(final char c) {
return ('0' <= c && c <= '9') ||
('a' <= c && c <= 'f') ||
('A' <= c && c <= 'F');
}
/**
* Is this an octal digit?
*/
public static boolean isOctal(final char c) {
return '0' <= c && c <= '7';
}
/**
* Interpret a character as a digit (in any base up to 36) and return the
* numeric value. This is like {@code Character.digit()} but we don't accept
* non-ASCII digits.
*/
public static int digitValue(final char c) {
if ('0' <= c && c <= '9') {
return c - '0';
} else if ('a' <= c && c <= 'z') {
return c - 'a' + 10;
} else {
return c - 'A' + 10;
}
}
public static boolean isDigits(final String text) {
return DIGITS.matcher(text).matches();
}
private static final int BUFFER_SIZE = 4096;
// TODO(chrisn): See if working around java.io.Reader#read(CharBuffer)
// overhead is worthwhile
public static StringBuilder toStringBuilder(Readable input) throws IOException {
StringBuilder text = new StringBuilder();
CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE);
while (true) {
int n = input.read(buffer);
if (n == -1) {
break;
}
buffer.flip();
text.append(buffer, 0, n);
}
return text;
}
public static InputStream toInputStream(String input) {
return toInputStream(input, Charset.defaultCharset());
}
public static InputStream toInputStream(String input, Charset cs) {
return new ByteArrayInputStream(input.getBytes(cs));
}
/**
* If the next token is a double and return its value.
* Otherwise, throw a {@link NumberFormatException}.
*/
public static double parseDouble(final String text) throws NumberFormatException {
// We need to parse infinity and nan separately because
// Double.parseDouble() does not accept "inf", "infinity", or "nan".
if (DOUBLE_INFINITY.matcher(text).matches()) {
final boolean negative = text.startsWith("-");
return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
}
if (text.equalsIgnoreCase("nan")) {
return Double.NaN;
}
final double result = Double.parseDouble(text);
return result;
}
/**
* Parse a float and return its value.
* Otherwise, throw a {@link NumberFormatException}.
*/
public static float parseFloat(final String text) throws NumberFormatException {
// We need to parse infinity and nan separately because
// Float.parseFloat() does not accept "inf", "infinity", or "nan".
if (FLOAT_INFINITY.matcher(text).matches()) {
final boolean negative = text.startsWith("-");
return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY;
}
if (FLOAT_NAN.matcher(text).matches()) {
return Float.NaN;
}
final float result = Float.parseFloat(text);
return result;
}
/**
* Parse a boolean and return its value.
* Otherwise, throw a {@link IllegalArgumentException}.
*/
public static boolean parseBoolean(final String text) throws IllegalArgumentException {
if (text.equalsIgnoreCase("true") || text.equalsIgnoreCase("t") ||
text.equals("1")) {
return true;
} else if (text.equalsIgnoreCase("false") || text.equalsIgnoreCase("f") ||
text.equals("0")) {
return false;
} else {
throw new IllegalArgumentException("Expected \"true\" or \"false\".");
}
}
/**
* Parse a 32-bit signed integer from the text. Unlike the Java standard
* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
* and "0" to signify hexidecimal and octal numbers, respectively.
*/
public static int parseInt32(final String text) throws NumberFormatException {
return (int) parseInteger(text, true, false);
}
/**
* Parse a 32-bit unsigned integer from the text. Unlike the Java standard
* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
* and "0" to signify hexidecimal and octal numbers, respectively. The
* result is coerced to a (signed) {@code int} when returned since Java has
* no unsigned integer type.
*/
public static int parseUInt32(final String text) throws NumberFormatException {
return (int) parseInteger(text, false, false);
}
/**
* Parse a 64-bit signed integer from the text. Unlike the Java standard
* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
* and "0" to signify hexidecimal and octal numbers, respectively.
*/
public static long parseInt64(final String text) throws NumberFormatException {
return parseInteger(text, true, true);
}
/**
* Parse a 64-bit unsigned integer from the text. Unlike the Java standard
* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
* and "0" to signify hexidecimal and octal numbers, respectively. The
* result is coerced to a (signed) {@code long} when returned since Java has
* no unsigned long type.
*/
public static long parseUInt64(final String text) throws NumberFormatException {
return parseInteger(text, false, true);
}
public static long parseInteger(final String text,
final boolean isSigned, final boolean isLong) throws NumberFormatException {
int pos = 0;
boolean negative = false;
if (text.startsWith("-", pos)) {
if (!isSigned) {
throw new NumberFormatException("Number must be positive: " + text);
}
++pos;
negative = true;
}
int radix = 10;
if (text.startsWith("0x", pos)) {
pos += 2;
radix = 16;
} else if (text.startsWith("0", pos)) {
radix = 8;
}
final String numberText = text.substring(pos);
long result = 0;
if (numberText.length() < 16) {
// Can safely assume no overflow.
result = Long.parseLong(numberText, radix);
if (negative) {
result = -result;
}
// Check bounds.
// No need to check for 64-bit numbers since they'd have to be 16 chars
// or longer to overflow.
if (!isLong) {
if (isSigned) {
if (result > Integer.MAX_VALUE || result < Integer.MIN_VALUE) {
throw new NumberFormatException(
"Number out of range for 32-bit signed integer: " + text);
}
} else {
if (result >= (1L << 32) || result < 0) {
throw new NumberFormatException(
"Number out of range for 32-bit unsigned integer: " + text);
}
}
}
} else {
BigInteger bigValue = new BigInteger(numberText, radix);
if (negative) {
bigValue = bigValue.negate();
}
// Check bounds.
if (!isLong) {
if (isSigned) {
if (bigValue.bitLength() > 31) {
throw new NumberFormatException(
"Number out of range for 32-bit signed integer: " + text);
}
} else {
if (bigValue.bitLength() > 32) {
throw new NumberFormatException(
"Number out of range for 32-bit unsigned integer: " + text);
}
}
} else {
if (isSigned) {
if (bigValue.bitLength() > 63) {
throw new NumberFormatException(
"Number out of range for 64-bit signed integer: " + text);
}
} else {
if (bigValue.bitLength() > 64) {
throw new NumberFormatException(
"Number out of range for 64-bit unsigned integer: " + text);
}
}
}
result = bigValue.longValue();
}
return result;
}
/**
* Escapes bytes in the format used in protocol buffer text format, which is the same as the
* format used for C string literals. All bytes that are not printable 7-bit ASCII characters
* are escaped, as well as backslash, single-quote, and double-quote characters. Characters for
* which no defined short-hand escape sequence is defined will be escaped using 3-digit octal
* sequences.
*/
public static String escapeBytes(final ByteString input) {
final StringBuilder builder = new StringBuilder(input.size());
for (int i = 0; i < input.size(); i++) {
final byte b = input.byteAt(i);
switch (b) {
// Java does not recognize \a or \v, apparently.
case 0x07: builder.append("\\a" ); break;
case '\b': builder.append("\\b" ); break;
case '\f': builder.append("\\f" ); break;
case '\n': builder.append("\\n" ); break;
case '\r': builder.append("\\r" ); break;
case '\t': builder.append("\\t" ); break;
case 0x0b: builder.append("\\v" ); break;
case '\\': builder.append("\\\\"); break;
case '\'': builder.append("\\\'"); break;
case '"' : builder.append("\\\""); break;
default:
// Note: Bytes with the high-order bit set should be escaped. Since
// bytes are signed, such bytes will compare less than 0x20, hence
// the following line is correct.
if (b >= 0x20) {
builder.append((char) b);
} else {
builder.append('\\');
builder.append((char) ('0' + ((b >>> 6) & 3)));
builder.append((char) ('0' + ((b >>> 3) & 7)));
builder.append((char) ('0' + (b & 7)));
}
break;
}
}
return builder.toString();
}
/**
* Un-escape a byte sequence as escaped using
* {@link #escapeBytes(ByteString)}. Two-digit hex escapes (starting with
* "\x") are also recognized.
*/
public static ByteString unescapeBytes(final CharSequence charString) {
// First convert the Java characater sequence to UTF-8 bytes.
ByteString input = ByteString.copyFromUtf8(charString.toString());
// Then unescape certain byte sequences introduced by ASCII '\\'. The valid
// escapes can all be expressed with ASCII characters, so it is safe to
// operate on bytes here.
//
// Unescaping the input byte array will result in a byte sequence that's no
// longer than the input. That's because each escape sequence is between
// two and four bytes long and stands for a single byte.
final byte[] result = new byte[input.size()];
int pos = 0;
for (int i = 0; i < input.size(); i++) {
byte c = input.byteAt(i);
if (c == '\\') {
if (i + 1 < input.size()) {
++i;
c = input.byteAt(i);
if (isOctal((char)c)) {
// Octal escape.
int code = digitValue((char) c);
if (i + 1 < input.size() && isOctal((char) input.byteAt(i + 1))) {
++i;
code = code * 8 + digitValue((char) input.byteAt(i));
}
if (i + 1 < input.size() && isOctal((char) input.byteAt(i + 1))) {
++i;
code = code * 8 + digitValue((char) input.byteAt(i));
}
// TODO: Check that 0 <= code && code <= 0xFF.
result[pos++] = (byte)code;
} else {
switch (c) {
case 'a' : result[pos++] = 0x07; break;
case 'b' : result[pos++] = '\b'; break;
case 'f' : result[pos++] = '\f'; break;
case 'n' : result[pos++] = '\n'; break;
case 'r' : result[pos++] = '\r'; break;
case 't' : result[pos++] = '\t'; break;
case 'v' : result[pos++] = 0x0b; break;
case '\\': result[pos++] = '\\'; break;
case '\'': result[pos++] = '\''; break;
case '"' : result[pos++] = '\"'; break;
case 'x':
// hex escape
int code = 0;
if (i + 1 < input.size() && isHex((char) input.byteAt(i + 1))) {
++i;
code = digitValue((char) input.byteAt(i));
} else {
throw new IllegalArgumentException(
"Invalid escape sequence: '\\x' with no digits");
}
if (i + 1 < input.size() && isHex((char) input.byteAt(i + 1))) {
++i;
code = code * 16 + digitValue((char) input.byteAt(i));
}
result[pos++] = (byte)code;
break;
default:
throw new IllegalArgumentException(
"Invalid escape sequence: '\\" + (char)c + '\'');
}
}
} else {
throw new IllegalArgumentException(
"Invalid escape sequence: '\\' at end of string.");
}
} else {
result[pos++] = c;
}
}
return ByteString.copyFrom(result, 0, pos);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy