
com.dyuproject.protostuff.parser.TextFormat Maven / Gradle / Ivy
The newest version!
//========================================================================
//Copyright 2007-2009 David Yu [email protected]
//------------------------------------------------------------------------
//Licensed under the Apache License, Version 2.0 (the "License");
//you may not use this file except in compliance with the License.
//You may obtain a copy of the License at
//http://www.apache.org/licenses/LICENSE-2.0
//Unless required by applicable law or agreed to in writing, software
//distributed under the License is distributed on an "AS IS" BASIS,
//WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//See the License for the specific language governing permissions and
//limitations under the License.
//========================================================================
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.dyuproject.protostuff.parser;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
/**
* Provide ascii text parsing and formatting support for proto2 instances.
* The implementation largely follows google/protobuf/text_format.cc.
*
* @author [email protected] Wenbo Zhu
* @author [email protected] Kenton Varda
* @author David Yu
*/
public final class TextFormat {
private TextFormat() {
}
static final Charset UTF8 = Charset.forName("UTF-8"), ISO_8859_1 = Charset.forName("ISO-8859-1");
// =================================================================
// Utility functions
//
// Some of these methods are package-private because Descriptors.java uses
// them.
/**
* Escapes bytes in the format used in protocol buffer text format, which
* is the same as the format used for C string literals. All bytes
* that are not printable 7-bit ASCII characters are escaped, as well as
* backslash, single-quote, and double-quote characters. Characters for
* which no defined short-hand escape sequence is defined will be escaped
* using 3-digit octal sequences.
*/
static StringBuilder escapeBytes(ByteBuffer input) {
//input.flip();
int length = input.limit();
final StringBuilder builder = new StringBuilder(length);
for (int i = 0; i < length; i++) {
final byte b = input.get(i);
switch (b) {
// Java does not recognize \a or \v, apparently.
case 0x07: builder.append("\\007" ); break;
case '\b': builder.append("\\b" ); break;
case '\f': builder.append("\\f" ); break;
case '\n': builder.append("\\n" ); break;
case '\r': builder.append("\\r" ); break;
case '\t': builder.append("\\t" ); break;
case 0x0b: builder.append("\\013" ); break;
case '\\': builder.append("\\\\"); break;
case '\'': builder.append("\\\'"); break;
case '"' : builder.append("\\\""); break;
default:
if (b >= 0x20) {
builder.append((char) b);
} else {
builder.append('\\');
builder.append((char) ('0' + ((b >>> 6) & 3)));
builder.append((char) ('0' + ((b >>> 3) & 7)));
builder.append((char) ('0' + (b & 7)));
}
break;
}
}
return builder;
}
/**
* Un-escape a byte sequence as escaped using
* {@link #escapeBytes(ByteString)}. Two-digit hex escapes (starting with
* "\x") are also recognized.
*/
static ByteBuffer unescapeBytes(final CharSequence input) {
int pos = 0, len = input.length();
final byte[] result = new byte[len];
ByteBuffer buffer = ByteBuffer.wrap(result);
for (int i = 0; i < len; i++) {
char c = input.charAt(i);
if (c == '\\') {
if (i + 1 < len) {
++i;
c = input.charAt(i);
if (isOctal(c)) {
// Octal escape.
int code = digitValue(c);
if (i + 1 < len && isOctal(input.charAt(i + 1))) {
++i;
code = code * 8 + digitValue(input.charAt(i));
}
if (i + 1 < len && isOctal(input.charAt(i + 1))) {
++i;
code = code * 8 + digitValue(input.charAt(i));
}
result[pos++] = (byte)code;
} else {
switch (c) {
case 'a' : result[pos++] = 0x07; break;
case 'b' : result[pos++] = '\b'; break;
case 'f' : result[pos++] = '\f'; break;
case 'n' : result[pos++] = '\n'; break;
case 'r' : result[pos++] = '\r'; break;
case 't' : result[pos++] = '\t'; break;
case 'v' : result[pos++] = 0x0b; break;
case '\\': result[pos++] = '\\'; break;
case '\'': result[pos++] = '\''; break;
case '"' : result[pos++] = '\"'; break;
case 'x':
// hex escape
int code = 0;
if (i + 1 < len && isHex(input.charAt(i + 1))) {
++i;
code = digitValue(input.charAt(i));
} else {
throw new InvalidEscapeSequenceException(
"Invalid escape sequence: '\\x' with no digits");
}
if (i + 1 < len && isHex(input.charAt(i + 1))) {
++i;
code = code * 16 + digitValue(input.charAt(i));
}
result[pos++] = (byte)code;
break;
default:
throw new InvalidEscapeSequenceException(
"Invalid escape sequence: '\\" + c + '\'');
}
}
} else {
throw new InvalidEscapeSequenceException(
"Invalid escape sequence: '\\' at end of string.");
}
} else {
result[pos++] = (byte)c;
}
}
buffer.limit(pos);
return buffer;
}
/**
* Thrown by {@link TextFormat#unescapeBytes} and
* {@link TextFormat#unescapeText} when an invalid escape sequence is seen.
*/
static class InvalidEscapeSequenceException extends RuntimeException {
private static final long serialVersionUID = -8164033650142593305L;
InvalidEscapeSequenceException(final String description) {
super(description);
}
}
/**
* Like {@link #escapeBytes(ByteString)}, but escapes a text string.
* Non-ASCII characters are first encoded as UTF-8, then each byte is escaped
* individually as a 3-digit octal escape. Yes, it's weird.
*/
static String escapeText(final String input) {
return escapeBytes(ByteBuffer.wrap(input.getBytes(ISO_8859_1))).toString();
}
/**
* Un-escape a text string as escaped using {@link #escapeText(String)}.
* Two-digit hex escapes (starting with "\x") are also recognized.
*/
static String unescapeText(String input) {
ByteBuffer buffer = unescapeBytes(input);
return new String(buffer.array(), buffer.position(), buffer.limit(), ISO_8859_1);
}
/** Is this an octal digit? */
private static boolean isOctal(final char c) {
return '0' <= c && c <= '7';
}
/** Is this a hex digit? */
private static boolean isHex(final char c) {
return ('0' <= c && c <= '9') ||
('a' <= c && c <= 'f') ||
('A' <= c && c <= 'F');
}
/**
* Interpret a character as a digit (in any base up to 36) and return the
* numeric value. This is like {@code Character.digit()} but we don't accept
* non-ASCII digits.
*/
private static int digitValue(final char c) {
if ('0' <= c && c <= '9') {
return c - '0';
} else if ('a' <= c && c <= 'z') {
return c - 'a' + 10;
} else {
return c - 'A' + 10;
}
}
/**
* Parse a 32-bit signed integer from the text. Unlike the Java standard
* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
* and "0" to signify hexidecimal and octal numbers, respectively.
*/
static int parseInt32(final String text) throws NumberFormatException {
return (int) parseInteger(text, true, false);
}
/**
* Parse a 32-bit unsigned integer from the text. Unlike the Java standard
* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
* and "0" to signify hexidecimal and octal numbers, respectively. The
* result is coerced to a (signed) {@code int} when returned since Java has
* no unsigned integer type.
*/
static int parseUInt32(final String text) throws NumberFormatException {
return (int) parseInteger(text, false, false);
}
/**
* Parse a 64-bit signed integer from the text. Unlike the Java standard
* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
* and "0" to signify hexidecimal and octal numbers, respectively.
*/
static long parseInt64(final String text) throws NumberFormatException {
return parseInteger(text, true, true);
}
/**
* Parse a 64-bit unsigned integer from the text. Unlike the Java standard
* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
* and "0" to signify hexidecimal and octal numbers, respectively. The
* result is coerced to a (signed) {@code long} when returned since Java has
* no unsigned long type.
*/
static long parseUInt64(final String text) throws NumberFormatException {
return parseInteger(text, false, true);
}
private static long parseInteger(final String text,
final boolean isSigned,
final boolean isLong)
throws NumberFormatException {
int pos = 0;
boolean negative = false;
if (text.startsWith("-", pos)) {
if (!isSigned) {
throw new NumberFormatException("Number must be positive: " + text);
}
++pos;
negative = true;
}
int radix = 10;
if (text.startsWith("0x", pos)) {
pos += 2;
radix = 16;
} else if (text.startsWith("0", pos)) {
radix = 8;
}
final String numberText = text.substring(pos);
long result = 0;
if (numberText.length() < 16) {
// Can safely assume no overflow.
result = Long.parseLong(numberText, radix);
if (negative) {
result = -result;
}
// Check bounds.
// No need to check for 64-bit numbers since they'd have to be 16 chars
// or longer to overflow.
if (!isLong) {
if (isSigned) {
if (result > Integer.MAX_VALUE || result < Integer.MIN_VALUE) {
throw new NumberFormatException(
"Number out of range for 32-bit signed integer: " + text);
}
} else {
if (result >= (1L << 32) || result < 0) {
throw new NumberFormatException(
"Number out of range for 32-bit unsigned integer: " + text);
}
}
}
} else {
BigInteger bigValue = new BigInteger(numberText, radix);
if (negative) {
bigValue = bigValue.negate();
}
// Check bounds.
if (!isLong) {
if (isSigned) {
if (bigValue.bitLength() > 31) {
throw new NumberFormatException(
"Number out of range for 32-bit signed integer: " + text);
}
} else {
if (bigValue.bitLength() > 32) {
throw new NumberFormatException(
"Number out of range for 32-bit unsigned integer: " + text);
}
}
} else {
if (isSigned) {
if (bigValue.bitLength() > 63) {
throw new NumberFormatException(
"Number out of range for 64-bit signed integer: " + text);
}
} else {
if (bigValue.bitLength() > 64) {
throw new NumberFormatException(
"Number out of range for 64-bit unsigned integer: " + text);
}
}
}
result = bigValue.longValue();
}
return result;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy