All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.googlecode.protobuf.format.JavaPropsFormat Maven / Gradle / Ivy

Go to download

Provide serialization and de-serialization of different formats based on Google’s protobuf Message. Enables overriding the default (byte array) output to text based formats such as XML, JSON and HTML.

The newest version!
package com.googlecode.protobuf.format;

import java.io.IOException;
import java.math.BigInteger;
import java.nio.CharBuffer;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.google.protobuf.ByteString;
import com.google.protobuf.Descriptors;
import com.google.protobuf.ExtensionRegistry;
import com.google.protobuf.Message;
import com.google.protobuf.UnknownFieldSet;
import static com.googlecode.protobuf.format.util.TextUtils.*;

/**
 * Provide ascii text parsing and formatting support for proto2 instances. The implementation
 * largely follows google/protobuf/text_format.cc.
 * 

* (c) 2009-10 Orbitz World Wide. All Rights Reserved. * * @author [email protected] Alex Antonov *

* Based on the original code by: * @author [email protected] Wenbo Zhu * @author [email protected] Kenton Varda */ public class JavaPropsFormat extends AbstractCharBasedFormatter { /** * Outputs a textual representation of the Protocol Message supplied into * the parameter output. (This representation is the new version of the * classic "ProtocolPrinter" output from the original Protocol Buffer system) */ public void print(final Message message, Appendable output) throws IOException { final JavaPropsGenerator generator = new JavaPropsGenerator(output); print(message, generator); } /** Outputs a textual representation of {@code fields} to {@code output}. */ public void print(final UnknownFieldSet fields, Appendable output) throws IOException { final JavaPropsGenerator generator = new JavaPropsGenerator(output); printUnknownFields(fields, generator); } private void print(final Message message, final JavaPropsGenerator generator) throws IOException { for (final Map.Entry field : message.getAllFields().entrySet()) { printField(field.getKey(), field.getValue(), generator); } printUnknownFields(message.getUnknownFields(), generator); } public void printField(final Descriptors.FieldDescriptor field, final Object value, final Appendable output) throws IOException { final JavaPropsGenerator generator = new JavaPropsGenerator(output); printField(field, value, generator); } public String printFieldToString(final Descriptors.FieldDescriptor field, final Object value) { try { final StringBuilder text = new StringBuilder(); printField(field, value, text); return text.toString(); } catch (IOException e) { throw new RuntimeException( "Writing to a StringBuilder threw an IOException (should never " + "happen).", e); } } private void printField(final Descriptors.FieldDescriptor field, final Object value, final JavaPropsGenerator generator) throws IOException { if (field.isRepeated()) { // Repeated field. Print each element. List list = (List) value; for (int i = 0; i < list.size(); i++) { printSingleField(field, list.get(i), i, generator); } } else { printSingleField(field, value, null, generator); } } private void printSingleField(final Descriptors.FieldDescriptor field, final Object value, final Integer collectionIndex, final JavaPropsGenerator generator) throws IOException { if (field.isExtension()) { generator.print("["); // We special-case MessageSet elements for compatibility with proto1. if (field.getContainingType().getOptions().getMessageSetWireFormat() && (field.getType() == Descriptors.FieldDescriptor.Type.MESSAGE) && (field.isOptional()) // object equality && (field.getExtensionScope() == field.getMessageType())) { generator.print(field.getMessageType().getFullName()); } else { generator.print(field.getFullName()); } generator.print("]"); } else { if (field.getType() != Descriptors.FieldDescriptor.Type.GROUP && field.getType() != Descriptors.FieldDescriptor.Type.MESSAGE) { // The field is a primitive value, no need to unwind the path. generator.print(createFieldNameCollectionIndex(field.getName(), collectionIndex)); } } if (field.getType() == Descriptors.FieldDescriptor.Type.GROUP) { // Groups must be serialized with their original capitalization. generator.indent(createFieldNameCollectionIndex(field.getMessageType().getName(), collectionIndex)); } else if (field.getType() == Descriptors.FieldDescriptor.Type.MESSAGE) { //generator.print(" {\n"); generator.indent(createFieldNameCollectionIndex(field.getName(), collectionIndex)); } else { generator.print("="); } printFieldValue(field, value, generator); if (field.getType() == Descriptors.FieldDescriptor.Type.MESSAGE) { generator.outdent(createFieldNameCollectionIndex(field.getName(), collectionIndex)); //generator.print(""); } else if (field.getType() == Descriptors.FieldDescriptor.Type.GROUP) { generator.outdent(createFieldNameCollectionIndex(field.getMessageType().getName(), collectionIndex)); //generator.print(""); } else { generator.print("\n"); } } private String createFieldNameCollectionIndex(final String fieldName, final Integer collectionIndex) throws IOException{ if (collectionIndex != null) { return fieldName + "[" + collectionIndex.toString() + "]"; } else { return fieldName; } } private void printFieldValue(final Descriptors.FieldDescriptor field, final Object value, final JavaPropsGenerator generator) throws IOException { switch (field.getType()) { case INT32: case INT64: case SINT32: case SINT64: case SFIXED32: case SFIXED64: case FLOAT: case DOUBLE: case BOOL: // Good old toString() does what we want for these types. generator.print(value.toString()); break; case UINT32: case FIXED32: generator.print(unsignedToString((Integer) value)); break; case UINT64: case FIXED64: generator.print(unsignedToString((Long) value)); break; case STRING: generator.print("\""); generator.print(escapeText((String) value)); generator.print("\""); break; case BYTES: generator.print("\""); generator.print(escapeBytes((ByteString) value)); generator.print("\""); break; case ENUM: generator.print(((Descriptors.EnumValueDescriptor) value).getName()); break; case MESSAGE: case GROUP: print((Message) value, generator); break; } } private void printUnknownFields(final UnknownFieldSet unknownFields, final JavaPropsGenerator generator) throws IOException { for (final Map.Entry entry : unknownFields.asMap().entrySet()) { final UnknownFieldSet.Field field = entry.getValue(); for (final long value : field.getVarintList()) { generator.print(entry.getKey().toString()); generator.print("="); generator.print(unsignedToString(value)); generator.print("\n"); } for (final int value : field.getFixed32List()) { generator.print(entry.getKey().toString()); generator.print("="); generator.print(String.format((Locale) null, "0x%08x", value)); generator.print("\n"); } for (final long value : field.getFixed64List()) { generator.print(entry.getKey().toString()); generator.print("="); generator.print(String.format((Locale) null, "0x%016x", value)); generator.print("\n"); } for (final ByteString value : field.getLengthDelimitedList()) { generator.print(entry.getKey().toString()); generator.print("=\""); generator.print(escapeBytes(value)); generator.print("\"\n"); } for (final UnknownFieldSet value : field.getGroupList()) { //generator.print(entry.getKey().toString()); //generator.print("={\n"); generator.indent(entry.getKey().toString()); printUnknownFields(value, generator); generator.outdent(entry.getKey().toString()); //generator.print("}\n"); generator.print("\n"); } } } /** * An inner class for writing text to the output stream. */ private static final class JavaPropsGenerator { private Appendable output; private boolean atStartOfLine = true; private final StringBuilder indent = new StringBuilder(); private JavaPropsGenerator(final Appendable output) { this.output = output; } /** * Indent text by two spaces. After calling Indent(), two spaces will be * inserted at the beginning of each line of text. Indent() may be called * multiple times to produce deeper indents. */ public void indent(String objectPath) { indent.append(objectPath); indent.append("."); //atStartOfLine = true; } /** * Reduces the current indent level by two spaces, or crashes if the indent * level is zero. */ public void outdent(String objectPath) { final int length = indent.length(); final int objectPathLength = objectPath.length() + 1; if (length == 0) { throw new IllegalArgumentException( " Outdent() without matching Indent()."); } indent.delete(length - objectPathLength, length); } /** * Print text to the output stream. */ public void print(final CharSequence text) throws IOException { final int size = text.length(); int pos = 0; for (int i = 0; i < size; i++) { if (text.charAt(i) == '\n') { write(text.subSequence(pos, size), i - pos + 1); pos = i + 1; atStartOfLine = true; } } write(text.subSequence(pos, size), size - pos); } private void write(final CharSequence data, final int size) throws IOException { if (size == 0) { return; } if (atStartOfLine) { atStartOfLine = false; output.append(indent); } output.append(data); } } // ================================================================= // Parsing /** * Represents a stream of tokens parsed from a {@code String}. * *

The Java standard library provides many classes that you might think * would be useful for implementing this, but aren't. For example: * *

    *
  • {@code java.io.StreamTokenizer}: This almost does what we want -- or, * at least, something that would get us close to what we want -- except * for one fatal flaw: It automatically un-escapes strings using Java * escape sequences, which do not include all the escape sequences we * need to support (e.g. '\x'). *
  • {@code java.util.Scanner}: This seems like a great way at least to * parse regular expressions out of a stream (so we wouldn't have to load * the entire input into a single string before parsing). Sadly, * {@code Scanner} requires that tokens be delimited with some delimiter. * Thus, although the text "foo:" should parse to two tokens ("foo" and * ":"), {@code Scanner} would recognize it only as a single token. * Furthermore, {@code Scanner} provides no way to inspect the contents * of delimiters, making it impossible to keep track of line and column * numbers. *
* *

Luckily, Java's regular expression support does manage to be useful to * us. (Barely: We need {@code Matcher.usePattern()}, which is new in * Java 1.5.) So, we can use that, at least. Unfortunately, this implies * that we need to have the entire input in one contiguous string. */ private static final class Tokenizer { private final CharSequence text; private final Matcher matcher; private String currentToken; // The character index within this.text at which the current token begins. private int pos = 0; // The line and column numbers of the current token. private int line = 0; private int column = 0; // The line and column numbers of the previous token (allows throwing // errors *after* consuming). private int previousLine = 0; private int previousColumn = 0; // We use possesive quantifiers (*+ and ++) because otherwise the Java // regex matcher has stack overflows on large inputs. private static final Pattern WHITESPACE = Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE); private static final Pattern TOKEN = Pattern.compile( "[a-zA-Z_][0-9a-zA-Z_+-]*+|" + // an identifier "[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" + // a number "\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" + // a double-quoted string "\'([^\'\n\\\\]|\\\\.)*+(\'|\\\\?$)", // a single-quoted string Pattern.MULTILINE); private static final Pattern DOUBLE_INFINITY = Pattern.compile( "-?inf(inity)?", Pattern.CASE_INSENSITIVE); private static final Pattern FLOAT_INFINITY = Pattern.compile( "-?inf(inity)?f?", Pattern.CASE_INSENSITIVE); private static final Pattern FLOAT_NAN = Pattern.compile( "nanf?", Pattern.CASE_INSENSITIVE); /** Construct a tokenizer that parses tokens from the given text. */ private Tokenizer(final CharSequence text) { this.text = text; this.matcher = WHITESPACE.matcher(text); skipWhitespace(); nextToken(); } /** Are we at the end of the input? */ public boolean atEnd() { return currentToken.length() == 0; } /** Advance to the next token. */ public void nextToken() { previousLine = line; previousColumn = column; // Advance the line counter to the current position. while (pos < matcher.regionStart()) { if (text.charAt(pos) == '\n') { ++line; column = 0; } else { ++column; } ++pos; } // Match the next token. if (matcher.regionStart() == matcher.regionEnd()) { // EOF currentToken = ""; } else { matcher.usePattern(TOKEN); if (matcher.lookingAt()) { currentToken = matcher.group(); matcher.region(matcher.end(), matcher.regionEnd()); } else { // Take one character. currentToken = String.valueOf(text.charAt(pos)); matcher.region(pos + 1, matcher.regionEnd()); } skipWhitespace(); } } /** * Skip over any whitespace so that the matcher region starts at the next * token. */ private void skipWhitespace() { matcher.usePattern(WHITESPACE); if (matcher.lookingAt()) { matcher.region(matcher.end(), matcher.regionEnd()); } } /** * If the next token exactly matches {@code token}, consume it and return * {@code true}. Otherwise, return {@code false} without doing anything. */ public boolean tryConsume(final String token) { if (currentToken.equals(token)) { nextToken(); return true; } else { return false; } } /** * If the next token exactly matches {@code token}, consume it. Otherwise, * throw a {@link ParseException}. */ public void consume(final String token) throws ParseException { if (!tryConsume(token)) { throw parseException("Expected \"" + token + "\"."); } } /** * Returns {@code true} if the next token is an integer, but does * not consume it. */ public boolean lookingAtInteger() { if (currentToken.length() == 0) { return false; } final char c = currentToken.charAt(0); return ('0' <= c && c <= '9') || c == '-' || c == '+'; } /** * If the next token is an identifier, consume it and return its value. * Otherwise, throw a {@link ParseException}. */ public String consumeIdentifier() throws ParseException { for (int i = 0; i < currentToken.length(); i++) { final char c = currentToken.charAt(i); if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') || (c == '_') //|| (c == '.') ) { // OK } else { throw parseException("Expected identifier."); } } final String result = currentToken; nextToken(); return result; } /** * If the next token is a 32-bit signed integer, consume it and return its * value. Otherwise, throw a {@link ParseException}. */ public int consumeInt32() throws ParseException { try { final int result = parseInt32(currentToken); nextToken(); return result; } catch (NumberFormatException e) { throw integerParseException(e); } } /** * If the next token is a 32-bit unsigned integer, consume it and return its * value. Otherwise, throw a {@link ParseException}. */ public int consumeUInt32() throws ParseException { try { final int result = parseUInt32(currentToken); nextToken(); return result; } catch (NumberFormatException e) { throw integerParseException(e); } } /** * If the next token is a 64-bit signed integer, consume it and return its * value. Otherwise, throw a {@link ParseException}. */ public long consumeInt64() throws ParseException { try { final long result = parseInt64(currentToken); nextToken(); return result; } catch (NumberFormatException e) { throw integerParseException(e); } } /** * If the next token is a 64-bit unsigned integer, consume it and return its * value. Otherwise, throw a {@link ParseException}. */ public long consumeUInt64() throws ParseException { try { final long result = parseUInt64(currentToken); nextToken(); return result; } catch (NumberFormatException e) { throw integerParseException(e); } } /** * If the next token is a double, consume it and return its value. * Otherwise, throw a {@link ParseException}. */ public double consumeDouble() throws ParseException { // We need to parse infinity and nan separately because // Double.parseDouble() does not accept "inf", "infinity", or "nan". if (DOUBLE_INFINITY.matcher(currentToken).matches()) { final boolean negative = currentToken.startsWith("-"); nextToken(); return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY; } if (currentToken.equalsIgnoreCase("nan")) { nextToken(); return Double.NaN; } try { final double result = Double.parseDouble(currentToken); nextToken(); return result; } catch (NumberFormatException e) { throw floatParseException(e); } } /** * If the next token is a float, consume it and return its value. * Otherwise, throw a {@link ParseException}. */ public float consumeFloat() throws ParseException { // We need to parse infinity and nan separately because // Float.parseFloat() does not accept "inf", "infinity", or "nan". if (FLOAT_INFINITY.matcher(currentToken).matches()) { final boolean negative = currentToken.startsWith("-"); nextToken(); return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY; } if (FLOAT_NAN.matcher(currentToken).matches()) { nextToken(); return Float.NaN; } try { final float result = Float.parseFloat(currentToken); nextToken(); return result; } catch (NumberFormatException e) { throw floatParseException(e); } } /** * If the next token is a boolean, consume it and return its value. * Otherwise, throw a {@link ParseException}. */ public boolean consumeBoolean() throws ParseException { if (currentToken.equals("true")) { nextToken(); return true; } else if (currentToken.equals("false")) { nextToken(); return false; } else { throw parseException("Expected \"true\" or \"false\"."); } } /** * If the next token is a string, consume it and return its (unescaped) * value. Otherwise, throw a {@link ParseException}. */ public String consumeString() throws ParseException { return consumeByteString().toStringUtf8(); } /** * If the next token is a string, consume it, unescape it as a * {@link ByteString}, and return it. Otherwise, throw a * {@link ParseException}. */ public ByteString consumeByteString() throws ParseException { List list = new ArrayList(); consumeByteString(list); while (currentToken.startsWith("'") || currentToken.startsWith("\"")) { consumeByteString(list); } return ByteString.copyFrom(list); } /** * Like {@link #consumeByteString()} but adds each token of the string to * the given list. String literals (whether bytes or text) may come in * multiple adjacent tokens which are automatically concatenated, like in * C or Python. */ private void consumeByteString(List list) throws ParseException { final char quote = currentToken.length() > 0 ? currentToken.charAt(0) : '\0'; if (quote != '\"' && quote != '\'') { throw parseException("Expected string."); } if (currentToken.length() < 2 || currentToken.charAt(currentToken.length() - 1) != quote) { throw parseException("String missing ending quote."); } try { final String escaped = currentToken.substring(1, currentToken.length() - 1); final ByteString result = unescapeBytes(escaped); nextToken(); list.add(result); } catch (InvalidEscapeSequenceException e) { throw parseException(e.getMessage()); } } /** * Returns a {@link ParseException} with the current line and column * numbers in the description, suitable for throwing. */ public ParseException parseException(final String description) { // Note: People generally prefer one-based line and column numbers. return new ParseException( (line + 1) + ":" + (column + 1) + ": " + description); } /** * Returns a {@link ParseException} with the line and column numbers of * the previous token in the description, suitable for throwing. */ public ParseException parseExceptionPreviousToken( final String description) { // Note: People generally prefer one-based line and column numbers. return new ParseException( (previousLine + 1) + ":" + (previousColumn + 1) + ": " + description); } /** * Constructs an appropriate {@link ParseException} for the given * {@code NumberFormatException} when trying to parse an integer. */ private ParseException integerParseException( final NumberFormatException e) { return parseException("Couldn't parse integer: " + e.getMessage()); } /** * Constructs an appropriate {@link ParseException} for the given * {@code NumberFormatException} when trying to parse a float or double. */ private ParseException floatParseException(final NumberFormatException e) { return parseException("Couldn't parse number: " + e.getMessage()); } } /** Thrown when parsing an invalid text format message. */ public static class ParseException extends IOException { private static final long serialVersionUID = 3196188060225107702L; public ParseException(final String message) { super(message); } } /** * Parse a text-format message from {@code input} and merge the contents * into {@code builder}. Extensions will be recognized if they are * registered in {@code extensionRegistry}. */ public void merge(final CharSequence input, final ExtensionRegistry extensionRegistry, final Message.Builder builder) throws ParseException { final Tokenizer tokenizer = new Tokenizer(input); final Map subMessages = new HashMap(); while (!tokenizer.atEnd()) { mergeField(tokenizer, extensionRegistry, subMessages, builder); } } /** * Parse a single field from {@code tokenizer} and merge it into * {@code builder}. */ private void mergeField(final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final Map subMessages, final Message.Builder builder) throws ParseException { Descriptors.FieldDescriptor field; final Descriptors.Descriptor type = builder.getDescriptorForType(); ExtensionRegistry.ExtensionInfo extension = null; if (tokenizer.tryConsume("[")) { // An extension. final StringBuilder name = new StringBuilder(tokenizer.consumeIdentifier()); while (tokenizer.tryConsume(".")) { name.append('.'); name.append(tokenizer.consumeIdentifier()); } extension = extensionRegistry.findExtensionByName(name.toString()); if (extension == null) { throw tokenizer.parseExceptionPreviousToken( "Extension \"" + name + "\" not found in the ExtensionRegistry."); } else if (extension.descriptor.getContainingType() != type) { throw tokenizer.parseExceptionPreviousToken( "Extension \"" + name + "\" does not extend message type \"" + type.getFullName() + "\"."); } tokenizer.consume("]"); field = extension.descriptor; } else { final String name = tokenizer.consumeIdentifier(); field = type.findFieldByName(name); // Group names are expected to be capitalized as they appear in the // .proto file, which actually matches their type names, not their field // names. if (field == null) { // Explicitly specify US locale so that this code does not break when // executing in Turkey. final String lowerName = name.toLowerCase(Locale.US); field = type.findFieldByName(lowerName); // If the case-insensitive match worked but the field is NOT a group, if (field != null && field.getType() != Descriptors.FieldDescriptor.Type.GROUP) { field = null; } } // Again, special-case group names as described above. if (field != null && field.getType() == Descriptors.FieldDescriptor.Type.GROUP && !field.getMessageType().getName().equals(name)) { field = null; } if (field == null) { throw tokenizer.parseExceptionPreviousToken( "Message type \"" + type.getFullName() + "\" has no field named \"" + name + "\"."); } } Object value = null; Integer collectionIndex = null; if (field.isRepeated()) { tokenizer.consume("["); collectionIndex = tokenizer.consumeInt32(); tokenizer.consume("]"); } if (field.getJavaType() == Descriptors.FieldDescriptor.JavaType.MESSAGE) { tokenizer.consume("."); //endToken = "}"; final Message.Builder subBuilder; if (extension == null) { subBuilder = builder.newBuilderForField(field); } else { subBuilder = extension.defaultInstance.newBuilderForType(); } final Message subMessage = subMessages.get(field.getFullName()); if (subMessage != null) { subBuilder.mergeFrom(subMessage); } mergeField(tokenizer, extensionRegistry, subMessages, subBuilder); value = subBuilder.buildPartial(); subMessages.put(field.getFullName(), (Message) value); } else { tokenizer.consume("="); switch (field.getType()) { case INT32: case SINT32: case SFIXED32: value = tokenizer.consumeInt32(); break; case INT64: case SINT64: case SFIXED64: value = tokenizer.consumeInt64(); break; case UINT32: case FIXED32: value = tokenizer.consumeUInt32(); break; case UINT64: case FIXED64: value = tokenizer.consumeUInt64(); break; case FLOAT: value = tokenizer.consumeFloat(); break; case DOUBLE: value = tokenizer.consumeDouble(); break; case BOOL: value = tokenizer.consumeBoolean(); break; case STRING: value = tokenizer.consumeString(); break; case BYTES: value = tokenizer.consumeByteString(); break; case ENUM: final Descriptors.EnumDescriptor enumType = field.getEnumType(); if (tokenizer.lookingAtInteger()) { final int number = tokenizer.consumeInt32(); value = enumType.findValueByNumber(number); if (value == null) { throw tokenizer.parseExceptionPreviousToken( "Enum type \"" + enumType.getFullName() + "\" has no value with number " + number + '.'); } } else { final String id = tokenizer.consumeIdentifier(); value = enumType.findValueByName(id); if (value == null) { throw tokenizer.parseExceptionPreviousToken( "Enum type \"" + enumType.getFullName() + "\" has no value named \"" + id + "\"."); } } break; case MESSAGE: case GROUP: throw new RuntimeException("Can't get here."); } } if (field.isRepeated()) { int collectionCount = builder.getRepeatedFieldCount(field) - 1; if (collectionCount < collectionIndex) { // Need to initialize the list. Apparently setRepeatedField does not initialize it :( builder.addRepeatedField(field, value); } else { builder.setRepeatedField(field, collectionIndex, value); } } else { builder.setField(field, value); } } // ================================================================= // Utility functions // // Some of these methods are package-private because Descriptors.java uses // them. /** * Escapes bytes in the format used in protocol buffer text format, which * is the same as the format used for C string literals. All bytes * that are not printable 7-bit ASCII characters are escaped, as well as * backslash, single-quote, and double-quote characters. Characters for * which no defined short-hand escape sequence is defined will be escaped * using 3-digit octal sequences. */ static String escapeBytes(final ByteString input) { final StringBuilder builder = new StringBuilder(input.size()); for (int i = 0; i < input.size(); i++) { final byte b = input.byteAt(i); switch (b) { // Java does not recognize \a or \v, apparently. case 0x07: builder.append("\\a" ); break; case '\b': builder.append("\\b" ); break; case '\f': builder.append("\\f" ); break; case '\n': builder.append("\\n" ); break; case '\r': builder.append("\\r" ); break; case '\t': builder.append("\\t" ); break; case 0x0b: builder.append("\\v" ); break; case '\\': builder.append("\\\\"); break; case '\'': builder.append("\\\'"); break; case '"' : builder.append("\\\""); break; default: if (b >= 0x20) { builder.append((char) b); } else { builder.append('\\'); builder.append((char) ('0' + ((b >>> 6) & 3))); builder.append((char) ('0' + ((b >>> 3) & 7))); builder.append((char) ('0' + (b & 7))); } break; } } return builder.toString(); } /** * Un-escape a byte sequence as escaped using * {@link #escapeBytes(ByteString)}. Two-digit hex escapes (starting with * "\x") are also recognized. */ static ByteString unescapeBytes(final CharSequence input) throws InvalidEscapeSequenceException { final byte[] result = new byte[input.length()]; int pos = 0; for (int i = 0; i < input.length(); i++) { char c = input.charAt(i); if (c == '\\') { if (i + 1 < input.length()) { ++i; c = input.charAt(i); if (isOctal(c)) { // Octal escape. int code = digitValue(c); if (i + 1 < input.length() && isOctal(input.charAt(i + 1))) { ++i; code = code * 8 + digitValue(input.charAt(i)); } if (i + 1 < input.length() && isOctal(input.charAt(i + 1))) { ++i; code = code * 8 + digitValue(input.charAt(i)); } result[pos++] = (byte)code; } else { switch (c) { case 'a' : result[pos++] = 0x07; break; case 'b' : result[pos++] = '\b'; break; case 'f' : result[pos++] = '\f'; break; case 'n' : result[pos++] = '\n'; break; case 'r' : result[pos++] = '\r'; break; case 't' : result[pos++] = '\t'; break; case 'v' : result[pos++] = 0x0b; break; case '\\': result[pos++] = '\\'; break; case '\'': result[pos++] = '\''; break; case '"' : result[pos++] = '\"'; break; case 'x': // hex escape int code = 0; if (i + 1 < input.length() && isHex(input.charAt(i + 1))) { ++i; code = digitValue(input.charAt(i)); } else { throw new InvalidEscapeSequenceException( "Invalid escape sequence: '\\x' with no digits"); } if (i + 1 < input.length() && isHex(input.charAt(i + 1))) { ++i; code = code * 16 + digitValue(input.charAt(i)); } result[pos++] = (byte)code; break; default: throw new InvalidEscapeSequenceException( "Invalid escape sequence: '\\" + c + '\''); } } } else { throw new InvalidEscapeSequenceException( "Invalid escape sequence: '\\' at end of string."); } } else { result[pos++] = (byte)c; } } return ByteString.copyFrom(result, 0, pos); } /** * Thrown by {@link JavaPropsFormat#unescapeBytes(CharSequence)} and * {@link JavaPropsFormat#unescapeText(String)} when an invalid escape sequence is seen. */ static class InvalidEscapeSequenceException extends IOException { private static final long serialVersionUID = -8164033650142593304L; InvalidEscapeSequenceException(final String description) { super(description); } } /** * Like {@link #escapeBytes(ByteString)}, but escapes a text string. * Non-ASCII characters are first encoded as UTF-8, then each byte is escaped * individually as a 3-digit octal escape. Yes, it's weird. */ static String escapeText(final String input) { return escapeBytes(ByteString.copyFromUtf8(input)); } /** * Un-escape a text string as escaped using {@link #escapeText(String)}. * Two-digit hex escapes (starting with "\x") are also recognized. */ static String unescapeText(final String input) throws InvalidEscapeSequenceException { return unescapeBytes(input).toStringUtf8(); } /** * Parse a 32-bit signed integer from the text. Unlike the Java standard * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" * and "0" to signify hexidecimal and octal numbers, respectively. */ static int parseInt32(final String text) throws NumberFormatException { return (int) parseInteger(text, true, false); } /** * Parse a 32-bit unsigned integer from the text. Unlike the Java standard * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" * and "0" to signify hexidecimal and octal numbers, respectively. The * result is coerced to a (signed) {@code int} when returned since Java has * no unsigned integer type. */ static int parseUInt32(final String text) throws NumberFormatException { return (int) parseInteger(text, false, false); } /** * Parse a 64-bit signed integer from the text. Unlike the Java standard * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" * and "0" to signify hexidecimal and octal numbers, respectively. */ static long parseInt64(final String text) throws NumberFormatException { return parseInteger(text, true, true); } /** * Parse a 64-bit unsigned integer from the text. Unlike the Java standard * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" * and "0" to signify hexidecimal and octal numbers, respectively. The * result is coerced to a (signed) {@code long} when returned since Java has * no unsigned long type. */ static long parseUInt64(final String text) throws NumberFormatException { return parseInteger(text, false, true); } private static long parseInteger(final String text, final boolean isSigned, final boolean isLong) throws NumberFormatException { int pos = 0; boolean negative = false; if (text.startsWith("-", pos)) { if (!isSigned) { throw new NumberFormatException("Number must be positive: " + text); } ++pos; negative = true; } int radix = 10; if (text.startsWith("0x", pos)) { pos += 2; radix = 16; } else if (text.startsWith("0", pos)) { radix = 8; } final String numberText = text.substring(pos); long result = 0; if (numberText.length() < 16) { // Can safely assume no overflow. result = Long.parseLong(numberText, radix); if (negative) { result = -result; } // Check bounds. // No need to check for 64-bit numbers since they'd have to be 16 chars // or longer to overflow. if (!isLong) { if (isSigned) { if (result > Integer.MAX_VALUE || result < Integer.MIN_VALUE) { throw new NumberFormatException( "Number out of range for 32-bit signed integer: " + text); } } else { if (result >= (1L << 32) || result < 0) { throw new NumberFormatException( "Number out of range for 32-bit unsigned integer: " + text); } } } } else { BigInteger bigValue = new BigInteger(numberText, radix); if (negative) { bigValue = bigValue.negate(); } // Check bounds. if (!isLong) { if (isSigned) { if (bigValue.bitLength() > 31) { throw new NumberFormatException( "Number out of range for 32-bit signed integer: " + text); } } else { if (bigValue.bitLength() > 32) { throw new NumberFormatException( "Number out of range for 32-bit unsigned integer: " + text); } } } else { if (isSigned) { if (bigValue.bitLength() > 63) { throw new NumberFormatException( "Number out of range for 64-bit signed integer: " + text); } } else { if (bigValue.bitLength() > 64) { throw new NumberFormatException( "Number out of range for 64-bit unsigned integer: " + text); } } } result = bigValue.longValue(); } return result; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy