All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.googlecode.protobuf.format.XmlFormat Maven / Gradle / Ivy

Go to download

Provide serialization and de-serialization of different formats based on Google’s protobuf Message. Enables overriding the default (byte array) output to text based formats such as XML, JSON and HTML.

The newest version!
package com.googlecode.protobuf.format;
/* 
    Copyright (c) 2009, Orbitz World Wide
    All rights reserved.

    Redistribution and use in source and binary forms, with or without modification, 
    are permitted provided that the following conditions are met:

        * Redistributions of source code must retain the above copyright notice, 
          this list of conditions and the following disclaimer.
        * Redistributions in binary form must reproduce the above copyright notice, 
          this list of conditions and the following disclaimer in the documentation 
          and/or other materials provided with the distribution.
        * Neither the name of the Orbitz World Wide nor the names of its contributors 
          may be used to endorse or promote products derived from this software 
          without specific prior written permission.

    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/


import java.io.IOException;
import java.math.BigInteger;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.nio.CharBuffer;

import com.google.protobuf.ByteString;
import com.google.protobuf.Descriptors;
import com.google.protobuf.ExtensionRegistry;
import com.google.protobuf.Message;
import com.google.protobuf.UnknownFieldSet;
import com.google.protobuf.Descriptors.EnumValueDescriptor;
import com.google.protobuf.Descriptors.FieldDescriptor;
import static com.googlecode.protobuf.format.util.TextUtils.*;

/**
 * Provide ascii text parsing and formatting support for proto2 instances. The implementation
 * largely follows google/protobuf/text_format.cc.
 * 

* (c) 2009-10 Orbitz World Wide. All Rights Reserved. * * @author [email protected] Eliran Bivas * @author [email protected] Alex Antonov *

* Based on the original code by: * @author [email protected] Wenbo Zhu * @author [email protected] Kenton Varda */ public final class XmlFormat extends AbstractCharBasedFormatter { /** * Outputs a textual representation of the Protocol Message supplied into the parameter output. * (This representation is the new version of the classic "ProtocolPrinter" output from the * original Protocol Buffer system) */ public void print(final Message message, Appendable output) throws IOException { XmlGenerator generator = new XmlGenerator(output); final String messageName = message.getDescriptorForType().getName(); generator.print("<"); generator.print(messageName); generator.print(">"); print(message, generator); generator.print(""); } /** * Outputs a textual representation of {@code fields} to {@code output}. */ public void print(final UnknownFieldSet fields, Appendable output) throws IOException { XmlGenerator generator = new XmlGenerator(output); generator.print(""); printUnknownFields(fields, generator); generator.print(""); } private void print(Message message, XmlGenerator generator) throws IOException { for (Map.Entry field : message.getAllFields().entrySet()) { printField(field.getKey(), field.getValue(), generator); } printUnknownFields(message.getUnknownFields(), generator); } public void printField(FieldDescriptor field, Object value, XmlGenerator generator) throws IOException { if (field.isRepeated()) { // Repeated field. Print each element. for (Object element : (List) value) { printSingleField(field, element, generator); } } else { printSingleField(field, value, generator); } } private void printSingleField(FieldDescriptor field, Object value, XmlGenerator generator) throws IOException { if (field.isExtension()) { generator.print(""); } else { generator.print("<"); if (field.getType() == FieldDescriptor.Type.GROUP) { // Groups must be serialized with their original capitalization. generator.print(field.getMessageType().getName()); } else { generator.print(field.getName()); } generator.print(">"); } printFieldValue(field, value, generator); if (!field.isExtension()) { generator.print(""); } else { generator.print(""); } } private void printFieldValue(FieldDescriptor field, Object value, XmlGenerator generator) throws IOException { switch (field.getType()) { case INT32: case INT64: case SINT32: case SINT64: case SFIXED32: case SFIXED64: case FLOAT: case DOUBLE: case BOOL: // Good old toString() does what we want for these types. generator.print(value.toString()); break; case UINT32: case FIXED32: generator.print(unsignedToString((Integer) value)); break; case UINT64: case FIXED64: generator.print(unsignedToString((Long) value)); break; case STRING: generator.print(escapeText((String) value)); break; case BYTES: { generator.print(escapeBytes((ByteString) value)); break; } case ENUM: { generator.print(((EnumValueDescriptor) value).getName()); break; } case MESSAGE: case GROUP: print((Message) value, generator); break; } } private void printUnknownFields(UnknownFieldSet unknownFields, XmlGenerator generator) throws IOException { for (Map.Entry entry : unknownFields.asMap().entrySet()) { UnknownFieldSet.Field field = entry.getValue(); final String key = entry.getKey().toString(); for (long value : field.getVarintList()) { printUnknownField(key, unsignedToString(value), generator); } for (int value : field.getFixed32List()) { printUnknownField(key, String.format((Locale) null, "0x%08x", value), generator); } for (long value : field.getFixed64List()) { printUnknownField(key, String.format((Locale) null, "0x%016x", value), generator); } for (ByteString value : field.getLengthDelimitedList()) { printUnknownField(key, escapeBytes(value), generator); } for (UnknownFieldSet value : field.getGroupList()) { generator.print(""); printUnknownFields(value, generator); generator.print(""); } } } private void printUnknownField(CharSequence fieldKey, CharSequence fieldValue, XmlGenerator generator) throws IOException { generator.print(""); generator.print(fieldValue); generator.print(""); } /** * An inner class for writing text to the output stream. */ private static final class XmlGenerator { Appendable output; public XmlGenerator(Appendable output) { this.output = output; } /** * Print text to the output stream. */ public void print(CharSequence text) throws IOException { int size = text.length(); int pos = 0; write(text.subSequence(pos, size), size - pos); } private void write(CharSequence data, int size) throws IOException { if (size == 0) { return; } output.append(data); } } // ================================================================= // Parsing /** * Represents a stream of tokens parsed from a {@code String}. *

*

* The Java standard library provides many classes that you might think would be useful for * implementing this, but aren't. For example: *

*

    *
  • {@code java.io.StreamTokenizer}: This almost does what we want -- or, at least, something * that would get us close to what we want -- except for one fatal flaw: It automatically * un-escapes strings using Java escape sequences, which do not include all the escape sequences * we need to support (e.g. '\x'). *
  • {@code java.util.Scanner}: This seems like a great way at least to parse regular * expressions out of a stream (so we wouldn't have to load the entire input into a single * string before parsing). Sadly, {@code Scanner} requires that tokens be delimited with some * delimiter. Thus, although the text "foo:" should parse to two tokens ("foo" and ":"), {@code * Scanner} would recognize it only as a single token. Furthermore, {@code Scanner} provides no * way to inspect the contents of delimiters, making it impossible to keep track of line and * column numbers. *
*

*

* Luckily, Java's regular expression support does manage to be useful to us. (Barely: We need * {@code Matcher.usePattern()}, which is new in Java 1.5.) So, we can use that, at least. * Unfortunately, this implies that we need to have the entire input in one contiguous string. */ private static final class Tokenizer { private final CharSequence text; private final Matcher matcher; private String currentToken; // The character index within this.text at which the current token begins. private int pos = 0; // The line and column numbers of the current token. private int line = 0; private int column = 0; // The line and column numbers of the previous token (allows throwing // errors *after* consuming). private int previousLine = 0; private int previousColumn = 0; // We use possesive quantifiers (*+ and ++) because otherwise the Java // regex matcher has stack overflows on large inputs. private static final Pattern WHITESPACE = Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE); private static final Pattern TOKEN = Pattern.compile( "extension|" + "[a-zA-Z_\\s;@][0-9a-zA-Z_\\s;@+-]*+|" + // an identifier with special handling for 'extension' "[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" + // a number " 0 ? currentToken.charAt(0) : '\0'; if ((quote != '\"') && (quote != '\'')) { throw parseException("Expected string."); } if ((currentToken.length() < 2) || (currentToken.charAt(currentToken.length() - 1) != quote)) { throw parseException("String missing ending quote."); }*/ try { String escaped = currentToken; //.substring(1, currentToken.length() - 1); ByteString result = unescapeBytes(escaped); nextToken(); return result; } catch (InvalidEscapeSequence e) { throw parseException(e.getMessage()); } } /** * Returns a {@link ParseException} with the current line and column numbers in the * description, suitable for throwing. */ public ParseException parseException(String description) { // Note: People generally prefer one-based line and column numbers. return new ParseException((line + 1) + ":" + (column + 1) + ": " + description); } /** * Returns a {@link ParseException} with the line and column numbers of the previous token * in the description, suitable for throwing. */ public ParseException parseExceptionPreviousToken(String description) { // Note: People generally prefer one-based line and column numbers. return new ParseException((previousLine + 1) + ":" + (previousColumn + 1) + ": " + description); } /** * Constructs an appropriate {@link ParseException} for the given {@code * NumberFormatException} when trying to parse an integer. */ private ParseException integerParseException(NumberFormatException e) { return parseException("Couldn't parse integer: " + e.getMessage()); } /** * Constructs an appropriate {@link ParseException} for the given {@code * NumberFormatException} when trying to parse a float or double. */ private ParseException floatParseException(NumberFormatException e) { return parseException("Couldn't parse number: " + e.getMessage()); } } /** * Parse a text-format message from {@code input} and merge the contents into {@code builder}. * Extensions will be recognized if they are registered in {@code extensionRegistry}. */ public void merge(CharSequence input, ExtensionRegistry extensionRegistry, Message.Builder builder) throws ParseException { Tokenizer tokenizer = new Tokenizer(input); // Need to first consume the outer object name element consumeOpeningElement(tokenizer); while (!tokenizer.tryConsume(""); return openingElement; } private void consumeClosingElement(Tokenizer tokenizer) throws ParseException { tokenizer.tryConsume(""); } private String consumeExtensionIdentifier(Tokenizer tokenizer) throws ParseException { tokenizer.consume("type"); tokenizer.consume("="); return tokenizer.consumeIdentifier(); } /** * Parse a single field from {@code tokenizer} and merge it into {@code builder}. If a ',' is * detected after the field ends, the next field will be parsed automatically */ private void mergeField(Tokenizer tokenizer, ExtensionRegistry extensionRegistry, Message.Builder builder) throws ParseException { FieldDescriptor field; Descriptors.Descriptor type = builder.getDescriptorForType(); ExtensionRegistry.ExtensionInfo extension = null; tokenizer.consume("<"); // Needs to happen when the object starts. if (tokenizer.tryConsume("extension")) { // An extension. StringBuilder name = new StringBuilder(consumeExtensionIdentifier(tokenizer)); while (tokenizer.tryConsume(".")) { name.append("."); name.append(tokenizer.consumeIdentifier()); } extension = extensionRegistry.findExtensionByName(name.toString()); if (extension == null) { throw tokenizer.parseExceptionPreviousToken("Extension \"" + name + "\" not found in the ExtensionRegistry."); } else if (extension.descriptor.getContainingType() != type) { throw tokenizer.parseExceptionPreviousToken("Extension \"" + name + "\" does not extend message type \"" + type.getFullName() + "\"."); } field = extension.descriptor; } else { String name = tokenizer.consumeIdentifier(); field = type.findFieldByName(name); // Group names are expected to be capitalized as they appear in the // .proto file, which actually matches their type names, not their field // names. if (field == null) { // Explicitly specify US locale so that this code does not break when // executing in Turkey. String lowerName = name.toLowerCase(Locale.US); field = type.findFieldByName(lowerName); // If the case-insensitive match worked but the field is NOT a group, if ((field != null) && (field.getType() != FieldDescriptor.Type.GROUP)) { field = null; } } // Again, special-case group names as described above. if ((field != null) && (field.getType() == FieldDescriptor.Type.GROUP) && !field.getMessageType().getName().equals(name)) { field = null; } if (field == null) { throw tokenizer.parseExceptionPreviousToken("Message type \"" + type.getFullName() + "\" has no field named \"" + name + "\"."); } } tokenizer.consume(">"); Object value = handleValue(tokenizer, extensionRegistry, builder, field, extension); if (field.isRepeated()) { builder.addRepeatedField(field, value); } else { builder.setField(field, value); } // Need to consume the closing field element - consumeClosingElement(tokenizer); } private Object handleValue(Tokenizer tokenizer, ExtensionRegistry extensionRegistry, Message.Builder builder, FieldDescriptor field, ExtensionRegistry.ExtensionInfo extension) throws ParseException { Object value = null; if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { value = handleObject(tokenizer, extensionRegistry, builder, field, extension); } else { value = handlePrimitive(tokenizer, field); } return value; } private Object handlePrimitive(Tokenizer tokenizer, FieldDescriptor field) throws ParseException { Object value = null; switch (field.getType()) { case INT32: case SINT32: case SFIXED32: value = tokenizer.consumeInt32(); break; case INT64: case SINT64: case SFIXED64: value = tokenizer.consumeInt64(); break; case UINT32: case FIXED32: value = tokenizer.consumeUInt32(); break; case UINT64: case FIXED64: value = tokenizer.consumeUInt64(); break; case FLOAT: value = tokenizer.consumeFloat(); break; case DOUBLE: value = tokenizer.consumeDouble(); break; case BOOL: value = tokenizer.consumeBoolean(); break; case STRING: value = tokenizer.consumeString(); break; case BYTES: value = tokenizer.consumeByteString(); break; case ENUM: { Descriptors.EnumDescriptor enumType = field.getEnumType(); if (tokenizer.lookingAtInteger()) { int number = tokenizer.consumeInt32(); value = enumType.findValueByNumber(number); if (value == null) { throw tokenizer.parseExceptionPreviousToken("Enum type \"" + enumType.getFullName() + "\" has no value with number " + number + "."); } } else { String id = tokenizer.consumeIdentifier(); value = enumType.findValueByName(id); if (value == null) { throw tokenizer.parseExceptionPreviousToken("Enum type \"" + enumType.getFullName() + "\" has no value named \"" + id + "\"."); } } break; } case MESSAGE: case GROUP: throw new RuntimeException("Can't get here."); } return value; } private Object handleObject(Tokenizer tokenizer, ExtensionRegistry extensionRegistry, Message.Builder builder, FieldDescriptor field, ExtensionRegistry.ExtensionInfo extension) throws ParseException { Object value; Message.Builder subBuilder; if (extension == null) { subBuilder = builder.newBuilderForField(field); } else { subBuilder = extension.defaultInstance.newBuilderForType(); } //tokenizer.consume("<"); String endToken = "= 0x20) { builder.append((char) b); } else { builder.append('\\'); builder.append((char) ('0' + ((b >>> 6) & 3))); builder.append((char) ('0' + ((b >>> 3) & 7))); builder.append((char) ('0' + (b & 7))); } break; } } return builder.toString(); } /** * Un-escape a byte sequence as escaped using * {@link #escapeBytes(com.googlecode.protobuf.format.ByteString)}. Two-digit hex escapes (starting with * "\x") are also recognized. */ static ByteString unescapeBytes(CharSequence input) throws InvalidEscapeSequence { byte[] result = new byte[input.length()]; int pos = 0; for (int i = 0; i < input.length(); i++) { char c = input.charAt(i); if (c == '\\') { if (i + 1 < input.length()) { ++i; c = input.charAt(i); if (isOctal(c)) { // Octal escape. int code = digitValue(c); if ((i + 1 < input.length()) && isOctal(input.charAt(i + 1))) { ++i; code = code * 8 + digitValue(input.charAt(i)); } if ((i + 1 < input.length()) && isOctal(input.charAt(i + 1))) { ++i; code = code * 8 + digitValue(input.charAt(i)); } result[pos++] = (byte) code; } else { switch (c) { case 'a': result[pos++] = 0x07; break; case 'b': result[pos++] = '\b'; break; case 'f': result[pos++] = '\f'; break; case 'n': result[pos++] = '\n'; break; case 'r': result[pos++] = '\r'; break; case 't': result[pos++] = '\t'; break; case 'v': result[pos++] = 0x0b; break; case '\\': result[pos++] = '\\'; break; case '\'': result[pos++] = '\''; break; case '"': result[pos++] = '\"'; break; case 'x': // hex escape int code = 0; if ((i + 1 < input.length()) && isHex(input.charAt(i + 1))) { ++i; code = digitValue(input.charAt(i)); } else { throw new InvalidEscapeSequence("Invalid escape sequence: '\\x' with no digits"); } if ((i + 1 < input.length()) && isHex(input.charAt(i + 1))) { ++i; code = code * 16 + digitValue(input.charAt(i)); } result[pos++] = (byte) code; break; default: throw new InvalidEscapeSequence("Invalid escape sequence: '\\" + c + "'"); } } } else { throw new InvalidEscapeSequence("Invalid escape sequence: '\\' at end of string."); } } else { result[pos++] = (byte) c; } } return ByteString.copyFrom(result, 0, pos); } /** * Thrown by {@link XmlFormat#unescapeBytes} and {@link XmlFormat#unescapeText} when an * invalid escape sequence is seen. */ static class InvalidEscapeSequence extends IOException { private static final long serialVersionUID = 1L; public InvalidEscapeSequence(String description) { super(description); } } /** * Like {@link #escapeBytes(com.googlecode.protobuf.format.ByteString)}, but escapes a text string. * Non-ASCII characters are first encoded as UTF-8, then each byte is escaped individually as a * 3-digit octal escape. Yes, it's weird. */ static String escapeText(String input) { return escapeBytes(ByteString.copyFromUtf8(input)); } /** * Un-escape a text string as escaped using {@link #escapeText(String)}. Two-digit hex escapes * (starting with "\x") are also recognized. */ static String unescapeText(String input) throws InvalidEscapeSequence { return unescapeBytes(input).toStringUtf8(); } /** * Parse a 32-bit signed integer from the text. Unlike the Java standard {@code * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify * hexidecimal and octal numbers, respectively. */ static int parseInt32(String text) throws NumberFormatException { return (int) parseInteger(text, true, false); } /** * Parse a 32-bit unsigned integer from the text. Unlike the Java standard {@code * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify * hexidecimal and octal numbers, respectively. The result is coerced to a (signed) {@code int} * when returned since Java has no unsigned integer type. */ static int parseUInt32(String text) throws NumberFormatException { return (int) parseInteger(text, false, false); } /** * Parse a 64-bit signed integer from the text. Unlike the Java standard {@code * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify * hexidecimal and octal numbers, respectively. */ static long parseInt64(String text) throws NumberFormatException { return parseInteger(text, true, true); } /** * Parse a 64-bit unsigned integer from the text. Unlike the Java standard {@code * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify * hexidecimal and octal numbers, respectively. The result is coerced to a (signed) {@code long} * when returned since Java has no unsigned long type. */ static long parseUInt64(String text) throws NumberFormatException { return parseInteger(text, false, true); } private static long parseInteger(String text, boolean isSigned, boolean isLong) throws NumberFormatException { int pos = 0; boolean negative = false; if (text.startsWith("-", pos)) { if (!isSigned) { throw new NumberFormatException("Number must be positive: " + text); } ++pos; negative = true; } int radix = 10; if (text.startsWith("0x", pos)) { pos += 2; radix = 16; } else if (text.startsWith("0", pos)) { radix = 8; } String numberText = text.substring(pos); long result = 0; if (numberText.length() < 16) { // Can safely assume no overflow. result = Long.parseLong(numberText, radix); if (negative) { result = -result; } // Check bounds. // No need to check for 64-bit numbers since they'd have to be 16 // chars // or longer to overflow. if (!isLong) { if (isSigned) { if ((result > Integer.MAX_VALUE) || (result < Integer.MIN_VALUE)) { throw new NumberFormatException("Number out of range for 32-bit signed integer: " + text); } } else { if ((result >= (1L << 32)) || (result < 0)) { throw new NumberFormatException("Number out of range for 32-bit unsigned integer: " + text); } } } } else { BigInteger bigValue = new BigInteger(numberText, radix); if (negative) { bigValue = bigValue.negate(); } // Check bounds. if (!isLong) { if (isSigned) { if (bigValue.bitLength() > 31) { throw new NumberFormatException("Number out of range for 32-bit signed integer: " + text); } } else { if (bigValue.bitLength() > 32) { throw new NumberFormatException("Number out of range for 32-bit unsigned integer: " + text); } } } else { if (isSigned) { if (bigValue.bitLength() > 63) { throw new NumberFormatException("Number out of range for 64-bit signed integer: " + text); } } else { if (bigValue.bitLength() > 64) { throw new NumberFormatException("Number out of range for 64-bit unsigned integer: " + text); } } } result = bigValue.longValue(); } return result; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy