es.rickyepoderi.wbxml.document.WbXmlEncoder Maven / Gradle / Ivy

Go to download
/*
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *    
 * Linking this library statically or dynamically with other modules 
 * is making a combined work based on this library. Thus, the terms and
 * conditions of the GNU General Public License cover the whole
 * combination.
 *    
 * As a special exception, the copyright holders of this library give 
 * you permission to link this library with independent modules to 
 * produce an executable, regardless of the license terms of these 
 * independent modules, and to copy and distribute the resulting 
 * executable under terms of your choice, provided that you also meet, 
 * for each linked independent module, the terms and conditions of the 
 * license of that module.  An independent module is a module which 
 * is not derived from or based on this library.  If you modify this 
 * library, you may extend this exception to your version of the 
 * library, but you are not obligated to do so.  If you do not wish 
 * to do so, delete this exception statement from your version.
 *
 * Project: github.com/rickyepoderi/wbxml-stream
 * 
 */
package es.rickyepoderi.wbxml.document;

import es.rickyepoderi.wbxml.definition.IanaCharset;
import es.rickyepoderi.wbxml.definition.WbXmlAttributeDef;
import es.rickyepoderi.wbxml.definition.WbXmlAttributeValueDef;
import es.rickyepoderi.wbxml.definition.WbXmlDefinition;
import es.rickyepoderi.wbxml.definition.WbXmlExtensionDef;
import es.rickyepoderi.wbxml.definition.WbXmlTagDef;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * The encoder class is the object to encode a Java WbXmlDocument object
 * into a real WBXML stream. The encoder is the symmetric class to the parser,
 * the methods are more or less the same but for encoding and writing instead of
 * parsing and reading.
 * 
 * The encoder follows the WXML format specification explained in the 
 * open alliance document.
 * This class has different methods to encode all the objects in this
 * package although it is intended to only call main method encode().
 * 
 * The WBXML specification defines a string table (strtbl) that is 
 * written at the beginning of the document but, if used, it is filled
 * during the later processing of elements. So if the strtbl is used two
 * passes are needed (one to encode everything and determine the strtbl
 * contents, and the second to really write the stream). For that reason
 * the encoder has three types of processing:
 * 
 * 
 * StrtblType=NO. This way not strtbl is used, the document is directly
 * written to the final stream and if (for some reasons) the strtbl is needed,
 * it throws an exception. So here only one pass is needed.
 * StrtblType=IF_NEEDED. Second and more conservative way of processing
 * the document, the strtbl is not used (no STR_T strings are used) but if for
 * some reason it is needed (missing tag or whatever) it is done. The idea
 * is the encoder uses another ByteArrayOutputStream in the first pass, and
 * if strtbl is not used the whole bytes are dumped into the real stream, 
 * is it was used a second real encoding process is performed.
 * StrtblType=ALWAYS. The strtbl is always used (instead of normal STR_I
 * the STR_T strings are used). The processing is exactly the same of the
 * previous type but here almost always the second pass will be needed.
 * 
 * 
 * @author ricky
 */
public class WbXmlEncoder {
    
    /**
     * Logger for the class.
     */
    protected static final Logger log = Logger.getLogger(WbXmlEncoder.class.getName());
    
    /**
     * Initial length of the auxiliary ByteArrayOuputStream.
     */
    static private final int BYTE_ARRAY_INITIAL_LENGTH = 1024;
    
    /**
     * Maximum value of a mb_u_int32.
     */
    static private final long MAX_UNSIGNED_INT = (((long)Integer.MAX_VALUE) * 2) + 1;
    
    /**
     * The StrtvlType is the type of encoding the encoder is going to perform
     * in relation with the String Table use.
     */
    public enum StrtblType { 
        
        /**
         * The Strtbl is never used (STR_I strings used). If, for some reason,
         * the strtbl is needed a exception is thrown. Only one pass is done.
         */
        NO, 
        
        /**
         * The Strtbl is only used if needed (STR_I strings still used). An
         * initial auxiliary ByteOutputStreamWriter is used in a first pass.
         */
        IF_NEEDED, 
        
        /**
         * The strtbl is used for all strings (STR_T). As in the previous 
         * type the auxiliary ByteOutputStreamWriter is used at first pass.
         */
        ALWAYS 
    };
    
    /**
     * The output stream used to perform the encoding process.
     */
    private OutputStream os;
    
    /**
     * Property to save the real in case of a first pass needed (IF_NEEDED and ALWAYS).
     */
    private OutputStream realOs;
    
    /**
     * The page attr state parser property.
     */
    private byte pageAttrState;
    
    /**
     * The page tag state parser property.
     */
    private byte pageTagState;
    
    /**
     * The document enconded to a WBXML stream.
     */
    private WbXmlDocument doc;
    
    /**
     * The type of the encoding process.
     */
    private StrtblType type;
    
    /**
     * Boolean property that marks if the strtbl is used. This is used to
     * check if a second pass is needed or the byte array can be directly dumpled.
     */
    private boolean strtblUsed;

    /**
     * Constructor of the encoder based in the output stream and the document
     * to encode. The default type is IF_NEEDED.
     * @param os The output stream to write the WBXML document
     * @param doc The document to encode
     */
    public WbXmlEncoder(OutputStream os, WbXmlDocument doc) {
        this(os, doc, StrtblType.IF_NEEDED);
    }
    
    /**
     * Constructor based in the three elements: output stream, document
     * and the type of used of the strtbl.
     * @param os The output stream to write the WBXML document
     * @param doc The document to encode
     * @param type The type of encoding to use (strtbl).
     */
    public WbXmlEncoder(OutputStream os, WbXmlDocument doc, StrtblType type) {
        this.pageAttrState = 0x00;
        this.pageTagState = 0x00;
        this.type = type;
        this.strtblUsed = false;
        this.realOs = os;
        this.os = os;
        this.doc = doc;
    }
    
    /**
     * Getter for the charset of the document.
     * @return The charset of the document
     */
    protected Charset getCharset() {
        return this.doc.getCharset().getCharset();
    }
    
    /**
     * Getter for the type of the encoder. 
     * @return The type or way of encoding.
     */
    public StrtblType getType() {
        return this.type;
    }
    
    /**
     * Getter for the charset of the document.
     * @return The IANA charset the document is using.
     */
    public IanaCharset getIanaCharset() {
        return this.doc.getCharset();
    }

    /**
     * Getter for the strtbl of the document.
     * @return The strtbl of the document
     */
    public WbXmlStrtbl getStrtbl() {
        return doc.getStrtbl();
    }

    /**
     * Getter for the definition of the document.
     * @return The definition of the document.
     */
    public WbXmlDefinition getDefinition() {
        return doc.getDefinition();
    }

    /**
     * Method that checks if the strtbl has been marched as used. It is called
     * when exported in the first pass.
     * @return true if the strtbl has been marked as used
     */
    protected boolean isStrtblUsed() {
        return strtblUsed;
    }

    /**
     * Setter for the strtbl as used. It is called form the WbXmlStrtbl object
     * when a string is added.
     */
    protected void setStrtblUsed() {
        this.strtblUsed = true;
    }
    
    /**
     * Method to reset the encoder to perform a second pass or a second
     * encoding process. A encoder is used only for one document, if you need
     * to encode a second document, create a second encoder.
     */
    public void reset() {
        this.pageAttrState = 0x0;
        this.pageTagState = 0x0;
        this.strtblUsed = false;
    }
    
    //
    // WRITE METHODS
    //
    
    /**
     * Method that writes a bite in the stream.
     * @param b The byte to write
     * @throws IOException Some error writing to the stream
     */
    public void write(byte b) throws IOException {
        write(new byte[] {b});
    }

    /**
     * Method that writes a byte array in the stream.
     * @param b The byte array to write
     * @throws IOException Some error writing the stream
     */
    public void write(byte[] b) throws IOException {
        write(b, 0, b.length);
    }
    
    /**
     * Method that write a byte array but starting from a specified index 
     * and with a specified length.
     * @param b The byte array to write
     * @param start The start index to write
     * @param length The length to write
     * @throws IOException Some error writing the stream
     */
    public void write(byte[] b, int start, int length) throws IOException {
        this.os.write(b, start, length);
    }
    
    /**
     * WBXML specification presents mb_u_int32 integers in a very strange
     * way in order to safe space. The way they are encoded is explained in
     * the chapter 5.1. Multi-byte Integers of the specification.
     * 
     * A multi-byte integer consists of a series of octets, where the most 
     * significant bit is the continuation flag and the remaining seven bits 
     * are a scalar value. The continuation flag indicates that an octet is not 
     * the end of the multi-byte sequence. A single integer value is encoded 
     * into a sequence of N octets. The first N-1 octets have the continuation 
     * flag set to a value of one (1). The final octet in the series has a 
     * continuation flag value of zero (0). The remaining seven bits in each 
     * octet are encoded in a big-endian order, e.g., most significant bit 
     * first. The octets are arranged in a big-endian order, e.g., the most 
     * significant seven bits are transmitted first. In the situation where the 
     * initial octet has less than seven bits of value, all unused bits must be 
     * set to zero (0). For example, the integer value 0xA0 would be encoded 
     * with the two-byte sequence 0x81 0x20. The integer value 0x60 would be 
     * encoded with the one-byte sequence 0x60.
     * 
     * @param value The value to write as a multi-byte integer
     * @throws IOException Some error writing the mb_u_int32
     */
    public void writeUnsignedInteger(long value) throws IOException {
        if (value > MAX_UNSIGNED_INT) {
            throw new IOException("Maximun unsigned integer value reached");
        }
        byte[] octets = new byte[5];
        octets[4] = (byte) (value & 0x7f);
        value >>= 7;
        //System.err.println(octets[4]);
        int i;
        for (i = 3; value > 0 && i >= 0; i--) {
            octets[i] = (byte) (0x80 | (value & 0x7f));
            //System.err.println(octets[i]);
            value >>= 7;
        }
        int start = i + 1;
        //System.err.println("start: " + start + " length: " + (5 - start));
        write(octets, start, 5 - start);
    }
    
    /**
     * The WBXML format specification defines two states for a parser/encoder
     * machine. The states deals with the page code, one state is used for tags
     * and the other for attributes. When a tag or attribute comes if it
     * is from the same page code that the state is no switch page is written.
     * Nevertheless if the tag is from other page a switch page token should 
     * be written in order to change the state (there are two states and they
     * are independent, that is why the parser have to page codes, for tags
     * and for attributes). This states are explained in the chapter
     * 5.8.1. Parser State Machine of the specification.
     * 
     * This method is always a new attribute or attribute value is written, 
     * the method writes the switch page if the page changes. The state parser 
     * for attributes is updated with the new page.
     * 
     * @param page The page of the attribute token
     * @throws IOException Some error writing to the stream
     */
    public void writeSwitchPageAttribute(byte page) throws IOException {
        if (pageAttrState != page) {
            pageAttrState = page;
            write(WbXmlLiterals.SWTICH_PAGE);
            write(pageAttrState);
        }
    }
    
    /**
     * The WBXML format specification defines two states for a parser/encoder
     * machine. The states deals with the page code, one state is used for tags
     * and the other for attributes. When a tag or attribute comes if it
     * is from the same page code that the state is no switch page is written.
     * Nevertheless if the tag is from other page a switch page token should 
     * be written in order to change the state (there are two states and they
     * are independent, that is why the parser have to page codes, for tags
     * and for attributes). This states are explained in the chapter
     * 5.8.1. Parser State Machine of the specification.
     * 
     * This method is always a new tag token is written, 
     * the method writes the switch page if the page changes. The state parser 
     * for tags is updated with the new page.
     * 
     * @param page
     * @throws IOException 
     */
    public void writeSwitchPageTag(byte page) throws IOException {
        if (pageTagState != page) {
            pageTagState = page;
            write(WbXmlLiterals.SWTICH_PAGE);
            write(pageTagState);
        }
    }
    
    /**
     * Generic method to write strings to the stream. Depending the type of 
     * enconding STR_I (inline) or STR_T (reference) strings are used. If type
     * is NO or IF_NEEDED inline strings are used, if ALWAYS reference one are
     * written.
     * @param s The string to write to the stream
     * @throws IOException Some error writting the string
     */
    public void writeString(String s) throws IOException {
        if (StrtblType.ALWAYS.equals(type)) {
            writeReferenceString(s);
        } else {
            writeInlineString(s);
        }
    }
    
    /**
     * A inline string (STR_I) is a string that is appended to the document
     * (as part of an attribute value or part of a content string). It is just 
     * defined as follows:
     * 
     *      * inline = STR_I termstr
     * termstr = charset-dependent string with termination
     * 
     * 
     * The inline string is just a STR_I token follow by the string
     * (charset dependent) terminated in 0x00 token. The strings format are 
     * specified in the chapter 5.8.4.1. Strings of the specification.
     * So this method writes the STR_I token and the charset dependent 
     * string array and the final 0x00.
     * 
     * @param s The string to write in the stream as STR_I
     * @throws IOException Some error writing to the stream
     */
    public void writeInlineString(String s) throws IOException {
        write(WbXmlLiterals.STR_I);
        writeTableString(s);
    }
    
    /**
     * A refrence string is a string which is in the string table and it is
     * referenced in some part of the document. The refernce string is defined
     * as follows by the specification:
     * 
     *      * tableref = STR_T index
     * index = mb_u_int32 // integer index into string table.
     * 
     * 
     * A refrence string is just the STR_T token followed by the index
     * to the string table. That index is a multi-byte integer. This method
     * adds the string to the table (getting the index) and then writes
     * the token and the index obtained. The method addString() throws an
     * exception if the table cannot be used.
     * 
     * @param s The string to write as reference (STR_T) string
     * @throws IOException Some error writing to the stream or strtbl cannot be used
     */
    public void writeReferenceString(String s) throws IOException {
        long idx = doc.getStrtbl().addString(this, s);
        write(WbXmlLiterals.STR_T);
        writeUnsignedInteger(idx);
    }
    
    /**
     * This method writes the string in the stream. The string is transformed in
     * a byte array using the document charset (IANA) and after that a 0x00 mark
     * is written. This method is used to write string in the strtbl and as
     * inline (STR_I) strings. The string is defined in the specification as follows:
     * 
     *      * termstr = charset-dependent string with termination (0x00)
     * 
     * 
     * @param s The string to write to the stream
     * @throws IOException Some error writing to the stream
     */
    public void writeTableString(String s) throws IOException {
        write(s.getBytes(getCharset()));
        write((byte) 0x00);
    }
    
    /**
     * Method that writes a opaque data byte array. An opaque data is just
     * defined by the specification as follows:
     * 
     *      * opaque = OPAQUE length *byte
     * 
     * 
     * The opaque is just the OPAQUE token follow by the length of the array
     * and the array itself. This method should be called from opaque plugins
     * when the data is calculated.
     * 
     * @param data The opaque data byte array to write
     * @throws IOException Some error writing to the stream
     */
    public void writeOpaque(byte[] data) throws IOException {
        write(WbXmlLiterals.OPAQUE);
        writeUnsignedInteger(data.length);
        write(data);
    }
    
    /**
     * The tag when encoding a WBXML document should be processed to mark if
     * it has attributes and/or content. It is explained in the chapter 
     * 5.8.2. Tag Code Space.
     * 
     * A TAG when writing should be marked in its 6 and 7 most significant bits
     * as the specification determines:
     * 
     * 
     * 7 (most significant). Indicates whether attributes follow the tag 
     * code. If this bit is zero, the tag contains no attributes. If this bit 
     * is one, the tag is followed immediately by one or more attributes. 
     * The attribute list is terminated by an END token.
     * 6. Indicates whether this tag begins an element containing content. 
     * If this bit is zero, the tag contains no content and no end tag. If this 
     * bit is one, the tag is followed by any content it contains and is 
     * terminated by an END token.
     * 5 - 0. Indicates the tag identity.
     * 
     * 
     * this method converts the tag read from the specification marking 7 
     * and 6 bit depending if the attribute has or not attributes and contents.
     * 
     * 
     * @param tag The tag read from the language definition (5-0 bits)
     * @param hasAttributes true if the element has attributes, false if not
     * @param hasContent true if the element has contents, false if not
     * @return The tag with 6 and 7 bit correctly set
     */
    static public byte processTag(byte tag, boolean hasAttributes, boolean hasContent) {
        if (hasAttributes) {
            tag |= WbXmlLiterals.TAG_ATTRIBUTES_MASK;
        }
        if (hasContent) {
            tag |= WbXmlLiterals.TAG_CONTENT_MASK;
        }
        return tag;
    }
    
    /**
     * If a tag has no correspondence to any known token in the language 
     * definition it should be expressed using a literal tag. The tag to use
     * differ if the element has attribute and/or contents.
     * 
     * In chapter 5.8.2. Tag Code Space the specification talks
     * about those literals: The globally unique codes LITERAL, LITERAL_A, 
     * LITERAL_C, and LITERAL_AC represent unknown tag names. (Note that the 
     * tags LITERAL_A, LITERAL_C, and LITERAL_AC are the LITERAL tag with the
     * appropriate combinations of bits 6 and 7 set.) An XML tokeniser should 
     * avoid the use of the literal or string representations of a tag when a 
     * more compact form is available.
     * 
     * This method return the appropriate literal to use depending
     * if the element has attributes and/or contents.
     * 
     * @param hasAttributes true if the element has attributes, false if not
     * @param hasContent true if the element has contents, false if not
     * @return The appropriate literal to use
     */
    static public byte processLiteralTag(boolean hasAttributes, boolean hasContent) {
        if (hasAttributes && hasContent) {
            return WbXmlLiterals.LITERAL_AC;
        } else if (hasContent) {
            return WbXmlLiterals.LITERAL_C;
        } else if (hasAttributes) {
            return WbXmlLiterals.LITERAL_A;
        } else {
            return WbXmlLiterals.LITERAL;
        }
    }
    
    /**
     * Main method of the encoder object. This is the method that should be
     * called when encoding a document. The whole document is encoded in the
     * Output Stream passed in the constructor. This is the method that,
     * depending the strtbl type of processing, performs one or two passes.
     * @throws IOException Some error writing the document to the stream
     */
    public void encode() throws IOException {
        if (StrtblType.NO.equals(this.type)) {
            // not use tblstr, strtbl generates an exception if used
            this.os = this.realOs;
            encode(doc);
        } else {
            // maybe it needs two passes, uses a byte array output stream
            ByteArrayOutputStream bos = null; 
            try {
                log.log(Level.FINE, "Performing first pass using a byte array");
                bos = new ByteArrayOutputStream(BYTE_ARRAY_INITIAL_LENGTH);
                this.os = bos;
                encode(doc);
                if (this.strtblUsed) {
                    // second pass needed cos strtbl was used, now using real os
                    // reset the decoder to restart with correct pages
                    reset();
                    log.log(Level.FINE, "Performing second pass into real stream cos strtbl used");
                    os = realOs;
                    encode(doc);
                } else {
                    // second pass not needed => just bulk write the full bytes
                    // into the real output stream
                    log.log(Level.FINE, "Dumping byte arrays cos strtbl not used");
                    realOs.write(((ByteArrayOutputStream) os).toByteArray());
                }
            } finally {
                if (bos != null) {
                    bos.close();
                }
                this.os = null;
            }
            
        }
    }
    
    /**
     * Method that encodes the version. The version is defined in the 
     * specification as follows:
     * 
     *      * version = u_int8 // WBXML version number
     * 
     * 
     * The version encoding/parsing chapter is the 5.4. Version Number:
     * All WBXML documents contain a version number in their initial byte. This 
     * version specifies the WBXML specification version. The version byte 
     * contains the major version minus one in the upper four bits and the minor
     * version in the lower four bits. For example, the version number 1.3 would
     * be encoded as 0x03, and version number 2.7 as 0x17.
     * 
     * @param version The version to write
     * @throws IOException Some error writing to the stream
     */
    public void encode(WbXmlVersion version) throws IOException {
        byte v = (byte) (((version.getMajor() - 1) << 4) | (version.getMinor()));
        write(v);
    }
    
    /**
     * method that encodes the string table to the stream. The string
     * table is defined as follows:
     * 
     *      * strtbl = length *byte
     * 
     * 
     * And the chapter 5.7. String Table explains how the string
     * table is used and encoded. The table is just the length of itself and
     * a byte array with all the strings defined in the strtbl. The strings
     * are just charset dependent byte arrays 0x00 terminated. Later references
     * in the document to the strings in the table are done using the
     * relative starting idex of the string in the table.
     * 
     * @param strtbl The string table to write
     * @throws IOException Some error writing to the stream
     */
    public void encode(WbXmlStrtbl strtbl) throws IOException {
        // write the size of the strtbl
        writeUnsignedInteger(strtbl.getSize());
        if (strtbl.getSize() > 0) {
            // iterate over all the strings in the table
            // they are ordered cos it is stored in a TreeMap
            for (long idx: strtbl.getIndexes()) {
                writeTableString(strtbl.getString(idx));
            }
        }
    }
    
    /**
     * Method that encodes the whole document to the stream. The method writes
     * the different elements that compounds a WBXML document, following
     * the specifications are the following:
     * 
     *      * start = version publicid charset strtbl body
     * 
     * publicid = mb_u_int32 | ( zero index )
     * zero = u_int8        // with a 0x0 value
     * index = mb_u_int32   // integer index into string table.
     * 
     * charset = mb_u_int32
     * 
     * 
     * So the method starts writing the five element one after the other.
     * 
     * @param doc
     * @throws IOException 
     */
    public void encode(WbXmlDocument doc) throws IOException {
        // write the fixed data
        encode(doc.getVersion());
        if (doc.getDefinition().getPublicId() == WbXmlDefinition.PUBLIC_ID_UNKNOWN
                && !StrtblType.NO.equals(type)
                && doc.getDefinition().getXmlPublicId() != null) {
            // unknown wbxml public id => write the xml public id using strtbl
            writeUnsignedInteger(WbXmlDefinition.PUBLIC_ID_STR_T);
            long idx = doc.getStrtbl().addString(this, doc.getDefinition().getXmlPublicId());
            writeUnsignedInteger(idx);
        } else {
            // write normal number or unknown if NO strtbl is used
            writeUnsignedInteger(doc.getDefinition().getPublicId());
        }
        if (doc.getCharset() != null) {
            writeUnsignedInteger(doc.getCharset().getMibEnum());
        } else {
            writeUnsignedInteger(0);
        }
        encode(doc.getStrtbl());
        reset();
        encode(doc.getBody());
    }
    
    /**
     * Method that encodes a element into the output stream. The element is
     * defined in the specification as follows:
     * 
     *      * element = ([switchPage] stag) [ 1*attribute END ] [ *content END ] 
     * stag = TAG | (literalTag index) 
     * literalTag = LITERAL | LITERAL_A | LITERAL_C | LITERAL_AC
     * 
     * 
     * So the method writes the tag or literal (depending it is defined
     * in the language definition or not) and the the attributes and contents
     * are encoded.
     * 
     * @param element The element to encode to the stream
     * @throws IOException Some error writing to the stream
     */
    public void encode(WbXmlElement element) throws IOException {
        // get the tag for this element
        WbXmlTagDef def = getDefinition().locateTag(element.getTag());
        if (def != null) {
            // found stag => normal encoding
            // switchPage
            writeSwitchPageTag(def.getToken().getPageCode());
            // stag
            write(WbXmlEncoder.processTag(def.getToken().getToken(),
                    !element.isAttributesEmpty(), !element.isContentsEmpty()));
        } else {
            // unknown tag => literal used
            log.log(Level.WARNING, "Using literal TAG in element: {0}", element.getTag());
            write(WbXmlEncoder.processLiteralTag(!element.isAttributesEmpty(), !element.isContentsEmpty()));
            long idx = getStrtbl().addString(this, element.getTag());
            writeUnsignedInteger(idx);
        }
        if (!element.isAttributesEmpty()) {
            // 1*attributes
            for (WbXmlAttribute attr : element.getAttributes()) {
                encode(element, attr);
            }
            // END
            write(WbXmlLiterals.END);
        }
        if (!element.isCompacted()) {
            element.compact(this);
        }
        if (!element.isContentsEmpty()) {
            // *content
            for (WbXmlContent content : element.getContents()) {
                OpaqueContentPlugin plugin = getDefinition().locateTagPlugin(element.getTag());
                if (plugin != null) {
                    plugin.encode(this, element, content);
                } else {
                    encode(content);
                }
            }
            // END
            write(WbXmlLiterals.END);
        }
    }
    
    /**
     * Method that encodes a content into the output stream. 
     * The content is defined in the specificatiosn as follows:
     * 
     *      * content = element | string | extension | entity | pi | opaque
     * 
     * string = inline | tableref
     * inline = STR_I termstr
     * tableref = STR_T index
     * 
     * extension = [switchPage] (( EXT_I termstr ) | ( EXT_T index ) | EXT)
     * 
     * entity = ENTITY entcode
     * entcode = mb_u_int32
     * 
     * opaque = OPAQUE length *byte
     * 
     * pi = PI attrStart *attrValue END
     * 
     * 
     * The opaque one is not treated here (see encode(WbXmlElement) but all
     * the rest of possibilities are treated here.
     * 
     * @param content The content to write to the output stream
     * @throws IOException Some error writing to the stream
     */
    public void encode(WbXmlContent content) throws IOException {
        if (content.getElement() != null) {
            encode(content.getElement());
        } else if (content.getString() != null) {
            WbXmlExtensionDef extDef = getDefinition().locateExtension(content.getString());
            if (extDef != null) {
                write(WbXmlLiterals.EXT_T_0);
                writeUnsignedInteger(extDef.getToken());
            } else if (content.isEntity()) {
                write(WbXmlLiterals.ENTITY);
                writeUnsignedInteger(content.getEntityNumber());
            } else {
                writeString(content.getString());
            }
        } else if (content.getPi() != null) {
            write(WbXmlLiterals.PI);
            encode(null, content.getPi());
            write(WbXmlLiterals.END);
        }
    }
    
    /**
     * Encode an attribute value using the attr values defined in the
     * definition.
     * @param value The value to write
     * @throws IOException Some error writing to the stream
     */
    public void encodeAttributeValue(String value) throws IOException {
        WbXmlAttributeValueDef valueAttrDef = getDefinition().locateAttributeValue(value);
        if (valueAttrDef != null) {
            // it is an attribute value
            writeSwitchPageAttribute(valueAttrDef.getToken().getPageCode());
            write(valueAttrDef.getToken().getToken());
        } else {
            WbXmlExtensionDef extDef = getDefinition().locateExtension(value);
            if (extDef != null) {
                write(WbXmlLiterals.EXT_T_0);
                writeUnsignedInteger(extDef.getToken());
            } else {
                writeString(value);
            }
        }
    }
    
    /**
     * Method that encodes a complete attribute into the output stream. An
     * attribute is defined in the specifications as follows:
     * 
     *      * attribute = attrStart *attrValue 
     * 
     * attrStart = ([switchPage] ATTRSTART) | (LITERAL index )
     * 
     * attrValue = ([switchPage] ATTRVALUE) | string | extension | entity | opaque
     *
     * string = inline | tableref 
     * inline = STR_I termstr 
     * tableref = STR_T index
     * index= mb_u_int32 // index in the attr table
     *
     * extension = [switchPage] (( EXT_I termstr ) | ( EXT_T index ) | EXT)
     *
     * entity = ENTITY entcode 
     * entcode = mb_u_int32 // UCS-4 character code
     *
     * opaque = OPAQUE length *byte
     * 
     * 
     * So the method tries to follow the specification.
     * 
     * @param element The element the attr belongs to (null in case of PI)
     * @param attr The attribute to write into the stream
     * @throws IOException  Some error writing to the stream
     */
    public void encode(WbXmlElement element, WbXmlAttribute attr) throws IOException {
        // search the best attribute definition
        String firstValue = null;
        if (!attr.isValuesEmpty()) {
            firstValue = attr.getValue(0);
        }
        // write attrStart
        WbXmlAttributeDef def = getDefinition().locateAttribute(attr.getName(), firstValue);
        if (def != null) {
            // tag exists
            writeSwitchPageAttribute(def.getToken().getPageCode());
            write(def.getToken().getToken()); 
        } else {
            // literal
            log.log(Level.WARNING, "Using literal TAG in attribute: {0}", attr.getName());
            write(WbXmlLiterals.LITERAL);
            long idx = getStrtbl().addString(this, attr.getName());
            writeUnsignedInteger(idx);
        }
        // compact the value string using attribute values or extensions
        if (!attr.isCompacted()) {
            attr.compact(this, def);
        }
        // write attrValues
        boolean first = true;
        for (String v: attr.getValues()) {
            if (first && def != null && def.getValue() != null) {
                v = v.substring(def.getValue().length());
                if (v.isEmpty()) {
                    first = false;
                    continue;
                }
            }
            OpaqueAttributePlugin plugin = null;
            if (def != null ) {
                plugin = getDefinition().locateAttrPlugin(def.getNameWithPrefix());
            }
            if (plugin != null) {
                plugin.encode(this, element, attr, v);
            } else {
                encodeAttributeValue(v);
            }
            first = false;
        }
    }
    
    /**
     * Method that encodes the body of a document. The body is as follows:
     * 
     *      * body = *pi element *pi
     * 
     * 
     * @param body The body to write into the stream
     * @throws IOException Some error writing to the stream
     */
    public void encode(WbXmlBody body) throws IOException {
        for (WbXmlAttribute pi: body.getPrePis()) {
            write(WbXmlLiterals.PI);
            encode(null, pi);
            write(WbXmlLiterals.END);
        }
        encode(body.getElement());
        for (WbXmlAttribute pi: body.getPostPis()) {
            write(WbXmlLiterals.PI);
            encode(null, pi);
            write(WbXmlLiterals.END);
        }
    }
}