All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sun.xml.bind.v2.runtime.output.Encoded Maven / Gradle / Ivy

There is a newer version: 4.0.5
Show newest version
/*
 * Copyright (c) 1997, 2021 Oracle and/or its affiliates. All rights reserved.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Eclipse Distribution License v. 1.0, which is available at
 * http://www.eclipse.org/org/documents/edl-v10.php.
 *
 * SPDX-License-Identifier: BSD-3-Clause
 */

package com.sun.xml.bind.v2.runtime.output;

import java.io.IOException;

/**
 * Buffer for UTF-8 encoded string.
 *
 * See http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 for the UTF-8 encoding.
 *
 * @author Kohsuke Kawaguchi
 */
public final class Encoded {
    public byte[] buf;

    public int len;

    public Encoded() {}

    public Encoded(String text) {
        set(text);
    }

    public void ensureSize(int size) {
        if(buf==null || buf.length 0x7F) {
                if (chr > 0x7FF) {
                    if(Character.MIN_HIGH_SURROGATE<=chr && chr<=Character.MAX_LOW_SURROGATE) {
                        // surrogate
                        int uc = (((chr & 0x3ff) << 10) | (text.charAt(++i) & 0x3ff)) + 0x10000;

                        buf[ptr++] = (byte)(0xF0 | ((uc >> 18)));
                        buf[ptr++] = (byte)(0x80 | ((uc >> 12) & 0x3F));
                        buf[ptr++] = (byte)(0x80 | ((uc >> 6) & 0x3F));
                        buf[ptr++] = (byte)(0x80 + (uc & 0x3F));
                        continue;
                    }
                    buf[ptr++] = (byte)(0xE0 + (chr >> 12));
                    buf[ptr++] = (byte)(0x80 + ((chr >> 6) & 0x3F));
                } else {
                    buf[ptr++] = (byte)(0xC0 + (chr >> 6));
                }
                buf[ptr++] = (byte)(0x80 + (chr & 0x3F));
            } else {
                buf[ptr++] = (byte)chr;
            }
        }

        len = ptr;
    }

    /**
     * Fill in the buffer by encoding the specified characters
     * while escaping characters like <
     *
     * @param isAttribute
     *      if true, characters like \t, \r, and \n are also escaped.
     */
    public final void setEscape(String text, boolean isAttribute) {
        int length = text.length();
        ensureSize(length*6+1);     // in the worst case the text is like """""", so we need 6 bytes per char

        int ptr = 0;

        for (int i = 0; i < length; i++) {
            final char chr = text.charAt(i);

            int ptr1 = ptr;
            if (chr > 0x7F) {
                if (chr > 0x7FF) {
                    if(Character.MIN_HIGH_SURROGATE<=chr && chr<=Character.MAX_LOW_SURROGATE) {
                        // surrogate
                        int uc = (((chr & 0x3ff) << 10) | (text.charAt(++i) & 0x3ff)) + 0x10000;

                        buf[ptr++] = (byte)(0xF0 | ((uc >> 18)));
                        buf[ptr++] = (byte)(0x80 | ((uc >> 12) & 0x3F));
                        buf[ptr++] = (byte)(0x80 | ((uc >> 6) & 0x3F));
                        buf[ptr++] = (byte)(0x80 + (uc & 0x3F));
                        continue;
                    }
                    buf[ptr1++] = (byte)(0xE0 + (chr >> 12));
                    buf[ptr1++] = (byte)(0x80 + ((chr >> 6) & 0x3F));
                } else {
                    buf[ptr1++] = (byte)(0xC0 + (chr >> 6));
                }
                buf[ptr1++] = (byte)(0x80 + (chr & 0x3F));
            } else {
                byte[] ent;

                if((ent=attributeEntities[chr])!=null) {
                    // the majority of the case is just printed as a char,
                    // so it's very important to reject them as quickly as possible

                    // check again to see if this really needs to be escaped
                    if(isAttribute || entities[chr]!=null)
                        ptr1 = writeEntity(ent,ptr1);
                    else
                        buf[ptr1++] = (byte)chr;
                } else
                    buf[ptr1++] = (byte)chr;
            }
            ptr = ptr1;
        }
        len = ptr;
    }

    private int writeEntity( byte[] entity, int ptr ) {
        System.arraycopy(entity,0,buf,ptr,entity.length);
        return ptr+entity.length;
    }

    /**
     * Writes the encoded bytes to the given output stream.
     */
    public final void write(UTF8XmlOutput out) throws IOException {
        out.write(buf,0,len);
    }

    /**
     * Appends a new character to the end of the buffer.
     * This assumes that you have enough space in the buffer.
     */
    public void append(char b) {
        buf[len++] = (byte)b;
    }

    /**
     * Reallocate the buffer to the exact size of the data
     * to reduce the memory footprint.
     */
    public void compact() {
        byte[] b = new byte[len];
        System.arraycopy(buf,0,b,0,len);
        buf = b;
    }

    /**
     * UTF-8 encoded entities keyed by their character code.
     * e.g., entities['&'] == AMP_ENTITY.
     *
     * In attributes we need to encode more characters.
     */
    private static final byte[][] entities = new byte[0x80][];
    private static final byte[][] attributeEntities = new byte[0x80][];

    static {
        add('&',"&",false);
        add('<',"<",false);
        add('>',">",false);
        add('"',""",true);
        add('\t',"	",true);
        add('\r',"
",false);
        add('\n',"
",true);
    }

    private static void add(char c, String s, boolean attOnly) {
        byte[] image = UTF8XmlOutput.toBytes(s);
        attributeEntities[c] = image;
        if(!attOnly)
            entities[c] = image;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy