com.sun.xml.stream.BufferManager Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of sjsxp Show documentation
Sun Java Streaming XML Parser (SJSXP) is the implementation of JSR 173.
The newest version!
/*
 * $Id: BufferManager.java,v 1.3 2007-07-19 22:33:12 ofung Exp $
 */

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 * 
 * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
 * 
 * The contents of this file are subject to the terms of either the GNU
 * General Public License Version 2 only ("GPL") or the Common Development
 * and Distribution License("CDDL") (collectively, the "License").  You
 * may not use this file except in compliance with the License. You can obtain
 * a copy of the License at https://glassfish.dev.java.net/public/CDDL+GPL.html
 * or glassfish/bootstrap/legal/LICENSE.txt.  See the License for the specific
 * language governing permissions and limitations under the License.
 * 
 * When distributing the software, include this License Header Notice in each
 * file and include the License file at glassfish/bootstrap/legal/LICENSE.txt.
 * Sun designates this particular file as subject to the "Classpath" exception
 * as provided by Sun in the GPL Version 2 section of the License file that
 * accompanied this code.  If applicable, add the following below the License
 * Header, with the fields enclosed by brackets [] replaced by your own
 * identifying information: "Portions Copyrighted [year]
 * [name of copyright owner]"
 * 
 * Contributor(s):
 * 
 * If you wish your version of this file to be governed by only the CDDL or
 * only the GPL Version 2, indicate your decision by adding "[Contributor]
 * elects to include this software in this distribution under the [CDDL or GPL
 * Version 2] license."  If you don't indicate a single choice of license, a
 * recipient has the option to distribute your version of this file under
 * either the CDDL, the GPL Version 2 or to extend the choice of license to
 * its licensees as provided above.  However, if you add GPL Version 2 code
 * and therefore, elected the GPL Version 2 license, then the option applies
 * only if the new code is made subject to such option by the copyright
 * holder.
 */

package com.sun.xml.stream;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.net.URL;
import java.nio.CharBuffer;
import com.sun.xml.stream.xerces.xni.parser.XMLInputSource;

/**
 * @author  Neeraj Bajaj, Sun Microsystems
 */
public abstract class BufferManager {
            
    protected boolean endOfStream = false;
    static boolean DEBUG = false;
    

    public static BufferManager getBufferManager(XMLInputSource inputSource) throws java.io.IOException{
        
        InputStream stream = inputSource.getByteStream();
        if(stream instanceof FileInputStream){
            if(DEBUG){
                System.out.println("Using FileBufferManager");
            }
            return new FileBufferManager((FileInputStream)stream,inputSource.getEncoding());
        }else{
            if(DEBUG){
                System.out.println("Using StreamBufferManager");
            }
            return new StreamBufferManager(stream, inputSource.getEncoding());
        }        
    }
    
    /**
     * This function returns true if some character data was loaded. Data is available via getCharBuffer().
     * If before calling this function CharBuffer had some data (i.e. remaining() > 0) then this function 
     * first calls CharBuffer.compact() and then it is filled with more data. After calling this function
     * CharBuffer.position() is always 'zero'.
     *
     * @see CharBuffer.compact()
     * @return true if some character data was loaded. False value can be assume to be end of current 
     * entity.
     */
    
    public abstract boolean getMore() throws java.io.IOException;
    
    public abstract CharBuffer getCharBuffer();
    
    /**
     *xxx: This should be an abstract method because in StreamBufferManager
     * CharBuffer capacity doesn't grow
     */
    public abstract boolean arrangeCapacity(int length) throws java.io.IOException;
    
    /**{
        if(getCharBuffer().limit() - getCharBuffer().position() >= length){
            return true;
        }
        while( (getCharBuffer().limit() - getCharBuffer().position()) < length){            
            if(endOfStream())break;
            getMore();
        }
        if(getCharBuffer().limit() - getCharBuffer().position() >= length){
            return true;
        }else{        
            return false;        
        }
    }*/
    
    /** This file signals the end of file
     * @return true/false signals the end of file.
     */
    public boolean endOfStream(){
        return endOfStream;
    }

    public abstract void close() throws java.io.IOException;
    
    public abstract void setEncoding(String encoding) throws java.io.IOException;
    
    /**
     * Returns the IANA encoding name that is auto-detected from
     * the bytes specified, with the endian-ness of that encoding where appropriate.
     *
     * @param b4    The first four bytes of the input.
     * @param count The number of bytes actually read.
     * @return a 2-element array:  the first element, an IANA-encoding string,
     *  the second element a Boolean which is true iff the document is big endian, false
     *  if it's little-endian, and null if the distinction isn't relevant.
     */
    protected Object[] getEncodingName(byte[] b4, int count) {
        
        if (count < 2) {
            return new Object[]{"UTF-8", null};
        }
        
        // UTF-16, with BOM
        int b0 = b4[0] & 0xFF;
        int b1 = b4[1] & 0xFF;
        if (b0 == 0xFE && b1 == 0xFF) {
            // UTF-16, big-endian
            return new Object [] {"UTF-16BE", new Boolean(true)};
        }
        if (b0 == 0xFF && b1 == 0xFE) {
            // UTF-16, little-endian
            return new Object [] {"UTF-16LE", new Boolean(false)};
        }
        
        // default to UTF-8 if we don't have enough bytes to make a
        // good determination of the encoding
        if (count < 3) {
            return new Object [] {"UTF-8", null};
        }
        
        // UTF-8 with a BOM
        int b2 = b4[2] & 0xFF;
        if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
            return new Object [] {"UTF-8", null};
        }
        
        // default to UTF-8 if we don't have enough bytes to make a
        // good determination of the encoding
        if (count < 4) {
            return new Object [] {"UTF-8", null};
        }
        
        // other encodings
        int b3 = b4[3] & 0xFF;
        if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
            // UCS-4, big endian (1234)
            return new Object [] {"ISO-10646-UCS-4", new Boolean(true)};
        }
        if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
            // UCS-4, little endian (4321)
            return new Object [] {"ISO-10646-UCS-4", new Boolean(false)};
        }
        if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
            // UCS-4, unusual octet order (2143)
            // REVISIT: What should this be?
            return new Object [] {"ISO-10646-UCS-4", null};
        }
        if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
            // UCS-4, unusual octect order (3412)
            // REVISIT: What should this be?
            return new Object [] {"ISO-10646-UCS-4", null};
        }
        if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
            // UTF-16, big-endian, no BOM
            // (or could turn out to be UCS-2...
            // REVISIT: What should this be?
            return new Object [] {"UTF-16BE", new Boolean(true)};
        }
        if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
            // UTF-16, little-endian, no BOM
            // (or could turn out to be UCS-2...
            return new Object [] {"UTF-16LE", new Boolean(false)};
        }
        if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
            // EBCDIC
            // a la xerces1, return CP037 instead of EBCDIC here
            return new Object [] {"CP037", null};
        }
        
        // default encoding
        return new Object [] {"UTF-8", null};
        
    } // getEncodingName(byte[],int):Object[]
    
    public static void main(String [] args){
        try{
            File file = new File(args[0]);
            System.out.println("url parameter = " + file.toURI().toString());
            URL url = new URL(file.toURI().toString());
            XMLInputSource inputSource = new XMLInputSource(null,null,null,new FileInputStream(file),"UTF-8");
            BufferManager sb = BufferManager.getBufferManager(inputSource);
            CharBuffer cb = sb.getCharBuffer();
            int i = 0 ;
            while(sb.getMore()){
                System.out.println("Loop " + i++ + " = " + sb.getCharBuffer());
            }
            System.out.println("End of stream reached = " + sb.endOfStream() );        
            System.out.println("Total no. of loops required = " + i);
        }
        catch(Exception ex){
            ex.printStackTrace();
        }
    }
}