All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jena.atlas.io.OutStreamUTF8 Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jena.atlas.io;

import java.io.IOException ;
import java.io.OutputStream ;
import java.io.Writer ;

/** Output UTF-8 encoded data.
 *  This class implements the "Modified UTF8" encoding rules (null -> C0 80)
 *  It will encode any 16 bit value.  
 *  It can be used as a pure UTf-8 encoder. 
 * 
 *  @see InStreamUTF8
 */
public final class OutStreamUTF8 extends Writer
{
    private OutputStream out ;

    public OutStreamUTF8(OutputStream out)
    {
        // Buffer?
        this.out = out ;
    }
    
    @Override
    public void write(char[] cbuf, int off, int len) throws IOException
    {
        for ( int i = 0 ; i < len; i++ )
            write(cbuf[off+i]) ;
    }
    
    @Override
    public void write(int ch) throws IOException
    { output(out, ch) ; }
    
    @Override
    public void write(char[] b) throws IOException
    { write(b, 0, b.length) ; }
    
    @Override
    public void write(String str) throws IOException
    { write(str,0, str.length()) ; }
    
    @Override
    public void write(String str, int idx, int len) throws IOException
    {
        for ( int i = 0 ; i < len; i++ )
            write(str.charAt(idx+i)) ;
    }

    public void output(int x)
    { 
        try { 
            output(out, x) ;
        } catch (IOException ex) { IO.exception(ex) ; }
    }
    
    /*
     * Bits 
     * 7    U+007F      1 to 127              0xxxxxxx 
     * 11   U+07FF      128 to 2,047          110xxxxx 10xxxxxx
     * 16   U+FFFF      2,048 to 65,535       1110xxxx 10xxxxxx 10xxxxxx
     * 21   U+1FFFFF    65,536 to 1,114,111   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
     * 26   U+3FFFFFF                         111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
     * 31   U+7FFFFFFF                        1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
     */
    public static void output(OutputStream out, int ch) throws IOException
    {
        if ( ch != 0 && ch <= 127 )
        {
            // 7 bits
            out.write(ch) ;
            return ;
        }
        
        if ( ch == 0 )
        {
            // Modified UTF-8.
            out.write(0xC0) ;
            out.write(0x80) ;
            return ;
        }
        
        // Better? output(int HiMask, int byte length, int value) 
        
        if ( ch <= 0x07FF )
        {
            // 11 bits : 110yyyyy 10xxxxxx
            // int x1 = ( ((ch>>(11-5))&0x7) | 0xC0 ) ; outputBytes(out, x1, 2, ch) ; return ;
            int x1 = ( ((ch>>(11-5))&0x01F ) | 0xC0 ) ; 
            int x2 = ( (ch&0x3F)  | 0x80 ) ;
            out.write(x1) ;
            out.write(x2) ;
            return ;
        }
        if ( ch <= 0xFFFF )
        {
            // 16 bits : 1110aaaa  10bbbbbb  10cccccc
            // int x1 = ( ((ch>>(16-4))&0x7) | 0xE0 ) ; outputBytes(out, x1, 3, ch) ; return ;
            int x1 = ( ((ch>>(16-4))&0x0F) | 0xE0 ) ;
            int x2 = ( ((ch>>6)&0x3F) | 0x80 ) ;
            int x3 = ( (ch&0x3F) | 0x80 ) ;
            out.write(x1) ;
            out.write(x2) ;
            out.write(x3) ;
            return ;
        }
        
//        if ( Character.isDefined(ch) )
//            throw new AtlasException("not a character") ;
        
        //if ( true ) throw new InternalErrorException("Valid code point for Java but not encodable") ;
        
        // Not java, where chars are 16 bit.
        if ( ch <= 0x1FFFFF )
        {
            // 21 bits : 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
            int x1 = ( ((ch>>(21-3))&0x7) | 0xF0 ) ;
            outputBytes(out, x1, 4, ch) ;
            return ;
        }
        if ( ch <= 0x3FFFFFF )
        {
            // 26 bits : 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
            int x1 = ( ((ch>>(26-2))&0x3) | 0xF8 ) ;
            outputBytes(out, x1, 5, ch) ;
            return ;
        }

        if ( ch <= 0x7FFFFFFF )
        {
            // 32 bits : 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
            int x1 = ( ((ch>>(32-1))&0x1) | 0xFC ) ;
            outputBytes(out, x1, 6, ch) ;
            return ;
        }
    }
    
    private static void outputBytes(OutputStream out, int x1, int byteLength, int ch) throws IOException
    {
        // ByteLength = 3 => 2 byteLenth => shift=6 and shift=0  
        out.write(x1) ;
        byteLength-- ; // remaining bytes
        for ( int i = 0 ; i < byteLength ; i++ )
        {
            // 6 Bits, loop from high to low  
            int shift = 6*(byteLength-i-1) ;
            int x =  (ch>>shift) & 0x3F ;
            x = x | 0x80 ;  // 10xxxxxx
            out.write(x) ;
        }
    }

    @Override
    public void flush() throws IOException
    { out.flush(); }

    @Override
    public void close() throws IOException
    { out.close() ; }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy