All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jena.atlas.lib.Chars Maven / Gradle / Ivy

There is a newer version: 5.2.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jena.atlas.lib;

import java.nio.charset.*;

public class Chars
{
    private Chars() {}

 // So also Bytes.hexDigits to get bytes.
    final public static char[] digits10 = {
        '0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' , '8' , '9'
    };

    /** Hex digits : upper case **/
    final public static char[] hexDigitsUC = {
        '0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' , '8' ,
        '9' , 'A' , 'B' , 'C' , 'D' , 'E' , 'F' };

    /** Hex digits : lower case **/
    final public static char[] hexDigitsLC = {
        '0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' , '8' ,
        '9' , 'a' , 'b' , 'c' , 'd' , 'e' , 'f' };


//         , 'g' , 'h' ,
//        'i' , 'j' , 'k' , 'l' , 'm' , 'n' ,
//        'o' , 'p' , 'q' , 'r' , 's' , 't' ,
//        'u' , 'v' , 'w' , 'x' , 'y' , 'z'

    public static final Charset charsetUTF8 = StandardCharsets.UTF_8;
    public static final Charset charsetASCII = StandardCharsets.US_ASCII;

    // Pools for encoders/decoder.
    // Better? use a ThreadLocal.
    // Initial pool size. Any additional encoder/decoder are later
    // placed in the pool - it's an infinite, reusing, growing pool.

    // Better?  If so, use these!

    private static final ThreadLocal threadCharsetEncoder =
        new ThreadLocal() {
        @Override protected CharsetEncoder initialValue() {
            return createEncoder();
        }
    };

    private static final ThreadLocal threadCharsetDecoder =
        new ThreadLocal() {
        @Override protected CharsetDecoder initialValue() {
            return createDecoder();
        }
    };

    /** Return a per-thread CharsetEncoder */
    public static CharsetEncoder getThreadEncoder() { return threadCharsetEncoder.get(); }

    /** Return a per-thread CharsetDecoder */
    public static CharsetDecoder getThreadDecoder() { return threadCharsetDecoder.get(); }

    private static final int PoolSize = 2;
    private static Pool encoders = PoolSync.create(new PoolBase());
    private static Pool decoders = PoolSync.create(new PoolBase());

    static {
        // Fill the pool.
        for ( int i = 0 ; i < PoolSize ; i++ )
        {
            putEncoder(createEncoder());
            putDecoder(createDecoder());
        }
    }

    /** Create a UTF-8 encoder */
    public static CharsetEncoder createEncoder()    { return charsetUTF8.newEncoder(); }
    /** Create a UTF-8 decoder */
    public static CharsetDecoder createDecoder()    { return charsetUTF8.newDecoder(); }

    /** Get a UTF-8 encoder from the pool (null if pool empty) */
    public static CharsetEncoder getEncoder()       { return encoders.get(); }
    /** Add/return a UTF-8 encoder to the pool */
    public static void putEncoder(CharsetEncoder encoder) { encoders.put(encoder); }

    /** Get a UTF-8 decoder from the pool (null if pool empty) */
    public static CharsetDecoder getDecoder()       { return decoders.get(); }
    /** Add/return a UTF-8 decoder to the pool */
    public static void putDecoder(CharsetDecoder decoder) { decoders.put(decoder); }

    /** Allocate a CharsetEncoder, creating as necessary */
    public static CharsetEncoder allocEncoder() {
        CharsetEncoder enc = Chars.getEncoder();
        // Blocking finite Pool - does not happen.
        // Plain Pool (sync wrapped) - might - allocate an extra one.
        if ( enc == null )
            enc = Chars.createEncoder();
        enc
          .onMalformedInput(CodingErrorAction.REPLACE)
          .onUnmappableCharacter(CodingErrorAction.REPLACE)
          .reset();

        return enc;
    }
    /** Deallocate a CharsetEncoder, may increase pool size */
    public static void deallocEncoder(CharsetEncoder enc) { putEncoder(enc); }

    /** Allocate a CharsetDecoder, creating as necessary */
    public static CharsetDecoder allocDecoder() {
        CharsetDecoder dec = Chars.getDecoder();
        // Blocking finite Pool - does not happen.
        // Plain Pool (sync wrapped) - might - allocate an extra one.
        if ( dec == null )
            dec = Chars.createDecoder();
        dec
          .onMalformedInput(CodingErrorAction.REPLACE)
          .onUnmappableCharacter(CodingErrorAction.REPLACE)
          .reset();

        return dec;
    }
    /** Deallocate a CharsetDecoder, may increase pool size */
    public static void deallocDecoder(CharsetDecoder dec) { putDecoder(dec); }

    /** Is char in the array? */
    public static boolean charInArray(int ch, char[] chars) {
        for ( int xch : chars ) {
            if ( ch == xch )
                return true;
        }
        return false;
    }

    public static void encodeAsHex(StringBuilder buff, char marker, char ch) {
        if ( ch < 256 ) {
            buff.append(marker);
            int lo = ch & 0xF;
            int hi = (ch >> 4) & 0xF;
            buff.append(Chars.hexDigitsUC[hi]);
            buff.append(Chars.hexDigitsUC[lo]);
            return;
        }
        int n4 = ch & 0xF;
        int n3 = (ch >> 4) & 0xF;
        int n2 = (ch >> 8) & 0xF;
        int n1 = (ch >> 12) & 0xF;
        buff.append(marker);
        buff.append(Chars.hexDigitsUC[n1]);
        buff.append(Chars.hexDigitsUC[n2]);
        buff.append(marker);
        buff.append(Chars.hexDigitsUC[n3]);
        buff.append(Chars.hexDigitsUC[n4]);
    }

    /** End of file - not a Unicode codepoint */
    public static final int EOF              = -1 ;

    // BOM  : U+FEFF encoded in UTF-8 as xEF,0xBB,0xBF
    public static final char BOM             = 0xFEFF ;

    // Specials block:
    //   https://www.unicode.org/charts/PDF/UFFF0.pdf
    //   https://en.wikipedia.org/wiki/Specials_(Unicode_block)
    // OBJECT REPLACEMENT CHARACTER
    public static final char OBJECT_REPLACEMENT = 0xFFFC ;

    // REPLACEMENT CHARACTER
    public static final char REPLACEMENT     = 0xFFFD ;

    // "Not a character" - a Unicode noncharacter codepoint that is not legal in UTF-8.
    public static final char NotACharacter   = 0xFFFF ;
    // Detect byte order by contrast (BOM reversed) - a Unicode noncharacter codepoint that is not legal in UTF-8.
    public static final char ReverseOrderBOM = 0xFFFE ;

    /** Undefined character (exact meaning depends on use) - not a Unicode codepoint */
    public static final int  UNSET           =  -2 ;
    public static final char NL              = '\n' ;
    public static final char LF              = NL ;     // Alt name.
    public static final char CR              = '\r' ;
    public static final char TAB             = '\t' ;
    public static final char FF              = '\f' ;   // Form feed
    public static final char SPC             = ' ' ;
    public static final char BSPACE          = '\b' ;

    public static final char CH_ZERO         =  (char)0 ;

    public static final char CH_LBRACKET     = '[' ;
    public static final char CH_RBRACKET     = ']' ;

    public static final char CH_LBRACE       = '{' ;
    public static final char CH_RBRACE       = '}' ;

    public static final char CH_LPAREN       = '(' ;
    public static final char CH_RPAREN       = ')' ;

    public static final char CH_LT           = '<' ;
    public static final char CH_GT           = '>' ;
    public static final char CH_UNDERSCORE   = '_' ;

    public static final char CH_QUOTE1       = '\'' ;
    public static final char CH_QUOTE2       = '"' ;

    public static final char CH_EQUALS       = '=' ;
    public static final char CH_STAR         = '*' ;
    public static final char CH_DOT          = '.' ;
    public static final char CH_COMMA        = ',' ;
    public static final char CH_SEMICOLON    = ';' ;
    public static final char CH_COLON        = ':' ;
    public static final char CH_AMPHERSAND   = '&' ;
    public static final char CH_AT           = '@' ;
    public static final char CH_QMARK        = '?' ;
    public static final char CH_EMARK        = '!' ;
    public static final char CH_DOLLAR       = '$' ;
    public static final char CH_HASH         = '#' ;
    public static final char CH_PLUS         = '+' ;
    public static final char CH_MINUS        = '-' ;
    public static final char CH_DASH         = '-' ; // Alt name
    public static final char CH_SLASH        = '/' ;
    public static final char CH_RSLASH       = '\\' ;
    public static final char CH_PERCENT      = '%' ;
    public static final char CH_VBAR         = '|' ;

    // Byte versions of the above
    public static final byte B_NL            = NL ;
    public static final byte B_CR            = CR ;

    public static final byte B_LBRACKET      = '[' ;
    public static final byte B_RBRACKET      = ']' ;

    public static final byte B_LBRACE        = '{' ;
    public static final byte B_RBRACE        = '}' ;

    public static final byte B_LPAREN        = '(' ;
    public static final byte B_RPAREN        = ')' ;

    public static final byte B_LT            = '<' ;
    public static final byte B_GT            = '>' ;
    public static final byte B_UNDERSCORE    = '_' ;

    public static final byte B_QUOTE1        = '\'' ;
    public static final byte B_QUOTE2        = '"' ;

    public static final byte B_EQUALS        = '=' ;
    public static final byte B_STAR          = '*' ;
    public static final byte B_DOT           = '.' ;
    public static final byte B_COMMA         = ',' ;
    public static final byte B_SEMICOLON     = ';' ;
    public static final byte B_COLON         = ':' ;
    public static final byte B_AT            = '@' ;
    public static final byte B_AMPHERSAND    = '&' ;
    public static final byte B_QMARK         = '?' ;
    public static final byte B_HASH          = '#' ;
    public static final byte B_PLUS          = '+' ;
    public static final byte B_MINUS         = '-' ;
    public static final byte B_DASH          = '-' ; // Alt name
    public static final byte B_SLASH         = '/' ;
    public static final byte B_RSLASH        = '\\' ;
    public static final byte B_PERCENT       = '%' ;
    public static final byte B_VBAR          = '|' ;

    // String versions - a few compound "chars" as well.
    public static final String S_LBRACKET     = "[" ;
    public static final String S_RBRACKET     = "]" ;

    public static final String S_LBRACE       = "{" ;
    public static final String S_RBRACE       = "}" ;

    public static final String S_LPAREN       = "(" ;
    public static final String S_RPAREN       = ")" ;

    public static final String S_LT           = "<" ;
    public static final String S_LE           = "<=" ;
    public static final String S_GT           = ">" ;
    public static final String S_GE           = ">=" ;
    public static final String S_UNDERSCORE   = "_" ;

    public static final String S_QUOTE1       = "'" ;
    public static final String S_QUOTE2       = "\"" ;

    public static final String S_EQUALS       = "=" ;
    public static final String S_STAR         = "*" ;
    public static final String S_DOT          = "." ;
    public static final String S_COMMA        = "," ;
    public static final String S_SEMICOLON    = ";" ;
    public static final String S_COLON        = ":" ;
    public static final String S_AMPHERSAND   = "&" ;
    public static final String S_AT           = "@" ;
    public static final String S_QMARK        = "?" ;
    public static final String S_HASH         = "#" ;
    public static final String S_PLUS         = "+" ;
    public static final String S_MINUS        = "-" ;
    public static final String S_DASH         = "-" ; // Alt name
    public static final String S_SLASH        = "/" ;
    public static final String S_RSLASH       = "\\" ;
    public static final String S_PERCENT      = "%" ;
    public static final String S_VBAR         = "|" ;

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy