All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.airlift.jcodings.unicode.UnicodeEncoding Maven / Gradle / Ivy

Go to download

Java port of Oniguruma: http://www.geocities.jp/kosako3/oniguruma that uses byte arrays directly instead of java Strings and chars

There is a newer version: 2.1.5.2
Show newest version
/*
 * Permission is hereby granted, free of charge, to any person obtaining a copy of
 * this software and associated documentation files (the "Software"), to deal in
 * the Software without restriction, including without limitation the rights to
 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is furnished to do
 * so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
package io.airlift.jcodings.unicode;

import static io.airlift.jcodings.util.ArrayReader.readIntArray;
import static io.airlift.jcodings.util.ArrayReader.readNestedIntArray;

import io.airlift.jcodings.ApplyAllCaseFoldFunction;
import io.airlift.jcodings.CaseFoldCodeItem;
import io.airlift.jcodings.CodeRange;
import io.airlift.jcodings.Config;
import io.airlift.jcodings.IntHolder;
import io.airlift.jcodings.MultiByteEncoding;
import io.airlift.jcodings.constants.CharacterType;
import io.airlift.jcodings.exception.CharacterPropertyException;
import io.airlift.jcodings.exception.ErrorMessages;
import io.airlift.jcodings.util.ArrayReader;
import io.airlift.jcodings.util.CaseInsensitiveBytesHash;
import io.airlift.jcodings.util.IntArrayHash;
import io.airlift.jcodings.util.IntHash;


public abstract class UnicodeEncoding extends MultiByteEncoding {
    private static final int PROPERTY_NAME_MAX_SIZE = 41;

    protected UnicodeEncoding(String name, int minLength, int maxLength, int[]EncLen) {
        // ASCII type tables for all Unicode encodings
        super(name, minLength, maxLength, EncLen, null, UNICODE_ISO_8859_1_CTypeTable);
    }

    protected UnicodeEncoding(String name, int minLength, int maxLength, int[]EncLen, int[][]Trans) {
        // ASCII type tables for all Unicode encodings
        super(name, minLength, maxLength, EncLen, Trans, UNICODE_ISO_8859_1_CTypeTable);
    }

    @Override
    public String getCharsetName() {
        return new String(getName());
    }

    // onigenc_unicode_is_code_ctype
    @Override
    public boolean isCodeCType(int code, int ctype) {
        if (Config.USE_UNICODE_PROPERTIES) {
            if (ctype <= CharacterType.MAX_STD_CTYPE && code < 256)
                return isCodeCTypeInternal(code, ctype);
        } else {
            if (code < 256) return isCodeCTypeInternal(code, ctype);
        }

        if (ctype > UnicodeProperties.CodeRangeTable.length) throw new InternalError(ErrorMessages.ERR_TYPE_BUG);

        return CodeRange.isInCodeRange(UnicodeProperties.CodeRangeTable[ctype].getRange(), code);

    }

    // onigenc_unicode_ctype_code_range
    protected final int[]ctypeCodeRange(int ctype) {
        if (ctype >= UnicodeProperties.CodeRangeTable.length) throw new InternalError(ErrorMessages.ERR_TYPE_BUG);

        return UnicodeProperties.CodeRangeTable[ctype].getRange();
    }

    // onigenc_unicode_property_name_to_ctype
    @Override
    public int propertyNameToCType(byte[]name, int p, int end) {
        byte[]buf = new byte[PROPERTY_NAME_MAX_SIZE];

        int p_ = p;
        int len = 0;

        while(p_ < end) {
            int code = mbcToCode(name, p_, end);
            if (code >= 0x80) throw new CharacterPropertyException(ErrorMessages.ERR_INVALID_CHAR_PROPERTY_NAME);
            buf[len++] = (byte)code;
            if (len >= PROPERTY_NAME_MAX_SIZE) throw new CharacterPropertyException(ErrorMessages.ERR_INVALID_CHAR_PROPERTY_NAME, name, p, end);
            p_ += length(name, p_, end);
        }

        Integer ctype = CTypeName.CTypeNameHash.get(buf, 0, len);
        if (ctype == null) throw new CharacterPropertyException(ErrorMessages.ERR_INVALID_CHAR_PROPERTY_NAME, name, p, end);
        return ctype;
    }

    // onigenc_unicode_mbc_case_fold
    @Override
    public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]fold) {
        int p = pp.value;
        int foldP = 0;

        int code = mbcToCode(bytes, p, end);
        int len = length(bytes, p, end);
        pp.value += len;

        if (Config.USE_UNICODE_CASE_FOLD_TURKISH_AZERI) {
            if ((flag & Config.ENC_CASE_FOLD_TURKISH_AZERI) != 0) {
                if (code == 0x0049) {
                    return codeToMbc(0x0131, fold, foldP);
                } else if (code == 0x0130) {
                    return codeToMbc(0x0069, fold, foldP);
                }
            }
        }

        int to[] = CaseFold.FoldHash.get(code);
        if (to != null) {
            if (to.length == 1) {
                return codeToMbc(to[0], fold, foldP);
            } else {
                int rlen = 0;
                for (int i=0; i CTypeNameHash = initializeCTypeNameTable();

        private static CaseInsensitiveBytesHash initializeCTypeNameTable() {
            CaseInsensitiveBytesHash table = new CaseInsensitiveBytesHash();
            for (int i = 0; i < UnicodeProperties.CodeRangeTable.length; i++) {
                table.putDirect(UnicodeProperties.CodeRangeTable[i].name, i);
            }
            return table;
        }
    }

    private static class CaseFold {
        private static final int CaseFold_From[] = readIntArray("CaseFold_From");
        private static final int CaseFold_To[][] = readNestedIntArray("CaseFold_To");
        private static final int CaseFold_Locale_From[] = readIntArray("CaseFold_Locale_From");
        private static final int CaseFold_Locale_To[][] = readNestedIntArray("CaseFold_Locale_To");

        private static IntHash initializeFoldHash() {
            IntHash fold = new IntHash(1200);
            for (int i = 0; i < CaseFold_From.length; i++)
                fold.putDirect(CaseFold_From[i], CaseFold_To[i]);
            for (int i = 0; i < CaseFold_Locale_From.length; i++)
                fold.putDirect(CaseFold_Locale_From[i], CaseFold_Locale_To[i]);
            return fold;
        }

        static final IntHashFoldHash = initializeFoldHash();
    }

    private static class CaseFold11 {
        private static final int CaseUnfold_11_From[] = readIntArray("CaseUnfold_11_From");
        private static final int CaseUnfold_11_To[][] = readNestedIntArray("CaseUnfold_11_To");
        private static final int CaseUnfold_11_Locale_From[] = readIntArray("CaseUnfold_11_Locale_From");
        private static final int CaseUnfold_11_Locale_To[][] = readNestedIntArray("CaseUnfold_11_Locale_To");

        private static IntHash initializeUnfold1Hash() {
            IntHash unfold1 = new IntHash(1000);
            for (int i = 0; i < CaseUnfold_11_From.length; i++)
                unfold1.putDirect(CaseUnfold_11_From[i], CaseUnfold_11_To[i]);
            for (int i = 0; i < CaseUnfold_11_Locale_From.length; i++)
                unfold1.putDirect(CaseUnfold_11_Locale_From[i], CaseUnfold_11_Locale_To[i]);
            return unfold1;
        }

        static final IntHash Unfold1Hash = initializeUnfold1Hash();
    }

    private static class CaseFold12 {
        private static final int CaseUnfold_12[][] = readNestedIntArray("CaseUnfold_12");
        private static final int CaseUnfold_12_Locale[][] = readNestedIntArray("CaseUnfold_12_Locale");

        private static IntArrayHash initializeUnfold2Hash() {
            IntArrayHash unfold2 = new IntArrayHash(200);
            for (int i = 0; i < CaseUnfold_12.length; i += 2)
                unfold2.putDirect(CaseUnfold_12[i], CaseUnfold_12[i + 1]);
            for (int i = 0; i < CaseUnfold_12_Locale.length; i += 2)
                unfold2.putDirect(CaseUnfold_12_Locale[i], CaseUnfold_12_Locale[i + 1]);
            return unfold2;
        }

        static final IntArrayHash Unfold2Hash = initializeUnfold2Hash();
    }

    private static class CaseFold13 {
        private static final int CaseUnfold_13[][] = readNestedIntArray("CaseUnfold_13");

        private static IntArrayHash initializeUnfold3Hash() {
            IntArrayHash unfold3 = new IntArrayHash(30);
            for (int i = 0; i < CaseUnfold_13.length; i += 2)
                unfold3.putDirect(CaseUnfold_13[i], CaseUnfold_13[i + 1]);
            return unfold3;
        }

        static final IntArrayHash Unfold3Hash = initializeUnfold3Hash();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy