All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.microsoft.sqlserver.jdbc.SQLCollation Maven / Gradle / Ivy

There is a newer version: 12.8.1.jre11
Show newest version
/*
 * Microsoft JDBC Driver for SQL Server Copyright(c) Microsoft Corporation All rights reserved. This program is made
 * available under the terms of the MIT License. See the LICENSE file in the project root for more information.
 */

package com.microsoft.sqlserver.jdbc;

import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.text.MessageFormat;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;


/**
 * SQLCollation is helper class used to read TDS collation from a TDS stream. Collation is in the following BNF format
 * (see TDS spec for full details):
 * 
 * LCID := 20 * BIT; fIgnoreCase := BIT; fIgnoreAccent := BIT; fIgnoreWidth := BIT; fIgnoreKana := BIT; fBinary := BIT;
 * ColFlags := fIgnoreCase, fIgnoreAccent, fIgnoreWidth, fIgnoreKana, fBinary, FRESERVEDBIT, FRESERVEDBIT, FRESERVEDBIT;
 * Version := 4 * BIT; SortId := BYTE;
 * 
 * COLLATION := LCID, ColFlags, Version, SortId;
 * 
 */
final class SQLCollation implements java.io.Serializable {
    /**
     * 
     */
    private static final long serialVersionUID = 6748833280721312349L;

    private final int info; // First 4 bytes of TDS collation.

    private int langID() {
        return info & 0x0000FFFF;
    }

    private final int sortId; // 5th byte of TDS collation.
    private final Encoding encoding;
    private static final int UTF8_IN_TDSCOLLATION = 0x4000000;

    // Utility methods for getting details of this collation's encoding
    final Charset getCharset() throws SQLServerException {
        return encoding.charset();
    }

    final boolean supportsAsciiConversion() {
        return encoding.supportsAsciiConversion();
    }

    final boolean hasAsciiCompatibleSBCS() {
        return encoding.hasAsciiCompatibleSBCS();
    }

    static final int tdsLength() {
        return 5;
    } // Length of collation in TDS (in bytes)

    /**
     * Returns the collation info
     * 
     * @return
     */
    int getCollationInfo() {
        return this.info;
    }

    /**
     * return sort ID
     * 
     * @return
     */
    int getCollationSortID() {
        return this.sortId;
    }

    boolean isEqual(SQLCollation col) {
        return ((col != null && col.info == info && col.sortId == sortId) ? true : false);
    }

    /**
     * Reads TDS collation from TDS buffer into SQLCollation class.
     * 
     * @param tdsReader
     */
    SQLCollation(TDSReader tdsReader) throws UnsupportedEncodingException, SQLServerException {
        /*
         * TDS rule for collation: COLLATION = LCID ColFlags Version SortId
         */
        info = tdsReader.readInt(); // 4 bytes, contains: LCID ColFlags Version
        sortId = tdsReader.readUnsignedByte(); // 1 byte, contains: SortId
        if (UTF8_IN_TDSCOLLATION == (info & UTF8_IN_TDSCOLLATION)) {
            encoding = Encoding.UTF8;
        } else {
            // For a SortId==0 collation, the LCID bits correspond to a LocaleId
            encoding = (0 == sortId) ? encodingFromLCID() : encodingFromSortId();
        }
    }

    /**
     * Writes TDS collation from SQLCollation class into TDS buffer at offset.
     * 
     * @param tdsWriter
     *        TDS writer to write collation to.
     */
    void writeCollation(TDSWriter tdsWriter) throws SQLServerException {
        tdsWriter.writeInt(info);
        tdsWriter.writeByte((byte) (sortId & 0xFF));
    }

    /**
     * Enumeration of Windows locales recognized by SQL Server.
     *
     * For our purposes in the driver, locales are only described by their LangID and character encodings.
     *
     * The set of locales is derived from the following resources:
     *
     * http://download.microsoft.com/download/9/5/e/95ef66af-9026-4bb0-a41d-a4f81802d92c/[MS-LCID].pdf Lists LCID values
     * and their corresponding meanings (in RFC 3066 format). Used to derive the names for the various enumeration
     * constants.
     *
     * x_rgLocaleMap and x_rgLcidOrdMap in sql\common\include\localemap.h in Katmai source tree Collectively, these two
     * tables provide a mapping of collation-version specific encodings for every locale supported by SQL Server. Lang
     * IDs are derived from locales' LCIDs.
     */
    enum WindowsLocale {
        ar_SA(0x0401, Encoding.CP1256),
        bg_BG(0x0402, Encoding.CP1251),
        ca_ES(0x0403, Encoding.CP1252),
        zh_TW(0x0404, Encoding.CP950),
        cs_CZ(0x0405, Encoding.CP1250),
        da_DK(0x0406, Encoding.CP1252),
        de_DE(0x0407, Encoding.CP1252),
        el_GR(0x0408, Encoding.CP1253),
        en_US(0x0409, Encoding.CP1252),
        es_ES_tradnl(0x040a, Encoding.CP1252),
        fi_FI(0x040b, Encoding.CP1252),
        fr_FR(0x040c, Encoding.CP1252),
        he_IL(0x040d, Encoding.CP1255),
        hu_HU(0x040e, Encoding.CP1250),
        is_IS(0x040f, Encoding.CP1252),
        it_IT(0x0410, Encoding.CP1252),
        ja_JP(0x0411, Encoding.CP932),
        ko_KR(0x0412, Encoding.CP949),
        nl_NL(0x0413, Encoding.CP1252),
        nb_NO(0x0414, Encoding.CP1252),
        pl_PL(0x0415, Encoding.CP1250),
        pt_BR(0x0416, Encoding.CP1252),
        rm_CH(0x0417, Encoding.CP1252),
        ro_RO(0x0418, Encoding.CP1250),
        ru_RU(0x0419, Encoding.CP1251),
        hr_HR(0x041a, Encoding.CP1250),
        sk_SK(0x041b, Encoding.CP1250),
        sq_AL(0x041c, Encoding.CP1250),
        sv_SE(0x041d, Encoding.CP1252),
        th_TH(0x041e, Encoding.CP874),
        tr_TR(0x041f, Encoding.CP1254),
        ur_PK(0x0420, Encoding.CP1256),
        id_ID(0x0421, Encoding.CP1252),
        uk_UA(0x0422, Encoding.CP1251),
        be_BY(0x0423, Encoding.CP1251),
        sl_SI(0x0424, Encoding.CP1250),
        et_EE(0x0425, Encoding.CP1257),
        lv_LV(0x0426, Encoding.CP1257),
        lt_LT(0x0427, Encoding.CP1257),
        tg_Cyrl_TJ(0x0428, Encoding.CP1251),
        fa_IR(0x0429, Encoding.CP1256),
        vi_VN(0x042a, Encoding.CP1258),
        hy_AM(0x042b, Encoding.CP1252),
        az_Latn_AZ(0x042c, Encoding.CP1254),
        eu_ES(0x042d, Encoding.CP1252),
        wen_DE(0x042e, Encoding.CP1252),
        mk_MK(0x042f, Encoding.CP1251),
        tn_ZA(0x0432, Encoding.CP1252),
        xh_ZA(0x0434, Encoding.CP1252),
        zu_ZA(0x0435, Encoding.CP1252),
        Af_ZA(0x0436, Encoding.CP1252),
        ka_GE(0x0437, Encoding.CP1252),
        fo_FO(0x0438, Encoding.CP1252),
        hi_IN(0x0439, Encoding.UNICODE),
        mt_MT(0x043a, Encoding.UNICODE),
        se_NO(0x043b, Encoding.CP1252),
        ms_MY(0x043e, Encoding.CP1252),
        kk_KZ(0x043f, Encoding.CP1251),
        ky_KG(0x0440, Encoding.CP1251),
        sw_KE(0x0441, Encoding.CP1252),
        tk_TM(0x0442, Encoding.CP1250),
        uz_Latn_UZ(0x0443, Encoding.CP1254),
        tt_RU(0x0444, Encoding.CP1251),
        bn_IN(0x0445, Encoding.UNICODE),
        pa_IN(0x0446, Encoding.UNICODE),
        gu_IN(0x0447, Encoding.UNICODE),
        or_IN(0x0448, Encoding.UNICODE),
        ta_IN(0x0449, Encoding.UNICODE),
        te_IN(0x044a, Encoding.UNICODE),
        kn_IN(0x044b, Encoding.UNICODE),
        ml_IN(0x044c, Encoding.UNICODE),
        as_IN(0x044d, Encoding.UNICODE),
        mr_IN(0x044e, Encoding.UNICODE),
        sa_IN(0x044f, Encoding.UNICODE),
        mn_MN(0x0450, Encoding.CP1251),
        bo_CN(0x0451, Encoding.UNICODE),
        cy_GB(0x0452, Encoding.CP1252),
        km_KH(0x0453, Encoding.UNICODE),
        lo_LA(0x0454, Encoding.UNICODE),
        gl_ES(0x0456, Encoding.CP1252),
        kok_IN(0x0457, Encoding.UNICODE),
        syr_SY(0x045a, Encoding.UNICODE),
        si_LK(0x045b, Encoding.UNICODE),
        iu_Cans_CA(0x045d, Encoding.CP1252),
        am_ET(0x045e, Encoding.CP1252),
        ne_NP(0x0461, Encoding.UNICODE),
        fy_NL(0x0462, Encoding.CP1252),
        ps_AF(0x0463, Encoding.UNICODE),
        fil_PH(0x0464, Encoding.CP1252),
        dv_MV(0x0465, Encoding.UNICODE),
        ha_Latn_NG(0x0468, Encoding.CP1252),
        yo_NG(0x046a, Encoding.CP1252),
        quz_BO(0x046b, Encoding.CP1252),
        nso_ZA(0x046c, Encoding.CP1252),
        ba_RU(0x046d, Encoding.CP1251),
        lb_LU(0x046e, Encoding.CP1252),
        kl_GL(0x046f, Encoding.CP1252),
        ig_NG(0x0470, Encoding.CP1252),
        ii_CN(0x0478, Encoding.CP1252),
        arn_CL(0x047a, Encoding.CP1252),
        moh_CA(0x047c, Encoding.CP1252),
        br_FR(0x047e, Encoding.CP1252),
        ug_CN(0x0480, Encoding.CP1256),
        mi_NZ(0x0481, Encoding.UNICODE),
        oc_FR(0x0482, Encoding.CP1252),
        co_FR(0x0483, Encoding.CP1252),
        gsw_FR(0x0484, Encoding.CP1252),
        sah_RU(0x0485, Encoding.CP1251),
        qut_GT(0x0486, Encoding.CP1252),
        rw_RW(0x0487, Encoding.CP1252),
        wo_SN(0x0488, Encoding.CP1252),
        prs_AF(0x048c, Encoding.CP1256),
        ar_IQ(0x0801, Encoding.CP1256),
        zh_CN(0x0804, Encoding.CP936),
        de_CH(0x0807, Encoding.CP1252),
        en_GB(0x0809, Encoding.CP1252),
        es_MX(0x080a, Encoding.CP1252),
        fr_BE(0x080c, Encoding.CP1252),
        it_CH(0x0810, Encoding.CP1252),
        nl_BE(0x0813, Encoding.CP1252),
        nn_NO(0x0814, Encoding.CP1252),
        pt_PT(0x0816, Encoding.CP1252),
        sr_Latn_CS(0x081a, Encoding.CP1250),
        sv_FI(0x081d, Encoding.CP1252),
        Lithuanian_Classic(0x0827, Encoding.CP1257),
        az_Cyrl_AZ(0x082c, Encoding.CP1251),
        dsb_DE(0x082e, Encoding.CP1252),
        se_SE(0x083b, Encoding.CP1252),
        ga_IE(0x083c, Encoding.CP1252),
        ms_BN(0x083e, Encoding.CP1252),
        uz_Cyrl_UZ(0x0843, Encoding.CP1251),
        bn_BD(0x0845, Encoding.UNICODE),
        mn_Mong_CN(0x0850, Encoding.CP1251),
        iu_Latn_CA(0x085d, Encoding.CP1252),
        tzm_Latn_DZ(0x085f, Encoding.CP1252),
        quz_EC(0x086b, Encoding.CP1252),
        ar_EG(0x0c01, Encoding.CP1256),
        zh_HK(0x0c04, Encoding.CP950),
        de_AT(0x0c07, Encoding.CP1252),
        en_AU(0x0c09, Encoding.CP1252),
        es_ES(0x0c0a, Encoding.CP1252),
        fr_CA(0x0c0c, Encoding.CP1252),
        sr_Cyrl_CS(0x0c1a, Encoding.CP1251),
        se_FI(0x0c3b, Encoding.CP1252),
        quz_PE(0x0c6b, Encoding.CP1252),
        ar_LY(0x1001, Encoding.CP1256),
        zh_SG(0x1004, Encoding.CP936),
        de_LU(0x1007, Encoding.CP1252),
        en_CA(0x1009, Encoding.CP1252),
        es_GT(0x100a, Encoding.CP1252),
        fr_CH(0x100c, Encoding.CP1252),
        hr_BA(0x101a, Encoding.CP1250),
        smj_NO(0x103b, Encoding.CP1252),
        ar_DZ(0x1401, Encoding.CP1256),
        zh_MO(0x1404, Encoding.CP950),
        de_LI(0x1407, Encoding.CP1252),
        en_NZ(0x1409, Encoding.CP1252),
        es_CR(0x140a, Encoding.CP1252),
        fr_LU(0x140c, Encoding.CP1252),
        bs_Latn_BA(0x141a, Encoding.CP1250),
        smj_SE(0x143b, Encoding.CP1252),
        ar_MA(0x1801, Encoding.CP1256),
        en_IE(0x1809, Encoding.CP1252),
        es_PA(0x180a, Encoding.CP1252),
        fr_MC(0x180c, Encoding.CP1252),
        sr_Latn_BA(0x181a, Encoding.CP1250),
        sma_NO(0x183b, Encoding.CP1252),
        ar_TN(0x1c01, Encoding.CP1256),
        en_ZA(0x1c09, Encoding.CP1252),
        es_DO(0x1c0a, Encoding.CP1252),
        sr_Cyrl_BA(0x1c1a, Encoding.CP1251),
        sma_SB(0x1c3b, Encoding.CP1252),
        ar_OM(0x2001, Encoding.CP1256),
        en_JM(0x2009, Encoding.CP1252),
        es_VE(0x200a, Encoding.CP1252),
        bs_Cyrl_BA(0x201a, Encoding.CP1251),
        sms_FI(0x203b, Encoding.CP1252),
        ar_YE(0x2401, Encoding.CP1256),
        en_CB(0x2409, Encoding.CP1252),
        es_CO(0x240a, Encoding.CP1252),
        smn_FI(0x243b, Encoding.CP1252),
        ar_SY(0x2801, Encoding.CP1256),
        en_BZ(0x2809, Encoding.CP1252),
        es_PE(0x280a, Encoding.CP1252),
        ar_JO(0x2c01, Encoding.CP1256),
        en_TT(0x2c09, Encoding.CP1252),
        es_AR(0x2c0a, Encoding.CP1252),
        ar_LB(0x3001, Encoding.CP1256),
        en_ZW(0x3009, Encoding.CP1252),
        es_EC(0x300a, Encoding.CP1252),
        ar_KW(0x3401, Encoding.CP1256),
        en_PH(0x3409, Encoding.CP1252),
        es_CL(0x340a, Encoding.CP1252),
        ar_AE(0x3801, Encoding.CP1256),
        es_UY(0x380a, Encoding.CP1252),
        ar_BH(0x3c01, Encoding.CP1256),
        es_PY(0x3c0a, Encoding.CP1252),
        ar_QA(0x4001, Encoding.CP1256),
        en_IN(0x4009, Encoding.CP1252),
        es_BO(0x400a, Encoding.CP1252),
        en_MY(0x4409, Encoding.CP1252),
        es_SV(0x440a, Encoding.CP1252),
        en_SG(0x4809, Encoding.CP1252),
        es_HN(0x480a, Encoding.CP1252),
        es_NI(0x4c0a, Encoding.CP1252),
        es_PR(0x500a, Encoding.CP1252),
        es_US(0x540a, Encoding.CP1252);

        private final int langID;
        private final Encoding encoding;

        WindowsLocale(int langID, Encoding encoding) {
            this.langID = langID;
            this.encoding = encoding;
        }

        final Encoding getEncoding() throws UnsupportedEncodingException {
            return encoding.checkSupported();
        }
    }

    // Index from of windows locales by their LangIDs for fast lookup
    // of encodings associated with various SQL collations
    private static final Map localeIndex;

    private Encoding encodingFromLCID() throws UnsupportedEncodingException {
        WindowsLocale locale = localeIndex.get(langID());

        if (null == locale) {
            MessageFormat form = new MessageFormat(SQLServerException.getErrString("R_unknownLCID"));
            Object[] msgArgs = {Integer.toHexString(langID()).toUpperCase()};
            throw new UnsupportedEncodingException(form.format(msgArgs));
        }

        try {
            return locale.getEncoding();
        } catch (UnsupportedEncodingException inner) {
            MessageFormat form = new MessageFormat(SQLServerException.getErrString("R_unknownLCID"));
            Object[] msgArgs = {locale};
            UnsupportedEncodingException e = new UnsupportedEncodingException(form.format(msgArgs));
            e.initCause(inner);
            throw e;
        }
    }

    /**
     * Enumeration of original SQL Server sort orders recognized by SQL Server.
     *
     * If SQL collation has a non-zero sortId, then use this enum to determine the encoding. From
     * sql_main\sql\common\src\sqlscol.cpp (SQLServer code base).
     */
    enum SortOrder {
        BIN_CP437(30, "SQL_Latin1_General_CP437_BIN", Encoding.CP437),
        DICTIONARY_437(31, "SQL_Latin1_General_CP437_CS_AS", Encoding.CP437),
        NOCASE_437(32, "SQL_Latin1_General_CP437_CI_AS", Encoding.CP437),
        NOCASEPREF_437(33, "SQL_Latin1_General_Pref_CP437_CI_AS", Encoding.CP437),
        NOACCENTS_437(34, "SQL_Latin1_General_CP437_CI_AI", Encoding.CP437),
        BIN2_CP437(35, "SQL_Latin1_General_CP437_BIN2", Encoding.CP437),

        BIN_CP850(40, "SQL_Latin1_General_CP850_BIN", Encoding.CP850),
        DICTIONARY_850(41, "SQL_Latin1_General_CP850_CS_AS", Encoding.CP850),
        NOCASE_850(42, "SQL_Latin1_General_CP850_CI_AS", Encoding.CP850),
        NOCASEPREF_850(43, "SQL_Latin1_General_Pref_CP850_CI_AS", Encoding.CP850),
        NOACCENTS_850(44, "SQL_Latin1_General_CP850_CI_AI", Encoding.CP850),
        BIN2_CP850(45, "SQL_Latin1_General_CP850_BIN2", Encoding.CP850),

        CASELESS_34(49, "SQL_1xCompat_CP850_CI_AS", Encoding.CP850),
        BIN_ISO_1(50, "bin_iso_1", Encoding.CP1252),
        DICTIONARY_ISO(51, "SQL_Latin1_General_CP1_CS_AS", Encoding.CP1252),
        NOCASE_ISO(52, "SQL_Latin1_General_CP1_CI_AS", Encoding.CP1252),
        NOCASEPREF_ISO(53, "SQL_Latin1_General_Pref_CP1_CI_AS", Encoding.CP1252),
        NOACCENTS_ISO(54, "SQL_Latin1_General_CP1_CI_AI", Encoding.CP1252),
        ALT_DICTIONARY(55, "SQL_AltDiction_CP850_CS_AS", Encoding.CP850),
        ALT_NOCASEPREF(56, "SQL_AltDiction_Pref_CP850_CI_AS", Encoding.CP850),
        ALT_NOACCENTS(57, "SQL_AltDiction_CP850_CI_AI", Encoding.CP850),
        SCAND_NOCASEPREF(58, "SQL_Scandinavian_Pref_CP850_CI_AS", Encoding.CP850),
        SCAND_DICTIONARY(59, "SQL_Scandinavian_CP850_CS_AS", Encoding.CP850),
        SCAND_NOCASE(60, "SQL_Scandinavian_CP850_CI_AS", Encoding.CP850),
        ALT_NOCASE(61, "SQL_AltDiction_CP850_CI_AS", Encoding.CP850),

        DICTIONARY_1252(71, "dictionary_1252", Encoding.CP1252),
        NOCASE_1252(72, "nocase_1252", Encoding.CP1252),
        DNK_NOR_DICTIONARY(73, "dnk_nor_dictionary", Encoding.CP1252),
        FIN_SWE_DICTIONARY(74, "fin_swe_dictionary", Encoding.CP1252),
        ISL_DICTIONARY(75, "isl_dictionary", Encoding.CP1252),

        BIN_CP1250(80, "bin_cp1250", Encoding.CP1250),
        DICTIONARY_1250(81, "SQL_Latin1_General_CP1250_CS_AS", Encoding.CP1250),
        NOCASE_1250(82, "SQL_Latin1_General_CP1250_CI_AS", Encoding.CP1250),
        CSYDIC(83, "SQL_Czech_CP1250_CS_AS", Encoding.CP1250),
        CSYNC(84, "SQL_Czech_CP1250_CI_AS", Encoding.CP1250),
        HUNDIC(85, "SQL_Hungarian_CP1250_CS_AS", Encoding.CP1250),
        HUNNC(86, "SQL_Hungarian_CP1250_CI_AS", Encoding.CP1250),
        PLKDIC(87, "SQL_Polish_CP1250_CS_AS", Encoding.CP1250),
        PLKNC(88, "SQL_Polish_CP1250_CI_AS", Encoding.CP1250),
        ROMDIC(89, "SQL_Romanian_CP1250_CS_AS", Encoding.CP1250),
        ROMNC(90, "SQL_Romanian_CP1250_CI_AS", Encoding.CP1250),
        SHLDIC(91, "SQL_Croatian_CP1250_CS_AS", Encoding.CP1250),
        SHLNC(92, "SQL_Croatian_CP1250_CI_AS", Encoding.CP1250),
        SKYDIC(93, "SQL_Slovak_CP1250_CS_AS", Encoding.CP1250),
        SKYNC(94, "SQL_Slovak_CP1250_CI_AS", Encoding.CP1250),
        SLVDIC(95, "SQL_Slovenian_CP1250_CS_AS", Encoding.CP1250),
        SLVNC(96, "SQL_Slovenian_CP1250_CI_AS", Encoding.CP1250),
        POLISH_CS(97, "polish_cs", Encoding.CP1250),
        POLISH_CI(98, "polish_ci", Encoding.CP1250),

        BIN_CP1251(104, "bin_cp1251", Encoding.CP1251),
        DICTIONARY_1251(105, "SQL_Latin1_General_CP1251_CS_AS", Encoding.CP1251),
        NOCASE_1251(106, "SQL_Latin1_General_CP1251_CI_AS", Encoding.CP1251),
        UKRDIC(107, "SQL_Ukrainian_CP1251_CS_AS", Encoding.CP1251),
        UKRNC(108, "SQL_Ukrainian_CP1251_CI_AS", Encoding.CP1251),

        BIN_CP1253(112, "bin_cp1253", Encoding.CP1253),
        DICTIONARY_1253(113, "SQL_Latin1_General_CP1253_CS_AS", Encoding.CP1253),
        NOCASE_1253(114, "SQL_Latin1_General_CP1253_CI_AS", Encoding.CP1253),

        GREEK_MIXEDDICTIONARY(120, "SQL_MixDiction_CP1253_CS_AS", Encoding.CP1253),
        GREEK_ALTDICTIONARY(121, "SQL_AltDiction_CP1253_CS_AS", Encoding.CP1253),
        GREEK_ALTDICTIONARY2(122, "SQL_AltDiction2_CP1253_CS_AS", Encoding.CP1253),
        GREEK_NOCASEDICT(124, "SQL_Latin1_General_CP1253_CI_AI", Encoding.CP1253),
        BIN_CP1254(128, "bin_cp1254", Encoding.CP1254),
        DICTIONARY_1254(129, "SQL_Latin1_General_CP1254_CS_AS", Encoding.CP1254),
        NOCASE_1254(130, "SQL_Latin1_General_CP1254_CI_AS", Encoding.CP1254),

        BIN_CP1255(136, "bin_cp1255", Encoding.CP1255),
        DICTIONARY_1255(137, "SQL_Latin1_General_CP1255_CS_AS", Encoding.CP1255),
        NOCASE_1255(138, "SQL_Latin1_General_CP1255_CI_AS", Encoding.CP1255),

        BIN_CP1256(144, "bin_cp1256", Encoding.CP1256),
        DICTIONARY_1256(145, "SQL_Latin1_General_CP1256_CS_AS", Encoding.CP1256),
        NOCASE_1256(146, "SQL_Latin1_General_CP1256_CI_AS", Encoding.CP1256),

        BIN_CP1257(152, "bin_cp1257", Encoding.CP1257),
        DICTIONARY_1257(153, "SQL_Latin1_General_CP1257_CS_AS", Encoding.CP1257),
        NOCASE_1257(154, "SQL_Latin1_General_CP1257_CI_AS", Encoding.CP1257),
        ETIDIC(155, "SQL_Estonian_CP1257_CS_AS", Encoding.CP1257),
        ETINC(156, "SQL_Estonian_CP1257_CI_AS", Encoding.CP1257),
        LVIDIC(157, "SQL_Latvian_CP1257_CS_AS", Encoding.CP1257),
        LVINC(158, "SQL_Latvian_CP1257_CI_AS", Encoding.CP1257),
        LTHDIC(159, "SQL_Lithuanian_CP1257_CS_AS", Encoding.CP1257),
        LTHNC(160, "SQL_Lithuanian_CP1257_CI_AS", Encoding.CP1257),

        DANNO_NOCASEPREF(183, "SQL_Danish_Pref_CP1_CI_AS", Encoding.CP1252),
        SVFI1_NOCASEPREF(184, "SQL_SwedishPhone_Pref_CP1_CI_AS", Encoding.CP1252),
        SVFI2_NOCASEPREF(185, "SQL_SwedishStd_Pref_CP1_CI_AS", Encoding.CP1252),
        ISLAN_NOCASEPREF(186, "SQL_Icelandic_Pref_CP1_CI_AS", Encoding.CP1252),

        BIN_CP932(192, "bin_cp932", Encoding.CP932),
        NLS_CP932(193, "nls_cp932", Encoding.CP932),
        BIN_CP949(194, "bin_cp949", Encoding.CP949),
        NLS_CP949(195, "nls_cp949", Encoding.CP949),
        BIN_CP950(196, "bin_cp950", Encoding.CP950),
        NLS_CP950(197, "nls_cp950", Encoding.CP950),
        BIN_CP936(198, "bin_cp936", Encoding.CP936),
        NLS_CP936(199, "nls_cp936", Encoding.CP936),
        NLS_CP932_CS(200, "nls_cp932_cs", Encoding.CP932),
        NLS_CP949_CS(201, "nls_cp949_cs", Encoding.CP949),
        NLS_CP950_CS(202, "nls_cp950_cs", Encoding.CP950),
        NLS_CP936_CS(203, "nls_cp936_cs", Encoding.CP936),
        BIN_CP874(204, "bin_cp874", Encoding.CP874),
        NLS_CP874(205, "nls_cp874", Encoding.CP874),
        NLS_CP874_CS(206, "nls_cp874_cs", Encoding.CP874),

        EBCDIC_037(210, "SQL_EBCDIC037_CP1_CS_AS", Encoding.CP1252),
        EBCDIC_273(211, "SQL_EBCDIC273_CP1_CS_AS", Encoding.CP1252),
        EBCDIC_277(212, "SQL_EBCDIC277_CP1_CS_AS", Encoding.CP1252),
        EBCDIC_278(213, "SQL_EBCDIC278_CP1_CS_AS", Encoding.CP1252),
        EBCDIC_280(214, "SQL_EBCDIC280_CP1_CS_AS", Encoding.CP1252),
        EBCDIC_284(215, "SQL_EBCDIC284_CP1_CS_AS", Encoding.CP1252),
        EBCDIC_285(216, "SQL_EBCDIC285_CP1_CS_AS", Encoding.CP1252),
        EBCDIC_297(217, "SQL_EBCDIC297_CP1_CS_AS", Encoding.CP1252);

        private final int sortId;
        private final String name;
        private final Encoding encoding;

        final Encoding getEncoding() throws UnsupportedEncodingException {
            return encoding.checkSupported();
        }

        SortOrder(int sortId, String name, Encoding encoding) {
            this.sortId = sortId;
            this.name = name;
            this.encoding = encoding;
        }

        public final String toString() {
            return name;
        }
    }

    private static final HashMap sortOrderIndex;

    private Encoding encodingFromSortId() throws UnsupportedEncodingException {
        SortOrder sortOrder = sortOrderIndex.get(sortId);

        if (null == sortOrder) {
            MessageFormat form = new MessageFormat(SQLServerException.getErrString("R_unknownSortId"));
            Object[] msgArgs = {sortId};
            throw new UnsupportedEncodingException(form.format(msgArgs));
        }

        try {
            return sortOrder.getEncoding();
        } catch (UnsupportedEncodingException inner) {
            MessageFormat form = new MessageFormat(SQLServerException.getErrString("R_unknownSortId"));
            Object[] msgArgs = {sortOrder};
            UnsupportedEncodingException e = new UnsupportedEncodingException(form.format(msgArgs));
            e.initCause(inner);
            throw e;
        }
    }

    static {
        // Populate the windows locale and sort order indices

        localeIndex = new HashMap<>();
        for (WindowsLocale locale : EnumSet.allOf(WindowsLocale.class))
            localeIndex.put(locale.langID, locale);

        sortOrderIndex = new HashMap<>();
        for (SortOrder sortOrder : EnumSet.allOf(SortOrder.class))
            sortOrderIndex.put(sortOrder.sortId, sortOrder);
    }
}


/**
 * Enumeration of encodings that are supported by SQL Server (and hopefully the JVM).
 *
 * See, for example, https://docs.oracle.com/javase/8/docs/technotes/guides/intl/encoding.doc.html for a complete list
 * of supported encodings with their canonical names.
 */
enum Encoding {
    UNICODE("UTF-16LE", true, false),
    UTF8("UTF-8", true, false),
    CP437("Cp437", false, false),
    CP850("Cp850", false, false),
    CP874("MS874", true, true),
    CP932("MS932", true, false),
    CP936("MS936", true, false),
    CP949("MS949", true, false),
    CP950("MS950", true, false),
    CP1250("Cp1250", true, true),
    CP1251("Cp1251", true, true),
    CP1252("Cp1252", true, true),
    CP1253("Cp1253", true, true),
    CP1254("Cp1254", true, true),
    CP1255("Cp1255", true, true),
    CP1256("Cp1256", true, true),
    CP1257("Cp1257", true, true),
    CP1258("Cp1258", true, true);

    private final String charsetName;
    private final boolean supportsAsciiConversion;
    private final boolean hasAsciiCompatibleSBCS;
    private boolean jvmSupportConfirmed = false;
    private Charset charset;

    private Encoding(String charsetName, boolean supportsAsciiConversion, boolean hasAsciiCompatibleSBCS) {
        this.charsetName = charsetName;
        this.supportsAsciiConversion = supportsAsciiConversion;
        this.hasAsciiCompatibleSBCS = hasAsciiCompatibleSBCS;
    }

    final Encoding checkSupported() throws UnsupportedEncodingException {
        if (!jvmSupportConfirmed) {
            // Checks for support by converting a java.lang.String
            // This works for all of the code pages above in SE 5 and later.
            if (!Charset.isSupported(charsetName)) {
                MessageFormat form = new MessageFormat(SQLServerException.getErrString("R_codePageNotSupported"));
                Object[] msgArgs = {charsetName};
                throw new UnsupportedEncodingException(form.format(msgArgs));
            }

            jvmSupportConfirmed = true;
        }

        return this;
    }

    final Charset charset() throws SQLServerException {
        try {
            checkSupported();
            if (charset == null) {
                charset = Charset.forName(charsetName);
            }
        } catch (UnsupportedEncodingException e) {
            MessageFormat form = new MessageFormat(SQLServerException.getErrString("R_codePageNotSupported"));
            Object[] msgArgs = {charsetName};
            throw new SQLServerException(form.format(msgArgs), e);
        }
        return charset;
    }

    String getCharsetName() {
        return charsetName;
    }

    /**
     * Returns true if the collation supports conversion to ascii.
     *
     * Per discussions with richards and michkap on UNICODE alias -> ASCII range is 0x00 to 0x7F. The range of 0x00 to
     * 0x7F of 1250-1258, 874, 932, 936, 949, and 950 are identical to ASCII. See also ->
     * http://blogs.msdn.com/michkap/archive/2005/11/23/495193.aspx
     */
    boolean supportsAsciiConversion() {
        return supportsAsciiConversion;
    }

    /**
     * Returns true if the collation supports conversion to ascii AND it uses a single-byte character set.
     *
     * Per discussions with richards and michkap on UNICODE alias -> ASCII range is 0x00 to 0x7F. The range of 0x00 to
     * 0x7F of 1250-1258 and 874 are identical to ASCII for these SBCS character sets. See also ->
     * http://blogs.msdn.com/michkap/archive/2005/11/23/495193.aspx
     */
    boolean hasAsciiCompatibleSBCS() {
        return hasAsciiCompatibleSBCS;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy