All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.text.CharsetRecog_2022 Maven / Gradle / Ivy

There is a newer version: 4.15.0-HBase-1.5
Show newest version
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
/*
*******************************************************************************
* Copyright (C) 2005 - 2012, International Business Machines Corporation and  *
* others. All Rights Reserved.                                                *
*******************************************************************************
*/
package com.ibm.icu.text;

/**
 *  class CharsetRecog_2022  part of the ICU charset detection imlementation.
 *                           This is a superclass for the individual detectors for
 *                           each of the detectable members of the ISO 2022 family
 *                           of encodings.
 *
 *                           The separate classes are nested within this class.
 */
abstract class CharsetRecog_2022 extends CharsetRecognizer {


    /**
     * Matching function shared among the 2022 detectors JP, CN and KR
     * Counts up the number of legal an unrecognized escape sequences in
     * the sample of text, and computes a score based on the total number &
     * the proportion that fit the encoding.
     *
     *
     * @param text the byte buffer containing text to analyse
     * @param textLen  the size of the text in the byte.
     * @param escapeSequences the byte escape sequences to test for.
     * @return match quality, in the range of 0-100.
     */
    int   match(byte [] text, int textLen, byte [][] escapeSequences) {
        int     i, j;
        int     escN;
        int     hits   = 0;
        int     misses = 0;
        int     shifts = 0;
        int     quality;
        scanInput:
            for (i=0; i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy