
ext.test4j.mozilla.intl.chardet.NsPSMDetector Maven / Gradle / Ivy
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
package ext.test4j.mozilla.intl.chardet;
public abstract class NsPSMDetector {
public static final int ALL = 0;
public static final int JAPANESE = 1;
public static final int CHINESE = 2;
public static final int SIMPLIFIED_CHINESE = 3;
public static final int TRADITIONAL_CHINESE = 4;
public static final int KOREAN = 5;
public static final int NO_OF_LANGUAGES = 6;
public static final int MAX_VERIFIERS = 16;
NsVerifier[] mVerifier;
byte[] mState = new byte[MAX_VERIFIERS];
int[] mItemIdx = new int[MAX_VERIFIERS];
int mItems;
int mClassItems;
boolean mDone;
boolean mRunSampler;
boolean mClassRunSampler;
public NsPSMDetector() {
initVerifiers(NsPSMDetector.ALL);
reset();
}
public NsPSMDetector(int langFlag) {
initVerifiers(langFlag);
reset();
}
public void reset() {
mRunSampler = mClassRunSampler;
mDone = false;
mItems = mClassItems;
for (int i = 0; i < mItems; i++) {
mState[i] = 0;
mItemIdx[i] = i;
}
}
protected void initVerifiers(int currVerSet) {
@SuppressWarnings("unused")
int idx = 0;
int currVerifierSet;
if (currVerSet >= 0 && currVerSet < NO_OF_LANGUAGES) {
currVerifierSet = currVerSet;
} else {
currVerifierSet = NsPSMDetector.ALL;
}
mVerifier = null;
if (currVerifierSet == NsPSMDetector.TRADITIONAL_CHINESE) {
mVerifier = new NsVerifier[] { new NsUTF8Verifier(), new NsBIG5Verifier(), new NsISO2022CNVerifier(),
new NsGB2312Verifier() };
} else if (currVerifierSet == NsPSMDetector.SIMPLIFIED_CHINESE) {
mVerifier = new NsVerifier[] { new NsUTF8Verifier(), new NsGB2312Verifier(), new NsISO2022CNVerifier(),
new NsHZVerifier() };
} else {
mVerifier = new NsVerifier[] { new NsUTF8Verifier(), new NsGB2312Verifier(), new NsBIG5Verifier(),
new NsISO2022CNVerifier(), new NsHZVerifier() };
}
mClassItems = mVerifier.length;
}
public abstract void report(String charset);
public boolean HandleData(byte[] aBuf, int len) {
int i, j;
byte b, st;
for (i = 0; i < len; i++) {
b = aBuf[i];
for (j = 0; j < mItems;) {
st = NsVerifier.getNextState(mVerifier[mItemIdx[j]], b, mState[j]);
if (st == NsVerifier.eItsMe) {
report(mVerifier[mItemIdx[j]].charset());
mDone = true;
return mDone;
} else if (st == NsVerifier.eError) {
mItems--;
if (j < mItems) {
mItemIdx[j] = mItemIdx[mItems];
mState[j] = mState[mItems];
}
} else {
mState[j++] = st;
}
}
int nonUCS2Num = 0;
int nonUCS2Idx = 0;
for (j = 0; j < mItems; j++) {
if ((!(mVerifier[mItemIdx[j]].isUCS2())) && (!(mVerifier[mItemIdx[j]].isUCS2()))) {
nonUCS2Num++;
nonUCS2Idx = j;
}
}
if (1 == nonUCS2Num) {
report(mVerifier[mItemIdx[nonUCS2Idx]].charset());
mDone = true;
return mDone;
}
}
return mDone;
}
public void dataEnd() {
if (mDone == true)
return;
if (mItems == 2) {
if ((mVerifier[mItemIdx[0]].charset()).equals("GB18030")) {
report(mVerifier[mItemIdx[1]].charset());
mDone = true;
} else if ((mVerifier[mItemIdx[1]].charset()).equals("GB18030")) {
report(mVerifier[mItemIdx[0]].charset());
mDone = true;
}
}
}
public String[] getProbableCharsets() {
String ret[] = new String[mItems];
for (int i = 0; i < mItems; i++)
ret[i] = mVerifier[mItemIdx[i]].charset();
return ret;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy