All Downloads are FREE. Search and download functionalities are using the official Maven repository.

panda.util.chardet.nsEUCSampler Maven / Gradle / Ivy

Go to download

Panda Core is the core module of Panda Framework, it contains commonly used utility classes similar to apache-commons.

There is a newer version: 1.8.0
Show newest version
package panda.util.chardet ;

import java.io.IOException;
import java.io.InputStream;

import panda.io.Streams;

public class nsEUCSampler {

	int mTotal = 0;
	int mThreshold = 200 ;
	int mState = 0;
	public int mFirstByteCnt[] = new int[94] ;
	public int mSecondByteCnt[] = new int[94] ;
	public float mFirstByteFreq[] = new float[94] ;
	public float mSecondByteFreq[] = new float[94];

	public nsEUCSampler() {
		Reset() ;
	}

	public void Reset() {
		mTotal = 0 ;
		mState = 0 ;
		for(int i=0; i<94; i++)
			mFirstByteCnt[i] = mSecondByteCnt[i] = 0 ;
	}

	boolean EnoughData() { return mTotal > mThreshold; }

        boolean GetSomeData() { return mTotal > 1; }

        boolean Sample(InputStream ins) throws IOException {

           if(mState == 1)
               return false;

           int b;
           while ((b = ins.read()) != Streams.EOF && (1 != mState)) {
               switch(mState) {
                  case 0:
                    if( ( b & 0x0080 ) != 0 )  
                    {
                       if((0xff==(0xff & b)) || ( 0xa1>(0xff & b))) {
                          mState = 1;
                       } else {
                          mTotal++;
                          mFirstByteCnt[(0xff & b) - 0xa1]++;
                          mState = 2;
                       }
                    }
                    break;
                  case 1:
                    break;
                  case 2:
                    if( (b & 0x0080)  != 0 )
                    {
                       if((0xff == (0xff & b)) 
				|| ( 0xa1 > (0xff & b))) {
                          mState = 1;
                       } else {
                          mTotal++;
                          mSecondByteCnt[(0xff & b) - 0xa1]++;
                          mState = 0;
                       }
                    } else {
                       mState = 1;
                    }
                    break;
                  default:
                    mState = 1;
               }
            }
            return (1 != mState  );
        }


        void CalFreq() {
	   for(int i = 0 ; i < 94; i++) {
	      mFirstByteFreq[i] = (float)mFirstByteCnt[i] / (float)mTotal;
	      mSecondByteFreq[i] = (float)mSecondByteCnt[i] / (float)mTotal;
	   }
	}

        float   GetScore(float[] aFirstByteFreq, float aFirstByteWeight,
                         float[] aSecondByteFreq, float aSecondByteWeight)
	{
       	   return aFirstByteWeight * GetScore(aFirstByteFreq, mFirstByteFreq) +
              aSecondByteWeight * GetScore(aSecondByteFreq, mSecondByteFreq);
	}

        float   GetScore(float[] array1, float[] array2) {
           float s;
           float sum=0.0f;

       	   for(int i=0;i<94;i++) {
              s = array1[i] - array2[i];
              sum += s * s;
           }
           return (float)java.lang.Math.sqrt((double)sum) / 94.0f;
	}
}






© 2015 - 2024 Weber Informatics LLC | Privacy Policy