
org.marketcetera.util.test.UnicodeData Maven / Gradle / Ivy
The newest version!
package org.marketcetera.util.test;
import java.io.ByteArrayOutputStream;
import java.io.CharArrayWriter;
import java.io.IOException;
import static org.junit.Assert.*;
/**
* Unicode test data. Some were obtained by applying an online
* converter onto the results of Google translation.
*
* @author [email protected]
* @since 0.6.0
* @version $Id$
*/
/* $License$ */
public final class UnicodeData
{
// CLASS DATA.
/**
* The space character.
*/
public static final String SPACE=
" "; //$NON-NLS-1$
/**
* The space character, as a character array.
*/
public static final char[] SPACE_CHARS=new char[]
{' '};
/**
* The space character, as a Unicode code point array.
*/
public static final int[] SPACE_UCPS=new int[]
{0x00020};
/**
* The space character, in the default encoding.
*/
public static final byte[] SPACE_NAT=
SPACE.getBytes();
/**
* The space, in UTF-8.
*/
public static final byte[] SPACE_UTF8=new byte[]
{(byte)0x20};
/**
* The space, in UTF-16BE.
*/
public static final byte[] SPACE_UTF16BE=new byte[]
{(byte)0x00,(byte)0x20};
/**
* The space, in UTF-16LE.
*/
public static final byte[] SPACE_UTF16LE=new byte[]
{(byte)0x20,(byte)0x00};
/**
* The space, in UTF-32BE.
*/
public static final byte[] SPACE_UTF32BE=new byte[]
{(byte)0x00,(byte)0x00,(byte)0x00,(byte)0x20};
/**
* The space, in UTF-32LE.
*/
public static final byte[] SPACE_UTF32LE=new byte[]
{(byte)0x20,(byte)0x00,(byte)0x00,(byte)0x00};
/**
* "Hello" in English.
*/
public static final String HELLO_EN=
"Hello"; //$NON-NLS-1$
/**
* "Hello" in English, as a character array.
*/
public static final char[] HELLO_EN_CHARS=new char[]
{'H','e','l','l','o'};
/**
* "Hello" in English, as a Unicode code point array.
*/
public static final int[] HELLO_EN_UCPS=new int[]
{0x00048,
0x00065,
0x0006C,
0x0006C,
0x0006F};
/**
* "Hello" in English, in the default encoding.
*/
public static final byte[] HELLO_EN_NAT=
HELLO_EN.getBytes();
/**
* "Hello" in English, in UTF-8.
*/
public static final byte[] HELLO_EN_UTF8=new byte[]
{(byte)0x48,
(byte)0x65,
(byte)0x6C,
(byte)0x6C,
(byte)0x6F};
/**
* "Hello" in English, in UTF-16BE.
*/
public static final byte[] HELLO_EN_UTF16BE=new byte[]
{(byte)0x00,(byte)0x48,
(byte)0x00,(byte)0x65,
(byte)0x00,(byte)0x6C,
(byte)0x00,(byte)0x6C,
(byte)0x00,(byte)0x6F};
/**
* "Hello" in English, in UTF-16LE.
*/
public static final byte[] HELLO_EN_UTF16LE=new byte[]
{(byte)0x48,(byte)0x00,
(byte)0x65,(byte)0x00,
(byte)0x6C,(byte)0x00,
(byte)0x6C,(byte)0x00,
(byte)0x6F,(byte)0x00};
/**
* "Hello" in English, in UTF-32BE.
*/
public static final byte[] HELLO_EN_UTF32BE=new byte[]
{(byte)0x00,(byte)0x00,(byte)0x00,(byte)0x48,
(byte)0x00,(byte)0x00,(byte)0x00,(byte)0x65,
(byte)0x00,(byte)0x00,(byte)0x00,(byte)0x6C,
(byte)0x00,(byte)0x00,(byte)0x00,(byte)0x6C,
(byte)0x00,(byte)0x00,(byte)0x00,(byte)0x6F};
/**
* "Hello" in English, in UTF-32LE.
*/
public static final byte[] HELLO_EN_UTF32LE=new byte[]
{(byte)0x48,(byte)0x00,(byte)0x00,(byte)0x00,
(byte)0x65,(byte)0x00,(byte)0x00,(byte)0x00,
(byte)0x6C,(byte)0x00,(byte)0x00,(byte)0x00,
(byte)0x6C,(byte)0x00,(byte)0x00,(byte)0x00,
(byte)0x6F,(byte)0x00,(byte)0x00,(byte)0x00};
/**
* "Language" (pronounced "sprook") in Norwegian: this is the word
* "language" in Norwegian, with the first letter capitalized.
*/
public static final String LANGUAGE_NO=
"Spr\u00E5k"; //$NON-NLS-1$
/**
* "Language" in Norwegian, as a character array.
*/
public static final char[] LANGUAGE_NO_CHARS=new char[]
{'S','p','r','\u00E5','k'};
/**
* "Language" in Norwegian, as a Unicode code point array.
*/
public static final int[] LANGUAGE_NO_UCPS=new int[]
{0x00053,
0x00070,
0x00072,
0x000E5,
0x0006B};
/**
* "Language" in Norwegian, in the default encoding.
*/
public static final byte[] LANGUAGE_NO_NAT=
LANGUAGE_NO.getBytes();
/**
* "Language" in Norwegian, in UTF-8.
*/
public static final byte[] LANGUAGE_NO_UTF8=new byte[]
{(byte)0x53,
(byte)0x70,
(byte)0x72,
(byte)0xC3,
(byte)0xA5,
(byte)0x6B};
/**
* "Language" in Norwegian, in UTF-16BE.
*/
public static final byte[] LANGUAGE_NO_UTF16BE=new byte[]
{(byte)0x00,(byte)0x53,
(byte)0x00,(byte)0x70,
(byte)0x00,(byte)0x72,
(byte)0x00,(byte)0xE5,
(byte)0x00,(byte)0x6B};
/**
* "Language" in Norwegian, in UTF-16LE.
*/
public static final byte[] LANGUAGE_NO_UTF16LE=new byte[]
{(byte)0x53,(byte)0x00,
(byte)0x70,(byte)0x00,
(byte)0x72,(byte)0x00,
(byte)0xE5,(byte)0x00,
(byte)0x6B,(byte)0x00};
/**
* "Language" in Norwegian, in UTF-32BE.
*/
public static final byte[] LANGUAGE_NO_UTF32BE=new byte[]
{(byte)0x00,(byte)0x00,(byte)0x00,(byte)0x53,
(byte)0x00,(byte)0x00,(byte)0x00,(byte)0x70,
(byte)0x00,(byte)0x00,(byte)0x00,(byte)0x72,
(byte)0x00,(byte)0x00,(byte)0x00,(byte)0xE5,
(byte)0x00,(byte)0x00,(byte)0x00,(byte)0x6B};
/**
* "Language" in Norwegian, in UTF-32LE.
*/
public static final byte[] LANGUAGE_NO_UTF32LE=new byte[]
{(byte)0x53,(byte)0x00,(byte)0x00,(byte)0x00,
(byte)0x70,(byte)0x00,(byte)0x00,(byte)0x00,
(byte)0x72,(byte)0x00,(byte)0x00,(byte)0x00,
(byte)0xE5,(byte)0x00,(byte)0x00,(byte)0x00,
(byte)0x6B,(byte)0x00,(byte)0x00,(byte)0x00};
/**
* "HELLO" (pronounced "yassou") in Greek: this is the word
* "hello" in all uppercase Greek letters (it is, in fact, two
* Greek words, separated by a space).
*/
public static final String HELLO_GR=
"\u0393\u0395\u0399\u0391 \u03A3\u039F\u03A5"; //$NON-NLS-1$
/**
* "HELLO" in Greek, as a character array.
*/
public static final char[] HELLO_GR_CHARS=new char[]
{'\u0393','\u0395','\u0399','\u0391',
' ',
'\u03A3','\u039F','\u03A5'};
/**
* "HELLO" in Greek, as a Unicode code point array.
*/
public static final int[] HELLO_GR_UCPS=new int[]
{0x00393,
0x00395,
0x00399,
0x00391,
0x00020,
0x003A3,
0x0039F,
0x003A5};
/**
* "HELLO" in Greek, in the default encoding.
*/
public static final byte[] HELLO_GR_NAT=
HELLO_GR.getBytes();
/**
* "HELLO" in Greek, in UTF-8.
*/
public static final byte[] HELLO_GR_UTF8=new byte[]
{(byte)0xCE,(byte)0x93,
(byte)0xCE,(byte)0x95,
(byte)0xCE,(byte)0x99,
(byte)0xCE,(byte)0x91,
(byte)0x20,
(byte)0xCE,(byte)0xA3,
(byte)0xCE,(byte)0x9F,
(byte)0xCE,(byte)0xA5};
/**
* "HELLO" in Greek, in UTF-16BE.
*/
public static final byte[] HELLO_GR_UTF16BE=new byte[]
{(byte)0x03,(byte)0x93,
(byte)0x03,(byte)0x95,
(byte)0x03,(byte)0x99,
(byte)0x03,(byte)0x91,
(byte)0x00,(byte)0x20,
(byte)0x03,(byte)0xA3,
(byte)0x03,(byte)0x9F,
(byte)0x03,(byte)0xA5};
/**
* "HELLO" in Greek, in UTF-16LE.
*/
public static final byte[] HELLO_GR_UTF16LE=new byte[]
{(byte)0x93,(byte)0x03,
(byte)0x95,(byte)0x03,
(byte)0x99,(byte)0x03,
(byte)0x91,(byte)0x03,
(byte)0x20,(byte)0x00,
(byte)0xA3,(byte)0x03,
(byte)0x9F,(byte)0x03,
(byte)0xA5,(byte)0x03};
/**
* "HELLO" in Greek, in UTF-32BE.
*/
public static final byte[] HELLO_GR_UTF32BE=new byte[]
{(byte)0x00,(byte)0x00,(byte)0x03,(byte)0x93,
(byte)0x00,(byte)0x00,(byte)0x03,(byte)0x95,
(byte)0x00,(byte)0x00,(byte)0x03,(byte)0x99,
(byte)0x00,(byte)0x00,(byte)0x03,(byte)0x91,
(byte)0x00,(byte)0x00,(byte)0x00,(byte)0x20,
(byte)0x00,(byte)0x00,(byte)0x03,(byte)0xA3,
(byte)0x00,(byte)0x00,(byte)0x03,(byte)0x9F,
(byte)0x00,(byte)0x00,(byte)0x03,(byte)0xA5};
/**
* "HELLO" in Greek, in UTF-32LE.
*/
public static final byte[] HELLO_GR_UTF32LE=new byte[]
{(byte)0x93,(byte)0x03,(byte)0x00,(byte)0x00,
(byte)0x95,(byte)0x03,(byte)0x00,(byte)0x00,
(byte)0x99,(byte)0x03,(byte)0x00,(byte)0x00,
(byte)0x91,(byte)0x03,(byte)0x00,(byte)0x00,
(byte)0x20,(byte)0x00,(byte)0x00,(byte)0x00,
(byte)0xA3,(byte)0x03,(byte)0x00,(byte)0x00,
(byte)0x9F,(byte)0x03,(byte)0x00,(byte)0x00,
(byte)0xA5,(byte)0x03,(byte)0x00,(byte)0x00};
/**
* "house" (pronounced "manzil") in Arabic.
*/
public static final String HOUSE_AR=
"\u0645\u0646\u0632\u0644"; //$NON-NLS-1$
/**
* "house" in Arabic, as a character array.
*/
public static final char[] HOUSE_AR_CHARS=new char[]
{'\u0645','\u0646','\u0632','\u0644'};
/**
* "house" in Arabic, as a Unicode code point array.
*/
public static final int[] HOUSE_AR_UCPS=new int[]
{0x00645,
0x00646,
0x00632,
0x00644};
/**
* "house" in Arabic, in the default encoding.
*/
public static final byte[] HOUSE_AR_NAT=
HOUSE_AR.getBytes();
/**
* "house" in Arabic, in UTF-8.
*/
public static final byte[] HOUSE_AR_UTF8=new byte[]
{(byte)0xD9,(byte)0x85,
(byte)0xD9,(byte)0x86,
(byte)0xD8,(byte)0xB2,
(byte)0xD9,(byte)0x84};
/**
* "house" in Arabic, in UTF-16BE.
*/
public static final byte[] HOUSE_AR_UTF16BE=new byte[]
{(byte)0x06,(byte)0x45,
(byte)0x06,(byte)0x46,
(byte)0x06,(byte)0x32,
(byte)0x06,(byte)0x44};
/**
* "house" in Arabic, in UTF-16LE.
*/
public static final byte[] HOUSE_AR_UTF16LE=new byte[]
{(byte)0x45,(byte)0x06,
(byte)0x46,(byte)0x06,
(byte)0x32,(byte)0x06,
(byte)0x44,(byte)0x06};
/**
* "house" in Arabic, in UTF-32BE.
*/
public static final byte[] HOUSE_AR_UTF32BE=new byte[]
{(byte)0x00,(byte)0x00,(byte)0x06,(byte)0x45,
(byte)0x00,(byte)0x00,(byte)0x06,(byte)0x46,
(byte)0x00,(byte)0x00,(byte)0x06,(byte)0x32,
(byte)0x00,(byte)0x00,(byte)0x06,(byte)0x44};
/**
* "house" in Arabic, in UTF-32LE.
*/
public static final byte[] HOUSE_AR_UTF32LE=new byte[]
{(byte)0x45,(byte)0x06,(byte)0x00,(byte)0x00,
(byte)0x46,(byte)0x06,(byte)0x00,(byte)0x00,
(byte)0x32,(byte)0x06,(byte)0x00,(byte)0x00,
(byte)0x44,(byte)0x06,(byte)0x00,(byte)0x00};
/**
* "goodbye" (pronounced "sayonara") in Japanese, in the Hiragana
* writing system.
*/
public static final String GOODBYE_JA=
"\u3055\u3088\u3046\u306A\u3089"; //$NON-NLS-1$
/**
* "goodbye" in Japanese, as a character array.
*/
public static final char[] GOODBYE_JA_CHARS=new char[]
{'\u3055','\u3088','\u3046','\u306A','\u3089'};
/**
* "goodbye" in Japanese, as a Unicode code point array.
*/
public static final int[] GOODBYE_JA_UCPS=new int[]
{0x03055,
0x03088,
0x03046,
0x0306A,
0x03089};
/**
* "goodbye" in Japanese, in the default encoding.
*/
public static final byte[] GOODBYE_JA_NAT=
GOODBYE_JA.getBytes();
/**
* "goodbye" in Japanese, in UTF-8.
*/
public static final byte[] GOODBYE_JA_UTF8=new byte[]
{(byte)0xE3,(byte)0x81,(byte)0x95,
(byte)0xE3,(byte)0x82,(byte)0x88,
(byte)0xE3,(byte)0x81,(byte)0x86,
(byte)0xE3,(byte)0x81,(byte)0xAA,
(byte)0xE3,(byte)0x82,(byte)0x89};
/**
* "goodbye" in Japanese, in UTF-16BE.
*/
public static final byte[] GOODBYE_JA_UTF16BE=new byte[]
{(byte)0x30,(byte)0x55,
(byte)0x30,(byte)0x88,
(byte)0x30,(byte)0x46,
(byte)0x30,(byte)0x6A,
(byte)0x30,(byte)0x89};
/**
* "goodbye" in Japanese, in UTF-16LE.
*/
public static final byte[] GOODBYE_JA_UTF16LE=new byte[]
{(byte)0x55,(byte)0x30,
(byte)0x88,(byte)0x30,
(byte)0x46,(byte)0x30,
(byte)0x6A,(byte)0x30,
(byte)0x89,(byte)0x30};
/**
* "goodbye" in Japanese, in UTF-32BE.
*/
public static final byte[] GOODBYE_JA_UTF32BE=new byte[]
{(byte)0x00,(byte)0x00,(byte)0x30,(byte)0x55,
(byte)0x00,(byte)0x00,(byte)0x30,(byte)0x88,
(byte)0x00,(byte)0x00,(byte)0x30,(byte)0x46,
(byte)0x00,(byte)0x00,(byte)0x30,(byte)0x6A,
(byte)0x00,(byte)0x00,(byte)0x30,(byte)0x89};
/**
* "goodbye" in Japanese, in UTF-32LE.
*/
public static final byte[] GOODBYE_JA_UTF32LE=new byte[]
{(byte)0x55,(byte)0x30,(byte)0x00,(byte)0x00,
(byte)0x88,(byte)0x30,(byte)0x00,(byte)0x00,
(byte)0x46,(byte)0x30,(byte)0x00,(byte)0x00,
(byte)0x6A,(byte)0x30,(byte)0x00,(byte)0x00,
(byte)0x89,(byte)0x30,(byte)0x00,(byte)0x00};
/**
* The Linear B ideograms for she-goat and he-goat (in this order
* and separated by a space).
*/
public static final String GOATS_LNB=
"\uD800\uDC88 \uD800\uDC89"; //$NON-NLS-1$
/**
* The Linear B goat ideograms, as a character array.
*/
public static final char[] GOATS_LNB_CHARS=new char[]
{'\uD800','\uDC88',
' ',
'\uD800','\uDC89'};
/**
* The Linear B goat ideograms, as a Unicode code point array.
*/
public static final int[] GOATS_LNB_UCPS=new int[]
{0x10088,
0x00020,
0x10089};
/**
* The Linear B goat ideograms, in the default encoding.
*/
public static final byte[] GOATS_LNB_NAT=
GOATS_LNB.getBytes();
/**
* The Linear B goat ideograms, in UTF-8.
*/
public static final byte[] GOATS_LNB_UTF8=new byte[]
{(byte)0xF0,(byte)0x90,(byte)0x82,(byte)0x88,
(byte)0x20,
(byte)0xF0,(byte)0x90,(byte)0x82,(byte)0x89};
/**
* The Linear B goat ideograms, in UTF-16BE.
*/
public static final byte[] GOATS_LNB_UTF16BE=new byte[]
{(byte)0xD8,(byte)0x00,(byte)0xDC,(byte)0x88,
(byte)0x00,(byte)0x20,
(byte)0xD8,(byte)0x00,(byte)0xDC,(byte)0x89};
/**
* The Linear B goat ideograms, in UTF-16LE.
*/
public static final byte[] GOATS_LNB_UTF16LE=new byte[]
{(byte)0x00,(byte)0xD8,(byte)0x88,(byte)0xDC,
(byte)0x20,(byte)0x00,
(byte)0x00,(byte)0xD8,(byte)0x89,(byte)0xDC};
/**
* The Linear B goat ideograms, in UTF-32BE.
*/
public static final byte[] GOATS_LNB_UTF32BE=new byte[]
{(byte)0x00,(byte)0x01,(byte)0x00,(byte)0x88,
(byte)0x00,(byte)0x00,(byte)0x00,(byte)0x20,
(byte)0x00,(byte)0x01,(byte)0x00,(byte)0x89};
/**
* The Linear B goat ideograms, in UTF-32LE.
*/
public static final byte[] GOATS_LNB_UTF32LE=new byte[]
{(byte)0x88,(byte)0x00,(byte)0x01,(byte)0x00,
(byte)0x20,(byte)0x00,(byte)0x00,(byte)0x00,
(byte)0x89,(byte)0x00,(byte)0x01,(byte)0x00};
/**
* The musical symbol G-clef.
*/
public static final String G_CLEF_MSC=
"\uD834\uDD1E"; //$NON-NLS-1$
/**
* The G-clef, as a character array.
*/
public static final char[] G_CLEF_MSC_CHARS=new char[]
{'\uD834','\uDD1E'};
/**
* The G-clef, as a Unicode code point array.
*/
public static final int[] G_CLEF_MSC_UCPS=new int[]
{0x1D11E};
/**
* The G-clef, in the default encoding.
*/
public static final byte[] G_CLEF_MSC_NAT=
G_CLEF_MSC.getBytes();
/**
* The G-clef, in UTF-8.
*/
public static final byte[] G_CLEF_MSC_UTF8=new byte[]
{(byte)0xF0,(byte)0x9D,(byte)0x84,(byte)0x9E};
/**
* The G-clef, in UTF-16BE.
*/
public static final byte[] G_CLEF_MSC_UTF16BE=new byte[]
{(byte)0xD8,(byte)0x34,(byte)0xDD,(byte)0x1E};
/**
* The G-clef, in UTF-16LE.
*/
public static final byte[] G_CLEF_MSC_UTF16LE=new byte[]
{(byte)0x34,(byte)0xD8,(byte)0x1E,(byte)0xDD};
/**
* The G-clef, in UTF-32BE.
*/
public static final byte[] G_CLEF_MSC_UTF32BE=new byte[]
{(byte)0x00,(byte)0x01,(byte)0xD1,(byte)0x1E};
/**
* The G-clef, in UTF-32LE.
*/
public static final byte[] G_CLEF_MSC_UTF32LE=new byte[]
{(byte)0x1E,(byte)0xD1,(byte)0x01,(byte)0x00};
/**
* A combo string that includes "Hello" in English, "Language" in
* Norwegian, "HELLO" in Greek, "house" in Arabic, "goodbye" in
* Japanese, and the G-clef, each successive pair separated by
* exactly one space.
*/
public static final String COMBO=
HELLO_EN+SPACE+
LANGUAGE_NO+SPACE+
HELLO_GR+SPACE+
HOUSE_AR+SPACE+
GOODBYE_JA+SPACE+
GOATS_LNB+SPACE+
G_CLEF_MSC;
/**
* The combo string, as a character array.
*/
public static final char[] COMBO_CHARS=concat
(HELLO_EN_CHARS,SPACE_CHARS,
LANGUAGE_NO_CHARS,SPACE_CHARS,
HELLO_GR_CHARS,SPACE_CHARS,
HOUSE_AR_CHARS,SPACE_CHARS,
GOODBYE_JA_CHARS,SPACE_CHARS,
GOATS_LNB_CHARS,SPACE_CHARS,
G_CLEF_MSC_CHARS);
/**
* The combo string, as a Unicode code point array.
*/
public static final int[] COMBO_UCPS=concat
(HELLO_EN_UCPS,SPACE_UCPS,
LANGUAGE_NO_UCPS,SPACE_UCPS,
HELLO_GR_UCPS,SPACE_UCPS,
HOUSE_AR_UCPS,SPACE_UCPS,
GOODBYE_JA_UCPS,SPACE_UCPS,
GOATS_LNB_UCPS,SPACE_UCPS,
G_CLEF_MSC_UCPS);
/**
* The combo string, in the default encoding.
*/
public static final byte[] COMBO_NAT=concat
(HELLO_EN_NAT,SPACE_NAT,
LANGUAGE_NO_NAT,SPACE_NAT,
HELLO_GR_NAT,SPACE_NAT,
HOUSE_AR_NAT,SPACE_NAT,
GOODBYE_JA_NAT,SPACE_NAT,
GOATS_LNB_NAT,SPACE_NAT,
G_CLEF_MSC_NAT);
/**
* The combo string, in UTF-8.
*/
public static final byte[] COMBO_UTF8=concat
(HELLO_EN_UTF8,SPACE_UTF8,
LANGUAGE_NO_UTF8,SPACE_UTF8,
HELLO_GR_UTF8,SPACE_UTF8,
HOUSE_AR_UTF8,SPACE_UTF8,
GOODBYE_JA_UTF8,SPACE_UTF8,
GOATS_LNB_UTF8,SPACE_UTF8,
G_CLEF_MSC_UTF8);
/**
* The combo string, in UTF-16BE.
*/
public static final byte[] COMBO_UTF16BE=concat
(HELLO_EN_UTF16BE,SPACE_UTF16BE,
LANGUAGE_NO_UTF16BE,SPACE_UTF16BE,
HELLO_GR_UTF16BE,SPACE_UTF16BE,
HOUSE_AR_UTF16BE,SPACE_UTF16BE,
GOODBYE_JA_UTF16BE,SPACE_UTF16BE,
GOATS_LNB_UTF16BE,SPACE_UTF16BE,
G_CLEF_MSC_UTF16BE);
/**
* The combo string, in UTF-16LE.
*/
public static final byte[] COMBO_UTF16LE=concat
(HELLO_EN_UTF16LE,SPACE_UTF16LE,
LANGUAGE_NO_UTF16LE,SPACE_UTF16LE,
HELLO_GR_UTF16LE,SPACE_UTF16LE,
HOUSE_AR_UTF16LE,SPACE_UTF16LE,
GOODBYE_JA_UTF16LE,SPACE_UTF16LE,
GOATS_LNB_UTF16LE,SPACE_UTF16LE,
G_CLEF_MSC_UTF16LE);
/**
* The combo string, in UTF-32BE.
*/
public static final byte[] COMBO_UTF32BE=concat
(HELLO_EN_UTF32BE,SPACE_UTF32BE,
LANGUAGE_NO_UTF32BE,SPACE_UTF32BE,
HELLO_GR_UTF32BE,SPACE_UTF32BE,
HOUSE_AR_UTF32BE,SPACE_UTF32BE,
GOODBYE_JA_UTF32BE,SPACE_UTF32BE,
GOATS_LNB_UTF32BE,SPACE_UTF32BE,
G_CLEF_MSC_UTF32BE);
/**
* The combo string, in UTF-32LE.
*/
public static final byte[] COMBO_UTF32LE=concat
(HELLO_EN_UTF32LE,SPACE_UTF32LE,
LANGUAGE_NO_UTF32LE,SPACE_UTF32LE,
HELLO_GR_UTF32LE,SPACE_UTF32LE,
HOUSE_AR_UTF32LE,SPACE_UTF32LE,
GOODBYE_JA_UTF32LE,SPACE_UTF32LE,
GOATS_LNB_UTF32LE,SPACE_UTF32LE,
G_CLEF_MSC_UTF32LE);
/**
* An invalid string, comprising an isolated 16-bit surrogate.
*/
public static final String INVALID=
"\uD800"; //$NON-NLS-1$
/**
* An invalid string, comprising an isolated 16-bit surrogate, as
* a character array.
*/
public static final char[] INVALID_CHARS=new char[]
{'\uD800'};
/**
* A Unicode code point comprising an isolated surrogate code
* point.
*/
public static final int[] INVALID_UCPS=new int[]
{0xD800};
/**
* A byte array comprising an invalid UTF-8 byte sequence (the
* first 3 bytes of a 4-byte sequence).
*/
public static final byte[] INVALID_UTF8=new byte[]
{(byte)0xF0,(byte)0x90,(byte)0x82};
/**
* A byte array comprising an invalid UTF-16BE byte sequence (an
* isolated 16-bit surrogate).
*/
public static final byte[] INVALID_UTF16BE=new byte[]
{(byte)0xD8,(byte)0x00};
/**
* A byte array comprising an invalid UTF-16LE byte sequence (an
* isolated 16-bit surrogate).
*/
public static final byte[] INVALID_UTF16LE=new byte[]
{(byte)0x00,(byte)0xD8};
/**
* A byte array comprising an invalid UTF-32BE byte sequence (a
* 32-bit value outside the valid range for Unicode scalar values).
*/
public static final byte[] INVALID_UTF32BE=new byte[]
{(byte)0x10,(byte)0x00,(byte)0x00,(byte)0x00};
/**
* A byte array comprising an invalid UTF-32LE byte sequence (a
* 32-bit value outside the valid range for Unicode scalar values).
*/
public static final byte[] INVALID_UTF32LE=new byte[]
{(byte)0x00,(byte)0x00,(byte)0x00,(byte)0x10};
// CLASS METHODS.
/**
* Concatenates the given byte arrays and returns the result.
*
* @param arrays The arrays.
*
* @return The concatenated arrays.
*/
private static byte[] concat
(byte[]... arrays)
{
ByteArrayOutputStream out=new ByteArrayOutputStream();
try {
for (byte[] array:arrays) {
out.write(array);
}
out.close();
} catch (IOException ex) {
fail("Cannot merge arrays"); //$NON-NLS-1$
}
return out.toByteArray();
}
/**
* Concatenates the given integer arrays and returns the result.
*
* @param arrays The arrays.
*
* @return The concatenated arrays.
*/
private static int[] concat
(int[]... arrays)
{
int len=0;
for (int[] array:arrays) {
len+=array.length;
}
int[] result=new int[len];
int i=0;
for (int[] array:arrays) {
for (int c:array) {
result[i++]=c;
}
}
return result;
}
/**
* Concatenates the given character arrays and returns the result.
*
* @param arrays The arrays.
*
* @return The concatenated arrays.
*/
private static char[] concat
(char[]... arrays)
{
CharArrayWriter out=new CharArrayWriter();
try {
for (char[] array:arrays) {
out.write(array);
}
out.close();
} catch (IOException ex) {
fail("Cannot merge arrays"); //$NON-NLS-1$
}
return out.toCharArray();
}
// CONSTRUCTOR.
/**
* Constructor. It is private so that no instances can be created.
*/
private UnicodeData() {}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy