src.test.main.TestUtf8Parser Maven / Gradle / Ivy
The newest version!
package main;
import org.codehaus.jackson.*;
import java.io.*;
import java.util.Random;
/**
* Set of basic unit tests for verifying that the basic parser
* functionality works as expected.
*/
public class TestUtf8Parser
extends BaseTest
{
final static String[] UTF8_2BYTE_STRINGS = new String[] {
/* This may look funny, but UTF8 scanner has fairly
* elaborate decoding machinery, and it is indeed
* necessary to try out various combinations...
*/
"b", "A\u00D8", "abc", "c3p0",
"12345", "......", "Long\u00FAer",
"Latin1-fully-\u00BE-develop\u00A8d",
"Some very long name, ridiculously long actually to see that buffer expansion works: \u00BF?"
};
final static String[] UTF8_3BYTE_STRINGS = new String[] {
"\uC823?", "A\u400F", "1\u1234?",
"Ab123\u4034",
"Even-longer:\uC023"
};
public void testEmptyName()
throws Exception
{
final String DOC = "{ \"\" : \"\" }";
JsonParser jp = createParserUsingStream(DOC, "UTF-8");
assertToken(JsonToken.START_OBJECT, jp.nextToken());
assertToken(JsonToken.FIELD_NAME, jp.nextToken());
assertEquals("", jp.getCurrentName());
assertToken(JsonToken.VALUE_STRING, jp.nextToken());
assertEquals("", jp.getText());
assertToken(JsonToken.END_OBJECT, jp.nextToken());
jp.close();
}
public void testUtf8Name2Bytes()
throws Exception
{
final String[] NAMES = UTF8_2BYTE_STRINGS;
for (int i = 0; i < NAMES.length; ++i) {
String NAME = NAMES[i];
String DOC = "{ \""+NAME+"\" : 0 }";
JsonParser jp = createParserUsingStream(DOC, "UTF-8");
assertToken(JsonToken.START_OBJECT, jp.nextToken());
assertToken(JsonToken.FIELD_NAME, jp.nextToken());
assertEquals(NAME, jp.getCurrentName());
assertToken(JsonToken.VALUE_NUMBER_INT, jp.nextToken());
// should retain name during value entry, too
assertEquals(NAME, jp.getCurrentName());
assertToken(JsonToken.END_OBJECT, jp.nextToken());
jp.close();
}
}
public void testUtf8Name3Bytes() throws Exception
{
final String[] NAMES = UTF8_3BYTE_STRINGS;
for (int i = 0; i < NAMES.length; ++i) {
String NAME = NAMES[i];
String DOC = "{ \""+NAME+"\" : true }";
JsonParser jp = createParserUsingStream(DOC, "UTF-8");
assertToken(JsonToken.START_OBJECT, jp.nextToken());
assertToken(JsonToken.FIELD_NAME, jp.nextToken());
assertEquals(NAME, jp.getCurrentName());
assertToken(JsonToken.VALUE_TRUE, jp.nextToken());
assertEquals(NAME, jp.getCurrentName());
assertToken(JsonToken.END_OBJECT, jp.nextToken());
jp.close();
}
}
// How about tests for Surrogate-Pairs?
public void testUtf8StringTrivial() throws Exception
{
String[] VALUES = UTF8_2BYTE_STRINGS;
for (int i = 0; i < VALUES.length; ++i) {
String VALUE = VALUES[i];
String DOC = "[ \""+VALUE+"\" ]";
JsonParser jp = createParserUsingStream(DOC, "UTF-8");
assertToken(JsonToken.START_ARRAY, jp.nextToken());
assertToken(JsonToken.VALUE_STRING, jp.nextToken());
String act = getAndVerifyText(jp);
if (act.length() != VALUE.length()) {
fail("Failed for value #"+(i+1)+"/"+VALUES.length+": length was "+act.length()+", should be "+VALUE.length());
}
assertEquals(VALUE, act);
assertToken(JsonToken.END_ARRAY, jp.nextToken());
jp.close();
}
VALUES = UTF8_3BYTE_STRINGS;
for (int i = 0; i < VALUES.length; ++i) {
String VALUE = VALUES[i];
String DOC = "[ \""+VALUE+"\" ]";
JsonParser jp = createParserUsingStream(DOC, "UTF-8");
assertToken(JsonToken.START_ARRAY, jp.nextToken());
assertToken(JsonToken.VALUE_STRING, jp.nextToken());
assertEquals(VALUE, getAndVerifyText(jp));
assertToken(JsonToken.END_ARRAY, jp.nextToken());
jp.close();
}
}
public void testUtf8StringValue() throws Exception
{
Random r = new Random(13);
//int LEN = 72000;
int LEN = 720;
StringBuilder sb = new StringBuilder(LEN + 20);
while (sb.length() < LEN) {
int c;
if (r.nextBoolean()) { // ascii
c = 32 + (r.nextInt() & 0x3F);
if (c == '"' || c == '\\') {
c = ' ';
}
} else if (r.nextBoolean()) { // 2-byte
c = 160 + (r.nextInt() & 0x3FF);
} else if (r.nextBoolean()) { // 3-byte (non-surrogate)
c = 8000 + (r.nextInt() & 0x7FFF);
} else { // surrogates (2 chars)
int value = r.nextInt() & 0x3FFFF; // 20-bit, ~ 1 million
sb.append((char) (0xD800 + (value >> 10)));
c = (0xDC00 + (value & 0x3FF));
}
sb.append((char) c);
}
ByteArrayOutputStream bout = new ByteArrayOutputStream(LEN);
OutputStreamWriter out = new OutputStreamWriter(bout, "UTF-8");
out.write("[\"");
String VALUE = sb.toString();
out.write(VALUE);
out.write("\"]");
out.close();
byte[] data = bout.toByteArray();
JsonParser jp = new JsonFactory().createJsonParser(new ByteArrayInputStream(data));
assertToken(JsonToken.START_ARRAY, jp.nextToken());
assertToken(JsonToken.VALUE_STRING, jp.nextToken());
String act = jp.getText();
/*
for (int i = 0; i < act.length(); ++i) {
char c1 = act.charAt(i);
char c2 = VALUE.charAt(i);
if (c1 != c2) {
System.err.println("Differ at "+i+", "+((int) c1)+" vs "+((int) c2)+"");
break;
}
}
*/
assertEquals(VALUE.length(), act.length());
assertEquals(VALUE, act);
jp.close();
}
}