org.codehaus.jackson.impl.ReaderBasedParser Maven / Gradle / Ivy
The newest version!
package org.codehaus.jackson.impl;
import java.io.*;
import org.codehaus.jackson.*;
import org.codehaus.jackson.io.IOContext;
import org.codehaus.jackson.sym.CharsToNameCanonicalizer;
import org.codehaus.jackson.util.*;
/**
* This is a concrete implementation of {@link JsonParser}, which is
* based on a {@link java.io.Reader} to handle low-level character
* conversion tasks.
*/
public final class ReaderBasedParser
extends ReaderBasedNumericParser
{
/*
/***************************************************
/* Configuration, state
/***************************************************
*/
protected ObjectCodec _objectCodec;
final protected CharsToNameCanonicalizer _symbols;
/*
/***************************************************
/* Life-cycle
/***************************************************
*/
public ReaderBasedParser(IOContext ioCtxt, int features, Reader r,
ObjectCodec codec, CharsToNameCanonicalizer st)
{
super(ioCtxt, features, r);
_objectCodec = codec;
_symbols = st;
}
public ObjectCodec getCodec() {
return _objectCodec;
}
public void setCodec(ObjectCodec c) {
_objectCodec = c;
}
/*
/***************************************************
/* Public API, traversal
/***************************************************
*/
/**
* @return Next token from the stream, if any found, or null
* to indicate end-of-input
*/
@Override
public JsonToken nextToken()
throws IOException, JsonParseException
{
/* First: field names are special -- we will always tokenize
* (part of) value along with field name to simplify
* state handling. If so, can and need to use secondary token:
*/
if (_currToken == JsonToken.FIELD_NAME) {
return _nextAfterName();
}
if (_tokenIncomplete) {
_skipString(); // only strings can be partial
}
int i = _skipWSOrEnd();
if (i < 0) { // end-of-input
/* 19-Feb-2009, tatu: Should actually close/release things
* like input source, symbol table and recyclable buffers now.
*/
close();
return (_currToken = null);
}
/* First, need to ensure we know the starting location of token
* after skipping leading white space
*/
_tokenInputTotal = _currInputProcessed + _inputPtr - 1;
_tokenInputRow = _currInputRow;
_tokenInputCol = _inputPtr - _currInputRowStart - 1;
// finally: clear any data retained so far
_binaryValue = null;
// Closing scope?
if (i == INT_RBRACKET) {
if (!_parsingContext.inArray()) {
_reportMismatchedEndMarker(i, '}');
}
_parsingContext = _parsingContext.getParent();
return (_currToken = JsonToken.END_ARRAY);
}
if (i == INT_RCURLY) {
if (!_parsingContext.inObject()) {
_reportMismatchedEndMarker(i, ']');
}
_parsingContext = _parsingContext.getParent();
return (_currToken = JsonToken.END_OBJECT);
}
// Nope: do we then expect a comma?
if (_parsingContext.expectComma()) {
if (i != INT_COMMA) {
_reportUnexpectedChar(i, "was expecting comma to separate "+_parsingContext.getTypeDesc()+" entries");
}
i = _skipWS();
}
/* And should we now have a name? Always true for
* Object contexts, since the intermediate 'expect-value'
* state is never retained.
*/
boolean inObject = _parsingContext.inObject();
if (inObject) {
// First, field name itself:
String name = _parseFieldName(i);
_parsingContext.setCurrentName(name);
_currToken = JsonToken.FIELD_NAME;
i = _skipWS();
if (i != INT_COLON) {
_reportUnexpectedChar(i, "was expecting a colon to separate field name and value");
}
i = _skipWS();
}
// Ok: we must have a value... what is it?
JsonToken t;
switch (i) {
case INT_QUOTE:
_tokenIncomplete = true;
t = JsonToken.VALUE_STRING;
break;
case INT_LBRACKET:
if (!inObject) {
_parsingContext = _parsingContext.createChildArrayContext(_tokenInputRow, _tokenInputCol);
}
t = JsonToken.START_ARRAY;
break;
case INT_LCURLY:
if (!inObject) {
_parsingContext = _parsingContext.createChildObjectContext(_tokenInputRow, _tokenInputCol);
}
t = JsonToken.START_OBJECT;
break;
case INT_RBRACKET:
case INT_RCURLY:
// Error: neither is valid at this point; valid closers have
// been handled earlier
_reportUnexpectedChar(i, "expected a value");
case INT_t:
_matchToken(JsonToken.VALUE_TRUE);
t = JsonToken.VALUE_TRUE;
break;
case INT_f:
_matchToken(JsonToken.VALUE_FALSE);
t = JsonToken.VALUE_FALSE;
break;
case INT_n:
_matchToken(JsonToken.VALUE_NULL);
t = JsonToken.VALUE_NULL;
break;
case INT_MINUS:
/* Should we have separate handling for plus? Although
* it is not allowed per se, it may be erroneously used,
* and could be indicate by a more specific error message.
*/
case INT_0:
case INT_1:
case INT_2:
case INT_3:
case INT_4:
case INT_5:
case INT_6:
case INT_7:
case INT_8:
case INT_9:
t = parseNumberText(i);
break;
default:
t = _handleUnexpectedValue(i);
break;
}
if (inObject) {
_nextToken = t;
return _currToken;
}
_currToken = t;
return t;
}
private final JsonToken _nextAfterName()
{
_nameCopied = false; // need to invalidate if it was copied
JsonToken t = _nextToken;
_nextToken = null;
// Also: may need to start new context?
if (t == JsonToken.START_ARRAY) {
_parsingContext = _parsingContext.createChildArrayContext(_tokenInputRow, _tokenInputCol);
} else if (t == JsonToken.START_OBJECT) {
_parsingContext = _parsingContext.createChildObjectContext(_tokenInputRow, _tokenInputCol);
}
return (_currToken = t);
}
@Override
public void close() throws IOException
{
super.close();
_symbols.release();
}
/*
/***************************************************
/* Internal methods, secondary parsing
/***************************************************
*/
protected final String _parseFieldName(int i)
throws IOException, JsonParseException
{
if (i != INT_QUOTE) {
return _handleUnusualFieldName(i);
}
/* First: let's try to see if we have a simple name: one that does
* not cross input buffer boundary, and does not contain escape
* sequences.
*/
int ptr = _inputPtr;
int hash = 0;
final int inputLen = _inputEnd;
if (ptr < inputLen) {
final int[] codes = CharTypes.getInputCodeLatin1();
final int maxCode = codes.length;
do {
int ch = _inputBuffer[ptr];
if (ch < maxCode && codes[ch] != 0) {
if (ch == '"') {
int start = _inputPtr;
_inputPtr = ptr+1; // to skip the quote
return _symbols.findSymbol(_inputBuffer, start, ptr - start, hash);
}
break;
}
hash = (hash * 31) + ch;
++ptr;
} while (ptr < inputLen);
}
int start = _inputPtr;
_inputPtr = ptr;
return _parseFieldName2(start, hash, INT_QUOTE);
}
private String _parseFieldName2(int startPtr, int hash, int endChar)
throws IOException, JsonParseException
{
_textBuffer.resetWithShared(_inputBuffer, startPtr, (_inputPtr - startPtr));
/* Output pointers; calls will also ensure that the buffer is
* not shared and has room for at least one more char.
*/
char[] outBuf = _textBuffer.getCurrentSegment();
int outPtr = _textBuffer.getCurrentSegmentSize();
while (true) {
if (_inputPtr >= _inputEnd) {
if (!loadMore()) {
_reportInvalidEOF(": was expecting closing '"+((char) endChar)+"' for name");
}
}
char c = _inputBuffer[_inputPtr++];
int i = (int) c;
if (i <= INT_BACKSLASH) {
if (i == INT_BACKSLASH) {
/* Although chars outside of BMP are to be escaped as
* an UTF-16 surrogate pair, does that affect decoding?
* For now let's assume it does not.
*/
c = _decodeEscaped();
} else if (i <= endChar) {
if (i == endChar) {
break;
}
if (i < INT_SPACE) {
_throwUnquotedSpace(i, "name");
}
}
}
hash = (hash * 31) + i;
// Ok, let's add char to output:
outBuf[outPtr++] = c;
// Need more room?
if (outPtr >= outBuf.length) {
outBuf = _textBuffer.finishCurrentSegment();
outPtr = 0;
}
}
_textBuffer.setCurrentLength(outPtr);
{
TextBuffer tb = _textBuffer;
char[] buf = tb.getTextBuffer();
int start = tb.getTextOffset();
int len = tb.size();
return _symbols.findSymbol(buf, start, len, hash);
}
}
/**
* Method called when we see non-white space character other
* than double quote, when expecting a field name.
* In standard mode will just throw an expection; but
* in non-standard modes may be able to parse name.
*
* @since 1.2
*/
protected final String _handleUnusualFieldName(int i)
throws IOException, JsonParseException
{
// [JACKSON-173]: allow single quotes
if (i == INT_APOSTROPHE && isEnabled(Feature.ALLOW_SINGLE_QUOTES)) {
return _parseApostropheFieldName();
}
// [JACKSON-69]: allow unquoted names if feature enabled:
if (!isEnabled(Feature.ALLOW_UNQUOTED_FIELD_NAMES)) {
_reportUnexpectedChar(i, "was expecting double-quote to start field name");
}
final int[] codes = CharTypes.getInputCodeLatin1JsNames();
final int maxCode = codes.length;
// Also: first char must be a valid name char, but NOT be number
boolean firstOk;
if (i < maxCode) { // identifier, and not a number
firstOk = (codes[i] == 0) && (i < INT_0 || i > INT_9);
} else {
firstOk = Character.isJavaIdentifierPart((char) i);
}
if (!firstOk) {
_reportUnexpectedChar(i, "was expecting either valid name character (for unquoted name) or double-quote (for quoted) to start field name");
}
int ptr = _inputPtr;
int hash = 0;
final int inputLen = _inputEnd;
if (ptr < inputLen) {
do {
int ch = _inputBuffer[ptr];
if (ch < maxCode) {
if (codes[ch] != 0) {
int start = _inputPtr-1; // -1 to bring back first char
_inputPtr = ptr;
return _symbols.findSymbol(_inputBuffer, start, ptr - start, hash);
}
} else if (!Character.isJavaIdentifierPart((char) ch)) {
int start = _inputPtr-1; // -1 to bring back first char
_inputPtr = ptr;
return _symbols.findSymbol(_inputBuffer, start, ptr - start, hash);
}
hash = (hash * 31) + ch;
++ptr;
} while (ptr < inputLen);
}
int start = _inputPtr-1;
_inputPtr = ptr;
return _parseUnusualFieldName2(start, hash, codes);
}
protected final String _parseApostropheFieldName()
throws IOException, JsonParseException
{
// Note: mostly copy of_parseFieldName
int ptr = _inputPtr;
int hash = 0;
final int inputLen = _inputEnd;
if (ptr < inputLen) {
final int[] codes = CharTypes.getInputCodeLatin1();
final int maxCode = codes.length;
do {
int ch = _inputBuffer[ptr];
if (ch == '\'') {
int start = _inputPtr;
_inputPtr = ptr+1; // to skip the quote
return _symbols.findSymbol(_inputBuffer, start, ptr - start, hash);
}
if (ch < maxCode && codes[ch] != 0) {
break;
}
hash = (hash * 31) + ch;
++ptr;
} while (ptr < inputLen);
}
int start = _inputPtr;
_inputPtr = ptr;
return _parseFieldName2(start, hash, INT_APOSTROPHE);
}
/**
* Method for handling cases where first non-space character
* of an expected value token is not legal for standard JSON content.
*
* @since 1.3
*/
protected final JsonToken _handleUnexpectedValue(int i)
throws IOException, JsonParseException
{
// Most likely an error, unless we are to allow single-quote-strings
if (i != INT_APOSTROPHE || !isEnabled(Feature.ALLOW_SINGLE_QUOTES)) {
_reportUnexpectedChar(i, "expected a valid value (number, String, array, object, 'true', 'false' or 'null')");
}
/* [JACKSON-173]: allow single quotes. Unlike with regular
* Strings, we'll eagerly parse contents; this so that there's
* no need to store information on quote char used.
*
* Also, no separation to fast/slow parsing; we'll just do
* one regular (~= slow) parsing, to keep code simple
*/
char[] outBuf = _textBuffer.emptyAndGetCurrentSegment();
int outPtr = _textBuffer.getCurrentSegmentSize();
while (true) {
if (_inputPtr >= _inputEnd) {
if (!loadMore()) {
_reportInvalidEOF(": was expecting closing quote for a string value");
}
}
char c = _inputBuffer[_inputPtr++];
i = (int) c;
if (i <= INT_BACKSLASH) {
if (i == INT_BACKSLASH) {
/* Although chars outside of BMP are to be escaped as
* an UTF-16 surrogate pair, does that affect decoding?
* For now let's assume it does not.
*/
c = _decodeEscaped();
} else if (i <= INT_APOSTROPHE) {
if (i == INT_APOSTROPHE) {
break;
}
if (i < INT_SPACE) {
_throwUnquotedSpace(i, "string value");
}
}
}
// Need more room?
if (outPtr >= outBuf.length) {
outBuf = _textBuffer.finishCurrentSegment();
outPtr = 0;
}
// Ok, let's add char to output:
outBuf[outPtr++] = c;
}
_textBuffer.setCurrentLength(outPtr);
return JsonToken.VALUE_STRING;
}
/**
* @since 1.2
*/
private String _parseUnusualFieldName2(int startPtr, int hash, int[] codes)
throws IOException, JsonParseException
{
_textBuffer.resetWithShared(_inputBuffer, startPtr, (_inputPtr - startPtr));
char[] outBuf = _textBuffer.getCurrentSegment();
int outPtr = _textBuffer.getCurrentSegmentSize();
final int maxCode = codes.length;
while (true) {
if (_inputPtr >= _inputEnd) {
if (!loadMore()) { // acceptable for now (will error out later)
break;
}
}
char c = _inputBuffer[_inputPtr];
int i = (int) c;
if (i <= maxCode) {
if (codes[i] != 0) {
break;
}
} else if (!Character.isJavaIdentifierPart(c)) {
break;
}
++_inputPtr;
hash = (hash * 31) + i;
// Ok, let's add char to output:
outBuf[outPtr++] = c;
// Need more room?
if (outPtr >= outBuf.length) {
outBuf = _textBuffer.finishCurrentSegment();
outPtr = 0;
}
}
_textBuffer.setCurrentLength(outPtr);
{
TextBuffer tb = _textBuffer;
char[] buf = tb.getTextBuffer();
int start = tb.getTextOffset();
int len = tb.size();
return _symbols.findSymbol(buf, start, len, hash);
}
}
@Override
protected void _finishString()
throws IOException, JsonParseException
{
/* First: let's try to see if we have simple String value: one
* that does not cross input buffer boundary, and does not
* contain escape sequences.
*/
int ptr = _inputPtr;
final int inputLen = _inputEnd;
if (ptr < inputLen) {
final int[] codes = CharTypes.getInputCodeLatin1();
final int maxCode = codes.length;
do {
int ch = _inputBuffer[ptr];
if (ch < maxCode && codes[ch] != 0) {
if (ch == '"') {
_textBuffer.resetWithShared(_inputBuffer, _inputPtr, (ptr-_inputPtr));
_inputPtr = ptr+1;
// Yes, we got it all
return;
}
break;
}
++ptr;
} while (ptr < inputLen);
}
/* Either ran out of input, or bumped into an escape
* sequence...
*/
_textBuffer.resetWithCopy(_inputBuffer, _inputPtr, (ptr-_inputPtr));
_inputPtr = ptr;
_finishString2();
}
protected void _finishString2()
throws IOException, JsonParseException
{
char[] outBuf = _textBuffer.getCurrentSegment();
int outPtr = _textBuffer.getCurrentSegmentSize();
while (true) {
if (_inputPtr >= _inputEnd) {
if (!loadMore()) {
_reportInvalidEOF(": was expecting closing quote for a string value");
}
}
char c = _inputBuffer[_inputPtr++];
int i = (int) c;
if (i <= INT_BACKSLASH) {
if (i == INT_BACKSLASH) {
/* Although chars outside of BMP are to be escaped as
* an UTF-16 surrogate pair, does that affect decoding?
* For now let's assume it does not.
*/
c = _decodeEscaped();
} else if (i <= INT_QUOTE) {
if (i == INT_QUOTE) {
break;
}
if (i < INT_SPACE) {
_throwUnquotedSpace(i, "string value");
}
}
}
// Need more room?
if (outPtr >= outBuf.length) {
outBuf = _textBuffer.finishCurrentSegment();
outPtr = 0;
}
// Ok, let's add char to output:
outBuf[outPtr++] = c;
}
_textBuffer.setCurrentLength(outPtr);
}
/**
* Method called to skim through rest of unparsed String value,
* if it is not needed. This can be done bit faster if contents
* need not be stored for future access.
*/
protected void _skipString()
throws IOException, JsonParseException
{
_tokenIncomplete = false;
int inputPtr = _inputPtr;
int inputLen = _inputEnd;
char[] inputBuffer = _inputBuffer;
while (true) {
if (inputPtr >= inputLen) {
_inputPtr = inputPtr;
if (!loadMore()) {
_reportInvalidEOF(": was expecting closing quote for a string value");
}
inputPtr = _inputPtr;
inputLen = _inputEnd;
}
char c = inputBuffer[inputPtr++];
int i = (int) c;
if (i <= INT_BACKSLASH) {
if (i == INT_BACKSLASH) {
/* Although chars outside of BMP are to be escaped as
* an UTF-16 surrogate pair, does that affect decoding?
* For now let's assume it does not.
*/
_inputPtr = inputPtr;
c = _decodeEscaped();
inputPtr = _inputPtr;
inputLen = _inputEnd;
} else if (i <= INT_QUOTE) {
if (i == INT_QUOTE) {
_inputPtr = inputPtr;
break;
}
if (i < INT_SPACE) {
_inputPtr = inputPtr;
_throwUnquotedSpace(i, "string value");
}
}
}
}
}
/**
* Method called to much one of literal tokens we may expect
*/
protected void _matchToken(JsonToken token)
throws IOException, JsonParseException
{
// First char is already matched, need to check the rest
String matchStr = token.asString();
int i = 1;
for (int len = matchStr.length(); i < len; ++i) {
if (_inputPtr >= _inputEnd) {
if (!loadMore()) {
_reportInvalidEOF(" in a value");
}
}
char c = _inputBuffer[_inputPtr];
if (c != matchStr.charAt(i)) {
_reportInvalidToken(matchStr.substring(0, i));
}
++_inputPtr;
}
/* Ok, fine; let's not bother checking anything beyond keyword.
* If there's something wrong there, it'll cause a parsing
* error later on.
*/
return;
}
private void _reportInvalidToken(String matchedPart)
throws IOException, JsonParseException
{
StringBuilder sb = new StringBuilder(matchedPart);
/* Let's just try to find what appears to be the token, using
* regular Java identifier character rules. It's just a heuristic,
* nothing fancy here.
*/
while (true) {
if (_inputPtr >= _inputEnd) {
if (!loadMore()) {
break;
}
}
char c = _inputBuffer[_inputPtr];
if (!Character.isJavaIdentifierPart(c)) {
break;
}
++_inputPtr;
sb.append(c);
}
_reportError("Unrecognized token '"+sb.toString()+"': was expecting 'null', 'true' or 'false'");
}
/*
/***************************************************
/* Internal methods, other parsing
/***************************************************
*/
/**
* We actually need to check the character value here
* (to see if we have \n following \r).
*/
protected final void _skipCR() throws IOException
{
if (_inputPtr < _inputEnd || loadMore()) {
if (_inputBuffer[_inputPtr] == '\n') {
++_inputPtr;
}
}
++_currInputRow;
_currInputRowStart = _inputPtr;
}
protected final void _skipLF() throws IOException
{
++_currInputRow;
_currInputRowStart = _inputPtr;
}
private final int _skipWS()
throws IOException, JsonParseException
{
while (_inputPtr < _inputEnd || loadMore()) {
int i = (int) _inputBuffer[_inputPtr++];
if (i > INT_SPACE) {
if (i != INT_SLASH) {
return i;
}
_skipComment();
} else if (i != INT_SPACE) {
if (i == INT_LF) {
_skipLF();
} else if (i == INT_CR) {
_skipCR();
} else if (i != INT_TAB) {
_throwInvalidSpace(i);
}
}
}
throw _constructError("Unexpected end-of-input within/between "+_parsingContext.getTypeDesc()+" entries");
}
private final int _skipWSOrEnd()
throws IOException, JsonParseException
{
while ((_inputPtr < _inputEnd) || loadMore()) {
int i = (int) _inputBuffer[_inputPtr++];
if (i > INT_SPACE) {
if (i != INT_SLASH) {
return i;
}
_skipComment();
} else if (i != INT_SPACE) {
if (i == INT_LF) {
_skipLF();
} else if (i == INT_CR) {
_skipCR();
} else if (i != INT_TAB) {
_throwInvalidSpace(i);
}
}
}
// We ran out of input...
_handleEOF();
return -1;
}
private final void _skipComment()
throws IOException, JsonParseException
{
if (!isEnabled(Feature.ALLOW_COMMENTS)) {
_reportUnexpectedChar('/', "maybe a (non-standard) comment? (not recognized as one since Feature 'ALLOW_COMMENTS' not enabled for parser)");
}
// First: check which comment (if either) it is:
if (_inputPtr >= _inputEnd && !loadMore()) {
_reportInvalidEOF(" in a comment");
}
char c = _inputBuffer[_inputPtr++];
if (c == '/') {
_skipCppComment();
} else if (c == '*') {
_skipCComment();
} else {
_reportUnexpectedChar(c, "was expecting either '*' or '/' for a comment");
}
}
private final void _skipCComment()
throws IOException, JsonParseException
{
// Ok: need the matching '*/'
main_loop:
while ((_inputPtr < _inputEnd) || loadMore()) {
int i = (int) _inputBuffer[_inputPtr++];
if (i <= INT_ASTERISK) {
if (i == INT_ASTERISK) { // end?
if ((_inputPtr >= _inputEnd) && !loadMore()) {
break main_loop;
}
if (_inputBuffer[_inputPtr] == INT_SLASH) {
++_inputPtr;
return;
}
continue;
}
if (i < INT_SPACE) {
if (i == INT_LF) {
_skipLF();
} else if (i == INT_CR) {
_skipCR();
} else if (i != INT_TAB) {
_throwInvalidSpace(i);
}
}
}
}
_reportInvalidEOF(" in a comment");
}
private final void _skipCppComment()
throws IOException, JsonParseException
{
// Ok: need to find EOF or linefeed
while ((_inputPtr < _inputEnd) || loadMore()) {
int i = (int) _inputBuffer[_inputPtr++];
if (i < INT_SPACE) {
if (i == INT_LF) {
_skipLF();
break;
} else if (i == INT_CR) {
_skipCR();
break;
} else if (i != INT_TAB) {
_throwInvalidSpace(i);
}
}
}
}
protected final char _decodeEscaped()
throws IOException, JsonParseException
{
if (_inputPtr >= _inputEnd) {
if (!loadMore()) {
_reportInvalidEOF(" in character escape sequence");
}
}
char c = _inputBuffer[_inputPtr++];
switch ((int) c) {
// First, ones that are mapped
case INT_b:
return '\b';
case INT_t:
return '\t';
case INT_n:
return '\n';
case INT_f:
return '\f';
case INT_r:
return '\r';
// And these are to be returned as they are
case INT_QUOTE:
case INT_SLASH:
case INT_BACKSLASH:
return c;
case INT_u: // and finally hex-escaped
break;
default:
_reportError("Unrecognized character escape "+_getCharDesc(c));
}
// Ok, a hex escape. Need 4 characters
int value = 0;
for (int i = 0; i < 4; ++i) {
if (_inputPtr >= _inputEnd) {
if (!loadMore()) {
_reportInvalidEOF(" in character escape sequence");
}
}
int ch = (int) _inputBuffer[_inputPtr++];
int digit = CharTypes.charToHex(ch);
if (digit < 0) {
_reportUnexpectedChar(ch, "expected a hex-digit for character escape sequence");
}
value = (value << 4) | digit;
}
return (char) value;
}
/*
/***************************************************
/* Binary access
/***************************************************
*/
@Override
protected byte[] _decodeBase64(Base64Variant b64variant)
throws IOException, JsonParseException
{
ByteArrayBuilder builder = _getByteArrayBuilder();
/* !!! 23-Jan-2009, tatu: There are some potential problems
* with this:
*
* - Escaped chars are not handled. Should they?
*/
//main_loop:
while (true) {
// first, we'll skip preceding white space, if any
char ch;
do {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
ch = _inputBuffer[_inputPtr++];
} while (ch <= INT_SPACE);
int bits = b64variant.decodeBase64Char(ch);
if (bits < 0) { // reached the end, fair and square?
if (ch == '"') {
return builder.toByteArray();
}
throw reportInvalidChar(b64variant, ch, 0);
}
int decodedData = bits;
// then second base64 char; can't get padding yet, nor ws
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
ch = _inputBuffer[_inputPtr++];
bits = b64variant.decodeBase64Char(ch);
if (bits < 0) {
throw reportInvalidChar(b64variant, ch, 1);
}
decodedData = (decodedData << 6) | bits;
// third base64 char; can be padding, but not ws
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
ch = _inputBuffer[_inputPtr++];
bits = b64variant.decodeBase64Char(ch);
// First branch: can get padding (-> 1 byte)
if (bits < 0) {
if (bits != Base64Variant.BASE64_VALUE_PADDING) {
throw reportInvalidChar(b64variant, ch, 2);
}
// Ok, must get padding
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
ch = _inputBuffer[_inputPtr++];
if (!b64variant.usesPaddingChar(ch)) {
throw reportInvalidChar(b64variant, ch, 3, "expected padding character '"+b64variant.getPaddingChar()+"'");
}
// Got 12 bits, only need 8, need to shift
decodedData >>= 4;
builder.append(decodedData);
continue;
}
// Nope, 2 or 3 bytes
decodedData = (decodedData << 6) | bits;
// fourth and last base64 char; can be padding, but not ws
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
ch = _inputBuffer[_inputPtr++];
bits = b64variant.decodeBase64Char(ch);
if (bits < 0) {
if (bits != Base64Variant.BASE64_VALUE_PADDING) {
throw reportInvalidChar(b64variant, ch, 3);
}
/* With padding we only get 2 bytes; but we have
* to shift it a bit so it is identical to triplet
* case with partial output.
* 3 chars gives 3x6 == 18 bits, of which 2 are
* dummies, need to discard:
*/
decodedData >>= 2;
builder.appendTwoBytes(decodedData);
} else {
// otherwise, our triple is now complete
decodedData = (decodedData << 6) | bits;
builder.appendThreeBytes(decodedData);
}
}
}
protected IllegalArgumentException reportInvalidChar(Base64Variant b64variant, char ch, int bindex)
throws IllegalArgumentException
{
return reportInvalidChar(b64variant, ch, bindex, null);
}
/**
* @param bindex Relative index within base64 character unit; between 0
* and 3 (as unit has exactly 4 characters)
*/
protected IllegalArgumentException reportInvalidChar(Base64Variant b64variant, char ch, int bindex, String msg)
throws IllegalArgumentException
{
String base;
if (ch <= INT_SPACE) {
base = "Illegal white space character (code 0x"+Integer.toHexString(ch)+") as character #"+(bindex+1)+" of 4-char base64 unit: can only used between units";
} else if (b64variant.usesPaddingChar(ch)) {
base = "Unexpected padding character ('"+b64variant.getPaddingChar()+"') as character #"+(bindex+1)+" of 4-char base64 unit: padding only legal as 3rd or 4th character";
} else if (!Character.isDefined(ch) || Character.isISOControl(ch)) {
// Not sure if we can really get here... ? (most illegal xml chars are caught at lower level)
base = "Illegal character (code 0x"+Integer.toHexString(ch)+") in base64 content";
} else {
base = "Illegal character '"+ch+"' (code 0x"+Integer.toHexString(ch)+") in base64 content";
}
if (msg != null) {
base = base + ": " + msg;
}
return new IllegalArgumentException(base);
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy