com.fasterxml.jackson.dataformat.smile.SmileParserBootstrapper Maven / Gradle / Ivy
Show all versions of jackson-dataformat-smile Show documentation
package com.fasterxml.jackson.dataformat.smile;
import java.io.*;
import com.fasterxml.jackson.core.*;
import com.fasterxml.jackson.core.format.InputAccessor;
import com.fasterxml.jackson.core.format.MatchStrength;
import com.fasterxml.jackson.core.io.IOContext;
import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
import static com.fasterxml.jackson.dataformat.smile.SmileConstants.*;
/**
* Simple bootstrapper version used with Smile format parser.
*/
public class SmileParserBootstrapper
{
/*
/**********************************************************************
/* Configuration
/**********************************************************************
*/
protected final IOContext _context;
protected final InputStream _in;
/*
/**********************************************************************
/* Input buffering
/**********************************************************************
*/
protected final byte[] _inputBuffer;
protected int _inputPtr;
protected int _inputEnd;
/**
* Flag that indicates whether buffer above is to be recycled
* after being used or not.
*/
protected final boolean _bufferRecyclable;
/*
/**********************************************************************
/* Input location
/**********************************************************************
*/
/**
* Current number of input units (bytes or chars) that were processed in
* previous blocks,
* before contents of current input buffer.
*
* Note: includes possible BOMs, if those were part of the input.
*/
protected int _inputProcessed;
/*
/**********************************************************************
/* Life-cycle
/**********************************************************************
*/
public SmileParserBootstrapper(IOContext ctxt, InputStream in)
{
_context = ctxt;
_in = in;
_inputBuffer = ctxt.allocReadIOBuffer();
_inputEnd = _inputPtr = 0;
_inputProcessed = 0;
_bufferRecyclable = true;
}
public SmileParserBootstrapper(IOContext ctxt, byte[] inputBuffer, int inputStart, int inputLen)
{
_context = ctxt;
_in = null;
_inputBuffer = inputBuffer;
_inputPtr = inputStart;
_inputEnd = (inputStart + inputLen);
// Need to offset this for correct location info
_inputProcessed = -inputStart;
_bufferRecyclable = false;
}
public SmileParser constructParser(int factoryFeatures,
int generalParserFeatures, int smileFeatures,
ObjectCodec codec, ByteQuadsCanonicalizer rootByteSymbols)
throws IOException, JsonParseException
{
// 13-Mar-2021, tatu: [dataformats-binary#252] Create canonicalizing OR
// placeholder, depending on settings
ByteQuadsCanonicalizer can = rootByteSymbols.makeChildOrPlaceholder(factoryFeatures);
// We just need a single byte, really, to know if it starts with header
int end = _inputEnd;
if ((_inputPtr < end) && (_in != null)) {
int count = _in.read(_inputBuffer, end, _inputBuffer.length - end);
if (count > 0) {
_inputEnd += count;
}
}
SmileParser p = new SmileParser(_context, generalParserFeatures, smileFeatures,
codec, can,
_in, _inputBuffer, _inputPtr, _inputEnd, _bufferRecyclable);
boolean hadSig = false;
if (_inputPtr >= _inputEnd) { // only the case for empty doc
// 11-Oct-2012, tatu: Actually, let's allow empty documents even if
// header signature would otherwise be needed. This is useful for
// JAX-RS provider, empty PUT/POST payloads.
return p;
}
final byte firstByte = _inputBuffer[_inputPtr];
if (firstByte == SmileConstants.HEADER_BYTE_1) {
// need to ensure it gets properly handled so caller won't see the signature
hadSig = p.handleSignature(true, true);
}
if (!hadSig && SmileParser.Feature.REQUIRE_HEADER.enabledIn(smileFeatures)) {
// Ok, first, let's see if it looks like plain JSON...
String msg;
if (firstByte == '{' || firstByte == '[') {
msg = "Input does not start with Smile format header (first byte = 0x"
+Integer.toHexString(firstByte & 0xFF)+") -- rather, it starts with '"+((char) firstByte)
+"' (plain JSON input?) -- can not parse";
} else {
msg = "Input does not start with Smile format header (first byte = 0x"
+Integer.toHexString(firstByte & 0xFF)+") and parser has REQUIRE_HEADER enabled: can not parse";
}
throw new JsonParseException(p, msg);
}
return p;
}
/*
/**********************************************************************
/* Encoding detection for data format auto-detection
/**********************************************************************
*/
public static MatchStrength hasSmileFormat(InputAccessor acc) throws IOException
{
// Ok: ideally we start with the header -- if so, we are golden
if (!acc.hasMoreBytes()) {
return MatchStrength.INCONCLUSIVE;
}
// We always need at least two bytes to determine, so
byte b1 = acc.nextByte();
if (!acc.hasMoreBytes()) {
return MatchStrength.INCONCLUSIVE;
}
byte b2 = acc.nextByte();
// First: do we see 3 "magic bytes"? If so, we are golden
if (b1 == SmileConstants.HEADER_BYTE_1) { // yeah, looks like marker
if (b2 != SmileConstants.HEADER_BYTE_2) {
return MatchStrength.NO_MATCH;
}
if (!acc.hasMoreBytes()) {
return MatchStrength.INCONCLUSIVE;
}
return (acc.nextByte() == SmileConstants.HEADER_BYTE_3) ?
MatchStrength.FULL_MATCH : MatchStrength.NO_MATCH;
}
// Otherwise: ideally either Object or Array:
if (b1 == SmileConstants.TOKEN_LITERAL_START_OBJECT) {
/* Object is bit easier, because now we need to get new name; i.e. can
* rule out name back-refs
*/
if (b2 == SmileConstants.TOKEN_KEY_LONG_STRING) {
return MatchStrength.SOLID_MATCH;
}
int ch = (int) b2 & 0xFF;
if (ch >= 0x80 && ch < 0xF8) {
return MatchStrength.SOLID_MATCH;
}
return MatchStrength.NO_MATCH;
}
// Array bit trickier
if (b1 == SmileConstants.TOKEN_LITERAL_START_ARRAY) {
if (!acc.hasMoreBytes()) {
return MatchStrength.INCONCLUSIVE;
}
/* For arrays, we will actually accept much wider range of values (including
* things that could otherwise collide)
*/
if (likelySmileValue(b2) || possibleSmileValue(b2, true)) {
return MatchStrength.SOLID_MATCH;
}
return MatchStrength.NO_MATCH;
}
// Scalar values are pretty weak, albeit possible; require more certain match, consider it weak:
if (likelySmileValue(b1) || possibleSmileValue(b2, false)) {
return MatchStrength.SOLID_MATCH;
}
return MatchStrength.NO_MATCH;
}
private static boolean likelySmileValue(byte b)
{
if ( (b == TOKEN_MISC_LONG_TEXT_ASCII) // 0xE0
|| (b == TOKEN_MISC_LONG_TEXT_UNICODE) // 0xE4
|| (b == TOKEN_MISC_BINARY_7BIT) // 0xE8
|| (b == TOKEN_LITERAL_START_ARRAY) // 0xF8
|| (b == TOKEN_LITERAL_START_OBJECT) // 0xFA
) {
return true;
}
int ch = b & 0xFF;
// ASCII ctrl char range is pretty good match too
if (ch >= 0x80 && ch <= 0x9F) {
return true;
}
return false;
}
/**
* @param lenient Whether to consider more speculative matches or not
* (typically true when there is context like start-array)
*/
private static boolean possibleSmileValue(byte b, boolean lenient)
{
int ch = (int) b & 0xFF;
// note: we know that likely matches have been handled already, so...
if (ch >= 0x80) {
return (ch <= 0xE0);
}
if (lenient) {
if (ch >= 0x40) { // tiny/short ASCII
return true;
}
if (ch >= 0x20) { // various constants
return (ch < 0x2C); // many reserved bytes that can't be seen
}
}
return false;
}
}