All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.fasterxml.jackson.dataformat.smile.SmileParserBootstrapper Maven / Gradle / Ivy

package com.fasterxml.jackson.dataformat.smile;

import java.io.*;

import com.fasterxml.jackson.core.*;
import com.fasterxml.jackson.core.format.InputAccessor;
import com.fasterxml.jackson.core.format.MatchStrength;
import com.fasterxml.jackson.core.io.IOContext;
import com.fasterxml.jackson.core.sym.BytesToNameCanonicalizer;

import static com.fasterxml.jackson.dataformat.smile.SmileConstants.*;

/**
 * Simple bootstrapper version used with Smile format parser.
 */
public class SmileParserBootstrapper
{
    /*
    /**********************************************************
    /* Configuration
    /**********************************************************
     */

    final IOContext _context;

    final InputStream _in;

    /*
    /**********************************************************
    /* Input buffering
    /**********************************************************
     */

    final byte[] _inputBuffer;

    private int _inputPtr;

    private int _inputEnd;

    /**
     * Flag that indicates whether buffer above is to be recycled
     * after being used or not.
     */
    private final boolean _bufferRecyclable;

    /*
    /**********************************************************
    /* Input location
    /**********************************************************
     */

    /**
     * Current number of input units (bytes or chars) that were processed in
     * previous blocks,
     * before contents of current input buffer.
     *

* Note: includes possible BOMs, if those were part of the input. */ protected int _inputProcessed; /* /********************************************************** /* Data gathered /********************************************************** */ /* /********************************************************** /* Life-cycle /********************************************************** */ public SmileParserBootstrapper(IOContext ctxt, InputStream in) { _context = ctxt; _in = in; _inputBuffer = ctxt.allocReadIOBuffer(); _inputEnd = _inputPtr = 0; _inputProcessed = 0; _bufferRecyclable = true; } public SmileParserBootstrapper(IOContext ctxt, byte[] inputBuffer, int inputStart, int inputLen) { _context = ctxt; _in = null; _inputBuffer = inputBuffer; _inputPtr = inputStart; _inputEnd = (inputStart + inputLen); // Need to offset this for correct location info _inputProcessed = -inputStart; _bufferRecyclable = false; } public SmileParser constructParser(int generalParserFeatures, int smileFeatures, boolean internNames, ObjectCodec codec, BytesToNameCanonicalizer rootByteSymbols) throws IOException, JsonParseException { BytesToNameCanonicalizer can = rootByteSymbols.makeChild(true, internNames); // We just need a single byte, really, to know if it starts with header ensureLoaded(1); SmileParser p = new SmileParser(_context, generalParserFeatures, smileFeatures, codec, can, _in, _inputBuffer, _inputPtr, _inputEnd, _bufferRecyclable); boolean hadSig = false; if (_inputPtr < _inputEnd) { // only false for empty doc if (_inputBuffer[_inputPtr] == SmileConstants.HEADER_BYTE_1) { // need to ensure it gets properly handled so caller won't see the signature hadSig = p.handleSignature(true, true); } } if (!hadSig && (smileFeatures & SmileParser.Feature.REQUIRE_HEADER.getMask()) != 0) { // Ok, first, let's see if it looks like plain JSON... String msg; byte firstByte = (_inputPtr < _inputEnd) ? _inputBuffer[_inputPtr] : 0; if (firstByte == '{' || firstByte == '[') { msg = "Input does not start with Smile format header (first byte = 0x" +Integer.toHexString(firstByte & 0xFF)+") -- rather, it starts with '"+((char) firstByte) +"' (plain JSON input?) -- can not parse"; } else { msg = "Input does not start with Smile format header (first byte = 0x" +Integer.toHexString(firstByte & 0xFF)+") and parser has REQUIRE_HEADER enabled: can not parse"; } throw new JsonParseException(msg, JsonLocation.NA); } return p; } /* /********************************************************** /* Encoding detection for data format auto-detection /********************************************************** */ /** * Helper * * @since 1.8 */ public static MatchStrength hasSmileFormat(InputAccessor acc) throws IOException { // Ok: ideally we start with the header -- if so, we are golden if (!acc.hasMoreBytes()) { return MatchStrength.INCONCLUSIVE; } // We always need at least two bytes to determine, so byte b1 = acc.nextByte(); if (!acc.hasMoreBytes()) { return MatchStrength.INCONCLUSIVE; } byte b2 = acc.nextByte(); // First: do we see 3 "magic bytes"? If so, we are golden if (b1 == SmileConstants.HEADER_BYTE_1) { // yeah, looks like marker if (b2 != SmileConstants.HEADER_BYTE_2) { return MatchStrength.NO_MATCH; } if (!acc.hasMoreBytes()) { return MatchStrength.INCONCLUSIVE; } return (acc.nextByte() == SmileConstants.HEADER_BYTE_3) ? MatchStrength.FULL_MATCH : MatchStrength.NO_MATCH; } // Otherwise: ideally either Object or Array: if (b1 == SmileConstants.TOKEN_LITERAL_START_OBJECT) { /* Object is bit easier, because now we need to get new name; i.e. can * rule out name back-refs */ if (b2 == SmileConstants.TOKEN_KEY_LONG_STRING) { return MatchStrength.SOLID_MATCH; } int ch = (int) b2 & 0xFF; if (ch >= 0x80 && ch < 0xF8) { return MatchStrength.SOLID_MATCH; } return MatchStrength.NO_MATCH; } // Array bit trickier if (b1 == SmileConstants.TOKEN_LITERAL_START_ARRAY) { if (!acc.hasMoreBytes()) { return MatchStrength.INCONCLUSIVE; } /* For arrays, we will actually accept much wider range of values (including * things that could otherwise collide) */ if (likelySmileValue(b2) || possibleSmileValue(b2, true)) { return MatchStrength.SOLID_MATCH; } return MatchStrength.NO_MATCH; } // Scalar values are pretty weak, albeit possible; require more certain match, consider it weak: if (likelySmileValue(b1) || possibleSmileValue(b2, false)) { return MatchStrength.SOLID_MATCH; } return MatchStrength.NO_MATCH; } private static boolean likelySmileValue(byte b) { int ch = (int) b & 0xFF; if (ch >= 0xE0) { // good range for known values switch (ch) { case TOKEN_MISC_LONG_TEXT_ASCII: // 0xE0 case TOKEN_MISC_LONG_TEXT_UNICODE: // 0xE4 case TOKEN_MISC_BINARY_7BIT: // 0xE8 case TOKEN_LITERAL_START_ARRAY: // 0xF8 case TOKEN_LITERAL_START_OBJECT: // 0xFA return true; } // Others will not work (end object/array; reserved; shared strings) return false; } // ASCII ctrl char range is pretty good match too if (ch >= 0x80 && ch <= 0x9F) { return true; } return false; } /** * @param lenient Whether to consider more speculative matches or not * (typically true when there is context like start-array) */ private static boolean possibleSmileValue(byte b, boolean lenient) { int ch = (int) b & 0xFF; // note: we know that likely matches have been handled already, so... if (ch >= 0x80) { return (ch <= 0xE0); } if (lenient) { if (ch >= 0x40) { // tiny/short ASCII return true; } if (ch >- 0x20) { // various constants return (ch < 0x2C); // many reserved bytes that can't be seen } } return false; } /* /********************************************************** /* Internal methods, raw input access /********************************************************** */ protected boolean ensureLoaded(int minimum) throws IOException { if (_in == null) { // block source; nothing more to load return false; } /* Let's assume here buffer has enough room -- this will always * be true for the limited used this method gets */ int gotten = (_inputEnd - _inputPtr); while (gotten < minimum) { int count = _in.read(_inputBuffer, _inputEnd, _inputBuffer.length - _inputEnd); if (count < 1) { return false; } _inputEnd += count; gotten += count; } return true; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy