All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.semarglproject.jsonld.JsonLdParser Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.semarglproject.jsonld;

import org.semarglproject.rdf.core.ParseException;
import org.semarglproject.rdf.core.ProcessorGraphHandler;
import org.semarglproject.sink.CharSink;
import org.semarglproject.sink.Pipe;
import org.semarglproject.sink.QuadSink;
import org.semarglproject.sink.TripleSink;
import org.semarglproject.source.StreamProcessor;

import java.util.BitSet;
import java.util.Deque;
import java.util.LinkedList;

/**
 * Implementation of streaming JSON-LD parser.
 * Parser requires @id properties to be declared before predicates for each non-blank JSON-LD node.
 * 
* List of supported options: *
    *
  • {@link StreamProcessor#PROCESSOR_GRAPH_HANDLER_PROPERTY}
  • *
  • {@link StreamProcessor#ENABLE_ERROR_RECOVERY}
  • *
*/ public final class JsonLdParser extends Pipe implements CharSink { /** * Class URI for errors produced by a parser */ public static final String ERROR = "http://semarglproject.org/json-ld/Error"; /** * Class URI for warnings produced by a parser */ public static final String WARNING = "http://semarglproject.org/json-ld/Warning"; private static final short PARSING_ARRAY_BEFORE_VALUE = 1; private static final short PARSING_OBJECT_BEFORE_KEY = 2; private static final short PARSING_OBJECT_BEFORE_VALUE = 3; private static final short PARSING_STRING = 4; private static final short PARSING_NUMBER = 5; private static final short PARSING_NAMED_LITERAL = 6; private static final short PARSING_OBJECT_BEFORE_COLON = 7; private static final short PARSING_OBJECT_BEFORE_COMMA = 8; private static final short PARSING_ARRAY_BEFORE_COMMA = 9; private static final BitSet WHITESPACE = new BitSet(); private static final BitSet NAMED_LITERAL_CHAR = new BitSet(); static { WHITESPACE.set('\t'); WHITESPACE.set(' '); WHITESPACE.set('\r'); WHITESPACE.set('\n'); NAMED_LITERAL_CHAR.set('t'); NAMED_LITERAL_CHAR.set('r'); NAMED_LITERAL_CHAR.set('u'); NAMED_LITERAL_CHAR.set('e'); NAMED_LITERAL_CHAR.set('f'); NAMED_LITERAL_CHAR.set('a'); NAMED_LITERAL_CHAR.set('l'); NAMED_LITERAL_CHAR.set('s'); NAMED_LITERAL_CHAR.set('n'); NAMED_LITERAL_CHAR.set('0'); NAMED_LITERAL_CHAR.set('1'); NAMED_LITERAL_CHAR.set('2'); NAMED_LITERAL_CHAR.set('3'); NAMED_LITERAL_CHAR.set('4'); NAMED_LITERAL_CHAR.set('5'); NAMED_LITERAL_CHAR.set('6'); NAMED_LITERAL_CHAR.set('7'); NAMED_LITERAL_CHAR.set('8'); NAMED_LITERAL_CHAR.set('9'); NAMED_LITERAL_CHAR.set('.'); NAMED_LITERAL_CHAR.set('-'); NAMED_LITERAL_CHAR.set('E'); NAMED_LITERAL_CHAR.set('+'); } private JsonLdContentHandler contentHandler; private ProcessorGraphHandler processorGraphHandler = null; private boolean ignoreErrors = false; private Deque stateStack = new LinkedList(); private short parsingState; private int tokenStartPos; private short charsToEscape = 0; private StringBuilder addBuffer = null; private JsonLdParser(QuadSink sink) { super(sink); contentHandler = new JsonLdContentHandler(sink); } /** * Creates instance of JsonLdParser connected to specified sink. * @param sink sink to be connected to * @return instance of JsonLdParser */ public static CharSink connect(QuadSink sink) { return new JsonLdParser(sink); } public void warning(String warningClass, String msg) { if (processorGraphHandler != null) { processorGraphHandler.warning(warningClass, msg); } } private void error(String msg) throws ParseException { if (processorGraphHandler != null) { processorGraphHandler.error(ERROR, msg); } if (!ignoreErrors) { throw new ParseException(msg); } } @Override public JsonLdParser process(String str) throws ParseException { return process(str.toCharArray(), 0, str.length()); } @Override public JsonLdParser process(char ch) throws ParseException { char[] buffer = new char[1]; buffer[0] = ch; return process(buffer, 0, 1); } @Override public JsonLdParser process(char[] buffer, int start, int count) throws ParseException { if (tokenStartPos != -1) { tokenStartPos = start; } int end = start + count; for (int pos = start; pos < end; pos++) { if (parsingState == PARSING_ARRAY_BEFORE_VALUE || parsingState == PARSING_OBJECT_BEFORE_VALUE || parsingState == PARSING_OBJECT_BEFORE_KEY) { processValueChar(buffer, pos); } else if (parsingState == PARSING_STRING) { processStringChar(buffer, pos); } else if (parsingState == PARSING_OBJECT_BEFORE_COMMA) { if (buffer[pos] == ',') { parsingState = PARSING_OBJECT_BEFORE_KEY; } else if (buffer[pos] == '}') { parsingState = stateStack.pop(); contentHandler.onObjectEnd(); onValue(); } else if (!WHITESPACE.get(buffer[pos])) { error("Unexpected character '" + buffer[pos] + "'"); } } else if (parsingState == PARSING_ARRAY_BEFORE_COMMA) { if (buffer[pos] == ',') { parsingState = PARSING_ARRAY_BEFORE_VALUE; } else if (buffer[pos] == ']') { parsingState = stateStack.pop(); contentHandler.onArrayEnd(); onValue(); } else if (!WHITESPACE.get(buffer[pos])) { error("Unexpected character '" + buffer[pos] + "'"); } } else if (parsingState == PARSING_OBJECT_BEFORE_COLON) { if (buffer[pos] == ':') { parsingState = PARSING_OBJECT_BEFORE_VALUE; } else if (!WHITESPACE.get(buffer[pos])) { error("Unexpected character '" + buffer[pos] + "'"); } } else if (parsingState == PARSING_NAMED_LITERAL || parsingState == PARSING_NUMBER) { if (!NAMED_LITERAL_CHAR.get(buffer[pos])) { String value = unescape(extractToken(buffer, pos - 1, 0)); if (parsingState == PARSING_NAMED_LITERAL) { if ("true".equals(value)) { contentHandler.onBoolean(true); } else if ("false".equals(value)) { contentHandler.onBoolean(false); } else if ("null".equals(value)) { contentHandler.onNull(); } else { error("Unexpected value '" + value + "'"); } } else { if (value.contains(".") || value.contains("E") || value.contains("e")) { contentHandler.onNumber(Double.valueOf(value)); } else { contentHandler.onNumber(Integer.valueOf(value)); } } parsingState = stateStack.pop(); if (parsingState == PARSING_ARRAY_BEFORE_VALUE) { parsingState = PARSING_ARRAY_BEFORE_COMMA; } else if (parsingState == PARSING_OBJECT_BEFORE_VALUE) { parsingState = PARSING_OBJECT_BEFORE_COMMA; } pos--; } } } if (tokenStartPos != -1) { if (addBuffer == null) { addBuffer = new StringBuilder(); } addBuffer.append(buffer, tokenStartPos, end - tokenStartPos); } return this; } private void processStringChar(char[] buffer, int pos) throws ParseException { if (charsToEscape > 0) { charsToEscape--; } else { if (buffer[pos] == '\"') { parsingState = stateStack.pop(); String value = unescape(extractToken(buffer, pos, 1)); if (parsingState == PARSING_OBJECT_BEFORE_KEY) { contentHandler.onKey(value); parsingState = PARSING_OBJECT_BEFORE_COLON; } else if (parsingState == PARSING_ARRAY_BEFORE_VALUE) { contentHandler.onString(value); parsingState = PARSING_ARRAY_BEFORE_COMMA; } else if (parsingState == PARSING_OBJECT_BEFORE_VALUE) { contentHandler.onString(value); parsingState = PARSING_OBJECT_BEFORE_COMMA; } } else if (buffer[pos] == '\\') { charsToEscape = 1; } } } private void processValueChar(char[] buffer, int pos) throws ParseException { switch (buffer[pos]) { case '{': stateStack.push(parsingState); parsingState = PARSING_OBJECT_BEFORE_KEY; contentHandler.onObjectStart(); break; case '}': if (parsingState == PARSING_OBJECT_BEFORE_VALUE) { error("Unexpected object end"); } parsingState = stateStack.pop(); contentHandler.onObjectEnd(); onValue(); break; case '[': stateStack.push(parsingState); parsingState = PARSING_ARRAY_BEFORE_VALUE; contentHandler.onArrayStart(); break; case ']': parsingState = stateStack.pop(); contentHandler.onArrayEnd(); onValue(); break; case 't': case 'f': case 'n': stateStack.push(parsingState); parsingState = PARSING_NAMED_LITERAL; tokenStartPos = pos; break; case '\"': stateStack.push(parsingState); parsingState = PARSING_STRING; tokenStartPos = pos; break; case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': stateStack.push(parsingState); parsingState = PARSING_NUMBER; tokenStartPos = pos; break; default: if (!WHITESPACE.get(buffer[pos])) { error("Unexpected character '" + buffer[pos] + "'"); } } } private void onValue() { if (parsingState == PARSING_ARRAY_BEFORE_VALUE) { parsingState = PARSING_ARRAY_BEFORE_COMMA; } else if (parsingState == PARSING_OBJECT_BEFORE_VALUE) { parsingState = PARSING_OBJECT_BEFORE_COMMA; } } @Override public void setBaseUri(String baseUri) { contentHandler.setBaseUri(baseUri); } @Override protected boolean setPropertyInternal(String key, Object value) { if (StreamProcessor.PROCESSOR_GRAPH_HANDLER_PROPERTY.equals(key) && value instanceof ProcessorGraphHandler) { processorGraphHandler = (ProcessorGraphHandler) value; } else if (StreamProcessor.ENABLE_ERROR_RECOVERY.equals(key) && value instanceof Boolean) { ignoreErrors = (Boolean) value; } return false; } private String extractToken(char[] buffer, int tokenEndPos, int trimSize) throws ParseException { String saved; if (addBuffer != null) { if (tokenEndPos - trimSize >= tokenStartPos) { addBuffer.append(buffer, tokenStartPos, tokenEndPos - tokenStartPos - trimSize + 1); } addBuffer.delete(0, trimSize); saved = addBuffer.toString(); addBuffer = null; } else { saved = String.valueOf(buffer, tokenStartPos + trimSize, tokenEndPos - tokenStartPos + 1 - 2 * trimSize); } tokenStartPos = -1; return saved; } @Override public void startStream() throws ParseException { super.startStream(); parsingState = PARSING_ARRAY_BEFORE_VALUE; contentHandler.onDocumentStart(); } @Override public void endStream() throws ParseException { super.endStream(); contentHandler.onDocumentEnd(); if (tokenStartPos != -1 || !stateStack.isEmpty()) { error("Unexpected end of stream"); } } private String unescape(String str) throws ParseException { int limit = str.length(); StringBuilder result = new StringBuilder(limit); for (int i = 0; i < limit; i++) { char ch = str.charAt(i); if (ch != '\\') { result.append(ch); continue; } i++; if (i == limit) { break; } ch = str.charAt(i); switch (ch) { case '\\': case '/': case '\"': result.append(ch); break; case 'b': result.append('\b'); break; case 'f': result.append('\f'); break; case 'n': result.append('\n'); break; case 'r': result.append('\r'); break; case 't': result.append('\t'); break; case 'u': int sequenceLength = 4; if (i + sequenceLength >= limit) { error("Error parsing escape sequence '\\" + ch + "'"); } String code = str.substring(i + 1, i + 1 + sequenceLength); i += sequenceLength; try { int value = Integer.parseInt(code, 16); result.append((char) value); } catch (NumberFormatException nfe) { error("Error parsing escape sequence '\\" + ch + "'"); } break; default: result.append(ch); break; } } return result.toString(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy