All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.konig.rio.turtle.TurtleParser Maven / Gradle / Ivy

There is a newer version: 2.11.0
Show newest version
package io.konig.rio.turtle;

/*
 * #%L
 * Konig Core
 * %%
 * Copyright (C) 2015 - 2017 Gregory McFall
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */


import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PushbackReader;
import java.io.Reader;
import java.io.StringReader;

import org.openrdf.model.BNode;
import org.openrdf.model.Literal;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.vocabulary.RDF;
import org.openrdf.model.vocabulary.XMLSchema;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.RDFParseException;
import org.openrdf.rio.helpers.RDFParserBase;

import io.konig.core.util.IriTemplate;

public class TurtleParser extends RDFParserBase {

	private static final int PUSH_BACK_SIZE = 20;
	
	protected PushbackReader reader;
	protected StringBuilder buffer = new StringBuilder();
	private String baseURI;
	private int lineNumber=1;
	private int columnNumber;
	protected NamespaceMap namespaceMap;
	
	public TurtleParser() {
		namespaceMap = new HashNamespaceMap();
	}
	
	public TurtleParser(NamespaceMap namespaceMap) {
		this.namespaceMap = namespaceMap==null ? new HashNamespaceMap() : namespaceMap;
	}
	
	public TurtleParser(NamespaceMap namespaceMap, ValueFactory valueFactory) {
		super(valueFactory);
		this.namespaceMap = namespaceMap;
	}
	
	protected StringBuilder buffer() {
		buffer.setLength(0);
		return buffer;
	}
	
	protected String nextWord(String delim) throws IOException {
		skipSpace();
		StringBuilder buffer = buffer();
		int c;
		while ((c = read()) != -1) {
			if (delim.lastIndexOf(c) >= 0) {
				unread(c);
				break;
			}
			buffer.appendCodePoint(c);
		}
		
		return buffer.toString();
	}
	
	public NamespaceMap getNamespaceMap() {
		return namespaceMap;
	}

	private void turtleDoc() throws IOException, RDFParseException, RDFHandlerException {
		lineNumber = 1;
		columnNumber = 0;
		if (rdfHandler != null) {
			rdfHandler.startRDF();
		}
		int c = 0;
		while (c != -1) {
			statement();
			c = next();
			unread(c);
		}
		
		if (rdfHandler!= null) {
			rdfHandler.endRDF();
		}
		
	}
	
	private void statement() throws IOException, RDFParseException, RDFHandlerException {
		int c = next();
		
		if (c == '@') {
			directive(c);
		} else {
			triples(c);
			read('.');
		}
		
	}

	protected void directive(int c) throws IOException, RDFParseException, RDFHandlerException {
		
		
		if (tryWord("prefix")) {
			prefixID();
			
		} else if (tryWord("base")) {
			base();
		}
		
		
	}


	protected void base() throws RDFParseException {
		fail("@base directive is not supported");
		// TODO: implement @base directive
		
	}

	/**
	 * 
	 * prefixID	::=	'@prefix' PNAME_NS IRIREF '.'
	 * 
* This method assumes we have already read '@prefix' */ protected void prefixID() throws RDFParseException, IOException, RDFHandlerException { readSpace(); String prefix = pname_ns(); String iriRef = iriRef(next()); read('.'); namespaceMap.put(prefix, iriRef); namespace(prefix, iriRef); } protected void namespace(String prefix, String name) throws RDFHandlerException { if (rdfHandler!=null) { rdfHandler.handleNamespace(prefix, name); } } /** *
	 * PNAME_NS	::=	PN_PREFIX? ':'
	 * 
*/ protected String pname_ns() throws IOException, RDFParseException { StringBuilder builder = buffer(); int c = next(); pn_prefix(c); c = next(); if (c != ':') { builder = err(); builder.append("Expected ':' but found '"); appendCodePoint(builder, c); builder.append("'"); fail(builder); } return builder.toString(); } protected String pn_prefix() throws RDFParseException, IOException { buffer(); int c = next(); pn_prefix(c); return buffer.toString(); } /** * PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)? */ protected void pn_prefix(int c) throws IOException, RDFParseException { if (pn_chars_base(c)) { buffer.appendCodePoint(c); boolean endsWithDot=false; for (;;) { c = read(); if (pn_chars(c)) { buffer.appendCodePoint(c); endsWithDot = false; } else if (c=='.') { buffer.appendCodePoint(c); endsWithDot = true; } else { break; } } if (endsWithDot) { throw new RDFParseException("Namespace prefix cannot end with '.'"); } } unread(c); } /** * PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] */ protected boolean pn_chars(int c) { return pn_chars_u(c) || (c == '-') || inRange(c, '0', '9') || (c == 0xB7) || inRange(c, 0x300, 0x36F) || inRange(c, 0x203F, 0x2040); } /** * PN_CHARS_U ::= PN_CHARS_BASE | '_' */ protected boolean pn_chars_u(int c) { return pn_chars_base(c) || (c == '_') ; } protected boolean pn_chars_base(int c) { return inRange(c, 'A', 'Z') || inRange(c, 'a', 'z') || inRange(c, 0xC0, 0xD6) || inRange(c, 0xD8, 0xF6) || inRange(c, 0xF8, 0x2FF) || inRange(c, 0x370, 0x37D) || inRange(c, 0x37F, 0x1FFF) || inRange(c, 0x200C, 0x200D) || inRange(c, 0x2070, 0x218F) || inRange(c, 0x2C00, 0x2FEF) || inRange(c, 0x3001, 0xD7FF) || inRange(c, 0xF900, 0xFDCF) || inRange(c, 0xFDF0, 0xFFFD) || inRange(c, 0x10000, 0xEFFFF) ; } protected boolean inRange(int c, int min, int max) { return c>=min && c<=max; } protected void readSpace() throws IOException, RDFParseException { int c = read(); if (!isWhitespace(c)) { StringBuilder err = err(); err.append("Expected whitespace but found '"); appendCodePoint(err, c); err.append("'"); fail(err); } skipSpace(); } protected boolean skipSpace() throws IOException { boolean result = false; int c = read(); while (isWhitespace(c)) { result = true; c = read(); } unread(c); return result; } protected void triples(int c) throws RDFParseException, IOException, RDFHandlerException { boolean done = false; if (c == '[') { BNode subject = tryBlankNodePropertyList(c); if (subject != null) { done = true; c = next(); unread(c); if (c != '.') { predicateObjectList(subject); } } } if (!done) { Resource subject = subject(c); predicateObjectList(subject); } } protected void predicateObjectList(Resource subject) throws IOException, RDFParseException, RDFHandlerException { URI predicate = verb(); objectList(subject, predicate); int c = next(); while (c == ';') { predicate = verb(); objectList(subject, predicate); c = next(); } unread(c); } private void objectList(Resource subject, URI predicate) throws IOException, RDFHandlerException, RDFParseException { int c = next(); Value object = object(c); statement(subject, predicate, object); c = next(); while (c == ',') { c = next(); object = object(c); statement(subject, predicate, object); c = next(); } unread(c); } protected void statement(Resource subject, URI predicate, Value object) throws RDFHandlerException { if (rdfHandler != null) { Statement st = valueFactory.createStatement(subject, predicate, object); rdfHandler.handleStatement(st); } } /** *
	 * object	::=	iri | BlankNode | collection | blankNodePropertyList | literal
	 * 
*/ protected Value object(int c) throws RDFParseException, IOException, RDFHandlerException { Value value = null; value = (value=tryCollection(c)) != null ? value : (value=tryLiteral(c)) != null ? value : (value=tryBlankNode(c)) != null ? value : (value=tryBlankNodePropertyList(c)) != null ? value : (value=tryIri(c)) != null ? value : null; if (value == null) { fail("Expected (iri | BlankNode | collection | blankNodePropertyList | literal)"); } return value; } private Value tryCollection(int c) { // TODO Auto-generated method stub return null; } protected BNode blankNodePropertyList() throws IOException, RDFParseException, RDFHandlerException { int c = read(); BNode result = tryBlankNodePropertyList(c); if (result == null) { unread(c); } return result; } /** *
	 * 	blankNodePropertyList	::=	'[' predicateObjectList ']'
	 * 
* @throws IOException * @throws RDFHandlerException * @throws RDFParseException */ protected BNode tryBlankNodePropertyList(int c) throws IOException, RDFParseException, RDFHandlerException { if (c != '[') { return null; } c = next(); unread(c); if (c == ']') { // No properties inside this BNode. return null; } BNode bnode = valueFactory.createBNode(); predicateObjectList(bnode); c = next(); if (c != ']') { StringBuilder builder = err(); builder.append("Expected ']' but found '"); appendCodePoint(builder, c); builder.append("'"); fail(builder); } return bnode; } protected void appendCodePoint(StringBuilder builder, int c) { if (c == -1) { builder.append("<>"); } else { builder.appendCodePoint(c); } } protected Literal tryLiteral() throws IOException, RDFParseException, RDFHandlerException { int c = read(); Literal literal = tryLiteral(c); if (literal == null) { unread(c); } return literal; } /** *
	 * literal	::=	RDFLiteral | NumericLiteral | BooleanLiteral
	 * 
* @throws IOException * @throws RDFParseException * @throws RDFHandlerException */ private Literal tryLiteral(int c) throws IOException, RDFParseException, RDFHandlerException { Literal value = null; value = (value=tryRDFLiteral(c)) != null ? value : (value=tryNumericLiteral(c)) != null ? value : (value=tryBooleanLiteral(c)) != null ? value : null; return value; } private Literal tryBooleanLiteral(int c) throws IOException { Literal result = null; if ((c=='t') && tryWord("rue")) { result = valueFactory.createLiteral(true); } else if ((c=='f') && tryWord("alse")) { result = valueFactory.createLiteral(false); } if (result != null) { c = read(); unread(c); if (!isWhitespace(c) && (c != ',') && (c!=';') && (c!='.') && (c!='/') && (c!='^') && (c!=']')) { boolean value = result.booleanValue(); if (value) { unread("rue"); } else { unread("alse"); } result = null; } } return result; } protected void unread(String text) throws IOException { for (int i=text.length()-1; i>=0; i--) { unread(text.charAt(i)); } } /** *
	 * NumericLiteral ::= INTEGER | DECIMAL | DOUBLE
	 *                
	 * INTEGER ::= [+-]? [0-9]+
	 * 
	 * DECIMAL ::= [+-]? [0-9]* '.' [0-9]+
	 * 
	 * DOUBLE ::= [+-]? ([0-9]+ '.' [0-9]* EXPONENT | '.' [0-9]+ EXPONENT | [0-9]+ EXPONENT)
	 *      
	 *        
	 * 
* * We have the following possible parses *
	 * 
	 * 	[+-]? 
	 *     [0-9]+                      >>>> INTEGER
	 *        '.' [0-9]+               >>>> DECIMAL
	 *        '.' [0-9]+ EXPONENT      >>>> DOUBLE
	 *        '.' EXPONENT             >>>> DOUBLE
	 *        EXPONENT                 >>>> DOUBLE
	 *     '.' [0-9]+                  >>>> DECIMAL
	 *     '.' [0-9]+ EXPONENT         >>>> DOUBLE
	 *      
	 * 
* * Let's define the following production rules. * *
	 * 
	 *  NumericLiteral ::= UnsignedNumber | [+-] UnsignedNumber
	 *  
	 *  UnsignedNumber ::= NumberWithIntegerPart | DecimalPart ;
	 *  
	 *  NumberWithIntegerPart ::= [0-9]+ (DecimalPart | EXPONENT)?
	 *  
	 *  DecimalPart ::= '.' [0-9]+ EXPONENT?
	 * 
	 * 
*/ private Literal tryNumericLiteral(int c) throws IOException, RDFParseException { Literal result = null; boolean digit = isDigit(c); if ( !digit && c!='+' && c!='-' && c!= '.' ) { return null; } StringBuilder builder = buffer(); if (c=='+' || c=='-') { builder.appendCodePoint(c); c = read(); digit = isDigit(c); } if (digit) { result = numberWithIntegerPart(c); } else if (c=='.') { result = tryDecimalPart(c); } else { builder = err(); builder.append("Invalid numeric literal. Expected [0-9] or '.' but found '"); builder.appendCodePoint(c); builder.append("'"); fail(builder); } return result; } /** *
	 * DecimalPart ::= '.' [0-9]+ EXPONENT?
	 * 
* @throws IOException * @throws RDFParseException */ private Literal tryDecimalPart(int c) throws IOException, RDFParseException { Literal result = null; if (c=='.') { c = read(); if (c=='e' | c=='E') { buffer.append('.'); result = tryExponent(c); } else if (!isDigit(c)) { // Do nothing } else { buffer.append('.'); while (isDigit(c)) { buffer.appendCodePoint(c); c = read(); } result = tryExponent(c); if (result == null) { unread(c); result = valueFactory.createLiteral(buffer.toString(), XMLSchema.DECIMAL); } } if (result == null) { unread(c); } } return result; } /** *
	 * NumberWithIntegerPart ::= [0-9]+ (DecimalPart | EXPONENT)?
	 * 
* @return * @throws IOException * @throws RDFParseException */ private Literal numberWithIntegerPart(int c) throws RDFParseException, IOException { while (isDigit(c)) { buffer.appendCodePoint(c); c = read(); } Literal result = (result=tryDecimalPart(c)) != null ? result : (result=tryExponent(c)) != null ? result : integerLiteral(c); return result; } private Literal integerLiteral(int c) throws IOException { unread(c); return valueFactory.createLiteral(buffer.toString(), XMLSchema.INTEGER); } /** *
	 * EXPONENT ::= [eE] [+-]? [0-9]+
	 * 
* @throws IOException * @throws RDFParseException */ private Literal tryExponent(int c) throws IOException, RDFParseException { Literal result = null; if (c == 'e' || c=='E') { buffer.appendCodePoint(c); c = read(); if (c=='+' || c=='-') { buffer.appendCodePoint(c); c = read(); } if (!isDigit(c)) { unread(c); return null; } while (isDigit(c)) { buffer.appendCodePoint(c); c = read(); } unread(c); result = valueFactory.createLiteral(buffer.toString(), XMLSchema.DOUBLE); } return result; } protected boolean isLetter(int c) { return inRange(c, 'a', 'z') || inRange(c, 'A', 'Z'); } protected boolean isDigit(int c) { return c>='0' && c<='9'; } /** *
	 * 	RDFLiteral	::=	String (LANGTAG | '^^' iri)?
	 * 
* @throws IOException * @throws RDFParseException * @throws RDFHandlerException */ private Literal tryRDFLiteral(int c) throws RDFParseException, IOException, RDFHandlerException { Literal result = null; if (c=='\'' || c=='"') { return rdfLiteral(c); } return result; } /** *
	 * 	RDFLiteral	::=	String (LANGTAG | '^^' iri)?
	 * 
* @throws IOException * @throws RDFParseException * @throws RDFHandlerException */ private Literal rdfLiteral(int c) throws IOException, RDFParseException, RDFHandlerException { Literal result = null; string(c, buffer()); c = read(); String text = buffer.toString(); if (c == '@') { String lang = langtag(c); result = valueFactory.createLiteral(text, lang); } else if (c=='^') { c = read(); if (c != '^') { err(); buffer.append("Invalid string literal. Expected '^^' but found '^"); buffer.appendCodePoint(c); buffer.append("'"); fail(buffer); } c = read(); result = valueFactory.createLiteral(text, iri(c)); } else { unread(c); result = valueFactory.createLiteral(text, XMLSchema.STRING); } return result; } /** *
	 * LANGTAG	::=	'@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)*
	 * 
* @throws IOException */ private String langtag(int c) throws RDFParseException, IOException { if (c!='@') { err(); buffer.append("Expected '@' but found '"); buffer.appendCodePoint(c); buffer.append("'"); fail(buffer); } buffer(); c = read(); if (!inRange(c, 'a', 'z') && !inRange(c, 'A', 'Z')) { err(); buffer.append("Language tag must start with a letter, but found '"); buffer.appendCodePoint(c); buffer.append("'"); fail(buffer); } while ( inRange(c, 'a', 'z') || inRange(c, 'A', 'Z') || isDigit(c) || c=='-' ) { buffer.appendCodePoint(c); c = read(); } unread(c); return buffer.toString(); } /** *
	 * String	::=	STRING_LITERAL_QUOTE | STRING_LITERAL_SINGLE_QUOTE | STRING_LITERAL_LONG_SINGLE_QUOTE | STRING_LITERAL_LONG_QUOTE
	 * 
*/ private void string(int c, StringBuilder builder) throws RDFParseException, IOException { if (c!='\'' && c!='"') { err(); buffer.append("Expected ['] or [\"] but found ["); buffer.appendCodePoint(c); buffer.append("'"); fail(buffer); } int d = read(); int e = read(); if (c=='\'') { if (d=='\'' && e=='\'') { string_literal_long_single_quote(c, d, e, builder); } else { unread(e); unread(d); string_literal_single_quote(c, builder); } } else { if (d=='"' && e=='"') { string_literal_long_quote(c, d, e, builder); } else { unread(e); unread(d); string_literal_quote(c, builder); } } } /** *
	 * STRING_LITERAL_QUOTE	::=	'"' ([^#x22#x5C#xA#xD] | ECHAR | UCHAR)* '"'
	 * 
* * where *
	 *   #x22=" 
	 *   #x5C=\ 
	 *   #xA=new line 
	 *   #xD=carriage return
	 * 
*/ private void string_literal_quote(int c, StringBuilder builder) throws RDFParseException, IOException { assertEquals('"', c); while ( stringQuoteChar(c=read(), builder)); assertEquals('"', c); } protected void assertEqualsIgnoreCase(int expected, int actual) throws RDFParseException { if (Character.toUpperCase(expected) != Character.toUpperCase(actual)) { StringBuilder err = err(); int lower = Character.toLowerCase(expected); int upper = Character.toUpperCase(expected); err.append("Expected '"); err.appendCodePoint(lower); err.append("' or '"); err.appendCodePoint(upper); err.append("' but found '"); appendCodePoint(err, actual); err.append("'"); fail(err); } } protected void assertIgnoreCase(String expected) throws IOException, RDFParseException { for (int i=0; i * STRING_LITERAL_LONG_QUOTE ::= '"""' (('"' | '""')? ([^"\] | ECHAR | UCHAR))* '"""' *
*/ private void string_literal_long_quote(int c, int d, int e, StringBuilder builder) throws RDFParseException, IOException { assertEquals('"', c); assertEquals('"', d); assertEquals('"', e); while ( stringLongQuoteChar(c=read(), builder)); assertEquals('"', c); read('"'); read('"'); } private boolean stringLongSingleQuoteChar(int c, StringBuilder builder) throws RDFParseException, IOException { if (c == '\'') { return quoteChar('\'', builder); } if (echar(c, builder) || uchar(c, builder)) { return true; } if (c!='\\' ) { builder.appendCodePoint(c); return true; } return false; } private boolean stringLongQuoteChar(int c, StringBuilder builder) throws RDFParseException, IOException { if (c == '"') { return quoteChar('"', builder); } if (echar(c, builder) || uchar(c, builder)) { return true; } if (c!='\\' ) { builder.appendCodePoint(c); return true; } return false; } private boolean quoteChar(char q, StringBuilder builder) throws IOException { int d = read(); if (d != q) { // Matched ["][^"] unread(d); builder.append(q); return true; } else { // Matched ["]["] int e = read(); if (e != q) { // Matched ["]["][^"] unread(e); builder.append(q); builder.append(q); return true; } else { // Matched ["]["]["] // This is the termination of the string, not characters within the string. unread(q); unread(q); return false; } } } private void string_literal_single_quote(int c, StringBuilder builder) throws RDFParseException, IOException { assertEquals('\'', c); while ( stringSingleQuoteChar(c=read(), builder)); assertEquals('\'', c); } /** *
	 * 	STRING_LITERAL_SINGLE_QUOTE	::=	"'" ([^#x27#x5C#xA#xD] | ECHAR | UCHAR)* "'" 
	 * 
* * where * *
	 *  #x27='
	 *  #x5C=\ 
	 *  #xA=new line 
	 *  #xD=carriage return 
	 * 
*/ private boolean stringSingleQuoteChar(int c, StringBuilder builder) throws RDFParseException, IOException { if (echar(c, builder) || uchar(c, builder)) { return true; } if (c!='\'' && c!='\\' && c!='\n' && c!='\r') { builder.appendCodePoint(c); return true; } return false; } private void string_literal_long_single_quote(int c, int d, int e, StringBuilder builder) throws RDFParseException, IOException { final char q = '\''; assertEquals(q, c); assertEquals(q, d); assertEquals(q, e); while ( stringLongSingleQuoteChar(c=read(), builder)); assertEquals(q, c); read(q); read(q); } /** *
	 * BlankNode	::=	BLANK_NODE_LABEL | ANON
	 * 
* @param c * @return * @throws IOException * @throws RDFParseException */ private BNode tryBlankNode(int c) throws RDFParseException, IOException { BNode bnode = null; return (bnode=blank_node_label(c)) == null ? anon(c) : bnode; } /** *
	 * ANON	::=	'[' WS* ']'
	 * 
*/ private BNode anon(int c) throws IOException { if (c != '[') { return null; } c = next(); if (c != ']') { unread(c); return null; } return valueFactory.createBNode(); } /** *
	 * BLANK_NODE_LABEL	::=	'_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)?
	 * 
* @throws RDFParseException */ private BNode blank_node_label(int c) throws IOException, RDFParseException { if (c != '_') { return null; } c = read(); if (c != ':') { unread(c); return null; } c = read(); if (!(pn_chars_u(c) || inRange(c, '0', '9')) ) { StringBuilder builder = err(); builder.append("Invalid initial character for BNode label: '"); builder.appendCodePoint(c); builder.append("'"); fail(builder); } StringBuilder builder = buffer(); builder.appendCodePoint(c); int last = -1; for (;;) { c = read(); if (!(pn_chars(c) || (c=='.') )) { break; } builder.appendCodePoint(c); last = c; } unread(c); if (last == '.') { unread(last); } return valueFactory.createBNode(builder.toString()); } private URI tryIri(int c) { int saveLineNumber = lineNumber; int saveColumnNo = columnNumber; try { return iri(c); } catch (Throwable oops) { lineNumber = saveLineNumber; columnNumber = saveColumnNo; } return null; } protected URI verb() throws IOException, RDFParseException, RDFHandlerException { int c = next(); if (c == 'a') { int cc = read(); if (isWhitespace(cc)) { return RDF.TYPE; } unread(cc); } return iri(c); } /** *
	 * subject	::=	iri | BlankNode | collection
	 * 
* @throws RDFHandlerException */ protected Resource subject(int c) throws RDFParseException, IOException, RDFHandlerException { Resource result = null; if (c == '_' || c=='[') { result = tryBlankNode(c); if (result == null) { fail("Invalid BNode definition"); } } else if (c == '(') { fail("TODO: Implement collection"); } else { result = iri(c); } if (result == null) { StringBuilder msg = err(); msg.append("Found '"); msg.appendCodePoint(c); msg.append("' but expected (iri | BlankNode | collection)"); fail(msg); } return result; } protected URI iri() throws RDFParseException, IOException, RDFHandlerException { return iri(read()); } /** * iri ::= IRIREF | PrefixedName * @throws RDFHandlerException */ protected URI iri(int c) throws RDFParseException, IOException, RDFHandlerException { if (c == '<') { String text = iriRef(c); return valueFactory.createURI(text); } else { return prefixedName(c); } } /** *
	 * PrefixedName	::=	PNAME_LN | PNAME_NS
	 *              ::= (PNAME_NS PN_LOCAL) | PNAME_NS
	 *              ::= PNAME_NS PN_LOCAL?
	 * 
*/ protected URI prefixedName(int c) throws IOException, RDFParseException { unread(c); String prefix = pname_ns(); String localName = pn_local(); String namespace = namespaceMap.get(prefix); if (namespace == null) { fail("Namespace not defined for prefix '" + prefix + "'"); } return valueFactory.createURI(namespace + localName); } /** *
	 * PN_LOCAL	::=	(PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))?
	 * 
* @return The string matching the PN_LOCAL rule, or an empty string if no match. */ public String pn_local() throws IOException, RDFParseException { StringBuilder builder = buffer(); int c = read(); if ( !pn_chars_u(c) && (c != ':') && !inRange(c, '0', '9') && !plx(c) ) { unread(c); } else { builder.appendCodePoint(c); int last = -1; for (;;) { c = read(); if ( !pn_chars(c) && (c != '.') && (c != ':') && !plx(c) ) { break; } builder.appendCodePoint(c); last = c; } unread(c); if (last == '.') { unread(c); } } return builder.toString(); } /** *
	 * PLX	::=	PERCENT | PN_LOCAL_ESC
	 * 
* @throws IOException * @throws RDFParseException */ protected boolean plx(int c) throws IOException, RDFParseException { return percent(c) || pn_local_esc(c); } /** *
	 * pn_local_esc :== '\' ('_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%')
	 * 
*/ private boolean pn_local_esc(int c) throws IOException { if (c != '\\') { return false; } int next = read(); switch (next) { case '_' : case '~' : case '.' : case '-' : case '!' : case '$' : case '&' : case '\'' : case '(' : case ')' : case '*' : case '+' : case ',' : case ';' : case '=' : case '/' : case '?' : case '#' : case '@' : case '%' : buffer.appendCodePoint(c); buffer.appendCodePoint(next); return true; } unread(next); return false; } /** *
	 * PERCENT	::=	'%' HEX HEX
	 * 
* @throws IOException * @throws RDFParseException */ private boolean percent(int c) throws RDFParseException, IOException { if (c != '%') { return false; } char[] array = {hex(), hex()}; String text = new String(array); c = Integer.parseInt(text, 16); buffer.appendCodePoint(c); return true; } protected IriTemplate iriTemplate() throws RDFParseException, IOException { int c = next(); String text = iriRef(c); return new IriTemplate(text); } protected String iriRef() throws IOException, RDFParseException { skipSpace(); int c = read(); return iriRef(c); } protected String iriRef(int c) throws IOException, RDFParseException { assertEquals('<', c); StringBuilder builder = buffer(); c = read(); while (c != -1 && c!='>') { if (c == '\\') { uchar(); } else { builder.appendCodePoint(c); } c=read(); } assertEquals('>', c); return builder.toString(); } /** * Read UCHAR, assuming we have already consumed the leading backslash. * *
	 * UCHAR ::= '\\u' HEX HEX HEX HEX | '\\U' HEX HEX HEX HEX HEX HEX HEX HEX
	 * 
* * @throws RDFParseException * @throws IOException */ private void uchar() throws RDFParseException, IOException { int c = read(); char[] array=null; if (c == 'u') { char[] tmp = {hex(), hex(), hex(), hex()}; array = tmp; } else if (c == 'U') { char[] tmp = {hex(), hex(), hex(), hex(), hex(), hex(), hex(), hex()}; array = tmp; } else { StringBuilder builder = err(); builder.append("Invalid escape sequence in IRI value. Expected (\\u HEX HEX HEX | \\U HEX HEX HEX HEX HEX HEX HEX HEX) but found: \\"); builder.appendCodePoint(c); fail(builder); } String value = new String(array); c = Integer.parseInt(value, 16); buffer.appendCodePoint(c); } protected char hex() throws IOException, RDFParseException { int c = read(); if (!inRange(c, 'a', 'f') && !inRange(c, 'A', 'F') && !inRange(c, '0', '9') ) { StringBuilder msg = err(); msg.append("Invalid HEX value. Expected [a-f] | [A-F] | [0-9] but found '"); msg.appendCodePoint(c); msg.append("'"); fail(msg); } return (char) c; } protected StringBuilder err() { buffer.setLength(0); buffer.append("Line "); buffer.append(lineNumber); if (columnNumber > 0) { buffer.append(':'); buffer.append(columnNumber-1); } buffer.append(' '); return buffer; } protected void fail(String msg) throws RDFParseException { StringBuilder builder = err(); builder.append(msg); fail(builder); } protected void fail(StringBuilder builder) throws RDFParseException { throw new RDFParseException(builder.toString()); } protected int next() throws IOException { int c = read(); while (isWhitespace(c)) { c = read(); } return c; } @Override public RDFFormat getRDFFormat() { return RDFFormat.TURTLE; } @Override public void parse(InputStream in, String baseURI) throws IOException, RDFParseException, RDFHandlerException { InputStreamReader reader = new InputStreamReader(in); parse(reader, baseURI); } protected void initParse(Reader reader, String baseURI) { if (this.reader != reader) { this.reader = new PushbackReader(reader, PUSH_BACK_SIZE); } this.baseURI = baseURI; } @Override public void parse(Reader reader, String baseURI) throws IOException, RDFParseException, RDFHandlerException { initParse(reader, baseURI); turtleDoc(); } protected int read() throws IOException { if (reader == null) { return -1; } int next = reader.read(); // System.out.println(" READ " + (char)next); if (next == -1) { reader = null; return -1; } columnNumber++; if (Character.isHighSurrogate((char) next)) { next = Character.toCodePoint((char)next, (char) reader.read()); columnNumber++; } if (next == '\n') { lineNumber++; columnNumber = 0; } return next; } protected void unread(int codePoint) throws IOException { if (codePoint != -1) { // System.out.println("UNREAD " + (char) codePoint); if (Character.isSupplementaryCodePoint(codePoint)) { final char[] surrogatePair = Character.toChars(codePoint); reader.unread(surrogatePair); columnNumber -= 2; } else { if (codePoint=='\n') { lineNumber--; columnNumber=Integer.MIN_VALUE; } else { columnNumber--; } if (reader == null) { reader = new PushbackReader(new StringReader(""), PUSH_BACK_SIZE); } reader.unread(codePoint); } } } protected boolean isWhitespace(int c) { return c==' ' || c=='\t' || c=='\r' || c=='\n'; } protected boolean tryWhitespace() throws IOException { int c = read(); if (isWhitespace(c)) { skipSpace(); return true; } unread(c); return false; } protected String tryCaseInsensitiveWord(String text) throws IOException { StringBuilder buffer = buffer(); String upper = text.toUpperCase(); for (int i=0; i=0; j--) { unread(buffer.charAt(j)); } return null; } } int next = read(); unread(next); if (next > 0 && Character.isAlphabetic(next)) { for (int j=text.length(); j>=0; j--) { unread(buffer.charAt(j)); } return null; } return buffer.toString(); } protected boolean tryWord(String text) throws IOException { for (int i=0; i=0; j--) { unread(text.charAt(j)); } return false; } } return true; } protected void read(char c) throws IOException, RDFParseException { int k = next(); assertEquals(c, k); } protected int peek() throws IOException { int c = read(); unread(c); return c; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy