org.openrdf.rio.turtle.TurtleParser Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of sesame-rio-turtle Show documentation
Rio parser and writer implementation for the Turtle file format.
There is a newer version: 4.1.2
/* 
 * Licensed to Aduna under one or more contributor license agreements.  
 * See the NOTICE.txt file distributed with this work for additional 
 * information regarding copyright ownership. 
 *
 * Aduna licenses this file to you under the terms of the Aduna BSD 
 * License (the "License"); you may not use this file except in compliance 
 * with the License. See the LICENSE.txt file distributed with this work 
 * for the full License.
 *
 * Unless required by applicable law or agreed to in writing, software 
 * distributed under the License is distributed on an "AS IS" BASIS, 
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
 * implied. See the License for the specific language governing permissions
 * and limitations under the License.
 */
package org.openrdf.rio.turtle;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PushbackReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;

import org.apache.commons.io.input.BOMInputStream;

import info.aduna.text.ASCIIUtil;

import org.openrdf.model.BNode;
import org.openrdf.model.Literal;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.IRI;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.impl.SimpleValueFactory;
import org.openrdf.model.vocabulary.RDF;
import org.openrdf.model.vocabulary.XMLSchema;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.RDFParseException;
import org.openrdf.rio.RioSetting;
import org.openrdf.rio.helpers.BasicParserSettings;
import org.openrdf.rio.helpers.AbstractRDFParser;
import org.openrdf.rio.helpers.TurtleParserSettings;

/**
 * RDF parser for Turtle
 * files. This parser is not thread-safe, therefore its public methods are
 * synchronized.
 * 
 * This implementation is based on the 2006/01/02 version of the Turtle
 * specification, with slight deviations:
 * 

 * Normalization of integer, floating point and boolean values is dependent
 * on the specified datatype handling. According to the specification, integers
 * and booleans should be normalized, but floats don't.
 * Comments can be used anywhere in the document, and extend to the end of
 * the line. The Turtle grammar doesn't allow comments to be used inside triple
 * constructs that extend over multiple lines, but the author's own parser
 * deviates from this too.
 * The localname part of a prefixed named is allowed to start with a number
 * (cf. the W3C Turtle Working
 * Draft).
 * 
 * 
 * @author Arjohn Kampman
 */
public class TurtleParser extends AbstractRDFParser {

	/*-----------*
	 * Variables *
	 *-----------*/

	private PushbackReader reader;

	protected Resource subject;

	protected IRI predicate;

	protected Value object;

	private int lineNumber = 1;

	/*--------------*
	 * Constructors *
	 *--------------*/

	/**
	 * Creates a new TurtleParser that will use a {@link SimpleValueFactory} to
	 * create RDF model objects.
	 */
	public TurtleParser() {
		super();
	}

	/**
	 * Creates a new TurtleParser that will use the supplied ValueFactory to
	 * create RDF model objects.
	 * 
	 * @param valueFactory
	 *        A ValueFactory.
	 */
	public TurtleParser(ValueFactory valueFactory) {
		super(valueFactory);
	}

	/*---------*
	 * Methods *
	 *---------*/

	public RDFFormat getRDFFormat() {
		return RDFFormat.TURTLE;
	}

	@Override
	public Collection> getSupportedSettings() {
		Set> result = new HashSet>(super.getSupportedSettings());
		result.add(TurtleParserSettings.CASE_INSENSITIVE_DIRECTIVES);
		return result;
	}

	/**
	 * Implementation of the parse(InputStream, String) method defined
	 * in the RDFParser interface.
	 * 
	 * @param in
	 *        The InputStream from which to read the data, must not be
	 *        null. The InputStream is supposed to contain UTF-8 encoded
	 *        Unicode characters, as per the Turtle specification.
	 * @param baseURI
	 *        The URI associated with the data in the InputStream, must not be
	 *        null.
	 * @throws IOException
	 *         If an I/O error occurred while data was read from the InputStream.
	 * @throws RDFParseException
	 *         If the parser has found an unrecoverable parse error.
	 * @throws RDFHandlerException
	 *         If the configured statement handler encountered an unrecoverable
	 *         error.
	 * @throws IllegalArgumentException
	 *         If the supplied input stream or base URI is null.
	 */
	public synchronized void parse(InputStream in, String baseURI)
		throws IOException, RDFParseException, RDFHandlerException
	{
		if (in == null) {
			throw new IllegalArgumentException("Input stream must not be 'null'");
		}
		// Note: baseURI will be checked in parse(Reader, String)

		try {
			parse(new InputStreamReader(new BOMInputStream(in, false), "UTF-8"), baseURI);
		}
		catch (UnsupportedEncodingException e) {
			// Every platform should support the UTF-8 encoding...
			throw new RuntimeException(e);
		}
	}

	/**
	 * Implementation of the parse(Reader, String) method defined in the
	 * RDFParser interface.
	 * 
	 * @param reader
	 *        The Reader from which to read the data, must not be null.
	 * @param baseURI
	 *        The URI associated with the data in the Reader, must not be
	 *        null.
	 * @throws IOException
	 *         If an I/O error occurred while data was read from the InputStream.
	 * @throws RDFParseException
	 *         If the parser has found an unrecoverable parse error.
	 * @throws RDFHandlerException
	 *         If the configured statement handler encountered an unrecoverable
	 *         error.
	 * @throws IllegalArgumentException
	 *         If the supplied reader or base URI is null.
	 */
	public synchronized void parse(Reader reader, String baseURI)
		throws IOException, RDFParseException, RDFHandlerException
	{
		if (reader == null) {
			throw new IllegalArgumentException("Reader must not be 'null'");
		}
		if (baseURI == null) {
			throw new IllegalArgumentException("base URI must not be 'null'");
		}

		if (rdfHandler != null) {
			rdfHandler.startRDF();
		}

		// Start counting lines at 1:
		lineNumber = 1;

		// Allow at most 8 characters to be pushed back:
		this.reader = new PushbackReader(reader, 8);

		// Store normalized base URI
		setBaseURI(baseURI);

		reportLocation();

		try {
			int c = skipWSC();

			while (c != -1) {
				parseStatement();
				c = skipWSC();
			}
		}
		finally {
			clear();
		}

		if (rdfHandler != null) {
			rdfHandler.endRDF();
		}
	}

	protected void parseStatement()
		throws IOException, RDFParseException, RDFHandlerException
	{

		StringBuilder sb = new StringBuilder(8);

		int codePoint;
		// longest valid directive @prefix
		do {
			codePoint = readCodePoint();
			if (codePoint == -1 || TurtleUtil.isWhitespace(codePoint)) {
				unread(codePoint);
				break;
			}
			sb.append(Character.toChars(codePoint));
		}
		while (sb.length() < 8);

		String directive = sb.toString();

		if (directive.startsWith("@") || directive.equalsIgnoreCase("prefix")
				|| directive.equalsIgnoreCase("base"))
		{
			parseDirective(directive);
			skipWSC();
			// SPARQL BASE and PREFIX lines do not end in .
			if (directive.startsWith("@")) {
				verifyCharacterOrFail(readCodePoint(), ".");
			}
		}
		else {
			unread(directive);
			parseTriples();
			skipWSC();
			verifyCharacterOrFail(readCodePoint(), ".");
		}
	}

	protected void parseDirective(String directive)
		throws IOException, RDFParseException, RDFHandlerException
	{
		if (directive.length() >= 7 && directive.substring(0, 7).equals("@prefix")) {
			if (directive.length() > 7) {
				unread(directive.substring(7));
			}
			parsePrefixID();
		}
		else if (directive.length() >= 5 && directive.substring(0, 5).equals("@base")) {
			if (directive.length() > 5) {
				unread(directive.substring(5));
			}
			parseBase();
		}
		else if (directive.length() >= 6 && directive.substring(0, 6).equalsIgnoreCase("prefix")) {
			// SPARQL doesn't require whitespace after directive, so must unread if
			// we found part of the prefixID
			if (directive.length() > 6) {
				unread(directive.substring(6));
			}
			parsePrefixID();
		}
		else if ((directive.length() >= 4 && directive.substring(0, 4).equalsIgnoreCase("base"))) {
			if (directive.length() > 4) {
				unread(directive.substring(4));
			}
			parseBase();
		}
		else if (directive.length() >= 7 && directive.substring(0, 7).equalsIgnoreCase("@prefix")) {
			if (!this.getParserConfig().get(TurtleParserSettings.CASE_INSENSITIVE_DIRECTIVES)) {
				reportFatalError("Cannot strictly support case-insensitive @prefix directive in compliance mode.");
			}
			if (directive.length() > 7) {
				unread(directive.substring(7));
			}
			parsePrefixID();
		}
		else if (directive.length() >= 5 && directive.substring(0, 5).equalsIgnoreCase("@base")) {
			if (!this.getParserConfig().get(TurtleParserSettings.CASE_INSENSITIVE_DIRECTIVES)) {
				reportFatalError("Cannot strictly support case-insensitive @base directive in compliance mode.");
			}
			if (directive.length() > 5) {
				unread(directive.substring(5));
			}
			parseBase();
		}
		else if (directive.length() == 0) {
			reportFatalError("Directive name is missing, expected @prefix or @base");
		}
		else {
			reportFatalError("Unknown directive \"" + directive + "\"");
		}
	}

	protected void parsePrefixID()
		throws IOException, RDFParseException, RDFHandlerException
	{
		skipWSC();

		// Read prefix ID (e.g. "rdf:" or ":")
		StringBuilder prefixID = new StringBuilder(8);

		while (true) {
			int c = readCodePoint();

			if (c == ':') {
				unread(c);
				break;
			}
			else if (TurtleUtil.isWhitespace(c)) {
				break;
			}
			else if (c == -1) {
				throwEOFException();
			}

			prefixID.append(Character.toChars(c));
		}

		skipWSC();

		verifyCharacterOrFail(readCodePoint(), ":");

		skipWSC();

		// Read the namespace URI
		IRI namespace = parseURI();

		// Store and report this namespace mapping
		String prefixStr = prefixID.toString();
		String namespaceStr = namespace.toString();

		setNamespace(prefixStr, namespaceStr);

		if (rdfHandler != null) {
			rdfHandler.handleNamespace(prefixStr, namespaceStr);
		}
	}

	protected void parseBase()
		throws IOException, RDFParseException, RDFHandlerException
	{
		skipWSC();

		IRI baseURI = parseURI();

		setBaseURI(baseURI.toString());
	}

	protected void parseTriples()
		throws IOException, RDFParseException, RDFHandlerException
	{
		int c = peekCodePoint();

		// If the first character is an open bracket we need to decide which of
		// the two parsing methods for blank nodes to use
		if (c == '[') {
			c = readCodePoint();
			skipWSC();
			c = peekCodePoint();
			if (c == ']') {
				c = readCodePoint();
				subject = createBNode();
				skipWSC();
				parsePredicateObjectList();
			}
			else {
				unread('[');
				subject = parseImplicitBlank();
			}
			skipWSC();
			c = peekCodePoint();

			// if this is not the end of the statement, recurse into the list of
			// predicate and objects, using the subject parsed above as the subject
			// of the statement.
			if (c != '.') {
				parsePredicateObjectList();
			}
		}
		else {
			parseSubject();
			skipWSC();
			parsePredicateObjectList();
		}

		subject = null;
		predicate = null;
		object = null;
	}

	protected void parsePredicateObjectList()
		throws IOException, RDFParseException, RDFHandlerException
	{
		predicate = parsePredicate();

		skipWSC();

		parseObjectList();

		while (skipWSC() == ';') {
			readCodePoint();

			int c = skipWSC();

			if (c == '.' || // end of triple
					c == ']' || c == '}') // end of predicateObjectList inside blank
													// node
			{
				break;
			}
			else if (c == ';') {
				// empty predicateObjectList, skip to next
				continue;
			}

			predicate = parsePredicate();

			skipWSC();

			parseObjectList();
		}
	}

	protected void parseObjectList()
		throws IOException, RDFParseException, RDFHandlerException
	{
		parseObject();

		while (skipWSC() == ',') {
			readCodePoint();
			skipWSC();
			parseObject();
		}
	}

	protected void parseSubject()
		throws IOException, RDFParseException, RDFHandlerException
	{
		int c = peekCodePoint();

		if (c == '(') {
			subject = parseCollection();
		}
		else if (c == '[') {
			subject = parseImplicitBlank();
		}
		else {
			Value value = parseValue();

			if (value instanceof Resource) {
				subject = (Resource)value;
			}
			else {
				reportFatalError("Illegal subject value: " + value);
			}
		}
	}

	protected IRI parsePredicate()
		throws IOException, RDFParseException, RDFHandlerException
	{
		// Check if the short-cut 'a' is used
		int c1 = readCodePoint();

		if (c1 == 'a') {
			int c2 = readCodePoint();

			if (TurtleUtil.isWhitespace(c2)) {
				// Short-cut is used, return the rdf:type URI
				return RDF.TYPE;
			}

			// Short-cut is not used, unread all characters
			unread(c2);
		}
		unread(c1);

		// Predicate is a normal resource
		Value predicate = parseValue();
		if (predicate instanceof IRI) {
			return (IRI)predicate;
		}
		else {
			reportFatalError("Illegal predicate value: " + predicate);
			return null;
		}
	}

	protected void parseObject()
		throws IOException, RDFParseException, RDFHandlerException
	{
		int c = peekCodePoint();

		if (c == '(') {
			object = parseCollection();
		}
		else if (c == '[') {
			object = parseImplicitBlank();
		}
		else {
			object = parseValue();
		}

		reportStatement(subject, predicate, object);
	}

	/**
	 * Parses a collection, e.g. ( item1 item2 item3 ).
	 */
	protected Resource parseCollection()
		throws IOException, RDFParseException, RDFHandlerException
	{
		verifyCharacterOrFail(readCodePoint(), "(");

		int c = skipWSC();

		if (c == ')') {
			// Empty list
			readCodePoint();
			return RDF.NIL;
		}
		else {
			BNode listRoot = createBNode();

			// Remember current subject and predicate
			Resource oldSubject = subject;
			IRI oldPredicate = predicate;

			// generated bNode becomes subject, predicate becomes rdf:first
			subject = listRoot;
			predicate = RDF.FIRST;

			parseObject();

			BNode bNode = listRoot;

			while (skipWSC() != ')') {
				// Create another list node and link it to the previous
				BNode newNode = createBNode();
				reportStatement(bNode, RDF.REST, newNode);

				// New node becomes the current
				subject = bNode = newNode;

				parseObject();
			}

			// Skip ')'
			readCodePoint();

			// Close the list
			reportStatement(bNode, RDF.REST, RDF.NIL);

			// Restore previous subject and predicate
			subject = oldSubject;
			predicate = oldPredicate;

			return listRoot;
		}
	}

	/**
	 * Parses an implicit blank node. This method parses the token []
	 * and predicateObjectLists that are surrounded by square brackets.
	 */
	protected Resource parseImplicitBlank()
		throws IOException, RDFParseException, RDFHandlerException
	{
		verifyCharacterOrFail(readCodePoint(), "[");

		BNode bNode = createBNode();

		int c = readCodePoint();
		if (c != ']') {
			unread(c);

			// Remember current subject and predicate
			Resource oldSubject = subject;
			IRI oldPredicate = predicate;

			// generated bNode becomes subject
			subject = bNode;

			// Enter recursion with nested predicate-object list
			skipWSC();

			parsePredicateObjectList();

			skipWSC();

			// Read closing bracket
			verifyCharacterOrFail(readCodePoint(), "]");

			// Restore previous subject and predicate
			subject = oldSubject;
			predicate = oldPredicate;
		}

		return bNode;
	}

	/**
	 * Parses an RDF value. This method parses uriref, qname, node ID, quoted
	 * literal, integer, double and boolean.
	 */
	protected Value parseValue()
		throws IOException, RDFParseException, RDFHandlerException
	{
		int c = peekCodePoint();

		if (c == '<') {
			// uriref, e.g. 
			return parseURI();
		}
		else if (c == ':' || TurtleUtil.isPrefixStartChar(c)) {
			// qname or boolean
			return parseQNameOrBoolean();
		}
		else if (c == '_') {
			// node ID, e.g. _:n1
			return parseNodeID();
		}
		else if (c == '"' || c == '\'') {
			// quoted literal, e.g. "foo" or """foo""" or 'foo' or '''foo'''
			return parseQuotedLiteral();
		}
		else if (ASCIIUtil.isNumber(c) || c == '.' || c == '+' || c == '-') {
			// integer or double, e.g. 123 or 1.2e3
			return parseNumber();
		}
		else if (c == -1) {
			throwEOFException();
			return null;
		}
		else {
			reportFatalError("Expected an RDF value here, found '" + new String(Character.toChars(c)) + "'");
			return null;
		}
	}

	/**
	 * Parses a quoted string, optionally followed by a language tag or datatype.
	 */
	protected Literal parseQuotedLiteral()
		throws IOException, RDFParseException, RDFHandlerException
	{
		String label = parseQuotedString();

		// Check for presence of a language tag or datatype
		int c = peekCodePoint();

		if (c == '@') {
			readCodePoint();

			// Read language
			StringBuilder lang = new StringBuilder(8);

			c = readCodePoint();
			if (c == -1) {
				throwEOFException();
			}

			boolean verifyLanguageTag = getParserConfig().get(BasicParserSettings.VERIFY_LANGUAGE_TAGS);
			if (verifyLanguageTag && !TurtleUtil.isLanguageStartChar(c)) {
				reportError("Expected a letter, found '" + new String(Character.toChars(c)) + "'",
						BasicParserSettings.VERIFY_LANGUAGE_TAGS);
			}

			lang.append(Character.toChars(c));

			c = readCodePoint();
			while (!TurtleUtil.isWhitespace(c)) {
				// SES-1887 : Flexibility introduced for SES-1985 and SES-1821 needs
				// to be counterbalanced against legitimate situations where Turtle
				// language tags do not need whitespace following the language tag
				if (c == '.' || c == ';' || c == ',' || c == ')' || c == ']' || c == -1) {
					break;
				}
				if (verifyLanguageTag && !TurtleUtil.isLanguageChar(c)) {
					reportError("Illegal language tag char: '" + new String(Character.toChars(c)) + "'",
							BasicParserSettings.VERIFY_LANGUAGE_TAGS);
				}
				lang.append(Character.toChars(c));
				c = readCodePoint();
			}

			unread(c);

			return createLiteral(label, lang.toString(), null, getLineNumber(), -1);
		}
		else if (c == '^') {
			readCodePoint();

			// next character should be another '^'
			verifyCharacterOrFail(readCodePoint(), "^");

			skipWSC();

			// Read datatype
			Value datatype = parseValue();
			if (datatype instanceof IRI) {
				return createLiteral(label, null, (IRI)datatype, getLineNumber(), -1);
			}
			else {
				reportFatalError("Illegal datatype value: " + datatype);
				return null;
			}
		}
		else {
			return createLiteral(label, null, null, getLineNumber(), -1);
		}
	}

	/**
	 * Parses a quoted string, which is either a "normal string" or a """long
	 * string""".
	 */
	protected String parseQuotedString()
		throws IOException, RDFParseException
	{
		String result = null;

		int c1 = readCodePoint();

		// First character should be '"' or "'"
		verifyCharacterOrFail(c1, "\"\'");

		// Check for long-string, which starts and ends with three double quotes
		int c2 = readCodePoint();
		int c3 = readCodePoint();

		if ((c1 == '"' && c2 == '"' && c3 == '"') || (c1 == '\'' && c2 == '\'' && c3 == '\'')) {
			// Long string
			result = parseLongString(c2);
		}
		else {
			// Normal string
			unread(c3);
			unread(c2);

			result = parseString(c1);
		}

		// Unescape any escape sequences
		try {
			result = TurtleUtil.decodeString(result);
		}
		catch (IllegalArgumentException e) {
			reportError(e.getMessage(), BasicParserSettings.VERIFY_DATATYPE_VALUES);
		}

		return result;
	}

	/**
	 * Parses a "normal string". This method requires that the opening character
	 * has already been parsed.
	 */
	protected String parseString(int closingCharacter)
		throws IOException, RDFParseException
	{
		StringBuilder sb = new StringBuilder(32);

		while (true) {
			int c = readCodePoint();

			if (c == closingCharacter) {
				break;
			}
			else if (c == -1) {
				throwEOFException();
			}

			sb.append(Character.toChars(c));

			if (c == '\\') {
				// This escapes the next character, which might be a '"'
				c = readCodePoint();
				if (c == -1) {
					throwEOFException();
				}
				sb.append(Character.toChars(c));
			}
		}

		return sb.toString();
	}

	/**
	 * Parses a """long string""". This method requires that the first three
	 * characters have already been parsed.
	 */
	protected String parseLongString(int closingCharacter)
		throws IOException, RDFParseException
	{
		StringBuilder sb = new StringBuilder(1024);

		int doubleQuoteCount = 0;
		int c;

		while (doubleQuoteCount < 3) {
			c = readCodePoint();

			if (c == -1) {
				throwEOFException();
			}
			else if (c == closingCharacter) {
				doubleQuoteCount++;
			}
			else {
				doubleQuoteCount = 0;
			}

			sb.append(Character.toChars(c));

			if (c == '\\') {
				// This escapes the next character, which might be a '"'
				c = readCodePoint();
				if (c == -1) {
					throwEOFException();
				}
				sb.append(Character.toChars(c));
			}
		}

		return sb.substring(0, sb.length() - 3);
	}

	protected Literal parseNumber()
		throws IOException, RDFParseException
	{
		StringBuilder value = new StringBuilder(8);
		IRI datatype = XMLSchema.INTEGER;

		int c = readCodePoint();

		// read optional sign character
		if (c == '+' || c == '-') {
			value.append(Character.toChars(c));
			c = readCodePoint();
		}

		while (ASCIIUtil.isNumber(c)) {
			value.append(Character.toChars(c));
			c = readCodePoint();
		}

		if (c == '.' || c == 'e' || c == 'E') {

			// read optional fractional digits
			if (c == '.') {

				if (TurtleUtil.isWhitespace(peekCodePoint())) {
					// We're parsing an integer that did not have a space before the
					// period to end the statement
				}
				else {
					value.append(Character.toChars(c));

					c = readCodePoint();

					while (ASCIIUtil.isNumber(c)) {
						value.append(Character.toChars(c));
						c = readCodePoint();
					}

					if (value.length() == 1) {
						// We've only parsed a '.'
						reportFatalError("Object for statement missing");
					}

					// We're parsing a decimal or a double
					datatype = XMLSchema.DECIMAL;
				}
			}
			else {
				if (value.length() == 0) {
					// We've only parsed an 'e' or 'E'
					reportFatalError("Object for statement missing");
				}
			}

			// read optional exponent
			if (c == 'e' || c == 'E') {
				datatype = XMLSchema.DOUBLE;
				value.append(Character.toChars(c));

				c = readCodePoint();
				if (c == '+' || c == '-') {
					value.append(Character.toChars(c));
					c = readCodePoint();
				}

				if (!ASCIIUtil.isNumber(c)) {
					reportError("Exponent value missing", BasicParserSettings.VERIFY_DATATYPE_VALUES);
				}

				value.append(Character.toChars(c));

				c = readCodePoint();
				while (ASCIIUtil.isNumber(c)) {
					value.append(Character.toChars(c));
					c = readCodePoint();
				}
			}
		}

		// Unread last character, it isn't part of the number
		unread(c);

		// String label = value.toString();
		// if (datatype.equals(XMLSchema.INTEGER)) {
		// try {
		// label = XMLDatatypeUtil.normalizeInteger(label);
		// }
		// catch (IllegalArgumentException e) {
		// // Note: this should never happen because of the parse constraints
		// reportError("Illegal integer value: " + label);
		// }
		// }
		// return createLiteral(label, null, datatype);

		// Return result as a typed literal
		return createLiteral(value.toString(), null, datatype, getLineNumber(), -1);
	}

	protected IRI parseURI()
		throws IOException, RDFParseException
	{
		StringBuilder uriBuf = new StringBuilder(100);

		// First character should be '<'
		int c = readCodePoint();
		verifyCharacterOrFail(c, "<");

		// Read up to the next '>' character
		while (true) {
			c = readCodePoint();

			if (c == '>') {
				break;
			}
			else if (c == -1) {
				throwEOFException();
			}

			if (c == ' ') {
				reportFatalError("IRI included an unencoded space: '" + c + "'");
			}

			uriBuf.append(Character.toChars(c));

			if (c == '\\') {
				// This escapes the next character, which might be a '>'
				c = readCodePoint();
				if (c == -1) {
					throwEOFException();
				}
				if (c != 'u' && c != 'U') {
					reportFatalError("IRI includes string escapes: '\\" + c + "'");
				}
				uriBuf.append(Character.toChars(c));
			}
		}

		if (c == '.') {
			reportFatalError("IRI must not end in a '.'");
		}

		String uri = uriBuf.toString();

		// Unescape any escape sequences
		try {
			// FIXME: The following decodes \n and similar in URIs, which should be
			// invalid according to test 
			uri = TurtleUtil.decodeString(uri);
		}
		catch (IllegalArgumentException e) {
			reportError(e.getMessage(), BasicParserSettings.VERIFY_DATATYPE_VALUES);
		}

		return super.resolveURI(uri);
	}

	/**
	 * Parses qnames and boolean values, which have equivalent starting
	 * characters.
	 */
	protected Value parseQNameOrBoolean()
		throws IOException, RDFParseException
	{
		// First character should be a ':' or a letter
		int c = readCodePoint();
		if (c == -1) {
			throwEOFException();
		}
		if (c != ':' && !TurtleUtil.isPrefixStartChar(c)) {
			reportError("Expected a ':' or a letter, found '" + new String(Character.toChars(c)) + "'",
					BasicParserSettings.VERIFY_RELATIVE_URIS);
		}

		String namespace = null;

		if (c == ':') {
			// qname using default namespace
			namespace = getNamespace("");
		}
		else {
			// c is the first letter of the prefix
			StringBuilder prefix = new StringBuilder(8);
			prefix.append(Character.toChars(c));

			int previousChar = c;
			c = readCodePoint();
			while (TurtleUtil.isPrefixChar(c)) {
				prefix.append(Character.toChars(c));
				previousChar = c;
				c = readCodePoint();
			}

			if (c != ':') {
				// prefix may actually be a boolean value
				String value = prefix.toString();

				if (value.equals("true") || value.equals("false")) {
					unread(c);
					return createLiteral(value, null, XMLSchema.BOOLEAN, getLineNumber(), -1);
				}
			}
			else {
				if (previousChar == '.') {
					// '.' is a legal prefix name char, but can not appear at the end
					reportFatalError("prefix can not end with with '.'");
				}
			}

			verifyCharacterOrFail(c, ":");

			namespace = getNamespace(prefix.toString());
		}

		// c == ':', read optional local name
		StringBuilder localName = new StringBuilder(16);
		c = readCodePoint();
		if (TurtleUtil.isNameStartChar(c)) {
			if (c == '\\') {
				localName.append(readLocalEscapedChar());
			}
			else {
				localName.append(Character.toChars(c));
			}

			int previousChar = c;
			c = readCodePoint();
			while (TurtleUtil.isNameChar(c)) {
				if (c == '\\') {
					localName.append(readLocalEscapedChar());
				}
				else {
					localName.append(Character.toChars(c));
				}
				previousChar = c;
				c = readCodePoint();
			}

			// Unread last character
			unread(c);

			if (previousChar == '.') {
				// '.' is a legal name char, but can not appear at the end, so is
				// not actually part of the name
				unread(previousChar);
				localName.deleteCharAt(localName.length() - 1);
			}
		}
		else {
			// Unread last character
			unread(c);
		}

		String localNameString = localName.toString();

		for (int i = 0; i < localNameString.length(); i++) {
			if (localNameString.charAt(i) == '%') {
				if (i > localNameString.length() - 3 || !ASCIIUtil.isHex(localNameString.charAt(i + 1))
						|| !ASCIIUtil.isHex(localNameString.charAt(i + 2)))
				{
					reportFatalError("Found incomplete percent-encoded sequence: " + localNameString);
				}
			}
		}

		// if (c == '.') {
		// reportFatalError("Blank node identifier must not end in a '.'");
		// }

		// Note: namespace has already been resolved
		return createURI(namespace + localNameString);
	}

	private char readLocalEscapedChar()
		throws RDFParseException, IOException
	{
		int c = readCodePoint();

		if (TurtleUtil.isLocalEscapedChar(c)) {
			return (char)c;
		}
		else {
			throw new RDFParseException("found '" + new String(Character.toChars(c)) + "', expected one of: "
					+ Arrays.toString(TurtleUtil.LOCAL_ESCAPED_CHARS));
		}
	}

	/**
	 * Parses a blank node ID, e.g. _:node1.
	 */
	protected BNode parseNodeID()
		throws IOException, RDFParseException
	{
		// Node ID should start with "_:"
		verifyCharacterOrFail(readCodePoint(), "_");
		verifyCharacterOrFail(readCodePoint(), ":");

		// Read the node ID
		int c = readCodePoint();
		if (c == -1) {
			throwEOFException();
		}
		else if (!TurtleUtil.isBLANK_NODE_LABEL_StartChar(c)) {
			reportError("Expected a letter, found '" + (char)c + "'", BasicParserSettings.PRESERVE_BNODE_IDS);
		}

		StringBuilder name = new StringBuilder(32);
		name.append(Character.toChars(c));

		// Read all following letter and numbers, they are part of the name
		c = readCodePoint();

		// If we would never go into the loop we must unread now
		if (!TurtleUtil.isBLANK_NODE_LABEL_Char(c)) {
			unread(c);
		}

		while (TurtleUtil.isBLANK_NODE_LABEL_Char(c)) {
			int previous = c;
			c = readCodePoint();

			if (previous == '.' && (c == -1 || TurtleUtil.isWhitespace(c) || c == '<' || c == '_')) {
				unread(c);
				unread(previous);
				break;
			}
			name.append((char)previous);
			if (!TurtleUtil.isBLANK_NODE_LABEL_Char(c)) {
				unread(c);
			}
		}

		return createBNode(name.toString());
	}

	protected void reportStatement(Resource subj, IRI pred, Value obj)
		throws RDFParseException, RDFHandlerException
	{
		Statement st = createStatement(subj, pred, obj);
		if (rdfHandler != null) {
			rdfHandler.handleStatement(st);
		}
	}

	/**
	 * Verifies that the supplied character code point codePoint is one
	 * of the expected characters specified in expected. This method
	 * will throw a ParseException if this is not the case.
	 */
	protected void verifyCharacterOrFail(int codePoint, String expected)
		throws RDFParseException
	{
		if (codePoint == -1) {
			throwEOFException();
		}

		final String supplied = new String(Character.toChars(codePoint));

		if (expected.indexOf(supplied) == -1) {
			StringBuilder msg = new StringBuilder(32);
			msg.append("Expected ");
			for (int i = 0; i < expected.length(); i++) {
				if (i > 0) {
					msg.append(" or ");
				}
				msg.append('\'');
				msg.append(expected.charAt(i));
				msg.append('\'');
			}
			msg.append(", found '");
			msg.append(supplied);
			msg.append("'");

			reportFatalError(msg.toString());
		}
	}

	/**
	 * Consumes any white space characters (space, tab, line feed, newline) and
	 * comments (#-style) from reader. After this method has been
	 * called, the first character that is returned by reader is either
	 * a non-ignorable character, or EOF. For convenience, this character is also
	 * returned by this method.
	 * 
	 * @return The next character code point that will be returned by
	 *         reader.
	 */
	protected int skipWSC()
		throws IOException, RDFHandlerException
	{
		int c = readCodePoint();
		while (TurtleUtil.isWhitespace(c) || c == '#') {
			if (c == '#') {
				processComment();
			}
			else if (c == '\n') {
				// we only count line feeds (LF), not carriage return (CR), as
				// normally a CR is immediately followed by a LF.
				lineNumber++;
			}

			c = readCodePoint();
		}

		unread(c);

		return c;
	}

	/**
	 * Consumes characters from reader until the first EOL has been read. This
	 * line of text is then passed to the {@link #rdfHandler} as a comment.
	 */
	protected void processComment()
		throws IOException, RDFHandlerException
	{
		StringBuilder comment = new StringBuilder(64);
		int c = readCodePoint();
		while (c != -1 && c != 0xD && c != 0xA) {
			comment.append(Character.toChars(c));
			c = readCodePoint();
		}

		// c is equal to -1, \r or \n.
		// In case c is equal to \r, we should also read a following \n.
		if (c == 0xD) {
			c = readCodePoint();

			if (c != 0xA) {
				unread(c);
			}
		}
		if (rdfHandler != null) {
			rdfHandler.handleComment(comment.toString());
		}
		reportLocation();
	}

	/**
	 * Reads the next Unicode code point.
	 * 
	 * @return the next Unicode code point, or -1 if the end of the stream has
	 *         been reached.
	 * @throws IOException
	 */
	protected int readCodePoint()
		throws IOException
	{
		int next = reader.read();
		if (Character.isHighSurrogate((char)next)) {
			next = Character.toCodePoint((char)next, (char)reader.read());
		}
		return next;
	}

	/**
	 * Pushes back a single code point by copying it to the front of the buffer.
	 * After this method returns, a call to {@link #readCodePoint()} will return
	 * the same code point c again.
	 * 
	 * @param codePoint
	 *        a single Unicode code point.
	 * @throws IOException
	 */
	protected void unread(int codePoint)
		throws IOException
	{
		if (codePoint != -1) {
			if (Character.isSupplementaryCodePoint(codePoint)) {
				final char[] surrogatePair = Character.toChars(codePoint);
				reader.unread(surrogatePair);
			}
			else {
				reader.unread(codePoint);
			}
		}
	}

	/**
	 * Pushes back the supplied string by copying it to the front of the buffer.
	 * After this method returns, successive calls to {@link #readCodePoint()}
	 * will return the code points in the supplied string again, starting at the
	 * first in the String..
	 * 
	 * @param string
	 *        the string to un-read.
	 * @throws IOException
	 */
	protected void unread(String string)
		throws IOException
	{
		for (int i = string.codePointCount(0, string.length()); i >= 1; i--) {
			final int codePoint = string.codePointBefore(i);
			if (Character.isSupplementaryCodePoint(codePoint)) {
				final char[] surrogatePair = Character.toChars(codePoint);
				reader.unread(surrogatePair);
			}
			else {
				reader.unread(codePoint);
			}
		}
	}

	/**
	 * Peeks at the next Unicode code point without advancing the reader, and
	 * returns its value.
	 * 
	 * @return the next Unicode code point, or -1 if the end of the stream has
	 *         been reached.
	 * @throws IOException
	 */
	protected int peekCodePoint()
		throws IOException
	{
		int result = readCodePoint();
		unread(result);
		return result;
	}

	protected void reportLocation() {
		reportLocation(getLineNumber(), -1);
	}

	/**
	 * Overrides {@link AbstractRDFParser#reportWarning(String)}, adding line number
	 * information to the error.
	 */
	@Override
	protected void reportWarning(String msg) {
		reportWarning(msg, getLineNumber(), -1);
	}

	/**
	 * Overrides {@link AbstractRDFParser#reportError(String, RioSetting)}, adding
	 * line number information to the error.
	 */
	@Override
	protected void reportError(String msg, RioSetting setting)
		throws RDFParseException
	{
		reportError(msg, getLineNumber(), -1, setting);
	}

	/**
	 * Overrides {@link AbstractRDFParser#reportFatalError(String)}, adding line
	 * number information to the error.
	 */
	@Override
	protected void reportFatalError(String msg)
		throws RDFParseException
	{
		reportFatalError(msg, getLineNumber(), -1);
	}

	/**
	 * Overrides {@link AbstractRDFParser#reportFatalError(Exception)}, adding line
	 * number information to the error.
	 */
	@Override
	protected void reportFatalError(Exception e)
		throws RDFParseException
	{
		reportFatalError(e, getLineNumber(), -1);
	}

	protected void throwEOFException()
		throws RDFParseException
	{
		throw new RDFParseException("Unexpected end of file");
	}

	private int getLineNumber() {
		return lineNumber;
	}
}