com.bigdata.rdf.rio.turtle.BigdataTurtleParser Maven / Gradle / Ivy
/*
* Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2007.
*
* Licensed under the Aduna BSD-style license.
*/
package com.bigdata.rdf.rio.turtle;
import info.aduna.text.ASCIIUtil;
import java.io.IOException;
import org.openrdf.model.BNode;
import org.openrdf.model.Resource;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.RDFParseException;
import org.openrdf.rio.turtle.TurtleParser;
import org.openrdf.rio.turtle.TurtleUtil;
import com.bigdata.rdf.model.BigdataValueFactory;
import com.bigdata.rdf.model.BigdataValueFactoryImpl;
/**
* RDF parser for Turtle
* files. This parser is not thread-safe, therefore its public methods are
* synchronized.
*
* This implementation is based on the 2006/01/02 version of the Turtle
* specification, with slight deviations:
*
* - Normalization of integer, floating point and boolean values is dependent
* on the specified datatype handling. According to the specification, integers
* and booleans should be normalized, but floats don't.
* - Comments can be used anywhere in the document, and extend to the end of
* the line. The Turtle grammar doesn't allow comments to be used inside triple
* constructs that extend over multiple lines, but the author's own parser
* deviates from this too.
*
*
* @author Arjohn Kampman
* @openrdf
*/
public class BigdataTurtleParser extends TurtleParser {
/**
* Standalone variant of the parser, should be used only
* when connection is not available.
*/
public BigdataTurtleParser() {
super(BigdataValueFactoryImpl.getInstance(""));
}
/**
* Parses an RDF value. This method parses uriref, qname, node ID, quoted
* literal, integer, double and boolean.
*/
protected Value parseValue()
throws IOException, RDFParseException
{
int c = peek();
if (c == '<') {
// uriref, e.g.
return parseURIOrSid();
}
else if (c == ':' || TurtleUtil.isPrefixStartChar(c)) {
// qname or boolean
return parseQNameOrBoolean();
}
else if (c == '_') {
// node ID, e.g. _:n1
return parseNodeID();
}
else if (c == '"' || c == '\'') {
// quoted literal, e.g. "foo" or """foo""" or 'foo' or '''foo'''
return parseQuotedLiteral();
}
else if (ASCIIUtil.isNumber(c) || c == '.' || c == '+' || c == '-') {
// integer or double, e.g. 123 or 1.2e3
return parseNumber();
}
else if (c == -1) {
throwEOFException();
return null;
}
else {
reportFatalError("Expected an RDF value here, found '" + (char)c + "'");
return null;
}
}
protected Value parseURIOrSid()
throws IOException, RDFParseException
{
// First character should be '<'
int c = read();
verifyCharacterOrFail(c, "<");
int n = peek();
if (n == '<') {
read();
if (this.valueFactory == null) {
reportFatalError("must use a BigdataValueFactory to use the RDR syntax");
}
return parseSid();
} else {
unread(c);
return parseURI();
}
}
protected Value parseSid()
throws IOException, RDFParseException
{
Resource s = (Resource) parseValue();
skipWS();
URI p = (URI) parseValue();
skipWS();
Value o = parseValue();
int i = read();
while (TurtleUtil.isWhitespace(i)) {
i = read();
}
if (i == '>' && read() == '>') {
if (valueFactory == null ||
valueFactory instanceof BigdataValueFactory == false) {
/*
* The BigdataValueFactory has an extension to create a BNode
* from a Statement. You need to specify that value factory
* when you create the parser using setValueFactory().
*/
throw new RDFParseException(
"You must use a BigdataValueFactory to use the RDR syntax");
}
try {
// write the RDR statement
reportStatement(s, p, o);
} catch (RDFHandlerException ex) {
throw new IOException(ex);
}
final BigdataValueFactory valueFactory = (BigdataValueFactory)
super.valueFactory;
return valueFactory.createBNode(
valueFactory.createStatement(s, p, o));
} else {
reportFatalError("expecting >> to close statement identifier");
throw new IOException("expecting >> to close statement identifier");
}
}
/**
* Consumes any white space characters (space, tab, line feed, newline) and
* comments (#-style) from reader. After this method has been
* called, the first character that is returned by reader is either
* a non-ignorable character, or EOF. For convenience, this character is also
* returned by this method.
*
* @return The next character that will be returned by reader.
*/
protected int skipWS()
throws IOException
{
int c = read();
while (TurtleUtil.isWhitespace(c)) {
c = read();
}
unread(c);
return c;
}
/**
* Parses a blank node ID, e.g. _:node1.
*/
protected BNode parseNodeID()
throws IOException, RDFParseException
{
// Node ID should start with "_:"
verifyCharacterOrFail(read(), "_");
verifyCharacterOrFail(read(), ":");
// Read the node ID
int c = read();
if (c == -1) {
throwEOFException();
}
// modified to allow fully numeric bnode ids
// else if (!TurtleUtil.isNameStartChar(c)) {
// reportError("Expected a letter, found '" + (char)c + "'", BasicParserSettings.PRESERVE_BNODE_IDS);
// }
StringBuilder name = new StringBuilder(32);
name.append((char)c);
// Read all following letter and numbers, they are part of the name
c = read();
// If we would never go into the loop we must unread now
if (!TurtleUtil.isNameChar(c)) {
unread(c);
}
while (TurtleUtil.isNameChar(c)) {
int previous = c;
c = read();
if (previous == '.' && (c == -1 || TurtleUtil.isWhitespace(c) || c == '<' || c == '_')) {
unread(c);
unread(previous);
break;
}
name.append((char)previous);
if(!TurtleUtil.isNameChar(c))
{
unread(c);
}
}
return createBNode(name.toString());
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy