
src.com.hp.hpl.jena.riot.lang.LangTurtle Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tdb Show documentation
Show all versions of tdb Show documentation
TDB is a storage subsystem for Jena and ARQ
/*
* (c) Copyright 2009 Hewlett-Packard Development Company, LP
* All rights reserved.
* [See end of file]
*/
package com.hp.hpl.jena.riot.lang;
import static com.hp.hpl.jena.riot.tokens.TokenType.* ;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
import atlas.event.Event ;
import atlas.event.EventManager ;
import atlas.lib.Sink ;
import com.hp.hpl.jena.graph.Node ;
import com.hp.hpl.jena.graph.Triple ;
import com.hp.hpl.jena.iri.IRI ;
import com.hp.hpl.jena.riot.Checker ;
import com.hp.hpl.jena.riot.IRIResolver ;
import com.hp.hpl.jena.riot.PrefixMap ;
import com.hp.hpl.jena.riot.Prologue ;
import com.hp.hpl.jena.riot.RIOT ;
import com.hp.hpl.jena.riot.tokens.Token ;
import com.hp.hpl.jena.riot.tokens.TokenType ;
import com.hp.hpl.jena.riot.tokens.Tokenizer ;
import com.hp.hpl.jena.sparql.core.NodeConst ;
import com.hp.hpl.jena.vocabulary.OWL ;
public class LangTurtle extends LangBase
{
/* See http://www.w3.org/TeamSubmission/turtle/ */
/*
[1] turtleDoc ::= statement*
[2] statement ::= directive '.' | triples '.' | ws+
[3] directive ::= prefixID | base
[4] prefixID ::= '@prefix' ws+ prefixName? ':' uriref
[5] base ::= '@base' ws+ uriref
[6] triples ::= subject predicateObjectList
[7] predicateObjectList ::= verb objectList ( ';' verb objectList )* ( ';')?
[8] objectList ::= object ( ',' object)*
[9] verb ::= predicate | 'a'
[10] comment ::= '#' ( [^#xA#xD] )*
[11] subject ::= resource | blank
[12] predicate ::= resource
[13] object ::= resource | blank | literal
[14] literal ::= quotedString ( '@' language )? | datatypeString | integer | double | decimal | boolean
[15] datatypeString ::= quotedString '^^' resource
[16] integer ::= ('-' | '+')? [0-9]+
[17] double ::= ('-' | '+')? ( [0-9]+ '.' [0-9]* exponent | '.' ([0-9])+ exponent | ([0-9])+ exponent )
[18] decimal ::= ('-' | '+')? ( [0-9]+ '.' [0-9]* | '.' ([0-9])+ | ([0-9])+ )
[19] exponent ::= [eE] ('-' | '+')? [0-9]+
[20] boolean ::= 'true' | 'false'
[21] blank ::= nodeID | '[]' | '[' predicateObjectList ']' | collection
[22] itemList ::= object+
[23] collection ::= '(' itemList? ')'
[24] ws ::= #x9 | #xA | #xD | #x20 | comment
[25] resource ::= uriref | qname
[26] nodeID ::= '_:' name
[27] qname ::= prefixName? ':' name?
[28] uriref ::= '<' relativeURI '>'
[29] language ::= [a-z]+ ('-' [a-z0-9]+ )*
[30] nameStartChar ::= [A-Z] | "_" | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
[31] nameChar ::= nameStartChar | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
[32] name ::= nameStartChar nameChar*
[33] prefixName ::= ( nameStartChar - '_' ) nameChar*
[34] relativeURI ::= ucharacter*
[35] quotedString ::= string | longString
[36] string ::= #x22 scharacter* #x22
[37] longString ::= #x22 #x22 #x22 lcharacter* #x22 #x22 #x22
[38] character ::= '\' 'u' hex hex hex hex | '\' 'U' hex hex hex hex hex hex hex hex |
'\\' | [#x20-#x5B] | [#x5D-#x10FFFF]
[39] echaracter ::= character | '\t' | '\n' | '\r'
[40] hex ::= [#x30-#x39] | [#x41-#x46]
[41] ucharacter ::= ( character - #x3E ) | '\>'
[42] scharacter ::= ( echaracter - #x22 ) | '\"'
[43] lcharacter ::= echaracter | '\"' | #x9 | #xA | #xD
*/
private static Logger log = LoggerFactory.getLogger(LangTurtle.class) ;
// Predicates
private static String KW_A = "a" ;
private static String KW_SAME_AS = "=" ;
private static String KW_LOG_IMPLIES = "=>" ;
private static String KW_TRUE = "true" ;
private static String KW_FALSE = "false" ;
private static final boolean VERBOSE = false ;
private static final boolean CHECKING = true ;
private final boolean strict = false ;
private final Prologue prologue ;
/** Provide access to the prologue.
* Use with care.
*/
public Prologue getPrologue() { return prologue ; }
/** Provide access to the prefix map.
* Note this parser uses a custom, lightweight prefix mapping implementation.
* Use with care.
*/
public PrefixMap getPrefixMap() { return prologue.getPrefixMap() ; }
private final Sink sink ;
// public LangTurtle(Tokenizer tokens)
// {
// this("http://example/", tokens, new PrintingSink(log)) ;
// }
public LangTurtle(String baseURI, Tokenizer tokens, Sink sink)
{
super(new Checker(null), null, tokens) ;
this.sink = sink ;
this.prologue = new Prologue(new PrefixMap(), new IRIResolver(baseURI)) ;
}
public final void parse()
{
EventManager.send(sink, new Event(RIOT.startRead, null)) ;
while(moreTokens())
{
if ( lookingAt(DIRECTIVE) )
{
if ( VERBOSE ) log.info(">> directive") ;
directive() ;
if ( VERBOSE ) log.info("<< directive") ;
continue ;
}
// Triples node.
// TriplesSameSubject -> TriplesNode PropertyList?
if ( peekTriplesNodeCompound() )
{
if ( VERBOSE ) log.info(">> compound") ;
Node n = triplesNodeCompound() ;
if ( VERBOSE ) log.info("<< compound") ;
// May be followed by:
// A predicateObject list
// A DOT or EOF.
if ( lookingAt(EOF) )
break ;
if ( lookingAt(DOT) )
{
nextToken() ;
continue ;
}
if ( peekPredicate() )
{
predicateObjectList(n) ;
expectEndOfTriples() ;
continue ;
}
exception("Unexpected token : %s", tokenResolve()) ;
}
// TriplesSameSubject -> Term PropertyListNotEmpty
if ( lookingAt(NODE) )
{
if ( VERBOSE ) log.info(">> triples") ;
triples() ;
if ( VERBOSE ) log.info("<< triples") ;
continue ;
}
exception("Out of place: %s", tokenResolve()) ;
}
EventManager.send(sink, new Event(RIOT.finishRead, null)) ;
}
private void directive()
{
String x = tokenResolve().getImage() ;
nextToken() ;
if ( x.equals("base") )
{
if ( VERBOSE ) log.info("@base") ;
directiveBase() ;
return ;
}
if ( x.equals("prefix") )
{
if ( VERBOSE ) log.info("@prefix") ;
directivePrefix() ;
return ;
}
exception("Unregcognized directive: %s", x) ;
}
private void directivePrefix()
{
// Raw - unresolved prefix name.
if ( ! lookingAt(PREFIXED_NAME) )
exception("@prefix requires a prefix (found '"+peekToken()+"')") ;
if ( peekToken().getImage2().length() != 0 )
exception("@prefix requires a prefix and no suffix (found '"+peekToken()+"')") ;
String prefix = peekToken().getImage() ;
nextToken() ;
if ( ! lookingAt(IRI) )
exception("@prefix requires an IRI (found '"+tokenResolve()+"')") ;
String iriStr = peekToken().getImage() ;
// CHECK
IRI iri = prologue.getResolver().resolveSilent(iriStr) ;
if ( getChecker() != null ) getChecker().checkIRI(iri) ;
prologue.getPrefixMap().add(prefix, iri) ;
nextToken() ;
if ( VERBOSE ) log.info("@prefix "+prefix+": "+iri.toString()) ;
expect("Prefix directive not terminated by a dot", DOT) ;
}
private void directiveBase()
{
String baseStr = peekToken().getImage() ;
// CHECK
IRI baseIRI = prologue.getResolver().resolve(baseStr) ;
if ( getChecker() != null ) getChecker().checkIRI(baseIRI) ;
if ( VERBOSE ) log.info("@base <"+baseIRI+">") ;
nextToken() ;
expect("Base directive not terminated by a dot", DOT) ;
prologue.setBaseURI(new IRIResolver(baseIRI)) ;
}
// Must be at least one triple.
private void triples()
{
Node subject = node() ;
if ( subject == null )
exception("Not recognized: expected directive or triples: %s", tokenResolve().text()) ;
nextToken() ;
predicateObjectList(subject) ;
expectEndOfTriples() ;
}
private void expectEndOfTriples()
{
// The DOT is required by Turtle (strictly).
// It is not in N3 and SPARQL.
if ( strict )
expect("Triples not terminated by DOT", DOT) ;
else
expectOrEOF("Triples not terminated by DOT", DOT) ;
}
private void predicateObjectList(Node subject)
{
if ( VERBOSE ) log.info("predicateObjectList("+subject+")") ;
predicateObjectItem(subject) ;
for(;;)
{
if ( ! lookingAt(SEMICOLON) )
break ;
// list continues - move over the ";"
nextToken() ;
if ( ! peekPredicate() )
// Trailing (pointless) SEMICOLON, no following predicate/object list.
break ;
predicateObjectItem(subject) ;
}
}
private void predicateObjectItem(Node subject)
{
Node predicate = predicate() ;
nextToken() ;
objectList(subject, predicate) ;
}
static private Node nodeSameAs = OWL.sameAs.asNode() ;
static private Node nodeLogImplies = Node.createURI("http://www.w3.org/2000/10/swap/log#implies") ;
/** Get predicate - maybe null for "illegal" */
private Node predicate()
{
if ( lookingAt(TokenType.KEYWORD) )
{
String image = peekToken().getImage() ;
if ( image.equals(KW_A) )
return NodeConst.nodeRDFType ;
if ( !strict && image.equals(KW_SAME_AS) )
return nodeSameAs ;
if ( !strict && image.equals(KW_LOG_IMPLIES) )
return NodeConst.nodeRDFType ;
exception("Unrecognized: "+image) ;
}
// Maybe null
return node() ;
}
/** Check raw token to see if it might be a predciate */
private boolean peekPredicate()
{
if ( lookingAt(TokenType.KEYWORD) )
{
String image = peekToken().getImage() ;
if ( image.equals(KW_A) )
return true ;
if ( !strict && image.equals(KW_SAME_AS) )
return true ;
if ( !strict && image.equals(KW_LOG_IMPLIES) )
return true ;
return false ;
}
// if ( lookingAt(NODE) )
// return true ;
if ( lookingAt(TokenType.IRI) )
return true ;
if ( lookingAt(TokenType.PREFIXED_NAME) )
return true ;
return false ;
}
private Node node()
{
// Resolve
// CHECK
// This is the only place where Nodes are created for triples.
Node n = tokenResolve().asNode() ;
if ( getChecker() != null )
getChecker().check(n) ;
return n ;
}
private void objectList(Node subject, Node predicate)
{
if ( VERBOSE ) log.info("objectList("+subject+", "+predicate+")") ;
for(;;)
{
Node object = triplesNode() ;
emit(subject, predicate, object) ;
if ( ! moreTokens() )
break ;
if ( ! lookingAt(COMMA) )
break ;
// list continues - move over the ","
nextToken() ;
}
}
// A structure of triples that itself generates a node. [] and ().
private Node triplesNode()
{
if ( lookingAt(NODE) )
{
Node n = node() ;
nextToken() ;
return n ;
}
// Special words.
if ( lookingAt(TokenType.KEYWORD) )
{
// Location independent node words
String image = peekToken().getImage() ;
nextToken() ;
if ( image.equals(KW_TRUE) )
return NodeConst.nodeTrue ;
if ( image.equals(KW_FALSE) )
return NodeConst.nodeFalse ;
exception("Unrecognized keyword: "+image) ;
}
return triplesNodeCompound() ;
}
private boolean peekTriplesNodeCompound()
{
if ( lookingAt(LBRACKET) )
return true ;
if ( lookingAt(LBRACE) )
return true ;
if ( lookingAt(LPAREN) )
return true ;
return false ;
}
private Node triplesNodeCompound()
{
if ( lookingAt(LBRACKET) )
return triplesBlankNode() ;
if ( lookingAt(LBRACE) )
return triplesFormula() ;
if ( lookingAt(LPAREN) )
return triplesList() ;
exception("Unrecognized: "+tokenResolve()) ;
return null ;
}
private Node triplesBlankNode()
{
nextToken() ; // Skip [
Node subject = Node.createAnon() ;
if ( peekPredicate() )
predicateObjectList(subject) ;
expect("Triples not terminated properly in []-list", RBRACKET) ;
// Exit: after the ]
return subject ;
}
private Node triplesFormula()
{
exception("Not implemented") ;
return null ;
}
private Node triplesList()
{
nextToken() ;
Node lastCell = null ;
Node listHead = null ;
for ( ;; )
{
if ( eof() )
exception ("Unterminated list") ;
if ( lookingAt(RPAREN) )
{
nextToken();
break ;
}
// The value.
Node n = triplesNode() ;
if ( n == null )
exception("Malformed list") ;
// Node for the list structre.
Node nextCell = Node.createAnon() ;
if ( listHead == null )
listHead = nextCell ;
if ( lastCell != null )
emit(lastCell, NodeConst.nodeRest, nextCell) ;
lastCell = nextCell ;
emit(nextCell, NodeConst.nodeFirst, n) ;
if ( ! moreTokens() ) // Error.
break ;
}
// On exit, just after the RPARENS
if ( lastCell == null )
// Simple ()
return NodeConst.nodeNil ;
// Finish list.
emit(lastCell, NodeConst.nodeRest, NodeConst.nodeNil) ;
return listHead ;
}
private void emit(Node subject, Node predicate, Node object)
{
if ( CHECKING )
{
if ( subject == null || ( ! subject.isURI() && ! subject.isBlank() ) )
exception("Subject is not a URI or blank node") ;
if ( predicate == null || ( ! predicate.isURI() ) )
exception("Predicate not a URI") ;
if ( object == null || ( ! object.isURI() && ! object.isBlank() && ! object.isLiteral() ) )
exception("Object is not a URI, blank node or literal") ;
}
Triple t = new Triple(subject, predicate, object) ;
if ( VERBOSE )
log.info(PrintingSink.strForTriple(t)) ;
sink.send(new Triple(subject, predicate, object)) ;
}
private Token tokenResolve()
{
return convert(peekToken()) ;
}
private Token convert(Token token)
{
if ( token.hasType(PREFIXED_NAME) )
{
String prefix = token.getImage() ;
String suffix = token.getImage2() ;
String expansion = prologue.getPrefixMap().expand(prefix, suffix) ;
if ( expansion == null )
exceptionDirect("Undefined prefix: "+prefix, token.getLine(), token.getColumn()) ;
token.setType(IRI) ;
token.setImage(expansion) ;
token.setImage2(null) ;
}
else if ( token.hasType(IRI) )
{
token.setImage(prologue.getResolver().resolve(token.getImage()).toString()) ;
}
else if ( token.hasType(LITERAL_DT) )
{
Token t = token.getSubToken() ;
t = convert(t) ;
token.setSubToken(t) ;
}
return token ;
}
}
/*
* (c) Copyright 2009 Hewlett-Packard Development Company, LP
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
© 2015 - 2025 Weber Informatics LLC | Privacy Policy