All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jena.riot.lang.LangTurtleBase Maven / Gradle / Ivy

There is a newer version: 5.1.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jena.riot.lang ;

import static org.apache.jena.riot.tokens.TokenType.COMMA ;
import static org.apache.jena.riot.tokens.TokenType.DIRECTIVE ;
import static org.apache.jena.riot.tokens.TokenType.DOT ;
import static org.apache.jena.riot.tokens.TokenType.EOF ;
import static org.apache.jena.riot.tokens.TokenType.IRI ;
import static org.apache.jena.riot.tokens.TokenType.KEYWORD ;
import static org.apache.jena.riot.tokens.TokenType.LBRACE ;
import static org.apache.jena.riot.tokens.TokenType.LBRACKET ;
import static org.apache.jena.riot.tokens.TokenType.LPAREN ;
import static org.apache.jena.riot.tokens.TokenType.NODE ;
import static org.apache.jena.riot.tokens.TokenType.PREFIXED_NAME ;
import static org.apache.jena.riot.tokens.TokenType.RBRACKET ;
import static org.apache.jena.riot.tokens.TokenType.RPAREN ;
import static org.apache.jena.riot.tokens.TokenType.SEMICOLON ;

import org.apache.jena.graph.Node ;
import org.apache.jena.graph.NodeFactory ;
import org.apache.jena.iri.IRI ;
import org.apache.jena.riot.system.*;
import org.apache.jena.riot.tokens.Token ;
import org.apache.jena.riot.tokens.TokenType ;
import org.apache.jena.riot.tokens.Tokenizer ;
import org.apache.jena.sparql.graph.NodeConst ;

/** The main engine for all things Turtle-ish (Turtle, TriG). */
public abstract class LangTurtleBase extends LangBase {
    // See http://www.w3.org/TR/turtle/
    // Some predicates (if accepted)
    protected final static String  KW_A           = "a" ;
    protected final static String  KW_SAME_AS     = "=" ;
    //protected final static String  KW_LOG_IMPLIES = "=>" ;
    protected final static String  KW_TRUE        = "true" ;
    protected final static String  KW_FALSE       = "false" ;

    protected final static boolean VERBOSE        = false ;
    // protected final static boolean CHECKING = true ;
    // Current graph - null for default graph
    private Node                   currentGraph   = null ;
    
    protected final PrefixMap prefixMap;

    protected final Node getCurrentGraph() {
        return currentGraph ;
    }

    protected final void setCurrentGraph(Node graph) {
        this.currentGraph = graph ;
    }

    protected LangTurtleBase(Tokenizer tokens, ParserProfile profile, StreamRDF dest) {
        super(tokens, profile, dest) ;
        prefixMap = profile.getPrefixMap();
    }

    @Override
    protected final void runParser() {
        while (moreTokens()) {
            Token t = peekToken() ;
            if ( lookingAt(DIRECTIVE) ) {
                directive() ; // @form.
                continue ;
            }

            if ( lookingAt(KEYWORD) ) {
                if ( t.getImage().equalsIgnoreCase("PREFIX") || t.getImage().equalsIgnoreCase("BASE") ) {
                    directiveKeyword() ;
                    continue ;
                }
            }

            oneTopLevelElement() ;

            if ( lookingAt(EOF) )
                break ;
        }
    }

    // Do one top level item for the language.
    protected abstract void oneTopLevelElement() ;

    /**
     * Emit a triple - nodes have been checked as has legality of node type in
     * location
     */
    protected abstract void emit(Node subject, Node predicate, Node object) ;

    protected final void directiveKeyword() {
        Token t = peekToken() ;
        String x = t.getImage() ;
        nextToken() ;

        if ( x.equalsIgnoreCase("BASE") ) {
            directiveBase() ;
            return ;
        }

        if ( x.equalsIgnoreCase("PREFIX") ) {
            directivePrefix() ;
            return ;
        }
        exception(t, "Unrecognized keyword for directive: %s", x) ;
    }

    protected final void directive() {
        // It's a directive ...
        Token t = peekToken() ;
        String x = t.getImage() ;
        nextToken() ;

        if ( x.equals("base") ) {
            directiveBase() ;
            if ( isStrictMode )
                // The line number is probably one ahead due to the newline
                expect("Base directive not terminated by a dot", DOT) ;
            else
                skipIf(DOT) ;
            return ;
        }

        if ( x.equals("prefix") ) {
            directivePrefix() ;
            if ( isStrictMode )
                // The line number is probably one ahead due to the newline
                expect("Prefix directive not terminated by a dot", DOT) ;   
            else
                skipIf(DOT) ;
            return ;
        }
        exception(t, "Unrecognized directive: %s", x) ;
    }

    protected final void directivePrefix() {
        // Raw - unresolved prefix name.
        if ( !lookingAt(PREFIXED_NAME) )
            exception(peekToken(), "@prefix or PREFIX requires a prefix (found '" + peekToken() + "')") ;
        if ( peekToken().getImage2().length() != 0 )
            exception(peekToken(), "@prefix or PREFIX requires a prefix with no suffix (found '" + peekToken() + "')") ;
        String prefix = peekToken().getImage() ;
        nextToken() ;
        if ( !lookingAt(IRI) )
            exception(peekToken(), "@prefix requires an IRI (found '" + peekToken() + "')") ;
        String iriStr = peekToken().getImage() ;
        IRI iri = profile.makeIRI(iriStr, currLine, currCol) ;
        prefixMap.add(prefix, iri) ;
        emitPrefix(prefix, iri.toString()) ;
        nextToken() ;
    }

    protected final void directiveBase() {
        Token token = peekToken() ;
        if ( !lookingAt(IRI) )
            exception(token, "@base requires an IRI (found '" + token + "')") ;
        String baseStr = token.getImage() ;
        IRI baseIRI = profile.makeIRI(baseStr, currLine, currCol) ;
        emitBase(baseIRI.toString()) ;
        nextToken() ;
        IRIResolver newResolver = IRIResolver.create(baseIRI) ;
        profile.setIRIResolver(newResolver);
    }

    // Unlike many operations in this parser suite
    // this does not assume that we are definitely
    // entering this state. It does checks and may
    // signal a parse exception.

    protected final void triplesSameSubject() {
        // Either a IRI/prefixed name or a construct that generates triples

        // TriplesSameSubject -> Term PropertyListNotEmpty
        if ( lookingAt(NODE) ) {
            triples() ;
            return ;
        }

        boolean maybeList = lookingAt(LPAREN) ;
        
        // Turtle: TriplesSameSubject -> TriplesNode PropertyList?
        // TriG:   (blankNodePropertyList | collection) predicateObjectList? '.'
        //         labelOrSubject (wrappedGraph | predicateObjectList '.')
        if ( peekTriplesNodeCompound() ) {
            Node n = triplesNodeCompound() ;

            // May be followed by:
            // A predicateObject list
            // A DOT or EOF.
            // But if a DOT or EOF, then it can't have been () or [].

            // Turtle, as spec'ed does not allow
            // (1 2 3 4) .
            // There must be a predicate and object.

            // -- If strict turtle.
            if ( isStrictMode && maybeList ) {
                if ( peekPredicate() ) {
                    predicateObjectList(n) ;
                    expectEndOfTriples() ;
                    return ;
                }
                exception(peekToken(), "Predicate/object required after (...) - Unexpected token : %s",
                          peekToken()) ;
            }
            // ---
            // If we allow top-level lists and [...].
            // Should check if () and [].

            if ( lookingAt(EOF) )
                return ;
            if ( lookingAt(DOT) ) {
                nextToken() ;
                return ;
            }

            if ( peekPredicate() )
                predicateObjectList(n) ;
            expectEndOfTriples() ;
            //exception(peekToken(), "Unexpected token : %s", peekToken()) ;
            return ;
        }
        exception(peekToken(), "Out of place: %s", peekToken()) ;
    }

    // Must be at least one triple.
    protected final void triples() {
        // Looking at a node.
        Node subject = node() ;
        if ( subject == null )
            exception(peekToken(), "Not recognized: expected node: %s", peekToken().text()) ;

        nextToken() ;
        predicateObjectList(subject) ;
        expectEndOfTriples() ;
    }

    // Differs between Turtle and TriG.
    // TriG, inside {} does not need the trailing DOT
    protected abstract void expectEndOfTriples() ;
    
    // The DOT is required by Turtle (strictly).
    // It is not in N3 and SPARQL.
    
    protected void expectEndOfTriplesTurtle() {
        if ( isStrictMode )
            expect("Triples not terminated by DOT", DOT) ;
        else
            expectOrEOF("Triples not terminated by DOT", DOT) ;
    }
        
    protected final void predicateObjectList(Node subject) {
        predicateObjectItem(subject) ;

        for (;;) {
            if ( !lookingAt(SEMICOLON) )
                break ;
            // predicatelist continues - move over all ";"
            while (lookingAt(SEMICOLON))
                nextToken() ;
            if ( !peekPredicate() )
                // Trailing (pointless) SEMICOLONs, no following
                // predicate/object list.
                break ;
            predicateObjectItem(subject) ;
        }
    }

    protected final void predicateObjectItem(Node subject) {
        Node predicate = predicate() ;
        nextToken() ;
        objectList(subject, predicate) ;
    }

    static protected final Node nodeSameAs     = NodeConst.nodeOwlSameAs ;
    static protected final Node nodeLogImplies = NodeFactory.createURI("http://www.w3.org/2000/10/swap/log#implies") ;

    /** Get predicate - maybe null for "illegal" */
    protected final Node predicate() {
        Token t = peekToken() ;

        if ( t.hasType(TokenType.KEYWORD) ) {
            boolean strict = isStrictMode ;
            Token tErr = peekToken() ;
            String image = peekToken().getImage() ;
            if ( image.equals(KW_A) )
                return NodeConst.nodeRDFType ;
            // N3-isms
            if ( !strict && image.equals(KW_SAME_AS) )
                return nodeSameAs ;
            // Relationship between two formulae in N3.
//            if ( !strict && image.equals(KW_LOG_IMPLIES) )
//                return log:implies;
            exception(tErr, "Unrecognized keyword: " + image) ;
        }

        Node n = node() ;
        if ( n == null || !n.isURI() )
            exception(t, "Expected IRI for predicate: got: %s", t) ;
        return n ;
    }

    /** Check raw token to see if it might be a predicate */
    protected final boolean peekPredicate() {
        if ( lookingAt(TokenType.KEYWORD) ) {
            String image = peekToken().getImage() ;
            if ( image.equals(KW_A) )
                return true ;
            if ( !isStrictMode && image.equals(KW_SAME_AS) )
                return true ;
//            if ( !isStrictMode && image.equals(KW_LOG_IMPLIES) )
//                return true ;
            return false ;
        }
        // if ( lookingAt(NODE) )
        // return true ;
        if ( lookingAt(TokenType.IRI) )
            return true ;
        if ( lookingAt(TokenType.PREFIXED_NAME) )
            return true ;
        return false ;
    }

    /** Maybe "null" for not-a-node. */
    protected final Node node() {
        // Token to Node
        Node n = tokenAsNode(peekToken()) ;
        if ( n == null )
            return null ;
        return n ;
    }

    protected final void objectList(Node subject, Node predicate) {
        for (;;) {
            Node object = triplesNode() ;
            emitTriple(subject, predicate, object) ;

            if ( !moreTokens() )
                break ;
            if ( !lookingAt(COMMA) )
                break ;
            // list continues - move over the ","
            nextToken() ;
        }
    }

    // A structure of triples that itself generates a node.
    // Special checks for [] and ().

    protected final Node triplesNode() {
        if ( lookingAt(NODE) ) {
            Node n = node() ;
            nextToken() ;
            return n ;
        }

        // Special words.
        if ( lookingAt(TokenType.KEYWORD) ) {
            Token tErr = peekToken() ;
            // Location independent node words
            String image = peekToken().getImage() ;
            nextToken() ;
            if ( image.equals(KW_TRUE) )
                return NodeConst.nodeTrue ;
            if ( image.equals(KW_FALSE) )
                return NodeConst.nodeFalse ;
            if ( image.equals(KW_A) )
                exception(tErr, "Keyword 'a' not legal at this point") ;

            exception(tErr, "Unrecognized keyword: " + image) ;
        }

        return triplesNodeCompound() ;
    }

    protected final boolean peekTriplesNodeCompound() {
        if ( lookingAt(LBRACKET) )
            return true ;
        if ( lookingAt(LBRACE) )
            return true ;
        if ( lookingAt(LPAREN) )
            return true ;
        return false ;
    }

    protected final Node triplesNodeCompound() {
        if ( lookingAt(LBRACKET) )
            return triplesBlankNode() ;
        if ( lookingAt(LBRACE) )
            return triplesFormula() ;
        if ( lookingAt(LPAREN) )
            return triplesList() ;
        exception(peekToken(), "Unrecognized (expected an RDF Term): " + peekToken()) ;
        return null ;
    }

    protected final Node triplesBlankNode() {
        Token t = nextToken() ; // Skip [
        Node subject = profile.createBlankNode(currentGraph, t.getLine(), t.getColumn()) ;
        triplesBlankNode(subject) ;
        return subject ;
    }

    protected final void triplesBlankNode(Node subject) {
        if ( peekPredicate() )
            predicateObjectList(subject) ;
        expect("Triples not terminated properly in []-list", RBRACKET) ;
        // Exit: after the ]
    }

    protected final Node triplesFormula() {
        exception(peekToken(), "Not implemented (formulae, graph literals)") ;
        return null ;
    }

    protected final Node triplesList() {
        nextToken() ;
        Node lastCell = null ;
        Node listHead = null ;

        startList() ;

        for (;;) {
            Token errorToken = peekToken() ;
            if ( eof() )
                exception(peekToken(), "Unterminated list") ;

            if ( lookingAt(RPAREN) ) {
                nextToken() ;
                break ;
            }

            // The value.
            Node n = triplesNode() ;

            if ( n == null )
                exception(errorToken, "Malformed list") ;

            // Node for the list structre.
            Node nextCell = NodeFactory.createBlankNode() ;
            if ( listHead == null )
                listHead = nextCell ;
            if ( lastCell != null )
                emitTriple(lastCell, NodeConst.nodeRest, nextCell) ;
            lastCell = nextCell ;

            emitTriple(nextCell, NodeConst.nodeFirst, n) ;
        }
        // On exit, just after the RPARENS

        if ( lastCell == null ) {
            // Simple ()
            finishList();
            return NodeConst.nodeNil ;
        }

        // Finish list.
        emitTriple(lastCell, NodeConst.nodeRest, NodeConst.nodeNil) ;
        finishList() ;
        return listHead ;
    }

    // Signal start of a list
    protected void finishList() {}

    // Signal end of a list
    protected void startList() {}

    protected final void emitTriple(Node subject, Node predicate, Node object) {
        emit(subject, predicate, object) ;
    }

    private final void emitPrefix(String prefix, String iriStr) {
        dest.prefix(prefix, iriStr) ; 
    }

    private final void emitBase(String baseStr) { 
        dest.base(baseStr);
    }

    protected final Node tokenAsNode(Token token) {
        return profile.create(currentGraph, token) ;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy