org.apache.jena.riot.lang.LangNTuple Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jena-arq Show documentation
ARQ is a SPARQL 1.1 query engine for Apache Jena
There is a newer version: 5.1.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jena.riot.lang;

import java.util.Iterator ;

import org.apache.jena.graph.Node ;
import org.apache.jena.graph.NodeFactory;
import org.apache.jena.graph.Triple;
import org.apache.jena.riot.system.ParserProfile ;
import org.apache.jena.riot.system.StreamRDF ;
import org.apache.jena.riot.tokens.StringType;
import org.apache.jena.riot.tokens.Token ;
import org.apache.jena.riot.tokens.TokenType ;
import org.apache.jena.riot.tokens.Tokenizer ;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;

/** N-Quads, N-triples parser framework, with both push and pull interfaces.
 *
 * 
 * The {@link #parse} method processes the whole stream of tokens,
 *   sending each to a {@link org.apache.jena.atlas.lib.Sink} object.
 * The {@code Iterator<X>} interface yields triples one-by-one.
 *  
 *
 * Normally, bad terms causes the parser to stop (i.e. treat them as errors).
 * In addition, the NTuples subsystem allows triples/quads with "bad" terms
 * to be skipped.
 *
 * Checking can be switched off completely. If the data is known to be correct,
 * no checking can be a large performance gain. Caveat emptor.
 */

public abstract class LangNTuple extends LangBase implements Iterator
{
    private static Logger log = LoggerFactory.getLogger(LangNTuple.class) ;

    protected boolean skipOnBadTerm = false ;

    protected LangNTuple(Tokenizer tokens, ParserProfile profile, StreamRDF dest) {
        super(tokens, profile, dest);
    }

    // Assumes no syntax errors.
    @Override
    public final boolean hasNext() {
        return super.moreTokens();
    }

    @Override
    public final X next() {
        return parseOne();
    }

    /** Parse one tuple - return object to be sent to the sink or null for none */
    protected abstract X parseOne() ;

    /** Note a tuple not being output */
    protected void skipOne(X object, String printForm, long line, long col) {
        errorHandler.warning("Skip: " + printForm, line, col);
    }

    protected abstract Node tokenAsNode(Token token) ;

    // One triple, not including terminator.
    protected final Triple parseTriple() {
        Token sToken = nextToken();
        if ( sToken.isEOF() )
            exception(sToken, "Premature end of file: %s", sToken);
        Node s;
        if ( sToken.hasType(TokenType.LT2) )
            s = parseTripleTerm();
        else {
            checkIRIOrBNode(sToken);
            s = tokenAsNode(sToken);
        }

        Token pToken = nextToken();
        if ( pToken.isEOF() )
            exception(pToken, "Premature end of file: %s", pToken);
        checkIRI(pToken);
        Node p = tokenAsNode(pToken);

        Token oToken = nextToken();
        if ( oToken.isEOF() )
            exception(oToken, "Premature end of file: %s", oToken);
        Node o;
        if ( oToken.hasType(TokenType.LT2) )
            o = parseTripleTerm();
        else {
            checkRDFTerm(oToken);
            o = tokenAsNode(oToken);
        }
        return profile.createTriple(s, p, o, sToken.getLine(), sToken.getColumn());
    }

    // Looking at "<<" (LT2)
    final protected Node parseTripleTerm() {
        Triple t = parseTriple();
        Token x = nextToken();
        if ( x.getType() != TokenType.GT2 )
            exception(x, "Triple term not terminated by >>: %s", x);
        return NodeFactory.createTripleNode(t);
    }

    protected final void checkIRIOrBNode(Token token) {
        if ( token.hasType(TokenType.IRI) )
            return;
        if ( token.hasType(TokenType.BNODE) )
            return;
        exception(token, "Expected BNode or IRI: Got: %s", token);
    }

    protected final void checkIRI(Token token) {
        if ( token.hasType(TokenType.IRI) )
            return;
        exception(token, "Expected IRI: Got: %s", token);
    }

    protected final void checkRDFTerm(Token token) {
        switch (token.getType()) {
            case IRI:
            case BNODE:
                return;
            case STRING:
                checkString(token);
                return ;
            case LITERAL_LANG:
            case LITERAL_DT:
                checkString(token.getSubToken1());
                return ;
            default:
                exception(token, "Illegal object: %s", token) ;
        }
    }

    private void checkString(Token token) {
        if ( token.isLongString() )
            exception(token, "Triple quoted string not permitted: %s", token) ;
        if ( isStrictMode() && ! token.hasStringType(StringType.STRING2) )
            exception(token, "Not a \"\"-quoted string: %s", token);
    }

    /** SkipOnBadTerm - do not output tuples with bad RDF terms */
    public boolean  getSkipOnBadTerm()                      { return skipOnBadTerm ; }
    /** SkipOnBadTerm - do not output tuples with bad RDF terms */
    public void     setSkipOnBadTerm(boolean skipOnBadTerm) { this.skipOnBadTerm = skipOnBadTerm ; }
}