All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jena.riot.lang.LangNTuple Maven / Gradle / Ivy

There is a newer version: 5.1.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jena.riot.lang;

import java.util.Iterator ;

import org.apache.jena.graph.Node ;
import org.apache.jena.graph.NodeFactory;
import org.apache.jena.graph.Triple;
import org.apache.jena.riot.system.ParserProfile ;
import org.apache.jena.riot.system.StreamRDF ;
import org.apache.jena.riot.tokens.StringType;
import org.apache.jena.riot.tokens.Token ;
import org.apache.jena.riot.tokens.TokenType ;
import org.apache.jena.riot.tokens.Tokenizer ;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;

/** N-Quads, N-triples parser framework, with both push and pull interfaces.
 *
 * 
    *
  • The {@link #parse} method processes the whole stream of tokens, * sending each to a {@link org.apache.jena.atlas.lib.Sink} object.
  • *
  • The {@code Iterator<X>} interface yields triples one-by-one.
  • *
* * Normally, bad terms causes the parser to stop (i.e. treat them as errors). * In addition, the NTuples subsystem allows triples/quads with "bad" terms * to be skipped. * * Checking can be switched off completely. If the data is known to be correct, * no checking can be a large performance gain. Caveat emptor. */ public abstract class LangNTuple extends LangBase implements Iterator { private static Logger log = LoggerFactory.getLogger(LangNTuple.class) ; protected boolean skipOnBadTerm = false ; protected LangNTuple(Tokenizer tokens, ParserProfile profile, StreamRDF dest) { super(tokens, profile, dest); } // Assumes no syntax errors. @Override public final boolean hasNext() { return super.moreTokens(); } @Override public final X next() { return parseOne(); } /** Parse one tuple - return object to be sent to the sink or null for none */ protected abstract X parseOne() ; /** Note a tuple not being output */ protected void skipOne(X object, String printForm, long line, long col) { errorHandler.warning("Skip: " + printForm, line, col); } protected abstract Node tokenAsNode(Token token) ; // One triple, not including terminator. protected final Triple parseTriple() { Token sToken = nextToken(); if ( sToken.isEOF() ) exception(sToken, "Premature end of file: %s", sToken); Node s; if ( sToken.hasType(TokenType.LT2) ) s = parseTripleTerm(); else { checkIRIOrBNode(sToken); s = tokenAsNode(sToken); } Token pToken = nextToken(); if ( pToken.isEOF() ) exception(pToken, "Premature end of file: %s", pToken); checkIRI(pToken); Node p = tokenAsNode(pToken); Token oToken = nextToken(); if ( oToken.isEOF() ) exception(oToken, "Premature end of file: %s", oToken); Node o; if ( oToken.hasType(TokenType.LT2) ) o = parseTripleTerm(); else { checkRDFTerm(oToken); o = tokenAsNode(oToken); } return profile.createTriple(s, p, o, sToken.getLine(), sToken.getColumn()); } // Looking at "<<" (LT2) final protected Node parseTripleTerm() { Triple t = parseTriple(); Token x = nextToken(); if ( x.getType() != TokenType.GT2 ) exception(x, "Triple term not terminated by >>: %s", x); return NodeFactory.createTripleNode(t); } protected final void checkIRIOrBNode(Token token) { if ( token.hasType(TokenType.IRI) ) return; if ( token.hasType(TokenType.BNODE) ) return; exception(token, "Expected BNode or IRI: Got: %s", token); } protected final void checkIRI(Token token) { if ( token.hasType(TokenType.IRI) ) return; exception(token, "Expected IRI: Got: %s", token); } protected final void checkRDFTerm(Token token) { switch (token.getType()) { case IRI: case BNODE: return; case STRING: checkString(token); return ; case LITERAL_LANG: case LITERAL_DT: checkString(token.getSubToken1()); return ; default: exception(token, "Illegal object: %s", token) ; } } private void checkString(Token token) { if ( token.isLongString() ) exception(token, "Triple quoted string not permitted: %s", token) ; if ( isStrictMode() && ! token.hasStringType(StringType.STRING2) ) exception(token, "Not a \"\"-quoted string: %s", token); } /** SkipOnBadTerm - do not output tuples with bad RDF terms */ public boolean getSkipOnBadTerm() { return skipOnBadTerm ; } /** SkipOnBadTerm - do not output tuples with bad RDF terms */ public void setSkipOnBadTerm(boolean skipOnBadTerm) { this.skipOnBadTerm = skipOnBadTerm ; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy