org.apache.jena.riot.lang.ReaderRIOTRDFXML Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jena-arq Show documentation
Show all versions of jena-arq Show documentation
ARQ is a SPARQL 1.1 query engine for Apache Jena
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.riot.lang;
import java.io.IOException ;
import java.io.InputStream ;
import java.io.Reader ;
import java.util.Map;
import org.apache.jena.JenaRuntime;
import org.apache.jena.atlas.lib.Pair ;
import org.apache.jena.atlas.logging.Log;
import org.apache.jena.atlas.web.ContentType;
import org.apache.jena.datatypes.RDFDatatype ;
import org.apache.jena.datatypes.TypeMapper ;
import org.apache.jena.graph.Node ;
import org.apache.jena.graph.NodeFactory ;
import org.apache.jena.graph.Triple ;
import org.apache.jena.rdf.model.RDFErrorHandler ;
import org.apache.jena.rdfxml.xmlinput.* ;
import org.apache.jena.rdfxml.xmlinput.impl.ARPSaxErrorHandler ;
import org.apache.jena.riot.*;
import org.apache.jena.riot.checker.CheckerLiterals ;
import org.apache.jena.riot.system.ErrorHandler;
import org.apache.jena.riot.system.IRIResolver;
import org.apache.jena.riot.system.ParserProfile;
import org.apache.jena.riot.system.StreamRDF;
import org.apache.jena.sparql.util.Context;
import org.xml.sax.SAXException ;
import org.xml.sax.SAXParseException ;
/** RDF/XML.
*
* @see http://www.w3.org/TR/rdf-syntax-grammar/
*/
public class ReaderRIOTRDFXML implements ReaderRIOT
{
public static class Factory implements ReaderRIOTFactory {
@Override
public ReaderRIOT create(Lang language, ParserProfile parserProfile) {
// Ignore the provided ParserProfile
// ARP predates RIOT and does many things internall already.
// Thisincludes IRI resolution.
return new ReaderRIOTRDFXML(parserProfile.getErrorHandler()) ;
}
}
private ARP arp = new ARP() ;
private InputStream input = null ;
private Reader reader = null ;
private String xmlBase ;
private String filename ;
private StreamRDF sink ;
private ErrorHandler errorHandler;
private Context context;
public ReaderRIOTRDFXML(ErrorHandler errorHandler) {
this.errorHandler = errorHandler;
}
@Override
public void read(InputStream in, String baseURI, ContentType ct, StreamRDF output, Context context) {
this.input = in ;
this.xmlBase = baseURI_RDFXML(baseURI) ;
this.filename = baseURI ;
this.sink = output ;
this.context = context;
parse();
}
@Override
public void read(Reader reader, String baseURI, ContentType ct, StreamRDF output, Context context) {
this.reader = reader ;
this.xmlBase = baseURI_RDFXML(baseURI) ;
this.filename = baseURI ;
this.sink = output ;
this.context = context;
parse();
}
// RDF 1.1 is based on URIs/IRIs, where space are not allowed.
// RDF 1.0 (and RDF/XML) was based on "RDF URI References" which did allow spaces.
// Use with TDB requires this to be "true" - it is set by InitTDB.
public static boolean RiotUniformCompatibility = false ;
// Warnings in ARP that should be errors to be compatible with
// non-XML-based languages. e.g. language tags should be
// syntactically valid.
private static int[] additionalErrors = new int[] {
ARPErrorNumbers.WARN_MALFORMED_XMLLANG
//, ARPErrorNumbers.WARN_MALFORMED_URI
//, ARPErrorNumbers.WARN_STRING_NOT_NORMAL_FORM_C
} ;
// Special case of space in URI is handled in HandlerSink (below).
// This is instead of ARPErrorNumbers.WARN_MALFORMED_URI in additionalErrors[].
// which causes a WARN (from ARP, with line+column numbers) then a ERROR from RIOT.
// It's a pragmatic compromise.
private static boolean errorForSpaceInURI = true;
// Extracted from org.apache.jena.rdfxml.xmlinput.JenaReader
private void oneProperty(ARPOptions options, String pName, Object value) {
if (! pName.startsWith("ERR_") && ! pName.startsWith("IGN_") && ! pName.startsWith("WARN_"))
return ;
int cond = ParseException.errorCode(pName);
if (cond == -1)
throw new RiotException("No such ARP property: '"+pName+"'");
int val;
if (value instanceof String) {
if (!((String) value).startsWith("EM_"))
throw new RiotException("Value for ARP property does not start EM_: '"+pName+"' = '"+value+"'" );
val = ParseException.errorCode((String) value);
if (val == -1 )
throw new RiotException("Illegal value for ARP property: '"+pName+"' = '"+value+"'" );
} else if (value instanceof Integer) {
val = ((Integer) value).intValue();
switch (val) {
case ARPErrorNumbers.EM_IGNORE:
case ARPErrorNumbers.EM_WARNING:
case ARPErrorNumbers.EM_ERROR:
case ARPErrorNumbers.EM_FATAL:
break;
default:
throw new RiotException("Illegal value for ARP property: '"+pName+"' = '"+value+"'" );
}
} else {
throw new RiotException("Property \"" + pName + "\" cannot have value: " + value.toString());
}
options.setErrorMode(cond, val);
}
public void parse() {
// Hacked out of ARP because of all the "private" methods
// JenaReader has reset the options since new ARP() was called.
sink.start() ;
HandlerSink rslt = new HandlerSink(sink, errorHandler) ;
arp.getHandlers().setStatementHandler(rslt) ;
arp.getHandlers().setErrorHandler(rslt) ;
arp.getHandlers().setNamespaceHandler(rslt) ;
// ARPOptions.
ARPOptions arpOptions = arp.getOptions() ;
if ( RiotUniformCompatibility ) {
// Convert some warnings to errors for compatible behaviour for all parsers.
for ( int code : additionalErrors )
arpOptions.setErrorMode(code, ARPErrorNumbers.EM_ERROR) ;
}
if ( JenaRuntime.isRDF11 )
arp.getOptions().setIRIFactory(IRIResolver.iriFactory());
if ( context != null ) {
Map properties = null;
try {
@SuppressWarnings("unchecked")
Map p = (Map)(context.get(SysRIOT.sysRdfReaderProperties)) ;
properties = p;
} catch(Throwable ex) {
Log.warn(this, "Problem accessing the RDF/XML reader properties: properties ignored", ex);
}
if ( properties != null )
properties.forEach((k,v) -> oneProperty(arpOptions, k, v)) ;
}
arp.setOptionsWith(arpOptions) ;
try {
if ( reader != null )
arp.load(reader, xmlBase) ;
else
arp.load(input, xmlBase) ;
}
catch (IOException e) {
errorHandler.error(filename + ": " + ParseException.formatMessage(e), -1, -1) ;
}
catch (SAXParseException e) {
// already reported.
}
catch (SAXException sax) {
errorHandler.error(filename + ": " + ParseException.formatMessage(sax), -1, -1) ;
}
sink.finish() ;
}
/** Sort out the base URI for RDF/XML parsing. */
private static String baseURI_RDFXML(String baseIRI) {
if ( baseIRI == null )
return SysRIOT.chooseBaseIRI() ;
else
// This normalizes the URI.
return SysRIOT.chooseBaseIRI(baseIRI) ;
}
private static class HandlerSink extends ARPSaxErrorHandler implements StatementHandler, NamespaceHandler {
private StreamRDF output ;
private ErrorHandler riotErrorHandler ;
private CheckerLiterals checker ;
HandlerSink(StreamRDF output, ErrorHandler errHandler) {
super(new ErrorHandlerBridge(errHandler)) ;
this.output = output ;
this.riotErrorHandler = errHandler ;
this.checker = new CheckerLiterals(errHandler) ;
}
@Override
public void statement(AResource subj, AResource pred, AResource obj)
{ output.triple(convert(subj, pred, obj)); }
@Override
public void statement(AResource subj, AResource pred, ALiteral lit)
{ output.triple(convert(subj, pred, lit)) ; }
// From JenaReader
private static Node convert(ALiteral lit) {
String dtURI = lit.getDatatypeURI();
if (dtURI == null)
return NodeFactory.createLiteral(lit.toString(), lit.getLang());
if (lit.isWellFormedXML()) {
return NodeFactory.createLiteral(lit.toString(), null, true);
}
RDFDatatype dt = TypeMapper.getInstance().getSafeTypeByName(dtURI);
return NodeFactory.createLiteral(lit.toString(), dt);
}
private Node convert(AResource r) {
if (!r.isAnonymous()) {
// URI.
String uriStr = r.getURI() ;
if ( errorForSpaceInURI ) {
// Special check for spaces in a URI.
// Convert to an error like TokernizerText.
if ( uriStr.contains(" ") ) {
int i = uriStr.indexOf(' ');
String s = uriStr.substring(0,i);
String msg = String.format("Bad character in IRI (space): <%s[space]...>", s);
riotErrorHandler.error(msg, -1, -1);
throw new RiotParseException(msg, -1, -1);
}
}
return NodeFactory.createURI(uriStr);
}
// String id = r.getAnonymousID();
Node rr = (Node) r.getUserData();
if (rr == null) {
rr = NodeFactory.createBlankNode();
r.setUserData(rr);
}
return rr;
}
private Triple convert(AResource s, AResource p, AResource o) {
return Triple.create(convert(s), convert(p), convert(o)) ;
}
private Triple convert(AResource s, AResource p, ALiteral o) {
Node object = convert(o) ;
checker.check(object, -1, -1) ;
return Triple.create(convert(s), convert(p), object) ;
}
@Override
public void startPrefixMapping(String prefix, String uri) {
output.prefix(prefix, uri) ;
}
@Override
public void endPrefixMapping(String prefix) {}
}
private static class ErrorHandlerBridge implements RDFErrorHandler {
private ErrorHandler errorHandler ;
ErrorHandlerBridge(ErrorHandler hander) {
this.errorHandler = hander ;
}
@Override
public void warning(Exception e) {
Pair p = getLineCol(e) ;
errorHandler.warning(e.getMessage(), p.getLeft(), p.getRight()) ;
}
@Override
public void error(Exception e) {
Pair p = getLineCol(e) ;
errorHandler.error(e.getMessage(), p.getLeft(), p.getRight()) ;
}
@Override
public void fatalError(Exception e) {
Pair p = getLineCol(e) ;
errorHandler.fatal(e.getMessage(), p.getLeft(), p.getRight()) ;
}
private static Pair getLineCol(Exception e) {
if ( e instanceof SAXParseException ) {
SAXParseException esax = (SAXParseException)e ;
return Pair.create(esax.getLineNumber(), esax.getColumnNumber()) ;
} else {
return Pair.create(-1, -1) ;
}
}
}
}