org.semanticweb.yars.turtle.TurtleParserBase Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.semanticweb.yars.turtle;
import java.math.BigInteger;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.semanticweb.yars.nx.BNode;
import org.semanticweb.yars.nx.Literal;
import org.semanticweb.yars.nx.Node;
import org.semanticweb.yars.nx.Nodes;
import org.semanticweb.yars.nx.Resource;
import org.semanticweb.yars.nx.namespace.RDF;
import org.semanticweb.yars.nx.namespace.XSD;
import org.semanticweb.yars.nx.parser.Callback;
/** Base class parsers, mainly SPARQL related */
public class TurtleParserBase {
final static Logger _log = Logger.getLogger(TurtleParserBase.class.getName());
int _bnodeid = 0;
boolean strictTurtle = true;
// NodeConst
protected final Literal XSD_TRUE = new Literal("true", XSD.BOOLEAN);
protected final Literal XSD_FALSE = new Literal("false", XSD.BOOLEAN);
protected final Node nRDFtype = RDF.TYPE;
protected final Node nRDFnil = RDF.NIL;
protected final Node nRDFfirst = RDF.FIRST;
protected final Node nRDFrest = RDF.REST;
protected final Node nRDFsubject = RDF.SUBJECT;
protected final Node nRDFpredicate = RDF.PREDICATE;
protected final Node nRDFobject = RDF.OBJECT;
// This is the map used allocate blank node labels during parsing.
// 1/ It is different between CONSTRUCT and the query pattern
// 2/ Each BasicGraphPattern is a scope for blank node labels so each
// BGP causes the map to be cleared at the start of the BGP
//LabelToNodeMap listLabelMap = new LabelToNodeMap(true, new VarAlloc("L")) ;
// ----
Callback _callback;
public TurtleParserBase() { _prologue = new Prologue(); }
protected Prologue _prologue ;
public void setPrologue(Prologue prologue) { _prologue = prologue ; }
public Prologue getPrologue() { return _prologue ; }
protected Literal createLiteralInteger(String lexicalForm)
{
return new Literal(lexicalForm, XSD.INTEGER) ;
}
protected Literal createLiteralDouble(String lexicalForm)
{
return new Literal(lexicalForm, XSD.DOUBLE) ;
}
protected Literal createLiteralDecimal(String lexicalForm)
{
return new Literal(lexicalForm, XSD.DECIMAL) ;
}
protected Resource createResource(String s) {
if (s.equals("<>")) {
return new Resource(_prologue.getBaseURI().toString());
}
URI u = URI.create(s.substring(1, s.length()-1));
_log.log(Level.FINE, "Creating resource {0}, absolute? {1}", new Object[] { s, u.isAbsolute() });
if (!u.isAbsolute()) {
_log.log(Level.FINE, "Resolving {0} relative to {1}", new Object[] { u, _prologue.getBaseURI() } );
u = _prologue.getBaseURI().resolve(u);
}
return new Resource(u.toString());
}
protected Node stripSign(Node node)
{
if ( ! (node instanceof Literal) ) return node ;
// String lex = node.getLiteralLexicalForm() ;
// String lang = node.getLiteralLanguage() ;
// RDFDatatype dt = node.getLiteralDatatype() ;
//
// if ( ! lex.startsWith("-") && ! lex.startsWith("+") )
// throw new ARQInternalErrorException("Literal does not start with a sign: "+lex) ;
//
// lex = lex.substring(1) ;
// return NodeFactory.createLiteral(lex, lang, dt) ;
return node;
}
protected long integerValue(String s) throws TurtleParseException
{
try {
if ( s.startsWith("+") )
s = s.substring(1) ;
if ( s.startsWith("0x") )
{
// Hex
s = s.substring(2) ;
return Long.parseLong(s, 16) ;
}
return Long.parseLong(s) ;
} catch (NumberFormatException ex)
{
try {
// Possible too large for a long.
BigInteger integer = new BigInteger(s) ;
throw new TurtleParseException("Number '"+s+"' is a valid number but can't not be stored in a long") ;
} catch (NumberFormatException ex2) {}
throw new TurtleParseException(ex, -1, -1) ;
}
}
protected double doubleValue(String s)
{
if ( s.startsWith("+") )
s = s.substring(1) ;
double valDouble = Double.parseDouble(s) ;
return valDouble ;
}
/** Remove first and last characters (e.g. ' or "") from a string */
protected static String stripQuotes(String s)
{
return s.substring(1,s.length()-1) ;
}
/** Remove first 3 and last 3 characters (e.g. ''' or """) from a string */
protected static String stripQuotes3(String s)
{
return s.substring(3,s.length()-3) ;
}
/** remove the first n charcacters from the string*/
public static String stripChars(String s, int n)
{
return s.substring(n, s.length()) ;
}
// protected Variable createVariable(String s, int line, int column)
// {
// s = s.substring(1) ; // Drop the marker
//
// // This is done by the parser input stream nowadays.
// //s = unescapeCodePoint(s, line, column) ;
// // Check \ u did not put in any illegals.
// return new Variable(s) ;
// }
// ---- IRIs and Nodes
// See RiotLib re bNode IRIs.
// Merge sometime.
// public static final String ParserLoggerName = "SPARQL" ;
// private static Logger parserLog = LoggerFactory.getLogger(ParserLoggerName) ;
// private static ErrorHandler errorHandler = ErrorHandlerFactory.errorHandlerStd(parserLog) ;
/**
* Resolve relative URI references.
* Might be slow, due to creating URI objects.
*
* @param name
* @param line
* @param column
* @return
* @throws TurtleParseException
*/
// protected String resolveUriRef(String name, int line, int column) throws TurtleParseException {
// URI u = null;
// try {
// u = new URI(name);
// } catch (URISyntaxException e) {
// e.printStackTrace();
// throw new TurtleParseException(e + " line " + line + " column " + column);
// }
//
// if (!u.isAbsolute()) {
// u = _prologue.getBaseURI().resolve(u);
//
// return u.toString();
// }
//
// return name;
// }
protected String resolvePName(String qname, int line, int column) throws TurtleParseException {
// It's legal.
int idx = qname.indexOf(':') ;
// -- Escapes in local name
String prefix = qname.substring(0, idx) ;
String local = qname.substring(idx+1) ;
local = unescapePName(local, line, column) ;
_log.log(Level.FINE, "Expanding prefix {0}, localname {1}", new Object[] { prefix, local } );
String s = _prologue.expandPrefix(prefix) ;
if ( s == null ) {
throw new TurtleParseException("Unresolved prefix: "+qname, line, column) ;
}
return s+local ;
}
public BNode getBNode(String id) {
return new BNode(id);
}
// -------- Basic Graph Patterns and Blank Node label scopes
// A BasicGraphPattern is any sequence of TripleBlocks, separated by filters,
// but not by other graph patterns.
protected void startTriplesBlock()
{ }
protected void endTriplesBlock()
{ }
// --------
// BNode from a list
// protected Node createListNode()
// { return listLabelMap.allocNode() ; }
protected Node createListNode(int line, int column) { return createBNode() ; }
// Unlabelled bNode.
public BNode createBNode() {
String s = _prologue.getBaseURI().toString();
return BNode.createBNode(s, "b"+_bnodeid++);
}
// Labelled bNode.
protected Node createBNode(String label, int line, int column) {
String s = _prologue.getBaseURI().toString();
return BNode.createBNode(s, label);
}
protected String fixupPrefix(String prefix, int line, int column)
{
// \ u processing!
if ( prefix.endsWith(":") )
prefix = prefix.substring(0, prefix.length()-1) ;
return prefix ;
}
protected void setPrefix(int line, int column, String prefix, String uri) {
_prologue.setPrefix(prefix, uri);
}
protected void setBase(String base, int line, int column) {
_prologue.setBaseURI(new Resource(base.substring(1, base.length()-1)));
}
protected void emitTriple(int line, int column, Nodes triple) {
_callback.processStatement(triple.getNodeArray());
}
// Utilities to remove escapes in strings.
public static String unescapeStr(String s) throws TurtleParseException
{ return unescape(s, '\\', false, 1, 1) ; }
// public static String unescapeCodePoint(String s)
// { return unescape(s, '\\', true, 1, 1) ; }
//
// protected String unescapeCodePoint(String s, int line, int column)
// { return unescape(s, '\\', true, line, column) ; }
public static String unescapeStr(String s, int line, int column) throws TurtleParseException
{ return unescape(s, '\\', false, line, column) ; }
// Worker function
public static String unescape(String s, char escape, boolean pointCodeOnly, int line, int column) throws TurtleParseException
{
int i = s.indexOf(escape) ;
if ( i == -1 )
return s ;
// Dump the initial part straight into the string buffer
StringBuilder sb = new StringBuilder(s.substring(0,i)) ;
for ( ; i < s.length() ; i++ )
{
char ch = s.charAt(i) ;
// Keep line and column numbers.
switch (ch)
{
case '\n':
case '\r':
line++ ;
column = 1 ;
break ;
default:
column++ ;
break ;
}
if ( ch != escape )
{
sb.append(ch) ;
continue ;
}
// Escape
if ( i >= s.length()-1 )
throw new TurtleParseException("Illegal escape at end of string", line, column) ;
char ch2 = s.charAt(i+1) ;
column = column+1 ;
i = i + 1 ;
// \\u and \\U
if ( ch2 == 'u' )
{
// i points to the \ so i+6 is next character
if ( i+4 >= s.length() )
throw new TurtleParseException("\\u escape too short", line, column) ;
int x = hex(s, i+1, 4, line, column) ;
sb.append((char)x) ;
// Jump 1 2 3 4 -- already skipped \ and u
i = i+4 ;
column = column+4 ;
continue ;
}
if ( ch2 == 'U' )
{
// i points to the \ so i+6 is next character
if ( i+8 >= s.length() )
throw new TurtleParseException("\\U escape too short", line, column) ;
int x = hex(s, i+1, 8, line, column) ;
// Convert to UTF-16 codepoint pair.
sb.append((char)x) ;
// Jump 1 2 3 4 5 6 7 8 -- already skipped \ and u
i = i+8 ;
column = column+8 ;
continue ;
}
// Are we doing just point code escapes?
// If so, \X-anything else is legal as a literal "\" and "X"
if ( pointCodeOnly )
{
sb.append('\\') ;
sb.append(ch2) ;
i = i + 1 ;
continue ;
}
// Not just codepoints. Must be a legal escape.
char ch3 = 0 ;
switch (ch2)
{
case 'n': ch3 = '\n' ; break ;
case 't': ch3 = '\t' ; break ;
case 'r': ch3 = '\r' ; break ;
case 'b': ch3 = '\b' ; break ;
case 'f': ch3 = '\f' ; break ;
case '\'': ch3 = '\'' ; break ;
case '\"': ch3 = '\"' ; break ;
case '\\': ch3 = '\\' ; break ;
default:
throw new TurtleParseException("Unknown escape: \\"+ch2, line, column) ;
}
sb.append(ch3) ;
}
return sb.toString() ;
}
// Line and column that started the escape
public static int hex(String s, int i, int len, int line, int column) throws TurtleParseException
{
// if ( i+len >= s.length() )
// {
//
// }
int x = 0 ;
for ( int j = i ; j < i+len ; j++ )
{
char ch = s.charAt(j) ;
column++ ;
int k = 0 ;
switch (ch)
{
case '0': k = 0 ; break ;
case '1': k = 1 ; break ;
case '2': k = 2 ; break ;
case '3': k = 3 ; break ;
case '4': k = 4 ; break ;
case '5': k = 5 ; break ;
case '6': k = 6 ; break ;
case '7': k = 7 ; break ;
case '8': k = 8 ; break ;
case '9': k = 9 ; break ;
case 'A': case 'a': k = 10 ; break ;
case 'B': case 'b': k = 11 ; break ;
case 'C': case 'c': k = 12 ; break ;
case 'D': case 'd': k = 13 ; break ;
case 'E': case 'e': k = 14 ; break ;
case 'F': case 'f': k = 15 ; break ;
default:
throw new TurtleParseException("Illegal hex escape: "+ch, line, column) ;
}
x = (x<<4)+k ;
}
return x ;
}
public static String unescapePName(String s, int line, int column) throws TurtleParseException
{
char escape = '\\' ;
int idx = s.indexOf(escape) ;
if ( idx == -1 )
return s ;
int len = s.length() ;
StringBuilder sb = new StringBuilder() ;
for ( int i = 0 ; i < len ; i++ )
{
// Copied form unescape abobve - share!
char ch = s.charAt(i) ;
// Keep line and column numbers.
switch (ch)
{
case '\n':
case '\r':
line++ ;
column = 1 ;
break ;
default:
column++ ;
break ;
}
if ( ch != escape )
{
sb.append(ch) ;
continue ;
}
// Escape
if ( i >= s.length()-1 )
throw new TurtleParseException("Illegal escape at end of string", line, column) ;
char ch2 = s.charAt(i+1) ;
column = column+1 ;
i = i + 1 ;
switch (ch2)
{
case '~' :
case '.' :
case '-' :
case '!' :
case '$' :
case '&' :
case '\'' :
case '(' :
case ')' :
case '*' :
case '+' :
case ',' :
case ';' :
case '=' :
case ':' :
case '/' :
case '?' :
case '#' :
case '@' :
case '%' :
sb.append(ch2) ;
break ;
default:
throw new TurtleParseException("Illegal prefix name escape: "+ch2, line, column) ;
}
}
return sb.toString() ;
}
protected void warnDeprecation(String msg)
{
System.err.println(msg);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy