org.semanticweb.owlapi.rdf.turtle.parser.TurtleParser.jj Maven / Gradle / Ivy
options {
JAVA_UNICODE_ESCAPE=true;
STATIC=false;
JAVA_TEMPLATE_TYPE = "modern";
EXCEPTIONS_SUPER_CLASS = "org.semanticweb.owlapi.io.OWLParserException";
SUPPORT_CLASS_VISIBILITY_PUBLIC=false;
//DEBUG_PARSER=true;
}
PARSER_BEGIN(TurtleParser)
package org.semanticweb.owlapi.rdf.turtle.parser;
import java.io.InputStream;
import java.io.IOException;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map;
import org.semanticweb.owlapi.model.IRI;
import org.semanticweb.owlapi.model.NodeID;
import org.semanticweb.owlapi.model.PrefixManager;
import org.semanticweb.owlapi.util.DefaultPrefixManager;
import org.semanticweb.owlapi.util.EscapeUtils;
import org.semanticweb.owlapi.vocab.OWLRDFVocabulary;
import org.semanticweb.owlapi.vocab.XSDVocabulary;
/** The Class TurtleParser. */
@SuppressWarnings("all")
public class TurtleParser {
private Map string2IRI;
private IRI base;
private TripleHandler handler;
private PrefixManager pm = new DefaultPrefixManager();
/** Instantiates a new turtle parser.
*
* @param reader
* the reader
* @param handler
* the handler
* @param base
* the base */
public TurtleParser(Reader reader, TripleHandler handler, IRI base) throws IOException {
this(new StreamProvider(reader));
this.handler = handler;
this.base = base;
string2IRI = new HashMap();
pm.setDefaultPrefix("http://www.semanticweb.org/owl/owlapi/turtle#");
}
/** Instantiates a new turtle parser.
*
* @param is
* the is
* @param handler
* the handler
* @param base
* the base */
public TurtleParser(InputStream is, TripleHandler handler, IRI base) throws IOException {
this(new StreamProvider(is));
this.handler = handler;
this.base = base;
string2IRI = new HashMap();
pm.setDefaultPrefix("http://www.semanticweb.org/owl/owlapi/turtle#");
}
/** Gets the prefix manager.
*
* @return the prefix manager */
public PrefixManager getPrefixManager() {
return pm;
}
/** Sets the triple handler.
*
* @param handler
* the new triple handler */
public void setTripleHandler(TripleHandler handler) {
this.handler = handler;
}
/** Gets the next blank node.
*
* @param id
* the id
* @return the next blank node */
protected IRI getNextBlankNode(String id) {
String string;
if (id == null) {
string = NodeID.nextAnonymousIRI();
} else {
if (NodeID.isAnonymousNodeID(id)) {
string = id;
} else {
string = NodeID.getIRIFromNodeID(id);
}
}
IRI iri = string2IRI.get(string);
if (iri == null) {
iri = IRI.create(string);
string2IRI.put(string, iri);
}
return iri;
}
/** Gets the iRI from q name.
*
* @param qname
* the qname
* @return the iRI from q name
* @throws ParseException
* the parse exception */
protected IRI getIRIFromQName(String qname) throws ParseException {
int colonIndex = qname.indexOf(':');
if(colonIndex == -1) {
throw new ParseException("Not a valid qname (missing ':') " + qname);
}
String prefix = qname.substring(0, colonIndex + 1);
if(prefix.equals("_:")) {
return getIRI("genid" + qname.substring(colonIndex + 1));
}
if(!pm.containsPrefixMapping(prefix)) {
throw new ParseException("Prefix not declared: " + prefix);
}
return pm.getIRI(qname);
}
/** Gets the iri.
*
* @param s
* the s
* @return the iri */
public IRI getIRI(String s) {
if(s.charAt(0) == '<') {
s = s.substring(1, s.length() - 1);
}
IRI iri = string2IRI.get(s);
if(iri == null) {
iri = IRI.create(s);
if (!iri.isAbsolute()) {
iri = IRI.create(base.getNamespace().substring(0, base.getNamespace().lastIndexOf('/')+1), s);
}
string2IRI.put(s, iri);
}
return iri;
}
}
PARSER_END(TurtleParser)
SKIP: {" " | "\n" | "\t" | "\r"}
SKIP: { }
/////////////////////////////////////////////////////////////////////////////////////////////
MORE : { : IN_STRING}
MORE : {<"\\"~[] >}
MORE : {<~["\""]>}
TOKEN: { : DEFAULT}
MORE : { : IN_SINGLESTRING}
MORE : {<"\\"~[] >}
MORE : {<~["'"]>}
TOKEN: { : DEFAULT}
MORE : { : IN_LONG_STRING}
MORE : {< (~[]) >}
MORE : {<"\\"~[] >}
TOKEN :{ : DEFAULT}
MORE : { : IN_LONG_SINGLESTRING}
MORE : {<(~[]) >}
MORE : {<"\\"~[] >}
TOKEN :{ : DEFAULT}
/////////////////////////////////////////////////////////////////////////////////////////////
TOKEN: { }
TOKEN: { )+ > }
TOKEN: { )+ ("." ()*)? | "." ()+ ) > }
TOKEN: { )+ "." ()* | "." ()+ | ()+ )> }
TOKEN: { )+> }
TOKEN: { }
TOKEN: { }
TOKEN: { }
TOKEN: { }
TOKEN: { }
TOKEN: { }
TOKEN: { }
TOKEN: { }
TOKEN: { }
TOKEN: { }
TOKEN: { }
TOKEN: { }
TOKEN: { > }
TOKEN: { }
TOKEN: { }
TOKEN: { }
TOKEN: { }
TOKEN: { }
TOKEN: { }
TOKEN: { }
////////////////////////////////////////////////////////////////////////////////////////////
//
// IRIs
TOKEN: { ", " "])*">"> }
TOKEN: { (":" )* )? ":"> }
TOKEN: { | ) ((|"."|"\\/"|"\\~"|"\\."|"\\-"|"\\!"|"\\$"|"\\&"|"\\'"|"\\("|"\\)"|"\\*"|"\\+"|"\\,"|"\\;"|"\\="|"\\?"|"\\#"|"\\@"|"\\%"|"\\_")* )?> }
TOKEN: { > }
TOKEN: { ((|".")* )?> }
TOKEN: { | ["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] | ["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] | ["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] | ["\u3001"-"\uD7FF"] | ["\uF900"-"\uFDCF"] | ["\uFDF0"-"\uFFFD"]> }
TOKEN: { | "-" | | "\u00B7" | ["\u0300"-"\u036F"]|["\u203F"-"\u2040"]> }
TOKEN: { | "_"> }
TOKEN: { > }
TOKEN: { }
/////////////////////////////////////////////////////////////////////////////////////////////
void parseDocument() : {} { (parseDirective() | parseStatement())+ {handler.handleEnd();} }
void parseDirective() : {} { parsePrefixDirective() | parseBaseDirective() }
void parsePrefixDirective() :
{
Token t;
IRI ns;
}
{
t= ns=parseIRI() {
pm.setPrefix(t.image, ns.toString());
handler.handlePrefixDirective(t.image, ns.toString());
}
}
void parseBaseDirective() : { Token t; } { t= {base = IRI.create(t.image.substring(1, t.image.length() - 1));} { handler.handleBaseDirective(base); } }
void parseStatement() : {} { parseTriples() }
void parseTriples() : { IRI subject; } { subject = parseSubject() (parsePredicateObjectList(subject))? }
IRI parseSubject() : { IRI iri; } { (iri=parseResource() | iri=parseBlankNode()) { return iri; } }
IRI parseLoneNS() : { Token t; } { t= { return getIRIFromQName(t.image); } }
IRI parseAbbreviatedIRI() : { Token t; } { t= { return getIRIFromQName(t.image); } }
IRI parseIRI() : { Token t; } { t= {return getIRI(t.image);} }
IRI parseBlankNode() : { IRI iri = null; }
{
(iri=parseNodeID()
| {if(iri==null){iri = getNextBlankNode(null);}}
| ({if(iri==null){iri = getNextBlankNode(null);}} parsePredicateObjectList(iri) ()?)? {if (iri == null) {iri = getNextBlankNode(null); }}
| iri = parseCollection()) { return iri; }
}
IRI parseNodeID() : { Token t; } { t= { return getNextBlankNode(t.image); } }
void parsePredicateObjectList(IRI subject) : { IRI predicate; } { (predicate=parseVerb() parseObjectList(subject, predicate) (LOOKAHEAD(2) predicate=parseVerb() parseObjectList(subject, predicate) )*) ()? }
IRI parseVerb() : { IRI iri; } { ({iri = OWLRDFVocabulary.RDF_TYPE.getIRI();} | iri=parsePredicate()) { return iri; } }
IRI parsePredicate() : { IRI iri; } { iri=parseResource() { return iri; } }
IRI parseResource() : { IRI iri; } { (iri=parseIRI() | LOOKAHEAD(2)iri=parseAbbreviatedIRI()| LOOKAHEAD(2)iri=parseLoneNS()) { return iri; } }
void parseObjectList(IRI subject, IRI predicate) : {} { parseObject(subject, predicate) ( parseObject(subject, predicate) )* }
void parseObject(IRI subject, IRI predicate) : { IRI resObject; } { (parseLiteral(subject, predicate) | ((resObject=parseResource() | resObject=parseBlankNode())) { handler.handleTriple(subject, predicate, resObject); }) }
IRI parseCollection() : { IRI iri; } { iri=parseItemList() { return iri; } }
IRI parseItemList() :
{
// _x rdf:type rdf:List
// _x rdf:first
// _x rdf:next
IRI firstSubject = OWLRDFVocabulary.RDF_NIL.getIRI();
IRI subject = null;
IRI type = OWLRDFVocabulary.RDF_TYPE.getIRI();
IRI first = OWLRDFVocabulary.RDF_FIRST.getIRI();
IRI rest = OWLRDFVocabulary.RDF_REST.getIRI();
IRI list = OWLRDFVocabulary.RDF_LIST.getIRI();
IRI nil = OWLRDFVocabulary.RDF_NIL.getIRI();
}
{
( {
IRI prevSubject = subject;
subject=getNextBlankNode(null);
if(prevSubject != null) { handler.handleTriple(prevSubject, rest, subject); }
else { firstSubject = subject; }
if(subject!=null) { handler.handleTriple(subject, type, list); }
}
parseObject(subject, first))* {
// Terminate list
if (subject != null) { handler.handleTriple(subject, rest, nil); }
return firstSubject;
}
}
void parseLiteral(IRI subject, IRI predicate) :
{
String literal;
String lang = null;
IRI datatype = null;
Token t;
}
{
(literal=parseQuotedString() (( datatype=parseResource()) | (("@" (t=)){lang=t.image;}))?
{
if(datatype != null) { handler.handleTriple(subject, predicate, literal, datatype); }
else if(lang != null) { handler.handleTriple(subject, predicate, literal, lang); }
else { handler.handleTriple(subject, predicate, literal); }
}
)
| (literal=parseInteger(){handler.handleTriple(subject, predicate, literal, XSDVocabulary.INTEGER.getIRI());})
| (literal=parseDouble(){handler.handleTriple(subject, predicate, literal, XSDVocabulary.DOUBLE.getIRI());})
| (literal=parseDecimal(){handler.handleTriple(subject, predicate, literal, XSDVocabulary.DECIMAL.getIRI());})
| (literal=parseBoolean(){handler.handleTriple(subject, predicate, literal, XSDVocabulary.BOOLEAN.getIRI());})
}
String parseInteger() : { Token t; } { ( t=|t=) { return t.image; } }
String parseDouble() : { Token t; } { t= { return t.image; } }
String parseDecimal() : { Token t; } { t= { return t.image; } }
String parseBoolean() : { Token t; } { (t= | t=) { return t.image; } }
String parseQuotedString() : { String s; } { s=parseString() { return s; } }
String parseString() :
{
Token t;
String rawString = "";
}
{
(t= { rawString = t.image.substring(1, t.image.length() - 1); }
| t= { rawString = t.image.substring(1, t.image.length() - 1); }
| t= { rawString = t.image.substring(3, t.image.length() - 3); }
| t= { rawString = t.image.substring(3, t.image.length() - 3); })
{ return EscapeUtils.unescapeString(rawString); }
}