All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.semanticweb.owlapi.rdf.turtle.parser.TurtleParser.jj Maven / Gradle / Ivy

There is a newer version: 5.5.0
Show newest version
options {
JAVA_UNICODE_ESCAPE=true;
    STATIC=false;
    LOOKAHEAD=2;
    //DEBUG_TOKEN_MANAGER=true;
    //DEBUG_PARSER=true;
}

PARSER_BEGIN(TurtleParser)

package org.semanticweb.owlapi.rdf.turtle.parser;

import java.io.InputStream;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map;

import org.semanticweb.owlapi.model.IRI;
import org.semanticweb.owlapi.model.NodeID;
import org.semanticweb.owlapi.model.PrefixManager;
import org.semanticweb.owlapi.util.DefaultPrefixManager;
import org.semanticweb.owlapi.util.EscapeUtils;
import org.semanticweb.owlapi.vocab.OWLRDFVocabulary;
import org.semanticweb.owlapi.vocab.XSDVocabulary;

/** The Class TurtleParser. */
public class TurtleParser {

    private Map string2IRI;

    private IRI base;

    private TripleHandler handler;

    private PrefixManager pm = new DefaultPrefixManager();

    /** Instantiates a new turtle parser.
     * 
     * @param reader
     *            the reader
     * @param handler
     *            the handler
     * @param base
     *            the base */
    public TurtleParser(Reader reader, TripleHandler handler, IRI base) {
        this(reader);
        this.handler = handler;
        this.base = base;
        string2IRI = new HashMap();
        pm.setDefaultPrefix("http://www.semanticweb.org/owl/owlapi/turtle#");
    }

    /** Instantiates a new turtle parser.
     * 
     * @param is
     *            the is
     * @param handler
     *            the handler
     * @param base
     *            the base */
    public TurtleParser(InputStream is, TripleHandler handler, IRI base) {
        this(is);
        this.handler = handler;
        this.base = base;
        string2IRI = new HashMap();
        pm.setDefaultPrefix("http://www.semanticweb.org/owl/owlapi/turtle#");
    }

    /** Gets the prefix manager.
     * 
     * @return the prefix manager */
    public PrefixManager getPrefixManager() {
        return pm;
    }

    /** Sets the triple handler.
     * 
     * @param handler
     *            the new triple handler */
    public void setTripleHandler(TripleHandler handler) {
        this.handler = handler;
    }

    /** Gets the next blank node.
     * 
     * @param id
     *            the id
     * @return the next blank node */
    protected IRI getNextBlankNode(String id) {
        String string;
        if (id == null) {
            string = NodeID.nextAnonymousIRI();
        } else {
            if (NodeID.isAnonymousNodeID(id)) {
                string = id;
            } else {
                string = NodeID.getIRIFromNodeID(id);
            }
        }
        IRI iri = string2IRI.get(string);
        if (iri == null) {
            iri = IRI.create(string);
            string2IRI.put(string, iri);
        }
        return iri;
    }

    /** Gets the iRI from q name.
     * 
     * @param qname
     *            the qname
     * @return the iRI from q name
     * @throws ParseException
     *             the parse exception */
    protected IRI getIRIFromQName(String qname) throws ParseException  {
        int colonIndex = qname.indexOf(':');
        if(colonIndex == -1) {
            throw new ParseException("Not a valid qname (missing ':') " + qname);
        }
        String prefix = qname.substring(0, colonIndex + 1);
        if(prefix.equals("_:")) {
            return getIRI("genid" + qname.substring(colonIndex + 1));
        }
        if(!pm.containsPrefixMapping(prefix)) {
            throw new ParseException("Prefix not declared: " + prefix);
        }
        return pm.getIRI(qname);
    }

    /** Gets the iri.
     * 
     * @param s
     *            the s
     * @return the iri */
    public IRI getIRI(String s) {
         if(s.charAt(0) == '<') {
            s = s.substring(1, s.length() - 1);
        }
        IRI iri = string2IRI.get(s);
        if(iri == null) {
            iri = IRI.create(s);
            if (!iri.isAbsolute()) {
                iri = IRI.create(base.getNamespace().substring(0, base.getNamespace().lastIndexOf('/')+1), s);
            }
            string2IRI.put(s, iri);
        }
        return iri;
    }

}

PARSER_END(TurtleParser)

SKIP: {" " | "\n" | "\t" | "\r"}

SKIP:
{
    
}

/////////////////////////////////////////////////////////////////////////////////////////////

MORE : {
     : IN_STRING
}

 MORE :
{
    
}

 MORE :
{
    
}

 TOKEN:
{
     : DEFAULT
}

MORE : {
   < THREEQUOTES: ("\"\"\""|"'''") > : IN_LONG_STRING
}

 MORE :
{
    <  (~[]) >
}
 MORE :
{
    
}


 TOKEN :
{
     : DEFAULT
}


/////////////////////////////////////////////////////////////////////////////////////////////

TOKEN:
{
    
}

TOKEN:
{
    )+ >
}

TOKEN:
{
    )+ ("." ()*)?  |  "." ()+ ) >
}

TOKEN:
{
    )+ "." ()* | "." ()+ | ()+ )>
}


TOKEN:
{
    )+>
}


TOKEN:
{
    
}

TOKEN:
{
    
}

TOKEN:
{
    
}

TOKEN:
{
    
}



TOKEN:
{
    
}

TOKEN:
{
    
}

TOKEN:
{
    
}

TOKEN:
{
    
}

TOKEN:
{
    
}

TOKEN:
{
    
}

TOKEN:
{
    
}


TOKEN:
{
    
}


TOKEN:
{
    >
}

TOKEN:
{
    
}

TOKEN:
{
    
}

TOKEN:
{
    
}

TOKEN:
{
    
}

TOKEN:
{
    
}

TOKEN:
{
    
}

TOKEN:
{
    
}



////////////////////////////////////////////////////////////////////////////////////////////
//
// IRIs

TOKEN:
{
    ", " "])*">">
}

TOKEN:
{
    )? ":">
}


TOKEN:
{
     | ["0"-"9"] ) ((|".")* )?>
}

TOKEN:
{
     >
}

TOKEN:
{
    ((|".")* )?>
}

TOKEN:
{
    
}


TOKEN:
{
    
               | "-"
               | ["0"-"9"]
               | "\u00B7"
               | ["\u0300"-"\u036F"]
               | ["\u203F"-"\u2040"]>
}

TOKEN:
{
     | "_">
}



TOKEN:
{
    >
}


TOKEN:
{
    
}

/////////////////////////////////////////////////////////////////////////////////////////////


//TOKEN:
//{
//    ~["\""])>
//}



void parseDocument() :
{
}
{
    
    (parseDirective() | parseStatement())+ {handler.handleEnd();}
}

void parseDirective() :
{
}
{
    parsePrefixDirective() | parseBaseDirective()
}

void parsePrefixDirective() :
{
    Token t;
    String prefix = "";
    IRI ns;
}
{
     t={prefix=t.image;} ns=parseIRI() {
        pm.setPrefix(prefix, ns.toString());
        handler.handlePrefixDirective(prefix, ns.toString());
    }
}

void parseBaseDirective() :
{
    Token t;
}
{
     t= {base = IRI.create(t.image.substring(1, t.image.length() - 1));} {
        handler.handleBaseDirective(base);
    }
}

void parseStatement() :
{
}
{
    parseTriples()
}

void parseTriples() :
{
    IRI subject;
}
{
    subject = parseSubject() (parsePredicateObjectList(subject))?
}

IRI parseSubject() :
{
    IRI iri;
}
{
    (iri=parseResource() | iri=parseBlankNode()) {
        return iri;
    }
}

IRI parseLoneNS() :
{
    Token t;
}
{
    t= {
        return getIRIFromQName(t.image);
    }
}
IRI parseAbbreviatedIRI() :
{
    Token t;
}
{
    t= {
        return getIRIFromQName(t.image);
    }
}

IRI parseIRI() :
{
    Token t;
}
{
    t= {return getIRI(t.image);}
}

IRI parseBlankNode() :
{
    IRI iri = null;
    Token t;
}
{
    (iri=parseNodeID()
    |
    {if(iri==null){iri = getNextBlankNode(null);}}

    |
     ({if(iri==null){iri = getNextBlankNode(null);}} parsePredicateObjectList(iri) ()?)?  {if (iri == null) {iri = getNextBlankNode(null); }}    
    |
    iri = parseCollection()) {
        return iri;
    }
}

IRI parseNodeID() :
{
    Token t;
}
{
    t= {
        return getNextBlankNode(t.image);
    }
}

void parsePredicateObjectList(IRI subject) :
{
    IRI predicate;
}
{
    (predicate=parseVerb() parseObjectList(subject, predicate) ( predicate=parseVerb() parseObjectList(subject, predicate) )*) ()?
}

IRI parseVerb() :
{
    IRI iri;
}
{
    ({iri = OWLRDFVocabulary.RDF_TYPE.getIRI();} | iri=parsePredicate()) {
        return iri;
    }
}

IRI parsePredicate() :
{
    IRI iri;
}
{
    iri=parseResource() {
        return iri;
    }
}

IRI parseResource() :
{
    IRI iri;
}
{
    (iri=parseIRI() | LOOKAHEAD(2)iri=parseAbbreviatedIRI()| LOOKAHEAD(2)iri=parseLoneNS()) {
        return iri;
    }
}


void parseObjectList(IRI subject, IRI predicate) :
{
    
}
{
    parseObject(subject, predicate) (  parseObject(subject, predicate) )*
}

void parseObject(IRI subject, IRI predicate) :
{
    IRI resObject;
}
{
    (parseLiteral(subject, predicate) 
    |
    ((resObject=parseResource()
    |
    resObject=parseBlankNode())) {
        handler.handleTriple(subject, predicate, resObject);
    })
}

IRI parseCollection() :
{
    IRI iri;
}
{
     iri=parseItemList()  {
        return iri;
    }
}

IRI parseItemList() :
{
    //  _x  rdf:type rdf:List
    //  _x  rdf:first
    //  _x  rdf:next
    IRI firstSubject = OWLRDFVocabulary.RDF_NIL.getIRI();
    IRI subject = null;
    IRI type = OWLRDFVocabulary.RDF_TYPE.getIRI();
    IRI first = OWLRDFVocabulary.RDF_FIRST.getIRI();
    IRI rest = OWLRDFVocabulary.RDF_REST.getIRI();
    IRI list = OWLRDFVocabulary.RDF_LIST.getIRI();
    IRI nil = OWLRDFVocabulary.RDF_NIL.getIRI();
}
{
    (
    {
        IRI prevSubject = subject;
        subject=getNextBlankNode(null);
        if(prevSubject != null) {
            handler.handleTriple(prevSubject, rest, subject);
        }
        else {
            firstSubject = subject;
        }
        if(subject!=null) {
        handler.handleTriple(subject, type, list);
        }
    }
        parseObject(subject, first))* {
        // Terminate list
        if(subject!=null) {
        handler.handleTriple(subject, rest, nil);
        }
        return firstSubject;
    }
}


void parseLiteral(IRI subject, IRI predicate) :
{
    String literal;
    String lang = null;
    IRI datatype = null;
    Token t;
}
{
    (literal=parseQuotedString() (( datatype=parseResource()) | (("@" (t=)){lang=t.image;}))?
        {
            if(datatype != null) {
                handler.handleTriple(subject, predicate, literal, datatype);
            }
            else if(lang != null) {
                handler.handleTriple(subject, predicate, literal, lang);
            }
            else {
                handler.handleTriple(subject, predicate, literal);
            }

        }
    )
    |
    (literal=parseInteger(){handler.handleTriple(subject, predicate, literal, XSDVocabulary.INTEGER.getIRI());})
    |
    (literal=parseDouble(){handler.handleTriple(subject, predicate, literal, XSDVocabulary.DOUBLE.getIRI());})
    |
    (literal=parseDecimal(){handler.handleTriple(subject, predicate, literal, XSDVocabulary.DECIMAL.getIRI());})
    |
    (literal=parseBoolean(){handler.handleTriple(subject, predicate, literal, XSDVocabulary.BOOLEAN.getIRI());})
}


String parseInteger() :
{
    Token t;
}
{
    t= {
        return t.image;
    }
    |
    t= {
        return t.image;
    }
}

String parseDouble() :
{
    Token t;
}
{
    t= {
        return t.image;
    }
}

String parseDecimal() :
{
    Token t;
}
{
    t= {
        return t.image;
    }
}

String parseBoolean() :
{
    Token t;
}
{
    (t= | t=) {
        return t.image;
    }
}

String parseQuotedString() :
{
    String s;
}
{
    s=parseString() {
        return s;
    }
}


String parseString() :
{
    Token t;
    String rawString = "";
}
{
    (t= {
        rawString = t.image.substring(1, t.image.length() - 1);
    }
    | t= {
        rawString = t.image.substring(3, t.image.length() - 3);
     }) {
        return EscapeUtils.unescapeString(rawString);
     }
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy