![JAR search and dependency download from the Maven repository](/logo.png)
org.odftoolkit.odfdom.pkg.rdfa.RDFaParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of odfdom-java Show documentation
Show all versions of odfdom-java Show documentation
ODFDOM is an OpenDocument Format (ODF) framework. Its purpose
is to provide an easy common way to create, access and
manipulate ODF files, without requiring detailed knowledge of
the ODF specification. It is designed to provide the ODF
developer community with an easy lightwork programming API
portable to any object-oriented language.
The current reference implementation is written in Java.
/************************************************************************
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
*
* Copyright 2008, 2010 Oracle and/or its affiliates. All rights reserved.
*
* Use is subject to license terms.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy
* of the License at http://www.apache.org/licenses/LICENSE-2.0. You can also
* obtain a copy of the License at http://odftoolkit.org/docs/license.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*
* See the License for the specific language governing permissions and
* limitations under the License.
*
************************************************************************/
package org.odftoolkit.odfdom.pkg.rdfa;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLEventFactory;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
import net.rootdev.javardfa.Constants;
import net.rootdev.javardfa.Setting;
import net.rootdev.javardfa.literal.LiteralCollector;
import org.xml.sax.Attributes;
import org.xml.sax.Locator;
/**
* A RDFa Parser modified from net.rootdev.javardfa.Parser
*
*/
class RDFaParser extends net.rootdev.javardfa.Parser {
boolean ignore = false;
protected XMLEventFactory eventFactory;
protected JenaSink sink;
protected Set settings;
protected LiteralCollector literalCollector;
protected URIExtractor extractor;
protected Locator locator;
protected EvalContext context;
protected RDFaParser(JenaSink sink, XMLOutputFactory outputFactory,
XMLEventFactory eventFactory, URIExtractor extractor) {
super(sink);
this.sink = sink;
this.eventFactory = eventFactory;
this.settings = EnumSet.noneOf(Setting.class);
this.extractor = extractor;
this.literalCollector = new LiteralCollector(this, eventFactory,
outputFactory);
extractor.setSettings(settings);
// Important, although I guess the caller doesn't get total control
outputFactory.setProperty(XMLOutputFactory.IS_REPAIRING_NAMESPACES,
true);
}
protected void beginRDFaElement(String arg0, String localname,
String qname, Attributes arg3) {
if (localname.equals("bookmark-start")) {
ignore = true;
return;
}
try {
// System.err.println("Start element: " + arg0 + " " + arg1 + " " +
// arg2);
// This is set very late in some html5 cases (not even ready by
// document start)
if (context == null) {
this.setBase(locator.getSystemId());
}
// Dammit, not quite the same as XMLEventFactory
String prefix = /* (localname.equals(qname)) */
(qname.indexOf(':') == -1) ? "" : qname.substring(0,
qname.indexOf(':'));
if (settings.contains(Setting.ManualNamespaces)) {
getNamespaces(arg3);
if (prefix.length() != 0) {
arg0 = context.getNamespaceURI(prefix);
localname = localname.substring(prefix.length() + 1);
}
}
StartElement e = eventFactory.createStartElement(prefix, arg0,
localname, fromAttributes(arg3), null, context);
if (literalCollector.isCollecting())
literalCollector.handleEvent(e);
// If we are gathering XML we stop parsing
if (!literalCollector.isCollectingXML())
context = parse(context, e);
} catch (XMLStreamException ex) {
throw new RuntimeException("Streaming issue", ex);
}
}
protected void endRDFaElement(String arg0, String localname, String qname) {
if (localname.equals("bookmark-start")) {
ignore = false;
return;
}
if (literalCollector.isCollecting()) {
String prefix = (localname.equals(qname)) ? "" : qname.substring(0,
qname.indexOf(':'));
XMLEvent e = eventFactory.createEndElement(prefix, arg0, localname);
literalCollector.handleEvent(e);
}
// If we aren't collecting an XML literal keep parsing
if (!literalCollector.isCollectingXML())
context = context.parent;
}
protected void writeCharacters(String value) {
if (!ignore) {
if (literalCollector.isCollecting()) {
XMLEvent e = eventFactory.createCharacters(value);
literalCollector.handleEvent(e);
}
}
}
/**
* Set the base uri of the DOM.
*/
public void setBase(String base) {
this.context = new EvalContext(base);
sink.setBase(context.getBase());
}
protected EvalContext parse(EvalContext context, StartElement element)
throws XMLStreamException {
boolean skipElement = false;
String newSubject = null;
String currentObject = null;
List forwardProperties = new LinkedList();
List backwardProperties = new LinkedList();
String currentLanguage = context.language;
if (settings.contains(Setting.OnePointOne)) {
if (getAttributeByName(element, Constants.vocab) != null) {
context.vocab = getAttributeByName(element, Constants.vocab)
.getValue().trim();
}
if (getAttributeByName(element, Constants.prefix) != null) {
parsePrefixes(getAttributeByName(element, Constants.prefix)
.getValue(), context);
}
}
// The xml / html namespace matching is a bit ropey. I wonder if the
// html 5
// parser has a setting for this?
if (settings.contains(Setting.ManualNamespaces)) {
if (getAttributeByName(element, Constants.xmllang) != null) {
currentLanguage = getAttributeByName(element, Constants.xmllang)
.getValue();
if (currentLanguage.length() == 0)
currentLanguage = null;
} else if (getAttributeByName(element, Constants.lang) != null) {
currentLanguage = getAttributeByName(element, Constants.lang)
.getValue();
if (currentLanguage.length() == 0)
currentLanguage = null;
}
} else if (getAttributeByName(element, Constants.xmllangNS) != null) {
currentLanguage = getAttributeByName(element, Constants.xmllangNS)
.getValue();
if (currentLanguage.length() == 0)
currentLanguage = null;
}
if (Constants.base.equals(element.getName())
&& getAttributeByName(element, Constants.href) != null) {
context.setBase(getAttributeByName(element, Constants.href)
.getValue());
sink.setBase(context.getBase());
}
if (getAttributeByName(element, Constants.rev) == null
&& getAttributeByName(element, Constants.rel) == null) {
Attribute nSubj = findAttribute(element, Constants.about);
if (nSubj != null) {
newSubject = extractor.getURI(element, nSubj, context);
}
if (newSubject == null) {
if (Constants.body.equals(element.getName())
|| Constants.head.equals(element.getName())) {
newSubject = context.base;
} else if (getAttributeByName(element, Constants.typeof) != null) {
newSubject = createBNode();
} else {
if (context.parentObject != null) {
newSubject = context.parentObject;
}
if (getAttributeByName(element, Constants.property) == null) {
skipElement = true;
}
}
}
} else {
Attribute nSubj = findAttribute(element, Constants.about,
Constants.src);
if (nSubj != null) {
newSubject = extractor.getURI(element, nSubj, context);
}
if (newSubject == null) {
// if element is head or body assume about=""
if (Constants.head.equals(element.getName())
|| Constants.body.equals(element.getName())) {
newSubject = context.base;
} else if (getAttributeByName(element, Constants.typeof) != null) {
newSubject = createBNode();
} else if (context.parentObject != null) {
newSubject = context.parentObject;
}
}
Attribute cObj = findAttribute(element, Constants.resource,
Constants.href);
if (cObj != null) {
currentObject = extractor.getURI(element, cObj, context);
}
}
if (newSubject != null
&& getAttributeByName(element, Constants.typeof) != null) {
List types = extractor.getURIs(element,
getAttributeByName(element, Constants.typeof), context);
for (String type : types) {
emitTriples(newSubject, Constants.rdfType, type);
}
}
if (currentObject != null) {
if (getAttributeByName(element, Constants.rel) != null) {
emitTriples(newSubject, extractor.getURIs(element,
getAttributeByName(element, Constants.rel), context),
currentObject);
}
if (getAttributeByName(element, Constants.rev) != null) {
emitTriples(currentObject, extractor.getURIs(element,
getAttributeByName(element, Constants.rev), context),
newSubject);
}
} else {
if (getAttributeByName(element, Constants.rel) != null) {
forwardProperties.addAll(extractor.getURIs(element,
getAttributeByName(element, Constants.rel), context));
}
if (getAttributeByName(element, Constants.rev) != null) {
backwardProperties.addAll(extractor.getURIs(element,
getAttributeByName(element, Constants.rev), context));
}
if (!forwardProperties.isEmpty() || !backwardProperties.isEmpty()) {
// if predicate present
currentObject = createBNode();
}
}
// Getting literal values. Complicated!
if (getAttributeByName(element, Constants.property) != null) {
List props = extractor.getURIs(element,
getAttributeByName(element, Constants.property), context);
String dt = getDatatype(element);
if (getAttributeByName(element, Constants.content) != null) { // The
// easy
// bit
String lex = getAttributeByName(element, Constants.content)
.getValue();
if (dt == null || dt.length() == 0) {
emitTriplesPlainLiteral(newSubject, props, lex,
currentLanguage);
} else {
emitTriplesDatatypeLiteral(newSubject, props, lex, dt);
}
} else {
literalCollector
.collect(newSubject, props, dt, currentLanguage);
}
}
if (!skipElement && newSubject != null) {
emitTriples(context.parentSubject, context.forwardProperties,
newSubject);
emitTriples(newSubject, context.backwardProperties,
context.parentSubject);
}
EvalContext ec = new EvalContext(context);
if (skipElement) {
ec.language = currentLanguage;
} else {
if (newSubject != null) {
ec.parentSubject = newSubject;
} else {
ec.parentSubject = context.parentSubject;
}
if (currentObject != null) {
ec.parentObject = currentObject;
} else if (newSubject != null) {
ec.parentObject = newSubject;
} else {
ec.parentObject = context.parentSubject;
}
ec.language = currentLanguage;
ec.forwardProperties = forwardProperties;
ec.backwardProperties = backwardProperties;
}
return ec;
}
private void getNamespaces(Attributes attrs) {
for (int i = 0; i < attrs.getLength(); i++) {
String qname = attrs.getQName(i);
String prefix = getPrefix(qname);
if ("xmlns".equals(prefix)) {
String pre = getLocal(prefix, qname);
String uri = attrs.getValue(i);
if (!settings.contains(Setting.ManualNamespaces)
&& pre.contains("_"))
continue; // not permitted
context.setNamespaceURI(pre, uri);
extractor.setNamespaceURI(pre, uri);
sink.addPrefix(pre, uri);
}
}
}
private String getPrefix(String qname) {
if (!qname.contains(":")) {
return "";
}
return qname.substring(0, qname.indexOf(":"));
}
private String getLocal(String prefix, String qname) {
if (prefix.length() == 0) {
return qname;
}
return qname.substring(prefix.length() + 1);
}
private Iterator fromAttributes(Attributes attributes) {
List toReturn = new LinkedList();
for (int i = 0; i < attributes.getLength(); i++) {
String qname = attributes.getQName(i);
String prefix = qname.contains(":") ? qname.substring(0,
qname.indexOf(":")) : "";
Attribute attr = eventFactory.createAttribute(prefix,
attributes.getURI(i), attributes.getLocalName(i),
attributes.getValue(i));
if (!qname.equals("xmlns") && !qname.startsWith("xmlns:"))
toReturn.add(attr);
}
return toReturn.iterator();
}
private Attribute findAttribute(StartElement element, QName... names) {
for (QName aName : names) {
Attribute a = getAttributeByName(element, aName);
if (a != null) {
return a;
}
}
return null;
}
private void parsePrefixes(String value, EvalContext context) {
String[] parts = value.split("\\s+");
for (int i = 0; i < parts.length; i += 2) {
String prefix = parts[i];
if (i + 1 < parts.length && prefix.endsWith(":")) {
String prefixFix = prefix.substring(0, prefix.length() - 1);
context.setPrefix(prefixFix, parts[i + 1]);
sink.addPrefix(prefixFix, parts[i + 1]);
}
}
}
private Attribute getAttributeByName(StartElement element, QName name) {
if (name == null || element == null) {
return null;
}
Iterator it = element.getAttributes();
while (it.hasNext()) {
Attribute at = (Attribute) it.next();
if (Util.qNameEquals(at.getName(), name)) {
return at;
}
}
return null;
}
int bnodeId = 0;
private String createBNode() // TODO probably broken? Can you write bnodes
// in rdfa directly?
{
return "_:node" + (bnodeId++);
}
private String getDatatype(StartElement element) {
Attribute de = getAttributeByName(element, Constants.datatype);
if (de == null) {
return null;
}
String dt = de.getValue();
if (dt.length() == 0) {
return dt;
}
return extractor.expandCURIE(element, dt, context);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy