All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.odftoolkit.odfdom.pkg.rdfa.RDFaParser Maven / Gradle / Ivy

Go to download

ODFDOM is an OpenDocument Format (ODF) framework. Its purpose is to provide an easy common way to create, access and manipulate ODF files, without requiring detailed knowledge of the ODF specification. It is designed to provide the ODF developer community with an easy lightwork programming API portable to any object-oriented language. The current reference implementation is written in Java.

There is a newer version: 0.8.11-incubating
Show newest version
/************************************************************************
 *
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
 * 
 * Copyright 2008, 2010 Oracle and/or its affiliates. All rights reserved.
 * 
 * Use is subject to license terms.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy
 * of the License at http://www.apache.org/licenses/LICENSE-2.0. You can also
 * obtain a copy of the License at http://odftoolkit.org/docs/license.txt
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * 
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ************************************************************************/
package org.odftoolkit.odfdom.pkg.rdfa;

import java.util.EnumSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;

import javax.xml.namespace.QName;
import javax.xml.stream.XMLEventFactory;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;

import net.rootdev.javardfa.Constants;
import net.rootdev.javardfa.Setting;
import net.rootdev.javardfa.literal.LiteralCollector;

import org.xml.sax.Attributes;
import org.xml.sax.Locator;

/**
 * A RDFa Parser modified from net.rootdev.javardfa.Parser
 *
 */
class RDFaParser extends net.rootdev.javardfa.Parser {
	
	boolean ignore = false;

	protected XMLEventFactory eventFactory;
	protected JenaSink sink;
	protected Set settings;
	protected LiteralCollector literalCollector;
	protected URIExtractor extractor;
	protected Locator locator;
	protected EvalContext context;

	protected RDFaParser(JenaSink sink, XMLOutputFactory outputFactory,
			XMLEventFactory eventFactory, URIExtractor extractor) {
		super(sink);
		this.sink = sink;
		this.eventFactory = eventFactory;
		this.settings = EnumSet.noneOf(Setting.class);
		this.extractor = extractor;

		this.literalCollector = new LiteralCollector(this, eventFactory,
				outputFactory);

		extractor.setSettings(settings);

		// Important, although I guess the caller doesn't get total control
		outputFactory.setProperty(XMLOutputFactory.IS_REPAIRING_NAMESPACES,
				true);
	}

	protected void beginRDFaElement(String arg0, String localname,
			String qname, Attributes arg3) {
		if (localname.equals("bookmark-start")) {
			ignore = true;
			return;
		}
		try {
			// System.err.println("Start element: " + arg0 + " " + arg1 + " " +
			// arg2);

			// This is set very late in some html5 cases (not even ready by
			// document start)
			if (context == null) {
				this.setBase(locator.getSystemId());
			}

			// Dammit, not quite the same as XMLEventFactory
			String prefix = /* (localname.equals(qname)) */
			(qname.indexOf(':') == -1) ? "" : qname.substring(0,
					qname.indexOf(':'));
			if (settings.contains(Setting.ManualNamespaces)) {
				getNamespaces(arg3);
				if (prefix.length() != 0) {
					arg0 = context.getNamespaceURI(prefix);
					localname = localname.substring(prefix.length() + 1);
				}
			}
			StartElement e = eventFactory.createStartElement(prefix, arg0,
					localname, fromAttributes(arg3), null, context);

			if (literalCollector.isCollecting())
				literalCollector.handleEvent(e);

			// If we are gathering XML we stop parsing
			if (!literalCollector.isCollectingXML())
				context = parse(context, e);
		} catch (XMLStreamException ex) {
			throw new RuntimeException("Streaming issue", ex);
		}
	}
	
	protected void endRDFaElement(String arg0, String localname, String qname) {
		if (localname.equals("bookmark-start")) {
			ignore = false;
			return;
		}
		if (literalCollector.isCollecting()) {
			String prefix = (localname.equals(qname)) ? "" : qname.substring(0,
					qname.indexOf(':'));
			XMLEvent e = eventFactory.createEndElement(prefix, arg0, localname);
			literalCollector.handleEvent(e);
		}
		// If we aren't collecting an XML literal keep parsing
		if (!literalCollector.isCollectingXML())
			context = context.parent;
	}
	
	protected void writeCharacters(String value) {
		if (!ignore) {
			if (literalCollector.isCollecting()) {
				XMLEvent e = eventFactory.createCharacters(value);
				literalCollector.handleEvent(e);
			}
		}
	}
	
	/** 
	 * Set the base uri of the DOM.
	 */
	public void setBase(String base) {	
		this.context = new EvalContext(base);
		sink.setBase(context.getBase());
	}

	protected EvalContext parse(EvalContext context, StartElement element)
			throws XMLStreamException {
		boolean skipElement = false;
		String newSubject = null;
		String currentObject = null;
		List forwardProperties = new LinkedList();
		List backwardProperties = new LinkedList();
		String currentLanguage = context.language;

		if (settings.contains(Setting.OnePointOne)) {

			if (getAttributeByName(element, Constants.vocab) != null) {
				context.vocab = getAttributeByName(element, Constants.vocab)
						.getValue().trim();
			}

			if (getAttributeByName(element, Constants.prefix) != null) {
				parsePrefixes(getAttributeByName(element, Constants.prefix)
						.getValue(), context);
			}
		}

		// The xml / html namespace matching is a bit ropey. I wonder if the
		// html 5
		// parser has a setting for this?
		if (settings.contains(Setting.ManualNamespaces)) {
			if (getAttributeByName(element, Constants.xmllang) != null) {
				currentLanguage = getAttributeByName(element, Constants.xmllang)
						.getValue();
				if (currentLanguage.length() == 0)
					currentLanguage = null;
			} else if (getAttributeByName(element, Constants.lang) != null) {
				currentLanguage = getAttributeByName(element, Constants.lang)
						.getValue();
				if (currentLanguage.length() == 0)
					currentLanguage = null;
			}
		} else if (getAttributeByName(element, Constants.xmllangNS) != null) {
			currentLanguage = getAttributeByName(element, Constants.xmllangNS)
					.getValue();
			if (currentLanguage.length() == 0)
				currentLanguage = null;
		}

		if (Constants.base.equals(element.getName())
				&& getAttributeByName(element, Constants.href) != null) {
			context.setBase(getAttributeByName(element, Constants.href)
					.getValue());
			sink.setBase(context.getBase());
		}
		if (getAttributeByName(element, Constants.rev) == null
				&& getAttributeByName(element, Constants.rel) == null) {
			Attribute nSubj = findAttribute(element, Constants.about);	
			if (nSubj != null) {
				newSubject = extractor.getURI(element, nSubj, context);
			}
			if (newSubject == null) {
				if (Constants.body.equals(element.getName())
						|| Constants.head.equals(element.getName())) {
					newSubject = context.base;
				} else if (getAttributeByName(element, Constants.typeof) != null) {
					newSubject = createBNode();
				} else {
					if (context.parentObject != null) {
						newSubject = context.parentObject;
					}
					if (getAttributeByName(element, Constants.property) == null) {
						skipElement = true;
					}
				}
			}
		} else {
			Attribute nSubj = findAttribute(element, Constants.about,
					Constants.src);
			if (nSubj != null) {
				newSubject = extractor.getURI(element, nSubj, context);
			}
			if (newSubject == null) {
				// if element is head or body assume about=""
				if (Constants.head.equals(element.getName())
						|| Constants.body.equals(element.getName())) {
					newSubject = context.base;
				} else if (getAttributeByName(element, Constants.typeof) != null) {
					newSubject = createBNode();
				} else if (context.parentObject != null) {
					newSubject = context.parentObject;
				}
			}
			Attribute cObj = findAttribute(element, Constants.resource,
					Constants.href);
			if (cObj != null) {
				currentObject = extractor.getURI(element, cObj, context);
			}
		}

		if (newSubject != null
				&& getAttributeByName(element, Constants.typeof) != null) {
			List types = extractor.getURIs(element,
					getAttributeByName(element, Constants.typeof), context);
			for (String type : types) {
				emitTriples(newSubject, Constants.rdfType, type);
			}
		}


		if (currentObject != null) {
			if (getAttributeByName(element, Constants.rel) != null) {
				emitTriples(newSubject, extractor.getURIs(element,
						getAttributeByName(element, Constants.rel), context),
						currentObject);
			}
			if (getAttributeByName(element, Constants.rev) != null) {
				emitTriples(currentObject, extractor.getURIs(element,
						getAttributeByName(element, Constants.rev), context),
						newSubject);
			}
		} else {
			if (getAttributeByName(element, Constants.rel) != null) {
				forwardProperties.addAll(extractor.getURIs(element,
						getAttributeByName(element, Constants.rel), context));
			}
			if (getAttributeByName(element, Constants.rev) != null) {
				backwardProperties.addAll(extractor.getURIs(element,
						getAttributeByName(element, Constants.rev), context));
			}
			if (!forwardProperties.isEmpty() || !backwardProperties.isEmpty()) {
				// if predicate present
				currentObject = createBNode();
			}
		}

		// Getting literal values. Complicated!
		if (getAttributeByName(element, Constants.property) != null) {
			List props = extractor.getURIs(element,
					getAttributeByName(element, Constants.property), context);
			String dt = getDatatype(element);
			if (getAttributeByName(element, Constants.content) != null) { // The
																			// easy
																			// bit
				String lex = getAttributeByName(element, Constants.content)
						.getValue();
				if (dt == null || dt.length() == 0) {
					emitTriplesPlainLiteral(newSubject, props, lex,
							currentLanguage);
				} else {
					emitTriplesDatatypeLiteral(newSubject, props, lex, dt);
				}
			} else {
				literalCollector
						.collect(newSubject, props, dt, currentLanguage);
			}
		}

		if (!skipElement && newSubject != null) {
			emitTriples(context.parentSubject, context.forwardProperties,
					newSubject);

			emitTriples(newSubject, context.backwardProperties,
					context.parentSubject);
		}

		EvalContext ec = new EvalContext(context);
		if (skipElement) {
			ec.language = currentLanguage;
		} else {
			if (newSubject != null) {
				ec.parentSubject = newSubject;
			} else {
				ec.parentSubject = context.parentSubject;
			}

			if (currentObject != null) {
				ec.parentObject = currentObject;
			} else if (newSubject != null) {
				ec.parentObject = newSubject;
			} else {
				ec.parentObject = context.parentSubject;
			}

			ec.language = currentLanguage;
			ec.forwardProperties = forwardProperties;
			ec.backwardProperties = backwardProperties;
		}
		return ec;
	}

	private void getNamespaces(Attributes attrs) {
		for (int i = 0; i < attrs.getLength(); i++) {
			String qname = attrs.getQName(i);
			String prefix = getPrefix(qname);
			if ("xmlns".equals(prefix)) {
				String pre = getLocal(prefix, qname);
				String uri = attrs.getValue(i);
				if (!settings.contains(Setting.ManualNamespaces)
						&& pre.contains("_"))
					continue; // not permitted
				context.setNamespaceURI(pre, uri);
				extractor.setNamespaceURI(pre, uri);
				sink.addPrefix(pre, uri);
			}
		}
	}

	private String getPrefix(String qname) {
		if (!qname.contains(":")) {
			return "";
		}
		return qname.substring(0, qname.indexOf(":"));
	}

	private String getLocal(String prefix, String qname) {
		if (prefix.length() == 0) {
			return qname;
		}
		return qname.substring(prefix.length() + 1);
	}

	private Iterator fromAttributes(Attributes attributes) {
		List toReturn = new LinkedList();

		for (int i = 0; i < attributes.getLength(); i++) {
			String qname = attributes.getQName(i);
			String prefix = qname.contains(":") ? qname.substring(0,
					qname.indexOf(":")) : "";
			Attribute attr = eventFactory.createAttribute(prefix,
					attributes.getURI(i), attributes.getLocalName(i),
					attributes.getValue(i));

			if (!qname.equals("xmlns") && !qname.startsWith("xmlns:"))
				toReturn.add(attr);
		}

		return toReturn.iterator();
	}

	private Attribute findAttribute(StartElement element, QName... names) {
		for (QName aName : names) {
			Attribute a = getAttributeByName(element, aName);
			if (a != null) {
				return a;
			}
		}
		return null;
	}

	private void parsePrefixes(String value, EvalContext context) {
		String[] parts = value.split("\\s+");
		for (int i = 0; i < parts.length; i += 2) {
			String prefix = parts[i];
			if (i + 1 < parts.length && prefix.endsWith(":")) {
				String prefixFix = prefix.substring(0, prefix.length() - 1);
				context.setPrefix(prefixFix, parts[i + 1]);
				sink.addPrefix(prefixFix, parts[i + 1]);
			}
		}
	}

	private Attribute getAttributeByName(StartElement element, QName name) {
		if (name == null || element == null) {
			return null;
		}
		Iterator it = element.getAttributes();
		while (it.hasNext()) {
			Attribute at = (Attribute) it.next();
			if (Util.qNameEquals(at.getName(), name)) {
				return at;
			}
		}
		return null;
	}

	int bnodeId = 0;

	private String createBNode() // TODO probably broken? Can you write bnodes
									// in rdfa directly?
	{
		return "_:node" + (bnodeId++);
	}

	private String getDatatype(StartElement element) {
		Attribute de = getAttributeByName(element, Constants.datatype);
		if (de == null) {
			return null;
		}
		String dt = de.getValue();
		if (dt.length() == 0) {
			return dt;
		}
		return extractor.expandCURIE(element, dt, context);
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy