org.openrdf.rio.rdfxml.util.RDFXMLPrettyWriter Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of sesame-rio-rdfxml Show documentation
Rio parser and writer implementation for the RDF/XML file format.
There is a newer version: 4.1.2
/* 
 * Licensed to Aduna under one or more contributor license agreements.  
 * See the NOTICE.txt file distributed with this work for additional 
 * information regarding copyright ownership. 
 *
 * Aduna licenses this file to you under the terms of the Aduna BSD 
 * License (the "License"); you may not use this file except in compliance 
 * with the License. See the LICENSE.txt file distributed with this work 
 * for the full License.
 *
 * Unless required by applicable law or agreed to in writing, software 
 * distributed under the License is distributed on an "AS IS" BASIS, 
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
 * implied. See the License for the specific language governing permissions
 * and limitations under the License.
 */
package org.openrdf.rio.rdfxml.util;

import java.io.Closeable;
import java.io.Flushable;
import java.io.IOException;
import java.io.OutputStream;
import java.io.Writer;
import java.util.Stack;

import org.openrdf.model.BNode;
import org.openrdf.model.Literal;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.impl.URIImpl;
import org.openrdf.model.util.Literals;
import org.openrdf.model.vocabulary.RDF;
import org.openrdf.model.vocabulary.RDFS;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.rdfxml.RDFXMLWriter;

/**
 * An extension of RDFXMLWriter that outputs a more concise form of RDF/XML. The
 * resulting output is semantically equivalent to the output of an RDFXMLWriter
 * (it produces the same set of statements), but it is usually easier to read
 * for humans.
 * 
 * This is a quasi-streaming RDFWriter. Statements are cached as long as the
 * striped syntax is followed (i.e. the subject of the next statement is the
 * object of the previous statement) and written to the output when the stripe
 * is broken.
 * 

 * The abbreviations used are typed node elements, empty property elements and striped syntax. Note that these abbreviations require that statements
 * are written in the appropriate order.
 * 

 * Striped syntax means that when the object of a statement is the subject of
 * the next statement we can nest the descriptions in each other.
 * 

 * Example:
 * 
 * 
 * <rdf:Seq>
 *    <rdf:li>
 *       <foaf:Person>
 *          <foaf:knows>
 *             <foaf:Person>
 *               <foaf:mbox rdf:resource="..."/>
 *             </foaf:Person>
 *          </foaf:knows>
 *       </foaf:Person>
 *    </rdf:li>
 * </rdf:Seq>
 * 
 * 
 * Typed node elements means that we write out type information in the short
 * form of
 * 
 *  * <foaf:Person rdf:about="...">
 *     ...
 *  </foaf:Person>
 * 
 * 
 * instead of
 * 
 *  * <rdf:Description rdf:about="...">
 *    <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Person"/>
 *     ...
 *  </rdf:Description>
 * 
 * 
 * Empty property elements are of the form
 * 
 *  * <foaf:Person>
 *    <foaf:homepage rdf:resource="http://www.cs.vu.nl/˜marta"/>
 * </foaf:Person>
 * 
 * 
 * instead of
 * 
 *  * <foaf:Person>
 *    <foaf:homepage>
 *       <rdf:Description rdf:about="http://www.cs.vu.nl/˜marta"/>
 *    <foaf:homepage>
 * </foaf:Person>
 * 
 * 
 * @author Peter Mika ([email protected])
 */
public class RDFXMLPrettyWriter extends RDFXMLWriter implements Closeable, Flushable {

	/*-----------*
	 * Variables *
	 *-----------*/

	/*
	 * We implement striped syntax by using two stacks, one for predicates and
	 * one for subjects/objects.
	 */

	/**
	 * Stack for remembering the nodes (subjects/objects) of statements at each
	 * level.
	 */
	private final Stack nodeStack = new Stack();

	/**
	 * Stack for remembering the predicate of statements at each level.
	 */
	private final Stack predicateStack = new Stack();

	/*--------------*
	 * Constructors *
	 *--------------*/

	/**
	 * Creates a new RDFXMLPrintWriter that will write to the supplied
	 * OutputStream.
	 * 
	 * @param out
	 *        The OutputStream to write the RDF/XML document to.
	 */
	public RDFXMLPrettyWriter(OutputStream out) {
		super(out);
	}

	/**
	 * Creates a new RDFXMLPrintWriter that will write to the supplied Writer.
	 * 
	 * @param out
	 *        The Writer to write the RDF/XML document to.
	 */
	public RDFXMLPrettyWriter(Writer out) {
		super(out);
	}

	/*---------*
	 * Methods *
	 *---------*/

	@Override
	protected void writeHeader()
		throws IOException
	{
		// This export format needs the RDF Schema namespace to be defined:
		setNamespace(RDFS.PREFIX, RDFS.NAMESPACE);

		super.writeHeader();
	}

	@Override
	public void flush()
		throws IOException
	{
		if (writingStarted) {
			if (!headerWritten) {
				writeHeader();
			}

			try {
				flushPendingStatements();
			}
			catch (RDFHandlerException e) {
				if (e.getCause() != null && e.getCause() instanceof IOException) {
					throw (IOException)e.getCause();
				}
				else {
					throw new IOException(e);
				}
			}

			writer.flush();
		}
	}

	@Override
	public void close()
		throws IOException
	{
		try {
			if (writingStarted) {
				endRDF();
			}
		}
		catch (RDFHandlerException e) {
			if (e.getCause() != null && e.getCause() instanceof IOException) {
				throw (IOException)e.getCause();
			}
			else {
				throw new IOException(e);
			}
		}
		finally {
			nodeStack.clear();
			predicateStack.clear();
			writer.close();
		}
	}

	@Override
	protected void flushPendingStatements()
		throws IOException, RDFHandlerException
	{
		if (!nodeStack.isEmpty()) {
			popStacks(null);
		}
	}

	/**
	 * Write out the stacks until we find subject. If subject == null, write out
	 * the entire stack
	 * 
	 * @param newSubject
	 */
	private void popStacks(Resource newSubject)
		throws IOException, RDFHandlerException
	{
		// Write start tags for the part of the stacks that are not yet
		// written
		for (int i = 0; i < nodeStack.size() - 1; i++) {
			Node node = nodeStack.get(i);

			if (!node.isWritten()) {
				if (i > 0) {
					writeIndents(i * 2 - 1);

					URI predicate = predicateStack.get(i - 1);

					writeStartTag(predicate.getNamespace(), predicate.getLocalName());
					writeNewLine();
				}

				writeIndents(i * 2);
				writeNodeStartTag(node);
				node.setIsWritten(true);
			}
		}

		// Write tags for the top subject
		Node topNode = nodeStack.pop();

		if (predicateStack.isEmpty()) {
			// write out an empty subject
			writeIndents(nodeStack.size() * 2);
			writeNodeEmptyTag(topNode);
			writeNewLine();
		}
		else {
			URI topPredicate = predicateStack.pop();

			if (!topNode.hasType()) {
				// we can use an abbreviated predicate
				writeIndents(nodeStack.size() * 2 - 1);
				writeAbbreviatedPredicate(topPredicate, topNode.getValue());
			}
			else {
				// we cannot use an abbreviated predicate because the type needs to
				// written out as well

				writeIndents(nodeStack.size() * 2 - 1);
				writeStartTag(topPredicate.getNamespace(), topPredicate.getLocalName());
				writeNewLine();

				// write out an empty subject
				writeIndents(nodeStack.size() * 2);
				writeNodeEmptyTag(topNode);
				writeNewLine();

				writeIndents(nodeStack.size() * 2 - 1);
				writeEndTag(topPredicate.getNamespace(), topPredicate.getLocalName());
				writeNewLine();
			}
		}

		// Write out the end tags until we find the subject
		while (!nodeStack.isEmpty()) {
			Node nextElement = nodeStack.peek();

			if (nextElement.getValue().equals(newSubject)) {
				break;
			}
			else {
				nodeStack.pop();

				// We have already written out the subject/object,
				// but we still need to close the tag
				writeIndents(predicateStack.size() + nodeStack.size());

				writeNodeEndTag(nextElement);

				if (predicateStack.size() > 0) {
					URI nextPredicate = predicateStack.pop();

					writeIndents(predicateStack.size() + nodeStack.size());

					writeEndTag(nextPredicate.getNamespace(), nextPredicate.getLocalName());

					writeNewLine();
				}
			}
		}
	}

	@Override
	public void handleStatement(Statement st)
		throws RDFHandlerException
	{
		if (!writingStarted) {
			throw new RDFHandlerException("Document writing has not yet been started");
		}

		Resource subj = st.getSubject();
		URI pred = st.getPredicate();
		Value obj = st.getObject();

		try {
			if (!headerWritten) {
				writeHeader();
			}

			if (!nodeStack.isEmpty() && !subj.equals(nodeStack.peek().getValue())) {
				// Different subject than we had before, empty the stack
				// until we find it
				popStacks(subj);
			}

			// Stack is either empty or contains the same subject at top

			if (nodeStack.isEmpty()) {
				// Push subject
				nodeStack.push(new Node(subj));
			}

			// Stack now contains at least one element
			Node topSubject = nodeStack.peek();

			// Check if current statement is a type statement and use a typed node
			// element is possible
			// FIXME: verify that an XML namespace-qualified name can be created
			// for the type URI
			if (pred.equals(RDF.TYPE) && obj instanceof URI && !topSubject.hasType() && !topSubject.isWritten())
			{
				// Use typed node element
				topSubject.setType((URI)obj);
			}
			else {
				if (!nodeStack.isEmpty() && pred.equals(nodeStack.peek().nextLi())) {
					pred = RDF.LI;
					nodeStack.peek().incrementNextLi();
				}

				// Push predicate and object
				predicateStack.push(pred);
				nodeStack.push(new Node(obj));
			}
		}
		catch (IOException e) {
			throw new RDFHandlerException(e);
		}
	}

	/**
	 * Write out the opening tag of the subject or object of a statement up to
	 * (but not including) the end of the tag. Used both in writeStartSubject and
	 * writeEmptySubject.
	 */
	private void writeNodeStartOfStartTag(Node node)
		throws IOException, RDFHandlerException
	{
		Value value = node.getValue();

		if (node.hasType()) {
			// We can use abbreviated syntax
			writeStartOfStartTag(node.getType().getNamespace(), node.getType().getLocalName());
		}
		else {
			// We cannot use abbreviated syntax
			writeStartOfStartTag(RDF.NAMESPACE, "Description");
		}

		if (value instanceof URI) {
			URI uri = (URI)value;
			writeAttribute(RDF.NAMESPACE, "about", uri.toString());
		}
		else {
			BNode bNode = (BNode)value;
			writeAttribute(RDF.NAMESPACE, "nodeID", getValidNodeId(bNode));
		}
	}

	/**
	 * Write out the opening tag of the subject or object of a statement.
	 */
	private void writeNodeStartTag(Node node)
		throws IOException, RDFHandlerException
	{
		writeNodeStartOfStartTag(node);
		writeEndOfStartTag();
		writeNewLine();
	}

	/**
	 * Write out the closing tag for the subject or object of a statement.
	 */
	private void writeNodeEndTag(Node node)
		throws IOException
	{
		if (node.getType() != null) {
			writeEndTag(node.getType().getNamespace(), node.getType().getLocalName());
		}
		else {
			writeEndTag(RDF.NAMESPACE, "Description");
		}
		writeNewLine();
	}

	/**
	 * Write out an empty tag for the subject or object of a statement.
	 */
	private void writeNodeEmptyTag(Node node)
		throws IOException, RDFHandlerException
	{
		writeNodeStartOfStartTag(node);
		writeEndOfEmptyTag();
	}

	/**
	 * Write out an empty property element.
	 */
	private void writeAbbreviatedPredicate(URI pred, Value obj)
		throws IOException, RDFHandlerException
	{
		writeStartOfStartTag(pred.getNamespace(), pred.getLocalName());

		if (obj instanceof Resource) {
			Resource objRes = (Resource)obj;

			if (objRes instanceof URI) {
				URI uri = (URI)objRes;
				writeAttribute(RDF.NAMESPACE, "resource", uri.toString());
			}
			else {
				BNode bNode = (BNode)objRes;
				writeAttribute(RDF.NAMESPACE, "nodeID", getValidNodeId(bNode));
			}

			writeEndOfEmptyTag();
		}
		else if (obj instanceof Literal) {
			Literal objLit = (Literal)obj;
			// datatype attribute
			URI datatype = objLit.getDatatype();
			// Check if datatype is rdf:XMLLiteral
			boolean isXmlLiteral = datatype.equals(RDF.XMLLITERAL);

			// language attribute
			if (Literals.isLanguageLiteral(objLit)) {
				writeAttribute("xml:lang", objLit.getLanguage());
			}
			else {
				if (isXmlLiteral) {
					writeAttribute(RDF.NAMESPACE, "parseType", "Literal");
				}
				else {
					writeAttribute(RDF.NAMESPACE, "datatype", datatype.toString());
				}
			}

			writeEndOfStartTag();

			// label
			if (isXmlLiteral) {
				// Write XML literal as plain XML
				writer.write(objLit.getLabel());
			}
			else {
				writeCharacterData(objLit.getLabel());
			}

			writeEndTag(pred.getNamespace(), pred.getLocalName());
		}

		writeNewLine();
	}

	protected void writeStartTag(String namespace, String localName)
		throws IOException
	{
		writeStartOfStartTag(namespace, localName);
		writeEndOfStartTag();
	}

	/**
	 * Writes n indents.
	 */
	protected void writeIndents(int n)
		throws IOException
	{
		for (int i = 0; i < n; i++) {
			writeIndent();
		}
	}

	/*------------------*
	 * Inner class Node *
	 *------------------*/

	private static class Node {

		private int nextLiIndex = 1;

		private Resource nextLi;

		private Value value;

		// type == null means that we use 
		private URI type = null;

		private boolean isWritten = false;

		/**
		 * Creates a new Node for the supplied Value.
		 */
		public Node(Value value) {
			this.value = value;
		}

		Resource nextLi() {
			if (nextLi == null) {
				nextLi = new URIImpl(RDF.NAMESPACE + "_" + nextLiIndex);
			}

			return nextLi;
		}

		public void incrementNextLi() {
			nextLiIndex++;
			nextLi = null;
		}

		public Value getValue() {
			return value;
		}

		public void setType(URI type) {
			this.type = type;
		}

		public URI getType() {
			return type;
		}

		public boolean hasType() {
			return type != null;
		}

		public void setIsWritten(boolean isWritten) {
			this.isWritten = isWritten;
		}

		public boolean isWritten() {
			return isWritten;
		}
	}
}