com.twelvemonkeys.imageio.metadata.xmp.XMPReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of org.apache.fop Show documentation
Show all versions of org.apache.fop Show documentation
The core maven build properties
The newest version!
/*
* Copyright (c) 2009, Harald Kuhr
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.twelvemonkeys.imageio.metadata.xmp;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import javax.imageio.IIOException;
import javax.imageio.stream.ImageInputStream;
import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import com.twelvemonkeys.imageio.metadata.Directory;
import com.twelvemonkeys.imageio.metadata.Entry;
import com.twelvemonkeys.imageio.metadata.MetadataReader;
import com.twelvemonkeys.imageio.util.IIOUtil;
import com.twelvemonkeys.lang.Validate;
/**
* XMPReader
*
* @author Harald Kuhr
* @author last modified by $Author: haraldk$
* @version $Id: XMPReader.java,v 1.0 Nov 14, 2009 11:04:30 PM haraldk Exp$
*/
public final class XMPReader extends MetadataReader {
// See http://www.scribd.com/doc/56852716/XMPSpecificationPart1
// TODO: Types? Probably defined in XMP/RDF XML schema. Or are we happy that everything is a string?
@Override
public Directory read(final ImageInputStream input) throws IOException {
Validate.notNull(input, "input");
try {
DocumentBuilderFactory factory = createDocumentBuilderFactory();
// TODO: Consider parsing using SAX?
// TODO: Determine encoding and parse using a Reader...
// TODO: Refactor scanner to return inputstream?
// TODO: Be smarter about ASCII-NULL termination/padding (the SAXParser aka Xerces DOMParser doesn't like it)...
DocumentBuilder builder = factory.newDocumentBuilder();
builder.setErrorHandler(new DefaultHandler());
Document document = builder.parse(new InputSource(IIOUtil.createStreamAdapter(input)));
String toolkit = getToolkit(document);
Node rdfRoot = document.getElementsByTagNameNS(XMP.NS_RDF, "RDF").item(0);
NodeList descriptions = document.getElementsByTagNameNS(XMP.NS_RDF, "Description");
return parseDirectories(rdfRoot, descriptions, toolkit);
}
catch (SAXException e) {
throw new IIOException(e.getMessage(), e);
}
catch (ParserConfigurationException e) {
throw new RuntimeException(e);
}
}
private DocumentBuilderFactory createDocumentBuilderFactory() throws ParserConfigurationException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
// Security: Disable XInclude & expanding entity references ("bombs"), not needed for XMP
factory.setXIncludeAware(false);
factory.setExpandEntityReferences(false);
// Security: Enable "secure processing", to prevent DoS attacks
factory.setAttribute(XMLConstants.FEATURE_SECURE_PROCESSING, true);
// Security: Remove possibility to access external DTDs or Schema, not needed for XMP
factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
// Security: Disable loading of external DTD and entities, not needed for XMP
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
return factory;
}
private String getToolkit(Document document) {
NodeList xmpmeta = document.getElementsByTagNameNS(XMP.NS_X, "xmpmeta");
if (xmpmeta == null || xmpmeta.getLength() <= 0) {
return null;
}
Node toolkit = xmpmeta.item(0).getAttributes().getNamedItemNS(XMP.NS_X, "xmptk");
return toolkit != null ? toolkit.getNodeValue() : null;
}
private XMPDirectory parseDirectories(final Node pParentNode, NodeList pNodes, String toolkit) {
Map> subdirs = new LinkedHashMap<>();
for (Node desc : asIterable(pNodes)) {
if (desc.getParentNode() != pParentNode) {
continue;
}
// Support attribute short-hand syntax
parseAttributesForKnownElements(subdirs, desc);
for (Node node : asIterable(desc.getChildNodes())) {
if (node.getNodeType() != Node.ELEMENT_NODE) {
continue;
}
// Lookup
List dir = subdirs.get(node.getNamespaceURI());
if (dir == null) {
dir = new ArrayList<>();
subdirs.put(node.getNamespaceURI(), dir);
}
Object value;
if (isResourceType(node)) {
value = parseAsResource(node);
}
else {
// TODO: This method contains loads of duplication an should be cleaned up...
// Support attribute short-hand syntax
Map> subsubdirs = new LinkedHashMap<>();
parseAttributesForKnownElements(subsubdirs, node);
if (!subsubdirs.isEmpty()) {
List entries = new ArrayList<>(subsubdirs.size());
for (Map.Entry> entry : subsubdirs.entrySet()) {
entries.addAll(entry.getValue());
}
value = new RDFDescription(entries);
}
else {
value = getChildTextValue(node);
}
}
dir.add(new XMPEntry(node.getNamespaceURI() + node.getLocalName(), node.getLocalName(), value));
}
}
List entries = new ArrayList<>(subdirs.size());
// TODO: Should we still allow asking for a subdirectory by item id?
for (Map.Entry> entry : subdirs.entrySet()) {
entries.add(new RDFDescription(entry.getKey(), entry.getValue()));
}
return new XMPDirectory(entries, toolkit);
}
private boolean isResourceType(Node node) {
Node parseType = node.getAttributes().getNamedItemNS(XMP.NS_RDF, "parseType");
return parseType != null && "Resource".equals(parseType.getNodeValue());
}
private RDFDescription parseAsResource(Node node) {
// See: http://www.w3.org/TR/REC-rdf-syntax/#section-Syntax-parsetype-resource
List entries = new ArrayList<>();
for (Node child : asIterable(node.getChildNodes())) {
if (child.getNodeType() != Node.ELEMENT_NODE) {
continue;
}
entries.add(new XMPEntry(child.getNamespaceURI() + child.getLocalName(), child.getLocalName(), getChildTextValue(child)));
}
return new RDFDescription(entries);
}
private void parseAttributesForKnownElements(Map> subdirs, Node desc) {
// NOTE: NamedNodeMap does not have any particular order...
NamedNodeMap attributes = desc.getAttributes();
for (Node attr : asIterable(attributes)) {
if (!XMP.ELEMENTS.contains(attr.getNamespaceURI())) {
continue;
}
List dir = subdirs.get(attr.getNamespaceURI());
if (dir == null) {
dir = new ArrayList<>();
subdirs.put(attr.getNamespaceURI(), dir);
}
dir.add(new XMPEntry(attr.getNamespaceURI() + attr.getLocalName(), attr.getLocalName(), attr.getNodeValue()));
}
}
private Object getChildTextValue(final Node node) {
for (Node child : asIterable(node.getChildNodes())) {
if (XMP.NS_RDF.equals(child.getNamespaceURI()) && "Alt".equals(child.getLocalName())) {
// Support for -> return a Map keyed on xml:lang
Map alternatives = new LinkedHashMap<>();
for (Node alternative : asIterable(child.getChildNodes())) {
if (XMP.NS_RDF.equals(alternative.getNamespaceURI()) && "li".equals(alternative.getLocalName())) {
NamedNodeMap attributes = alternative.getAttributes();
Node key = attributes.getNamedItem("xml:lang");
alternatives.put(key == null ? null : key.getTextContent(), getChildTextValue(alternative));
}
}
return alternatives;
}
else if (XMP.NS_RDF.equals(child.getNamespaceURI()) && ("Seq".equals(child.getLocalName()) || "Bag".equals(child.getLocalName()))) {
// Support for -> return array
// Support for -> return array/unordered collection (how can a serialized collection not have order?)
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy