org.apache.tika.xmp.XMPMetadata Maven / Gradle / Ivy
Show all versions of tika-xmp Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.xmp;
import java.io.IOException;
import java.io.NotSerializableException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.Calendar;
import java.util.Date;
import java.util.Enumeration;
import java.util.Map;
import java.util.Objects;
import java.util.Properties;
import com.adobe.internal.xmp.XMPDateTime;
import com.adobe.internal.xmp.XMPException;
import com.adobe.internal.xmp.XMPIterator;
import com.adobe.internal.xmp.XMPMeta;
import com.adobe.internal.xmp.XMPMetaFactory;
import com.adobe.internal.xmp.XMPSchemaRegistry;
import com.adobe.internal.xmp.XMPUtils;
import com.adobe.internal.xmp.options.IteratorOptions;
import com.adobe.internal.xmp.options.PropertyOptions;
import com.adobe.internal.xmp.options.SerializeOptions;
import com.adobe.internal.xmp.properties.XMPProperty;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.Property.PropertyType;
import org.apache.tika.metadata.PropertyTypeException;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.xmp.convert.TikaToXMP;
/**
* Provides a conversion of the Metadata map from Tika to the XMP data model by also providing the
* Metadata API for clients to ease transition. But clients can also work directly on the XMP data
* model, by getting the XMPMeta reference from this class. Usually the instance would be
* initialized by providing the Metadata object that had been returned from Tika-core which
* populates the XMP data model with all properties that can be converted.
*
* This class is not serializable!
*/
@SuppressWarnings("serial")
public class XMPMetadata extends Metadata {
/**
* Use the XMP namespace registry implementation
*/
private static final XMPSchemaRegistry registry = XMPMetaFactory.getSchemaRegistry();
/**
* The XMP data
*/
private XMPMeta xmpData;
/**
* Initializes with an empty XMP packet
*/
public XMPMetadata() {
xmpData = XMPMetaFactory.create();
}
/**
* @see #XMPMetadata(Metadata, String)
* But the mimetype is retrieved from the metadata map.
*/
public XMPMetadata(Metadata meta) throws TikaException {
this.xmpData = TikaToXMP.convert(meta);
}
/**
* Initializes the data by converting the Metadata information to XMP. If a mimetype is
* provided, a specific converter can be used, that converts all available metadata. If there is
* no mimetype provided or no specific converter available a generic conversion is done which
* will convert only those properties that are in known namespaces and are using the correct
* prefixes
*
* @param meta the Metadata information from Tika-core
* @param mimetype mimetype information
* @throws TikaException case an error occurred during conversion
*/
public XMPMetadata(Metadata meta, String mimetype) throws TikaException {
this.xmpData = TikaToXMP.convert(meta, mimetype);
}
/**
* Register a namespace URI with a suggested prefix. It is not an error if the URI is already
* registered, no matter what the prefix is. If the URI is not registered but the suggested
* prefix is in use, a unique prefix is created from the suggested one. The actual registered
* prefix is always returned. The function result tells if the registered prefix is the
* suggested one.
* Note: No checking is presently done on either the URI or the prefix.
*
* @param namespaceURI The URI for the namespace. Must be a valid XML URI.
* @param suggestedPrefix The suggested prefix to be used if the URI is not yet registered. Must be a valid
* XML name.
* @return Returns the registered prefix for this URI, is equal to the suggestedPrefix if the
* namespace hasn't been registered before, otherwise the existing prefix.
* @throws XMPException If the parameters are not accordingly set
*/
public static String registerNamespace(String namespaceURI, String suggestedPrefix)
throws XMPException {
return registry.registerNamespace(namespaceURI, suggestedPrefix);
}
/**
* Obtain the prefix for a registered namespace URI.
* It is not an error if the namespace URI is not registered.
*
* @param namespaceURI The URI for the namespace. Must not be null or the empty string.
* @return Returns the prefix registered for this namespace URI or null.
*/
public static String getNamespacePrefix(String namespaceURI) {
return registry.getNamespacePrefix(namespaceURI);
}
/**
* Obtain the URI for a registered namespace prefix.
* It is not an error if the namespace prefix is not registered.
*
* @param namespacePrefix The prefix for the namespace. Must not be null or the empty string.
* @return Returns the URI registered for this prefix or null.
*/
public static String getNamespaceURI(String namespacePrefix) {
return registry.getNamespaceURI(namespacePrefix);
}
// === Namespace Registry API === //
/**
* @return Returns the registered prefix/namespace-pairs as map, where the keys are the
* namespaces and the values are the prefixes.
*/
@SuppressWarnings("unchecked")
public static Map getNamespaces() {
return registry.getNamespaces();
}
/**
* @return Returns the registered namespace/prefix-pairs as map, where the keys are the prefixes
* and the values are the namespaces.
*/
@SuppressWarnings("unchecked")
public static Map getPrefixes() {
return registry.getPrefixes();
}
/**
* Deletes a namespace from the registry.
*
* Does nothing if the URI is not registered, or if the namespaceURI parameter is null or the
* empty string.
*
* Note: Not yet implemented.
*
* @param namespaceURI The URI for the namespace.
*/
public static void deleteNamespace(String namespaceURI) {
registry.deleteNamespace(namespaceURI);
}
/**
* @see org.apache.tika.xmp.XMPMetadata#process(org.apache.tika.metadata.Metadata,
* java.lang.String)
* But the mimetype is retrieved from the metadata map.
*/
public void process(Metadata meta) throws TikaException {
this.xmpData = TikaToXMP.convert(meta);
}
/**
* Converts the Metadata information to XMP. If a mimetype is provided, a specific converter can
* be used, that converts all available metadata. If there is no mimetype provided or no
* specific converter available a generic conversion is done which will convert only those
* properties that are in known namespaces and are using the correct prefixes
*
* @param meta the Metadata information from Tika-core
* @param mimetype mimetype information
* @throws TikaException case an error occurred during conversion
*/
public void process(Metadata meta, String mimetype) throws TikaException {
this.xmpData = TikaToXMP.convert(meta, mimetype);
}
/**
* Provides direct access to the XMP data model, in case a client prefers to work directly on it
* instead of using the Metadata API
*
* @return the "internal" XMP data object
*/
public XMPMeta getXMPData() {
return xmpData;
}
// === Metadata API === //
/**
* @see org.apache.tika.xmp.XMPMetadata#isMultiValued(java.lang.String)
*/
@Override
public boolean isMultiValued(Property property) {
return this.isMultiValued(property.getName());
}
/**
* Checks if the named property is an array.
*
* @see org.apache.tika.metadata.Metadata#isMultiValued(java.lang.String)
*/
@Override
public boolean isMultiValued(String name) {
checkKey(name);
String[] keyParts = splitKey(name);
String ns = registry.getNamespaceURI(keyParts[0]);
if (ns != null) {
try {
XMPProperty prop = xmpData.getProperty(ns, keyParts[1]);
return prop.getOptions().isArray();
} catch (XMPException e) {
// Ignore
}
}
return false;
}
/**
* For XMP it is not clear what that API should return, therefor not implemented
*/
@Override
public String[] names() {
throw new UnsupportedOperationException("Not implemented");
}
/**
* Returns the value of a simple property or the first one of an array. The given name must
* contain a namespace prefix of a registered namespace.
*
* @see org.apache.tika.metadata.Metadata#get(java.lang.String)
*/
@Override
public String get(String name) {
checkKey(name);
String value = null;
String[] keyParts = splitKey(name);
String ns = registry.getNamespaceURI(keyParts[0]);
if (ns != null) {
try {
XMPProperty prop = xmpData.getProperty(ns, keyParts[1]);
if (prop != null && prop.getOptions().isSimple()) {
value = prop.getValue();
} else if (prop != null && prop.getOptions().isArray()) {
prop = xmpData.getArrayItem(ns, keyParts[1], 1);
value = prop.getValue();
}
// in all other cases, null is returned
} catch (XMPException e) {
// Ignore
}
}
return value;
}
/**
* @see org.apache.tika.xmp.XMPMetadata#get(java.lang.String)
*/
@Override
public String get(Property property) {
return this.get(property.getName());
}
/**
* @see org.apache.tika.xmp.XMPMetadata#get(java.lang.String)
*/
@Override
public Integer getInt(Property property) {
Integer result = null;
try {
result = XMPUtils.convertToInteger(this.get(property.getName()));
} catch (XMPException e) {
// Ignore
}
return result;
}
/**
* @see org.apache.tika.xmp.XMPMetadata#get(java.lang.String)
*/
@Override
public Date getDate(Property property) {
Date result = null;
try {
XMPDateTime xmpDate = XMPUtils.convertToDate(this.get(property.getName()));
if (xmpDate != null) {
Calendar cal = xmpDate.getCalendar();
// TODO Timezone is currently lost
// need another solution that preserves the timezone
result = cal.getTime();
}
} catch (XMPException e) {
// Ignore
}
return result;
}
/**
* @see org.apache.tika.xmp.XMPMetadata#getValues(java.lang.String)
*/
@Override
public String[] getValues(Property property) {
return this.getValues(property.getName());
}
/**
* Returns the value of a simple property or all if the property is an array and the elements
* are of simple type. The given name must contain a namespace prefix of a registered namespace.
*
* @see org.apache.tika.metadata.Metadata#getValues(java.lang.String)
*/
@Override
public String[] getValues(String name) {
checkKey(name);
String[] value = null;
String[] keyParts = splitKey(name);
String ns = registry.getNamespaceURI(keyParts[0]);
if (ns != null) {
try {
XMPProperty prop = xmpData.getProperty(ns, keyParts[1]);
if (prop != null && prop.getOptions().isSimple()) {
value = new String[1];
value[0] = prop.getValue();
} else if (prop != null && prop.getOptions().isArray()) {
int size = xmpData.countArrayItems(ns, keyParts[1]);
value = new String[size];
boolean onlySimpleChildren = true;
for (int i = 0; i < size && onlySimpleChildren; i++) {
prop = xmpData.getArrayItem(ns, keyParts[1], i + 1);
if (prop.getOptions().isSimple()) {
value[i] = prop.getValue();
} else {
onlySimpleChildren = false;
}
}
if (!onlySimpleChildren) {
value = null;
}
}
// in all other cases, null is returned
} catch (XMPException e) {
// Ignore
}
}
return value;
}
/**
* As this API could only possibly work for simple properties in XMP, it just calls the set
* method, which replaces any existing value
*
* @see org.apache.tika.metadata.Metadata#add(java.lang.String, java.lang.String)
*/
@Override
public void add(String name, String value) {
set(name, value);
}
/**
* Sets the given property. If the property already exists, it is overwritten. Only simple
* properties that use a registered prefix are stored in the XMP.
*
* @see org.apache.tika.metadata.Metadata#set(java.lang.String, java.lang.String)
*/
@Override
public void set(String name, String value) {
checkKey(name);
String[] keyParts = splitKey(name);
String ns = registry.getNamespaceURI(keyParts[0]);
if (ns != null) {
try {
xmpData.setProperty(ns, keyParts[1], value);
} catch (XMPException e) {
// Ignore
}
}
}
/**
* @see org.apache.tika.xmp.XMPMetadata#set(java.lang.String, java.lang.String)
*/
@Override
public void set(Property property, String value) {
this.set(property.getName(), value);
}
/**
* @see org.apache.tika.xmp.XMPMetadata#set(java.lang.String, java.lang.String)
*/
@Override
public void set(Property property, int value) {
// Can reuse the checks from the base class implementation which will call
// the set(String, String) method in the end
super.set(property, value);
}
/**
* @see org.apache.tika.xmp.XMPMetadata#set(java.lang.String, java.lang.String)
*/
@Override
public void set(Property property, double value) {
super.set(property, value);
}
/**
* @see org.apache.tika.xmp.XMPMetadata#set(java.lang.String, java.lang.String)
*/
@Override
public void set(Property property, Date date) {
super.set(property, date);
}
/**
* Sets array properties. If the property already exists, it is overwritten. Only array
* properties that use a registered prefix are stored in the XMP.
*
* @see org.apache.tika.metadata.Metadata#set(org.apache.tika.metadata.Property,
* java.lang.String[])
*/
@Override
public void set(Property property, String[] values) {
checkKey(property.getName());
if (!property.isMultiValuePermitted()) {
throw new PropertyTypeException("Property is not of an array type");
}
String[] keyParts = splitKey(property.getName());
String ns = registry.getNamespaceURI(keyParts[0]);
if (ns != null) {
try {
int arrayType = tikaToXMPArrayType(property.getPrimaryProperty().getPropertyType());
xmpData.setProperty(ns, keyParts[1], null, new PropertyOptions(arrayType));
for (String value : values) {
xmpData.appendArrayItem(ns, keyParts[1], value);
}
} catch (XMPException e) {
// Ignore
}
}
}
/**
* It will set all simple and array properties that have QName keys in registered namespaces.
*
* @see org.apache.tika.metadata.Metadata#setAll(java.util.Properties)
*/
@Override
public void setAll(Properties properties) {
@SuppressWarnings("unchecked") Enumeration names =
(Enumeration) properties.propertyNames();
while (names.hasMoreElements()) {
String name = names.nextElement();
Property property = Property.get(name);
if (property == null) {
throw new PropertyTypeException("Unknown property: " + name);
}
String value = properties.getProperty(name);
if (property.isMultiValuePermitted()) {
this.set(property, new String[]{value});
} else {
this.set(property, value);
}
}
}
/**
* @see org.apache.tika.xmp.XMPMetadata#remove(java.lang.String)
*/
public void remove(Property property) {
this.remove(property.getName());
}
/**
* Removes the given property from the XMP data. If it is a complex property the whole subtree
* is removed
*
* @see org.apache.tika.metadata.Metadata#remove(java.lang.String)
*/
@Override
public void remove(String name) {
checkKey(name);
String[] keyParts = splitKey(name);
String ns = registry.getNamespaceURI(keyParts[0]);
if (ns != null) {
xmpData.deleteProperty(ns, keyParts[1]);
}
}
/**
* Returns the number of top-level namespaces
*/
@Override
public int size() {
int size = 0;
try {
// Get an iterator for the XMP packet, starting at the top level schema nodes
XMPIterator nsIter = xmpData.iterator(
new IteratorOptions().setJustChildren(true).setOmitQualifiers(true));
// iterate all top level namespaces
while (nsIter.hasNext()) {
nsIter.next();
size++;
}
} catch (XMPException e) {
// ignore
}
return size;
}
/**
* This method is not implemented, yet. It is very tedious to check for semantic equality of XMP
* packets
*/
@Override
public boolean equals(Object o) {
throw new UnsupportedOperationException("Not implemented");
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), xmpData);
}
/**
* Serializes the XMP data in compact form without packet wrapper
*
* @see org.apache.tika.metadata.Metadata#toString()
*/
@Override
public String toString() {
String result = null;
try {
result = XMPMetaFactory.serializeToString(xmpData,
new SerializeOptions().setOmitPacketWrapper(true).setUseCompactFormat(true));
} catch (XMPException e) {
// ignore
}
return result;
}
// The XMP object is not serializable!
private void readObject(ObjectInputStream ois) throws ClassNotFoundException, IOException {
throw new NotSerializableException();
}
// The XMP object is not serializable!
private void writeObject(ObjectOutputStream ois) throws IOException {
throw new NotSerializableException();
}
/**
* Checks if the given key is a valid QName with a known standard namespace prefix
*
* @param key the key to check
* @return true if the key is valid otherwise false
*/
private void checkKey(String key) throws PropertyTypeException {
if (key == null || key.length() == 0) {
throw new PropertyTypeException("Key must not be null");
}
String[] keyParts = splitKey(key);
if (keyParts == null) {
throw new PropertyTypeException("Key must be a QName in the form prefix:localName");
}
if (registry.getNamespaceURI(keyParts[0]) == null) {
throw new PropertyTypeException("Key does not use a registered Namespace prefix");
}
}
/**
* Split the given key at the namespace prefix delimiter
*
* @param key the key to split
* @return prefix and local name of the property or null if the key did not contain a delimiter
* or too much of them
*/
private String[] splitKey(String key) {
String[] keyParts = key.split(TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER);
if (keyParts.length > 0 && keyParts.length <= 2) {
return keyParts;
}
return null;
} // checkKeyPrefix
/**
* Convert Tika array types to XMP array types
*
* @param type
* @return
*/
private int tikaToXMPArrayType(PropertyType type) {
int result = 0;
switch (type) {
case BAG:
result = PropertyOptions.ARRAY;
break;
case SEQ:
result = PropertyOptions.ARRAY_ORDERED;
break;
case ALT:
result = PropertyOptions.ARRAY_ALTERNATE;
break;
}
return result;
}
}