All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.xmp.XMPMetadata Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.xmp;

import java.io.IOException;
import java.io.NotSerializableException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.Calendar;
import java.util.Date;
import java.util.Enumeration;
import java.util.Map;
import java.util.Objects;
import java.util.Properties;

import com.adobe.internal.xmp.XMPDateTime;
import com.adobe.internal.xmp.XMPException;
import com.adobe.internal.xmp.XMPIterator;
import com.adobe.internal.xmp.XMPMeta;
import com.adobe.internal.xmp.XMPMetaFactory;
import com.adobe.internal.xmp.XMPSchemaRegistry;
import com.adobe.internal.xmp.XMPUtils;
import com.adobe.internal.xmp.options.IteratorOptions;
import com.adobe.internal.xmp.options.PropertyOptions;
import com.adobe.internal.xmp.options.SerializeOptions;
import com.adobe.internal.xmp.properties.XMPProperty;

import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.Property.PropertyType;
import org.apache.tika.metadata.PropertyTypeException;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.xmp.convert.TikaToXMP;

/**
 * Provides a conversion of the Metadata map from Tika to the XMP data model by also providing the
 * Metadata API for clients to ease transition. But clients can also work directly on the XMP data
 * model, by getting the XMPMeta reference from this class. Usually the instance would be
 * initialized by providing the Metadata object that had been returned from Tika-core which
 * populates the XMP data model with all properties that can be converted.
 * 

* This class is not serializable! */ @SuppressWarnings("serial") public class XMPMetadata extends Metadata { /** * Use the XMP namespace registry implementation */ private static final XMPSchemaRegistry registry = XMPMetaFactory.getSchemaRegistry(); /** * The XMP data */ private XMPMeta xmpData; /** * Initializes with an empty XMP packet */ public XMPMetadata() { xmpData = XMPMetaFactory.create(); } /** * @see #XMPMetadata(Metadata, String) * But the mimetype is retrieved from the metadata map. */ public XMPMetadata(Metadata meta) throws TikaException { this.xmpData = TikaToXMP.convert(meta); } /** * Initializes the data by converting the Metadata information to XMP. If a mimetype is * provided, a specific converter can be used, that converts all available metadata. If there is * no mimetype provided or no specific converter available a generic conversion is done which * will convert only those properties that are in known namespaces and are using the correct * prefixes * * @param meta the Metadata information from Tika-core * @param mimetype mimetype information * @throws TikaException case an error occurred during conversion */ public XMPMetadata(Metadata meta, String mimetype) throws TikaException { this.xmpData = TikaToXMP.convert(meta, mimetype); } /** * Register a namespace URI with a suggested prefix. It is not an error if the URI is already * registered, no matter what the prefix is. If the URI is not registered but the suggested * prefix is in use, a unique prefix is created from the suggested one. The actual registered * prefix is always returned. The function result tells if the registered prefix is the * suggested one. * Note: No checking is presently done on either the URI or the prefix. * * @param namespaceURI The URI for the namespace. Must be a valid XML URI. * @param suggestedPrefix The suggested prefix to be used if the URI is not yet registered. Must be a valid * XML name. * @return Returns the registered prefix for this URI, is equal to the suggestedPrefix if the * namespace hasn't been registered before, otherwise the existing prefix. * @throws XMPException If the parameters are not accordingly set */ public static String registerNamespace(String namespaceURI, String suggestedPrefix) throws XMPException { return registry.registerNamespace(namespaceURI, suggestedPrefix); } /** * Obtain the prefix for a registered namespace URI. * It is not an error if the namespace URI is not registered. * * @param namespaceURI The URI for the namespace. Must not be null or the empty string. * @return Returns the prefix registered for this namespace URI or null. */ public static String getNamespacePrefix(String namespaceURI) { return registry.getNamespacePrefix(namespaceURI); } /** * Obtain the URI for a registered namespace prefix. * It is not an error if the namespace prefix is not registered. * * @param namespacePrefix The prefix for the namespace. Must not be null or the empty string. * @return Returns the URI registered for this prefix or null. */ public static String getNamespaceURI(String namespacePrefix) { return registry.getNamespaceURI(namespacePrefix); } // === Namespace Registry API === // /** * @return Returns the registered prefix/namespace-pairs as map, where the keys are the * namespaces and the values are the prefixes. */ @SuppressWarnings("unchecked") public static Map getNamespaces() { return registry.getNamespaces(); } /** * @return Returns the registered namespace/prefix-pairs as map, where the keys are the prefixes * and the values are the namespaces. */ @SuppressWarnings("unchecked") public static Map getPrefixes() { return registry.getPrefixes(); } /** * Deletes a namespace from the registry. *

* Does nothing if the URI is not registered, or if the namespaceURI parameter is null or the * empty string. *

* Note: Not yet implemented. * * @param namespaceURI The URI for the namespace. */ public static void deleteNamespace(String namespaceURI) { registry.deleteNamespace(namespaceURI); } /** * @see org.apache.tika.xmp.XMPMetadata#process(org.apache.tika.metadata.Metadata, * java.lang.String) * But the mimetype is retrieved from the metadata map. */ public void process(Metadata meta) throws TikaException { this.xmpData = TikaToXMP.convert(meta); } /** * Converts the Metadata information to XMP. If a mimetype is provided, a specific converter can * be used, that converts all available metadata. If there is no mimetype provided or no * specific converter available a generic conversion is done which will convert only those * properties that are in known namespaces and are using the correct prefixes * * @param meta the Metadata information from Tika-core * @param mimetype mimetype information * @throws TikaException case an error occurred during conversion */ public void process(Metadata meta, String mimetype) throws TikaException { this.xmpData = TikaToXMP.convert(meta, mimetype); } /** * Provides direct access to the XMP data model, in case a client prefers to work directly on it * instead of using the Metadata API * * @return the "internal" XMP data object */ public XMPMeta getXMPData() { return xmpData; } // === Metadata API === // /** * @see org.apache.tika.xmp.XMPMetadata#isMultiValued(java.lang.String) */ @Override public boolean isMultiValued(Property property) { return this.isMultiValued(property.getName()); } /** * Checks if the named property is an array. * * @see org.apache.tika.metadata.Metadata#isMultiValued(java.lang.String) */ @Override public boolean isMultiValued(String name) { checkKey(name); String[] keyParts = splitKey(name); String ns = registry.getNamespaceURI(keyParts[0]); if (ns != null) { try { XMPProperty prop = xmpData.getProperty(ns, keyParts[1]); return prop.getOptions().isArray(); } catch (XMPException e) { // Ignore } } return false; } /** * For XMP it is not clear what that API should return, therefor not implemented */ @Override public String[] names() { throw new UnsupportedOperationException("Not implemented"); } /** * Returns the value of a simple property or the first one of an array. The given name must * contain a namespace prefix of a registered namespace. * * @see org.apache.tika.metadata.Metadata#get(java.lang.String) */ @Override public String get(String name) { checkKey(name); String value = null; String[] keyParts = splitKey(name); String ns = registry.getNamespaceURI(keyParts[0]); if (ns != null) { try { XMPProperty prop = xmpData.getProperty(ns, keyParts[1]); if (prop != null && prop.getOptions().isSimple()) { value = prop.getValue(); } else if (prop != null && prop.getOptions().isArray()) { prop = xmpData.getArrayItem(ns, keyParts[1], 1); value = prop.getValue(); } // in all other cases, null is returned } catch (XMPException e) { // Ignore } } return value; } /** * @see org.apache.tika.xmp.XMPMetadata#get(java.lang.String) */ @Override public String get(Property property) { return this.get(property.getName()); } /** * @see org.apache.tika.xmp.XMPMetadata#get(java.lang.String) */ @Override public Integer getInt(Property property) { Integer result = null; try { result = XMPUtils.convertToInteger(this.get(property.getName())); } catch (XMPException e) { // Ignore } return result; } /** * @see org.apache.tika.xmp.XMPMetadata#get(java.lang.String) */ @Override public Date getDate(Property property) { Date result = null; try { XMPDateTime xmpDate = XMPUtils.convertToDate(this.get(property.getName())); if (xmpDate != null) { Calendar cal = xmpDate.getCalendar(); // TODO Timezone is currently lost // need another solution that preserves the timezone result = cal.getTime(); } } catch (XMPException e) { // Ignore } return result; } /** * @see org.apache.tika.xmp.XMPMetadata#getValues(java.lang.String) */ @Override public String[] getValues(Property property) { return this.getValues(property.getName()); } /** * Returns the value of a simple property or all if the property is an array and the elements * are of simple type. The given name must contain a namespace prefix of a registered namespace. * * @see org.apache.tika.metadata.Metadata#getValues(java.lang.String) */ @Override public String[] getValues(String name) { checkKey(name); String[] value = null; String[] keyParts = splitKey(name); String ns = registry.getNamespaceURI(keyParts[0]); if (ns != null) { try { XMPProperty prop = xmpData.getProperty(ns, keyParts[1]); if (prop != null && prop.getOptions().isSimple()) { value = new String[1]; value[0] = prop.getValue(); } else if (prop != null && prop.getOptions().isArray()) { int size = xmpData.countArrayItems(ns, keyParts[1]); value = new String[size]; boolean onlySimpleChildren = true; for (int i = 0; i < size && onlySimpleChildren; i++) { prop = xmpData.getArrayItem(ns, keyParts[1], i + 1); if (prop.getOptions().isSimple()) { value[i] = prop.getValue(); } else { onlySimpleChildren = false; } } if (!onlySimpleChildren) { value = null; } } // in all other cases, null is returned } catch (XMPException e) { // Ignore } } return value; } /** * As this API could only possibly work for simple properties in XMP, it just calls the set * method, which replaces any existing value * * @see org.apache.tika.metadata.Metadata#add(java.lang.String, java.lang.String) */ @Override public void add(String name, String value) { set(name, value); } /** * Sets the given property. If the property already exists, it is overwritten. Only simple * properties that use a registered prefix are stored in the XMP. * * @see org.apache.tika.metadata.Metadata#set(java.lang.String, java.lang.String) */ @Override public void set(String name, String value) { checkKey(name); String[] keyParts = splitKey(name); String ns = registry.getNamespaceURI(keyParts[0]); if (ns != null) { try { xmpData.setProperty(ns, keyParts[1], value); } catch (XMPException e) { // Ignore } } } /** * @see org.apache.tika.xmp.XMPMetadata#set(java.lang.String, java.lang.String) */ @Override public void set(Property property, String value) { this.set(property.getName(), value); } /** * @see org.apache.tika.xmp.XMPMetadata#set(java.lang.String, java.lang.String) */ @Override public void set(Property property, int value) { // Can reuse the checks from the base class implementation which will call // the set(String, String) method in the end super.set(property, value); } /** * @see org.apache.tika.xmp.XMPMetadata#set(java.lang.String, java.lang.String) */ @Override public void set(Property property, double value) { super.set(property, value); } /** * @see org.apache.tika.xmp.XMPMetadata#set(java.lang.String, java.lang.String) */ @Override public void set(Property property, Date date) { super.set(property, date); } /** * Sets array properties. If the property already exists, it is overwritten. Only array * properties that use a registered prefix are stored in the XMP. * * @see org.apache.tika.metadata.Metadata#set(org.apache.tika.metadata.Property, * java.lang.String[]) */ @Override public void set(Property property, String[] values) { checkKey(property.getName()); if (!property.isMultiValuePermitted()) { throw new PropertyTypeException("Property is not of an array type"); } String[] keyParts = splitKey(property.getName()); String ns = registry.getNamespaceURI(keyParts[0]); if (ns != null) { try { int arrayType = tikaToXMPArrayType(property.getPrimaryProperty().getPropertyType()); xmpData.setProperty(ns, keyParts[1], null, new PropertyOptions(arrayType)); for (String value : values) { xmpData.appendArrayItem(ns, keyParts[1], value); } } catch (XMPException e) { // Ignore } } } /** * It will set all simple and array properties that have QName keys in registered namespaces. * * @see org.apache.tika.metadata.Metadata#setAll(java.util.Properties) */ @Override public void setAll(Properties properties) { @SuppressWarnings("unchecked") Enumeration names = (Enumeration) properties.propertyNames(); while (names.hasMoreElements()) { String name = names.nextElement(); Property property = Property.get(name); if (property == null) { throw new PropertyTypeException("Unknown property: " + name); } String value = properties.getProperty(name); if (property.isMultiValuePermitted()) { this.set(property, new String[]{value}); } else { this.set(property, value); } } } /** * @see org.apache.tika.xmp.XMPMetadata#remove(java.lang.String) */ public void remove(Property property) { this.remove(property.getName()); } /** * Removes the given property from the XMP data. If it is a complex property the whole subtree * is removed * * @see org.apache.tika.metadata.Metadata#remove(java.lang.String) */ @Override public void remove(String name) { checkKey(name); String[] keyParts = splitKey(name); String ns = registry.getNamespaceURI(keyParts[0]); if (ns != null) { xmpData.deleteProperty(ns, keyParts[1]); } } /** * Returns the number of top-level namespaces */ @Override public int size() { int size = 0; try { // Get an iterator for the XMP packet, starting at the top level schema nodes XMPIterator nsIter = xmpData.iterator( new IteratorOptions().setJustChildren(true).setOmitQualifiers(true)); // iterate all top level namespaces while (nsIter.hasNext()) { nsIter.next(); size++; } } catch (XMPException e) { // ignore } return size; } /** * This method is not implemented, yet. It is very tedious to check for semantic equality of XMP * packets */ @Override public boolean equals(Object o) { throw new UnsupportedOperationException("Not implemented"); } @Override public int hashCode() { return Objects.hash(super.hashCode(), xmpData); } /** * Serializes the XMP data in compact form without packet wrapper * * @see org.apache.tika.metadata.Metadata#toString() */ @Override public String toString() { String result = null; try { result = XMPMetaFactory.serializeToString(xmpData, new SerializeOptions().setOmitPacketWrapper(true).setUseCompactFormat(true)); } catch (XMPException e) { // ignore } return result; } // The XMP object is not serializable! private void readObject(ObjectInputStream ois) throws ClassNotFoundException, IOException { throw new NotSerializableException(); } // The XMP object is not serializable! private void writeObject(ObjectOutputStream ois) throws IOException { throw new NotSerializableException(); } /** * Checks if the given key is a valid QName with a known standard namespace prefix * * @param key the key to check * @return true if the key is valid otherwise false */ private void checkKey(String key) throws PropertyTypeException { if (key == null || key.length() == 0) { throw new PropertyTypeException("Key must not be null"); } String[] keyParts = splitKey(key); if (keyParts == null) { throw new PropertyTypeException("Key must be a QName in the form prefix:localName"); } if (registry.getNamespaceURI(keyParts[0]) == null) { throw new PropertyTypeException("Key does not use a registered Namespace prefix"); } } /** * Split the given key at the namespace prefix delimiter * * @param key the key to split * @return prefix and local name of the property or null if the key did not contain a delimiter * or too much of them */ private String[] splitKey(String key) { String[] keyParts = key.split(TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER); if (keyParts.length > 0 && keyParts.length <= 2) { return keyParts; } return null; } // checkKeyPrefix /** * Convert Tika array types to XMP array types * * @param type * @return */ private int tikaToXMPArrayType(PropertyType type) { int result = 0; switch (type) { case BAG: result = PropertyOptions.ARRAY; break; case SEQ: result = PropertyOptions.ARRAY_ORDERED; break; case ALT: result = PropertyOptions.ARRAY_ALTERNATE; break; } return result; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy