All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.xmp.XMPMetadata Maven / Gradle / Ivy

There is a newer version: 3.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.xmp;

import java.io.IOException;
import java.io.NotSerializableException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.Calendar;
import java.util.Date;
import java.util.Enumeration;
import java.util.Map;
import java.util.Properties;

import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.Property.PropertyType;
import org.apache.tika.metadata.PropertyTypeException;
import org.apache.tika.xmp.convert.TikaToXMP;

import com.adobe.xmp.XMPDateTime;
import com.adobe.xmp.XMPException;
import com.adobe.xmp.XMPIterator;
import com.adobe.xmp.XMPMeta;
import com.adobe.xmp.XMPMetaFactory;
import com.adobe.xmp.XMPSchemaRegistry;
import com.adobe.xmp.XMPUtils;
import com.adobe.xmp.options.IteratorOptions;
import com.adobe.xmp.options.PropertyOptions;
import com.adobe.xmp.options.SerializeOptions;
import com.adobe.xmp.properties.XMPProperty;

/**
 * Provides a conversion of the Metadata map from Tika to the XMP data model by also providing the
 * Metadata API for clients to ease transition. But clients can also work directly on the XMP data
 * model, by getting the XMPMeta reference from this class. Usually the instance would be
 * initialized by providing the Metadata object that had been returned from Tika-core which
 * populates the XMP data model with all properties that can be converted.
 *
 * This class is not serializable!
 */
@SuppressWarnings("serial")
public class XMPMetadata extends Metadata {
    /** The XMP data */
    private XMPMeta xmpData;
    /** Use the XMP namespace registry implementation */
    private static final XMPSchemaRegistry registry = XMPMetaFactory.getSchemaRegistry();

    /**
     * Initializes with an empty XMP packet
     */
    public XMPMetadata() {
        xmpData = XMPMetaFactory.create();
    }

    /**
     * @see #XMPMetadata(Metadata, String)
     * But the mimetype is retrieved from the metadata map.
     */
    public XMPMetadata(Metadata meta) throws TikaException {
        this.xmpData = TikaToXMP.convert( meta );
    }

    /**
     * Initializes the data by converting the Metadata information to XMP. If a mimetype is
     * provided, a specific converter can be used, that converts all available metadata. If there is
     * no mimetype provided or no specific converter available a generic conversion is done which
     * will convert only those properties that are in known namespaces and are using the correct
     * prefixes
     *
     * @param meta
     *            the Metadata information from Tika-core
     * @param mimetype
     *            mimetype information
     * @throws In
     *             case an error occured during conversion
     */
    public XMPMetadata(Metadata meta, String mimetype) throws TikaException {
        this.xmpData = TikaToXMP.convert( meta, mimetype );
    }

    /**
     * @see org.apache.tika.xmp.XMPMetadata#process(org.apache.tika.metadata.Metadata,
     *      java.lang.String)
     *  But the mimetype is retrieved from the metadata map.
     */
    public void process(Metadata meta) throws TikaException {
        this.xmpData = TikaToXMP.convert( meta );
    }

    /**
     * Converts the Metadata information to XMP. If a mimetype is provided, a specific converter can
     * be used, that converts all available metadata. If there is no mimetype provided or no
     * specific converter available a generic conversion is done which will convert only those
     * properties that are in known namespaces and are using the correct prefixes
     *
     * @param meta
     *            the Metadata information from Tika-core
     * @param mimetype
     *            mimetype information
     * @throws In
     *             case an error occured during conversion
     */
    public void process(Metadata meta, String mimetype) throws TikaException {
        this.xmpData = TikaToXMP.convert( meta, mimetype );
    }

    /**
     * Provides direct access to the XMP data model, in case a client prefers to work directly on it
     * instead of using the Metadata API
     *
     * @return the "internal" XMP data object
     */
    public XMPMeta getXMPData() {
        return xmpData;
    }

    // === Namespace Registry API === //
    /**
     * Register a namespace URI with a suggested prefix. It is not an error if the URI is already
     * registered, no matter what the prefix is. If the URI is not registered but the suggested
     * prefix is in use, a unique prefix is created from the suggested one. The actual registeed
     * prefix is always returned. The function result tells if the registered prefix is the
     * suggested one.
     * Note: No checking is presently done on either the URI or the prefix.
     *
     * @param namespaceURI
     *            The URI for the namespace. Must be a valid XML URI.
     * @param suggestedPrefix
     *            The suggested prefix to be used if the URI is not yet registered. Must be a valid
     *            XML name.
     * @return Returns the registered prefix for this URI, is equal to the suggestedPrefix if the
     *         namespace hasn't been registered before, otherwise the existing prefix.
     * @throws XMPException
     *             If the parameters are not accordingly set
     */
    public static String registerNamespace(String namespaceURI, String suggestedPrefix)
            throws XMPException {
        return registry.registerNamespace( namespaceURI, suggestedPrefix );
    }

    /**
     * Obtain the prefix for a registered namespace URI.
     * It is not an error if the namespace URI is not registered.
     *
     * @param namespaceURI
     *            The URI for the namespace. Must not be null or the empty string.
     * @return Returns the prefix registered for this namespace URI or null.
     */
    public static String getNamespacePrefix(String namespaceURI) {
        return registry.getNamespacePrefix( namespaceURI );
    }

    /**
     * Obtain the URI for a registered namespace prefix.
     * It is not an error if the namespace prefix is not registered.
     *
     * @param namespacePrefix
     *            The prefix for the namespace. Must not be null or the empty string.
     * @return Returns the URI registered for this prefix or null.
     */
    public static String getNamespaceURI(String namespacePrefix) {
        return registry.getNamespaceURI( namespacePrefix );
    }

    /**
     * @return Returns the registered prefix/namespace-pairs as map, where the keys are the
     *         namespaces and the values are the prefixes.
     */
    @SuppressWarnings("unchecked")
    public static Map getNamespaces() {
        return registry.getNamespaces();
    }

    /**
     * @return Returns the registered namespace/prefix-pairs as map, where the keys are the prefixes
     *         and the values are the namespaces.
     */
    @SuppressWarnings("unchecked")
    public static Map getPrefixes() {
        return registry.getPrefixes();
    }

    /**
     * Deletes a namespace from the registry.
     * 

* Does nothing if the URI is not registered, or if the namespaceURI parameter is null or the * empty string. *

* Note: Not yet implemented. * * @param namespaceURI * The URI for the namespace. */ public static void deleteNamespace(String namespaceURI) { registry.deleteNamespace( namespaceURI ); } // === Metadata API === // /** * @see org.apache.tika.xmp.XMPMetadata#isMultiValued(java.lang.String) */ @Override public boolean isMultiValued(Property property) { return this.isMultiValued( property.getName() ); } /** * Checks if the named property is an array. * * @see org.apache.tika.metadata.Metadata#isMultiValued(java.lang.String) */ @Override public boolean isMultiValued(String name) { checkKey( name ); String[] keyParts = splitKey( name ); String ns = registry.getNamespaceURI( keyParts[0] ); if (ns != null) { try { XMPProperty prop = xmpData.getProperty( ns, keyParts[1] ); return prop.getOptions().isArray(); } catch (XMPException e) { // Ignore } } return false; } /** * For XMP it is not clear what that API should return, therefor not implemented */ @Override public String[] names() { throw new UnsupportedOperationException( "Not implemented" ); } /** * Returns the value of a simple property or the first one of an array. The given name must * contain a namespace prefix of a registered namespace. * * @see org.apache.tika.metadata.Metadata#get(java.lang.String) */ @Override public String get(String name) { checkKey( name ); String value = null; String[] keyParts = splitKey( name ); String ns = registry.getNamespaceURI( keyParts[0] ); if (ns != null) { try { XMPProperty prop = xmpData.getProperty( ns, keyParts[1] ); if (prop != null && prop.getOptions().isSimple()) { value = prop.getValue(); } else if (prop != null && prop.getOptions().isArray()) { prop = xmpData.getArrayItem( ns, keyParts[1], 1 ); value = prop.getValue(); } // in all other cases, null is returned } catch (XMPException e) { // Ignore } } return value; } /** * @see org.apache.tika.xmp.XMPMetadata#get(java.lang.String) */ @Override public String get(Property property) { return this.get( property.getName() ); } /** * @see org.apache.tika.xmp.XMPMetadata#get(java.lang.String) */ @Override public Integer getInt(Property property) { Integer result = null; try { result = new Integer( XMPUtils.convertToInteger( this.get( property.getName() ) ) ); } catch (XMPException e) { // Ignore } return result; } /** * @see org.apache.tika.xmp.XMPMetadata#get(java.lang.String) */ @Override public Date getDate(Property property) { Date result = null; try { XMPDateTime xmpDate = XMPUtils.convertToDate( this.get( property.getName() ) ); if (xmpDate != null) { Calendar cal = xmpDate.getCalendar(); // TODO Timezone is currently lost // need another solution that preserves the timezone result = cal.getTime(); } } catch (XMPException e) { // Ignore } return result; } /** * @see org.apache.tika.xmp.XMPMetadata#getValues(java.lang.String) */ @Override public String[] getValues(Property property) { return this.getValues( property.getName() ); } /** * Returns the value of a simple property or all if the property is an array and the elements * are of simple type. The given name must contain a namespace prefix of a registered namespace. * * @see org.apache.tika.metadata.Metadata#getValues(java.lang.String) */ @Override public String[] getValues(String name) { checkKey( name ); String[] value = null; String[] keyParts = splitKey( name ); String ns = registry.getNamespaceURI( keyParts[0] ); if (ns != null) { try { XMPProperty prop = xmpData.getProperty( ns, keyParts[1] ); if (prop != null && prop.getOptions().isSimple()) { value = new String[1]; value[0] = prop.getValue(); } else if (prop != null && prop.getOptions().isArray()) { int size = xmpData.countArrayItems( ns, keyParts[1] ); value = new String[size]; boolean onlySimpleChildren = true; for (int i = 0; i < size && onlySimpleChildren; i++) { prop = xmpData.getArrayItem( ns, keyParts[1], i + 1 ); if (prop.getOptions().isSimple()) { value[i] = prop.getValue(); } else { onlySimpleChildren = false; } } if (!onlySimpleChildren) { value = null; } } // in all other cases, null is returned } catch (XMPException e) { // Ignore } } return value; } /** * As this API could only possibly work for simple properties in XMP, it just calls the set * method, which replaces any existing value * * @see org.apache.tika.metadata.Metadata#add(java.lang.String, java.lang.String) */ @Override public void add(String name, String value) { set( name, value ); } /** * Sets the given property. If the property already exists, it is overwritten. Only simple * properties that use a registered prefix are stored in the XMP. * * @see org.apache.tika.metadata.Metadata#set(java.lang.String, java.lang.String) */ @Override public void set(String name, String value) { checkKey( name ); String[] keyParts = splitKey( name ); String ns = registry.getNamespaceURI( keyParts[0] ); if (ns != null) { try { xmpData.setProperty( ns, keyParts[1], value ); } catch (XMPException e) { // Ignore } } } /** * @see org.apache.tika.xmp.XMPMetadata#set(java.lang.String, java.lang.String) */ @Override public void set(Property property, String value) { this.set( property.getName(), value ); } /** * @see org.apache.tika.xmp.XMPMetadata#set(java.lang.String, java.lang.String) */ @Override public void set(Property property, int value) { // Can reuse the checks from the base class implementation which will call // the set(String, String) method in the end super.set( property, value ); } /** * @see org.apache.tika.xmp.XMPMetadata#set(java.lang.String, java.lang.String) */ @Override public void set(Property property, double value) { super.set( property, value ); } /** * @see org.apache.tika.xmp.XMPMetadata#set(java.lang.String, java.lang.String) */ @Override public void set(Property property, Date date) { super.set( property, date ); } /** * Sets array properties. If the property already exists, it is overwritten. Only array * properties that use a registered prefix are stored in the XMP. * * @see org.apache.tika.metadata.Metadata#set(org.apache.tika.metadata.Property, * java.lang.String[]) */ @Override public void set(Property property, String[] values) { checkKey( property.getName() ); if (!property.isMultiValuePermitted()) { throw new PropertyTypeException( "Property is not of an array type" ); } String[] keyParts = splitKey( property.getName() ); String ns = registry.getNamespaceURI( keyParts[0] ); if (ns != null) { try { int arrayType = tikaToXMPArrayType( property.getPrimaryProperty().getPropertyType() ); xmpData.setProperty( ns, keyParts[1], null, new PropertyOptions( arrayType ) ); for (String value : values) { xmpData.appendArrayItem( ns, keyParts[1], value ); } } catch (XMPException e) { // Ignore } } } /** * It will set all simple and array properties that have QName keys in registered namespaces. * * @see org.apache.tika.metadata.Metadata#setAll(java.util.Properties) */ @Override public void setAll(Properties properties) { @SuppressWarnings("unchecked") Enumeration names = (Enumeration) properties.propertyNames(); while (names.hasMoreElements()) { String name = names.nextElement(); Property property = Property.get( name ); if (property == null) { throw new PropertyTypeException( "Unknown property: " + name ); } String value = properties.getProperty( name ); if (property.isMultiValuePermitted()) { this.set( property, new String[] { value } ); } else { this.set( property, value ); } } } /** * @see org.apache.tika.xmp.XMPMetadata#remove(java.lang.String) */ public void remove(Property property) { this.remove( property.getName() ); } /** * Removes the given property from the XMP data. If it is a complex property the whole subtree * is removed * * @see org.apache.tika.metadata.Metadata#remove(java.lang.String) */ @Override public void remove(String name) { checkKey( name ); String[] keyParts = splitKey( name ); String ns = registry.getNamespaceURI( keyParts[0] ); if (ns != null) { xmpData.deleteProperty( ns, keyParts[1] ); } } /** * Returns the number of top-level namespaces */ @Override public int size() { int size = 0; try { // Get an iterator for the XMP packet, starting at the top level schema nodes XMPIterator nsIter = xmpData.iterator( new IteratorOptions().setJustChildren( true ) .setOmitQualifiers( true ) ); // iterate all top level namespaces while (nsIter.hasNext()) { nsIter.next(); size++; } } catch (XMPException e) { // ignore } return size; } /** * This method is not implemented, yet. It is very tedious to check for semantic equality of XMP * packets */ @Override public boolean equals(Object o) { throw new UnsupportedOperationException( "Not implemented" ); } /** * Serializes the XMP data in compact form without packet wrapper * * @see org.apache.tika.metadata.Metadata#toString() */ @Override public String toString() { String result = null; try { result = XMPMetaFactory.serializeToString( xmpData, new SerializeOptions() .setOmitPacketWrapper( true ).setUseCompactFormat( true ) ); } catch (XMPException e) { // ignore } return result; } // The XMP object is not serializable! private void readObject(ObjectInputStream ois) throws ClassNotFoundException, IOException { throw new NotSerializableException(); } // The XMP object is not serializable! private void writeObject(ObjectOutputStream ois) throws IOException { throw new NotSerializableException(); } /** * Checks if the given key is a valid QName with a known standard namespace prefix * * @param key * the key to check * @return true if the key is valid otherwise false */ private void checkKey(String key) throws PropertyTypeException { if (key == null || key.length() == 0) { throw new PropertyTypeException( "Key must not be null" ); } String[] keyParts = splitKey( key ); if (keyParts == null) { throw new PropertyTypeException( "Key must be a QName in the form prefix:localName" ); } if (registry.getNamespaceURI( keyParts[0] ) == null) { throw new PropertyTypeException( "Key does not use a registered Namespace prefix" ); } } /** * Split the given key at the namespace prefix delimiter * * @param key * the key to split * @return prefix and local name of the property or null if the key did not contain a delimiter * or too much of them */ private String[] splitKey(String key) { String[] keyParts = key.split( Metadata.NAMESPACE_PREFIX_DELIMITER ); if (keyParts.length > 0 && keyParts.length <= 2) { return keyParts; } return null; }// checkKeyPrefix /** * Convert Tika array types to XMP array types * * @param type * @return */ private int tikaToXMPArrayType(PropertyType type) { int result = 0; switch (type) { case BAG: result = PropertyOptions.ARRAY; break; case SEQ: result = PropertyOptions.ARRAY_ORDERED; break; case ALT: result = PropertyOptions.ARRAY_ALTERNATE; break; } return result; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy