com.drew.metadata.xmp.XmpReader Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of metadata-extractor Show documentation
Java library for extracting EXIF, IPTC, XMP, ICC and other metadata from image and video files.
There is a newer version: 2.19.0
/*
 * Copyright 2002-2019 Drew Noakes and contributors
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 *
 * More information about this project is available at:
 *
 *    https://drewnoakes.com/code/exif/
 *    https://github.com/drewnoakes/metadata-extractor
 */
package com.drew.metadata.xmp;

import com.adobe.internal.xmp.XMPException;
import com.adobe.internal.xmp.XMPIterator;
import com.adobe.internal.xmp.XMPMeta;
import com.adobe.internal.xmp.XMPMetaFactory;
import com.adobe.internal.xmp.impl.ByteBuffer;
import com.adobe.internal.xmp.options.ParseOptions;
import com.adobe.internal.xmp.properties.XMPPropertyInfo;
import com.drew.imaging.jpeg.JpegSegmentMetadataReader;
import com.drew.imaging.jpeg.JpegSegmentType;
import com.drew.lang.SequentialByteArrayReader;
import com.drew.lang.SequentialReader;
import com.drew.metadata.Directory;
import com.drew.lang.annotations.NotNull;
import com.drew.lang.annotations.Nullable;
import com.drew.metadata.Metadata;
import com.drew.metadata.StringValue;

import java.io.IOException;
import java.util.Collection;
import java.util.Collections;

/**
 * Extracts XMP data from JPEG APP1 segments.
 * 
 * Note that XMP uses a namespace and path format for identifying values, which does not map to metadata-extractor's
 * integer based tag identifiers. Therefore, XMP data is extracted and exposed via {@link XmpDirectory#getXMPMeta()}
 * which returns an instance of Adobe's {@link XMPMeta} which exposes the full XMP data set.
 * 

 * The extraction is done with Adobe's XmpCore-Library (XMP-Toolkit)
 * Copyright (c) 1999 - 2007, Adobe Systems Incorporated All rights reserved.
 *
 * @author Torsten Skadell
 * @author Drew Noakes https://drewnoakes.com
 * @author https://github.com/bezineb5
 */
public class XmpReader implements JpegSegmentMetadataReader
{
    @NotNull
    private static final String XMP_JPEG_PREAMBLE = "http://ns.adobe.com/xap/1.0/\0";
    @NotNull
    private static final String XMP_EXTENSION_JPEG_PREAMBLE = "http://ns.adobe.com/xmp/extension/\0";
    @NotNull
    private static final String SCHEMA_XMP_NOTES = "http://ns.adobe.com/xmp/note/";
    @NotNull
    private static final String ATTRIBUTE_EXTENDED_XMP = "xmpNote:HasExtendedXMP";
    // Limit photoshop:DocumentAncestors node as it can reach over 100000 items and make parsing extremely slow. 
    // This is not a typical value but it may happen https://forums.adobe.com/thread/2081839
    @NotNull
    private static final ParseOptions PARSE_OPTIONS = new ParseOptions().setXMPNodesToLimit(Collections.singletonMap("photoshop:DocumentAncestors", 1000));

    /**
     * Extended XMP constants
     */
    private static final int EXTENDED_XMP_GUID_LENGTH = 32;
    private static final int EXTENDED_XMP_INT_LENGTH = 4;

    @NotNull
    public Iterable getSegmentTypes()
    {
        return Collections.singletonList(JpegSegmentType.APP1);
    }

    /**
     * Version specifically for dealing with XMP found in JPEG segments. This form of XMP has a peculiar preamble, which
     * must be removed before parsing the XML.
     *
     * @param segments The byte array from which the metadata should be extracted.
     * @param metadata The {@link Metadata} object into which extracted values should be merged.
     * @param segmentType The {@link JpegSegmentType} being read.
     */
    public void readJpegSegments(@NotNull Iterable segments, @NotNull Metadata metadata, @NotNull JpegSegmentType segmentType)
    {
        final int preambleLength = XMP_JPEG_PREAMBLE.length();
        final int extensionPreambleLength = XMP_EXTENSION_JPEG_PREAMBLE.length();
        String extendedXMPGUID = null;
        byte[] extendedXMPBuffer = null;

        for (byte[] segmentBytes : segments) {
            // XMP in a JPEG file has an identifying preamble which is not valid XML
            if (segmentBytes.length >= preambleLength) {
                // NOTE we expect the full preamble here, but some images (such as that reported on GitHub #102)
                // start with "XMP\0://ns.adobe.com/xap/1.0/" which appears to be an error but is easily recovered
                // from. In such cases, the actual XMP data begins at the same offset.
                if (XMP_JPEG_PREAMBLE.equalsIgnoreCase(new String(segmentBytes, 0, preambleLength)) ||
                    "XMP".equalsIgnoreCase(new String(segmentBytes, 0, 3))) {

                    byte[] xmlBytes = new byte[segmentBytes.length - preambleLength];
                    System.arraycopy(segmentBytes, preambleLength, xmlBytes, 0, xmlBytes.length);
                    extract(xmlBytes, metadata);
                    // Check in the Standard XMP if there should be a Extended XMP part in other chunks.
                    extendedXMPGUID = getExtendedXMPGUID(metadata);
                    continue;
                }
            }

            // If we know that there's Extended XMP chunks, look for them.
            if (extendedXMPGUID != null &&
                segmentBytes.length >= extensionPreambleLength &&
                XMP_EXTENSION_JPEG_PREAMBLE.equalsIgnoreCase(new String(segmentBytes, 0, extensionPreambleLength))) {

                extendedXMPBuffer = processExtendedXMPChunk(metadata, segmentBytes, extendedXMPGUID, extendedXMPBuffer);
            }
        }

        // Now that the Extended XMP chunks have been concatenated, let's parse and merge with the Standard XMP.
        if (extendedXMPBuffer != null) {
            extract(extendedXMPBuffer, metadata);
        }
    }

    /**
     * Performs the XMP data extraction, adding found values to the specified instance of {@link Metadata}.
     * 

     * The extraction is done with Adobe's XMPCore library.
     */
    public void extract(@NotNull final byte[] xmpBytes, @NotNull Metadata metadata)
    {
        extract(xmpBytes, metadata, null);
    }

    /**
     * Performs the XMP data extraction, adding found values to the specified instance of {@link Metadata}.
     * 

     * The extraction is done with Adobe's XMPCore library.
     */
    public void extract(@NotNull final byte[] xmpBytes, @NotNull Metadata metadata, @Nullable Directory parentDirectory)
    {
        extract(xmpBytes, 0, xmpBytes.length, metadata, parentDirectory);
    }

    /**
     * Performs the XMP data extraction, adding found values to the specified instance of {@link Metadata}.
     * 

     * The extraction is done with Adobe's XMPCore library.
     */
    public void extract(@NotNull final byte[] xmpBytes, int offset, int length, @NotNull Metadata metadata, @Nullable Directory parentDirectory)
    {
        XmpDirectory directory = new XmpDirectory();

        if (parentDirectory != null)
            directory.setParent(parentDirectory);

        try {
            XMPMeta xmpMeta;

            // If all xmpBytes are requested, no need to make a new ByteBuffer
            if (offset == 0 && length == xmpBytes.length) {
                xmpMeta = XMPMetaFactory.parseFromBuffer(xmpBytes, PARSE_OPTIONS);
            } else {
                ByteBuffer buffer = new ByteBuffer(xmpBytes, offset, length);
                xmpMeta = XMPMetaFactory.parse(buffer.getByteStream(), PARSE_OPTIONS);
            }

            directory.setXMPMeta(xmpMeta);
        } catch (XMPException e) {
            directory.addError("Error processing XMP data: " + e.getMessage());
        }

        if (!directory.isEmpty())
            metadata.addDirectory(directory);
    }

    /**
     * Performs the XMP data extraction, adding found values to the specified instance of {@link Metadata}.
     * 

     * The extraction is done with Adobe's XMPCore library.
     */
    public void extract(@NotNull final String xmpString, @NotNull Metadata metadata)
    {
        extract(xmpString, metadata, null);
    }

    /**
     * Performs the XMP data extraction, adding found values to the specified instance of {@link Metadata}.
     * 

     * The extraction is done with Adobe's XMPCore library.
     */
    public void extract(@NotNull final StringValue xmpString, @NotNull Metadata metadata)
    {
        extract(xmpString.getBytes(), metadata, null);
    }

    /**
     * Performs the XMP data extraction, adding found values to the specified instance of {@link Metadata}.
     * 
     * The extraction is done with Adobe's XMPCore library.
     */
    public void extract(@NotNull final String xmpString, @NotNull Metadata metadata, @Nullable Directory parentDirectory)
    {
        XmpDirectory directory = new XmpDirectory();

        if (parentDirectory != null)
            directory.setParent(parentDirectory);

        try {
            XMPMeta xmpMeta = XMPMetaFactory.parseFromString(xmpString, PARSE_OPTIONS);
            directory.setXMPMeta(xmpMeta);
        } catch (XMPException e) {
            directory.addError("Error processing XMP data: " + e.getMessage());
        }

        if (!directory.isEmpty())
            metadata.addDirectory(directory);
    }

    /**
     * Determine if there is an extended XMP section based on the standard XMP part.
     * The xmpNote:HasExtendedXMP attribute contains the GUID of the Extended XMP chunks.
     */
    @Nullable
    private static String getExtendedXMPGUID(@NotNull Metadata metadata)
    {
        final Collection xmpDirectories = metadata.getDirectoriesOfType(XmpDirectory.class);

        for (XmpDirectory directory : xmpDirectories) {
            final XMPMeta xmpMeta = directory.getXMPMeta();

            try {
                final XMPIterator itr = xmpMeta.iterator(SCHEMA_XMP_NOTES, null, null);
                if (itr == null)
                    continue;

                while (itr.hasNext()) {
                    final XMPPropertyInfo pi = (XMPPropertyInfo) itr.next();
                    if (ATTRIBUTE_EXTENDED_XMP.equals(pi.getPath())) {
                        return pi.getValue();
                    }
                }
            } catch (XMPException e) {
                // Fail silently here: we had a reading issue, not a decoding issue.
            }
        }

        return null;
    }

    /**
     * Process an Extended XMP chunk. It will read the bytes from segmentBytes and validates that the GUID the requested one.
     * It will progressively fill the buffer with each chunk.
     * The format is specified in this document:
     * http://www.adobe.com/content/dam/Adobe/en/devnet/xmp/pdfs/XMPSpecificationPart3.pdf
     * at page 19
     */
    @Nullable
    private static byte[] processExtendedXMPChunk(@NotNull Metadata metadata, @NotNull byte[] segmentBytes, @NotNull String extendedXMPGUID, @Nullable byte[] extendedXMPBuffer)
    {
        final int extensionPreambleLength = XMP_EXTENSION_JPEG_PREAMBLE.length();
        final int segmentLength = segmentBytes.length;
        final int totalOffset = extensionPreambleLength + EXTENDED_XMP_GUID_LENGTH + EXTENDED_XMP_INT_LENGTH + EXTENDED_XMP_INT_LENGTH;

        if (segmentLength >= totalOffset) {
            try {
                /*
                 * The chunk contains:
                 * - A null-terminated signature string of "http://ns.adobe.com/xmp/extension/".
                 * - A 128-bit GUID stored as a 32-byte ASCII hex string, capital A-F, no null termination.
                 *   The GUID is a 128-bit MD5 digest of the full ExtendedXMP serialization.
                 * - The full length of the ExtendedXMP serialization as a 32-bit unsigned integer
                 * - The offset of this portion as a 32-bit unsigned integer
                 * - The portion of the ExtendedXMP
                 */
                final SequentialReader reader = new SequentialByteArrayReader(segmentBytes);
                reader.skip(extensionPreambleLength);
                final String segmentGUID = reader.getString(EXTENDED_XMP_GUID_LENGTH);

                if (extendedXMPGUID.equals(segmentGUID)) {
                    final int fullLength = (int)reader.getUInt32();
                    final int chunkOffset = (int)reader.getUInt32();

                    if (extendedXMPBuffer == null)
                        extendedXMPBuffer = new byte[fullLength];

                    if (extendedXMPBuffer.length == fullLength) {
                        System.arraycopy(segmentBytes, totalOffset, extendedXMPBuffer, chunkOffset, segmentLength - totalOffset);
                    } else {
                        XmpDirectory directory = new XmpDirectory();
                        directory.addError(String.format("Inconsistent length for the Extended XMP buffer: %d instead of %d", fullLength, extendedXMPBuffer.length));
                        metadata.addDirectory(directory);
                    }
                }
            } catch (IOException ex) {
                XmpDirectory directory = new XmpDirectory();
                directory.addError(ex.getMessage());
                metadata.addDirectory(directory);
            }
        }

        return extendedXMPBuffer;
    }
}