All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twelvemonkeys.imageio.metadata.xmp.XMPScanner Maven / Gradle / Ivy

There is a newer version: 3.12.0
Show newest version
/*
 * Copyright (c) 2009, Harald Kuhr
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *     * Neither the name "TwelveMonkeys" nor the
 *       names of its contributors may be used to endorse or promote products
 *       derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package com.twelvemonkeys.imageio.metadata.xmp;

import com.twelvemonkeys.imageio.stream.BufferedImageInputStream;
import com.twelvemonkeys.imageio.util.IIOUtil;

import javax.imageio.ImageIO;
import javax.imageio.stream.ImageInputStream;
import java.io.*;
import java.nio.charset.Charset;

/**
 * XMPScanner
 *
 * @author Harald Kuhr
 * @author last modified by $Author: haraldk$
 * @version $Id: XMPScanner.java,v 1.0 Nov 11, 2009 4:49:00 PM haraldk Exp$
 */
public final class XMPScanner {
    /**
     * {@code <?xpacket begin=}
     * 

*

    *
  • * 8-bit (UTF-8): * 0x3C 0x3F 0x78 0x70 0x61 0x63 0x6B 0x65 0x74 0x20 * 0x62 0x65 0x67 0x69 0x6E 0x3D *
  • *
  • 16-bit encoding (UCS-2, UTF-16): (either big- or little-endian order) * 0x3C 0x00 0x3F 0x00 0x78 0x00 0x70 0x00 0x61 0x00 * 0x63 0x00 0x6B 0x00 0x65 0x00 0x74 0x00 0x20 0x00 0x62 0x00 * 0x65 0x00 0x67 0x00 0x69 0x00 0x6E 0x00 0x3D [0x00] *
  • *
  • 32-bit encoding (UCS-4): * As 16 bit UCS2, with three 0x00 instead of one.
  • *
*/ private static final byte[] XMP_PACKET_BEGIN = { 0x3C, 0x3F, 0x78, 0x70, 0x61, 0x63, 0x6B, 0x65, 0x74, 0x20, 0x62, 0x65, 0x67, 0x69, 0x6E, 0x3D }; /** * {@code <?xpacket end=} */ private static final byte[] XMP_PACKET_END = { 0x3C, 0x3F, 0x78, 0x70, 0x61, 0x63, 0x6B, 0x65, 0x74, 0x20, 0x65, 0x6E, 0x64, 0x3D }; /** * Scans the given input for an XML metadata packet. * The scanning process involves reading every byte in the file, while searching for an XMP packet. * This process is very inefficient, compared to reading a known file format. *

* NOTE: The XMP Specification says this method of reading an XMP packet * should be considered a last resort.
* This is because files may contain multiple XMP packets, some which may be related to embedded resources, * some which may be obsolete (or even incomplete). * * @param pInput the input to scan. The input may be an {@link javax.imageio.stream.ImageInputStream} or * any object that can be passed to {@link ImageIO#createImageInputStream(Object)}. * Typically this may be a {@link File}, {@link InputStream} or {@link java.io.RandomAccessFile}. * * @return a character Reader * * @throws java.nio.charset.UnsupportedCharsetException if the encoding specified within the BOM is not supported * by the JRE. * @throws IOException if an I/O exception occurs reading from {@code pInput}. * @see ImageIO#createImageInputStream(Object) */ static public Reader scanForXMPPacket(final Object pInput) throws IOException { ImageInputStream stream = pInput instanceof ImageInputStream ? (ImageInputStream) pInput : ImageIO.createImageInputStream(pInput); // TODO: Consider if BufferedIIS is a good idea if (!(stream instanceof BufferedImageInputStream)) { stream = new BufferedImageInputStream(stream); } // TODO: Might be more than one XMP block per file (it's possible to re-start for now).. long pos; pos = scanForSequence(stream, XMP_PACKET_BEGIN); if (pos >= 0) { // Skip ' OR " (plus possible nulls for 16/32 bit) byte quote = stream.readByte(); if (quote == '\'' || quote == '"') { Charset cs = null; // Read BOM byte[] bom = new byte[4]; stream.readFully(bom); // NOTE: Empty string should be treated as UTF-8 for backwards compatibility if (bom[0] == (byte) 0xEF && bom[1] == (byte) 0xBB && bom[2] == (byte) 0xBF && bom[3] == quote || bom[0] == quote) { // UTF-8 cs = Charset.forName("UTF-8"); } else if (bom[0] == (byte) 0xFE && bom[1] == (byte) 0xFF && bom[2] == 0x00 && bom[3] == quote) { // UTF-16 BIG endian cs = Charset.forName("UTF-16BE"); } else if (bom[0] == 0x00 && bom[1] == (byte) 0xFF && bom[2] == (byte) 0xFE && bom[3] == quote) { stream.skipBytes(1); // Alignment // UTF-16 little endian cs = Charset.forName("UTF-16LE"); } else if (bom[0] == 0x00 && bom[1] == 0x00 && bom[2] == (byte) 0xFE && bom[3] == (byte) 0xFF) { // NOTE: 32-bit character set not supported by default // UTF 32 BIG endian cs = Charset.forName("UTF-32BE"); } else if (bom[0] == 0x00 && bom[1] == 0x00 && bom[2] == 0x00 && bom[3] == (byte) 0xFF && stream.read() == 0xFE) { stream.skipBytes(2); // Alignment // NOTE: 32-bit character set not supported by default // UTF 32 little endian cs = Charset.forName("UTF-32LE"); } if (cs != null) { // Read all bytes until while (reader.read() != '>') { } // Return reader? // How to decide between w or r?! return reader; } } } return null; } /** * Scans for a given ASCII sequence. * * @param pStream the stream to scan * @param pSequence the byte sequence to search for * * @return the start position of the given sequence. * * @throws IOException if an I/O exception occurs during scanning */ private static long scanForSequence(final ImageInputStream pStream, final byte[] pSequence) throws IOException { long start = -1l; int index = 0; int nullBytes = 0; for (int read; (read = pStream.read()) >= 0;) { if (pSequence[index] == (byte) read) { // If this is the first byte in the sequence, store position if (start == -1) { start = pStream.getStreamPosition() - 1; } // Inside the sequence, there might be 1 or 3 null bytes, depending on 16/32 byte encoding if (nullBytes == 1 || nullBytes == 3) { pStream.skipBytes(nullBytes); } index++; // If we found the entire sequence, we're done, return start position if (index == pSequence.length) { return start; } } else if (index == 1 && read == 0 && nullBytes < 3) { // Skip 1 or 3 null bytes for 16/32 bit encoding nullBytes++; } else if (index != 0) { // Start over index = 0; start = -1; nullBytes = 0; } } return -1l; } public static void main(final String[] pArgs) throws IOException { ImageInputStream stream = ImageIO.createImageInputStream(new File(pArgs[0])); Reader xmp; while ((xmp = scanForXMPPacket(stream)) != null) { BufferedReader reader = new BufferedReader(xmp); String line; while ((line = reader.readLine()) != null) { System.out.println(line); } } stream.close(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy