com.twelvemonkeys.imageio.metadata.xmp.XMPScanner Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of org.apache.fop Show documentation
Show all versions of org.apache.fop Show documentation
The core maven build properties
The newest version!
/*
* Copyright (c) 2009, Harald Kuhr
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.twelvemonkeys.imageio.metadata.xmp;
import com.twelvemonkeys.imageio.util.IIOUtil;
import javax.imageio.ImageIO;
import javax.imageio.stream.ImageInputStream;
import java.io.*;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
/**
* XMPScanner
*
* @author Harald Kuhr
* @author last modified by $Author: haraldk$
* @version $Id: XMPScanner.java,v 1.0 Nov 11, 2009 4:49:00 PM haraldk Exp$
*/
public final class XMPScanner {
/**
* {@code <?xpacket begin=}
*
*
* -
* 8-bit (UTF-8):
* 0x3C 0x3F 0x78 0x70 0x61 0x63 0x6B 0x65 0x74 0x20
* 0x62 0x65 0x67 0x69 0x6E 0x3D
*
* - 16-bit encoding (UCS-2, UTF-16): (either big- or little-endian order)
* 0x3C 0x00 0x3F 0x00 0x78 0x00 0x70 0x00 0x61 0x00
* 0x63 0x00 0x6B 0x00 0x65 0x00 0x74 0x00 0x20 0x00 0x62 0x00
* 0x65 0x00 0x67 0x00 0x69 0x00 0x6E 0x00 0x3D [0x00]
*
* - 32-bit encoding (UCS-4):
* As 16 bit UCS2, with three 0x00 instead of one.
*
*
*/
private static final byte[] XMP_PACKET_BEGIN = {
0x3C, 0x3F, 0x78, 0x70, 0x61, 0x63, 0x6B, 0x65, 0x74, 0x20,
0x62, 0x65, 0x67, 0x69, 0x6E, 0x3D
};
/**
* {@code <?xpacket end=}
*/
private static final byte[] XMP_PACKET_END = {
0x3C, 0x3F, 0x78, 0x70, 0x61, 0x63, 0x6B, 0x65, 0x74, 0x20,
0x65, 0x6E, 0x64, 0x3D
};
/**
* Scans the given input for an XML metadata packet.
* The scanning process involves reading every byte in the file, while searching for an XMP packet.
* This process is very inefficient, compared to reading a known file format.
*
* NOTE: The XMP Specification says this method of reading an XMP packet
* should be considered a last resort.
*
* This is because files may contain multiple XMP packets, some which may be related to embedded resources,
* some which may be obsolete (or even incomplete).
*
*
* @param pInput the input to scan. The input may be an {@link javax.imageio.stream.ImageInputStream} or
* any object that can be passed to {@link ImageIO#createImageInputStream(Object)}.
* Typically this may be a {@link File}, {@link InputStream} or {@link java.io.RandomAccessFile}.
*
* @return a character Reader
*
* @throws java.nio.charset.UnsupportedCharsetException if the encoding specified within the BOM is not supported
* by the JRE.
* @throws IOException if an I/O exception occurs reading from {@code pInput}.
* @see ImageIO#createImageInputStream(Object)
*/
@SuppressWarnings("StatementWithEmptyBody")
static public Reader scanForXMPPacket(final Object pInput) throws IOException {
ImageInputStream stream = pInput instanceof ImageInputStream ? (ImageInputStream) pInput : ImageIO.createImageInputStream(pInput);
// TODO: Might be more than one XMP block per file (it's possible to re-start for now)..
long pos;
pos = scanForSequence(stream, XMP_PACKET_BEGIN);
if (pos >= 0) {
// Skip ' OR " (plus possible nulls for 16/32 bit)
byte quote = stream.readByte();
if (quote == '\'' || quote == '"') {
Charset cs = null;
// Read BOM
byte[] bom = new byte[4];
stream.readFully(bom);
// NOTE: Empty string should be treated as UTF-8 for backwards compatibility
if (bom[0] == (byte) 0xEF && bom[1] == (byte) 0xBB && bom[2] == (byte) 0xBF && bom[3] == quote ||
bom[0] == quote) {
// UTF-8
cs = StandardCharsets.UTF_8;
}
else if (bom[0] == (byte) 0xFE && bom[1] == (byte) 0xFF && bom[2] == 0x00 && bom[3] == quote) {
// UTF-16 BIG endian
cs = StandardCharsets.UTF_16BE;
}
else if (bom[0] == 0x00 && bom[1] == (byte) 0xFF && bom[2] == (byte) 0xFE && bom[3] == quote) {
stream.skipBytes(1); // Alignment
// UTF-16 little endian
cs = StandardCharsets.UTF_16LE;
}
else if (bom[0] == 0x00 && bom[1] == 0x00 && bom[2] == (byte) 0xFE && bom[3] == (byte) 0xFF) {
// NOTE: 32-bit character set not supported by default
// UTF 32 BIG endian
cs = Charset.forName("UTF-32BE");
}
else if (bom[0] == 0x00 && bom[1] == 0x00 && bom[2] == 0x00 && bom[3] == (byte) 0xFF && stream.read() == 0xFE) {
stream.skipBytes(2); // Alignment
// NOTE: 32-bit character set not supported by default
// UTF 32 little endian
cs = Charset.forName("UTF-32LE");
}
if (cs != null) {
// Read all bytes until
while (reader.read() != '>') {
}
// Return reader?
// How to decide between w or r?!
return reader;
}
}
}
return null;
}
/**
* Scans for a given ASCII sequence.
*
* @param pStream the stream to scan
* @param pSequence the byte sequence to search for
*
* @return the start position of the given sequence.
*
* @throws IOException if an I/O exception occurs during scanning
*/
private static long scanForSequence(final ImageInputStream pStream, final byte[] pSequence) throws IOException {
long start = -1L;
int index = 0;
int nullBytes = 0;
for (int read; (read = pStream.read()) >= 0;) {
if (pSequence[index] == (byte) read) {
// If this is the first byte in the sequence, store position
if (start == -1) {
start = pStream.getStreamPosition() - 1;
}
// Inside the sequence, there might be 1 or 3 null bytes, depending on 16/32 byte encoding
if (nullBytes == 1 || nullBytes == 3) {
pStream.skipBytes(nullBytes);
}
index++;
// If we found the entire sequence, we're done, return start position
if (index == pSequence.length) {
return start;
}
}
else if (index == 1 && read == 0 && nullBytes < 3) {
// Skip 1 or 3 null bytes for 16/32 bit encoding
nullBytes++;
}
else if (index != 0) {
// Start over
index = 0;
start = -1;
nullBytes = 0;
}
}
return -1L;
}
public static void main(final String[] pArgs) throws IOException {
ImageInputStream stream = ImageIO.createImageInputStream(new File(pArgs[0]));
Reader xmp;
while ((xmp = scanForXMPPacket(stream)) != null) {
BufferedReader reader = new BufferedReader(xmp);
String line;
while ((line = reader.readLine()) != null) {
System.out.println(line);
}
}
stream.close();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy