All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twelvemonkeys.imageio.metadata.tiff.TIFFReader Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2009, Harald Kuhr
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * * Redistributions of source code must retain the above copyright notice, this
 *   list of conditions and the following disclaimer.
 *
 * * Redistributions in binary form must reproduce the above copyright notice,
 *   this list of conditions and the following disclaimer in the documentation
 *   and/or other materials provided with the distribution.
 *
 * * Neither the name of the copyright holder nor the names of its
 *   contributors may be used to endorse or promote products derived from
 *   this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package com.twelvemonkeys.imageio.metadata.tiff;

import com.twelvemonkeys.imageio.metadata.Directory;
import com.twelvemonkeys.imageio.metadata.Entry;
import com.twelvemonkeys.imageio.metadata.MetadataReader;
import com.twelvemonkeys.lang.StringUtil;
import com.twelvemonkeys.lang.Validate;

import javax.imageio.IIOException;
import javax.imageio.ImageIO;
import javax.imageio.stream.ImageInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.nio.ByteOrder;
import java.nio.charset.StandardCharsets;
import java.util.*;

import static com.twelvemonkeys.imageio.metadata.tiff.TIFFEntry.getValueLength;

/**
 * TIFFReader
 *
 * @author Harald Kuhr
 * @author last modified by $Author: haraldk$
 * @version $Id: TIFFReader.java,v 1.0 Nov 13, 2009 5:42:51 PM haraldk Exp$
 */
public final class TIFFReader extends MetadataReader {

    final static boolean DEBUG = "true".equalsIgnoreCase(System.getProperty("com.twelvemonkeys.imageio.metadata.tiff.debug"));

    // TODO: Consider leaving to client code what sub-IFDs to parse (but always parse TAG_SUB_IFD).
    private static final Collection VALID_TOP_LEVEL_IFDS = Collections.unmodifiableCollection(Arrays.asList(TIFF.TAG_SUB_IFD, TIFF.TAG_EXIF_IFD, TIFF.TAG_GPS_IFD));
    private static final Map> VALID_SUB_IFDS = createSubIFDMap();

    private static Map> createSubIFDMap() {
        HashMap> map = new HashMap>() {
            @Override
            public Collection get(Object key) {
                Collection collection = super.get(key);
                return collection != null ? collection : Collections.emptySet();
            }
        };

        map.put(TIFF.TAG_SUB_IFD, Collections.singleton(TIFF.TAG_SUB_IFD));
        map.put(TIFF.TAG_EXIF_IFD, Collections.singleton(TIFF.TAG_INTEROP_IFD));

        return Collections.unmodifiableMap(map);
    }

    private final Set parsedIFDs = new TreeSet<>();

    private long length;
    private boolean longOffsets;
    private int offsetSize;

    @Override
    public Directory read(final ImageInputStream input) throws IOException {
        Validate.notNull(input, "input");

        byte[] bom = new byte[2];
        input.readFully(bom);

        if (bom[0] == 'I' && bom[1] == 'I') {
            input.setByteOrder(ByteOrder.LITTLE_ENDIAN);
        }
        else if (bom[0] == 'M' && bom[1] == 'M') {
            input.setByteOrder(ByteOrder.BIG_ENDIAN);
        }
        else {
            throw new IIOException(String.format("Invalid TIFF byte order mark '%s', expected: 'II' or 'MM'", StringUtil.decode(bom, 0, bom.length, "ASCII")));
        }

        // BigTiff uses version 43 instead of TIFF's 42, and header is slightly different, see
        // http://www.awaresystems.be/imaging/tiff/bigtiff.html
        int magic = input.readUnsignedShort();
        if (magic == TIFF.TIFF_MAGIC) {
            longOffsets = false;
            offsetSize = 4;
        }
        else if (magic == TIFF.BIGTIFF_MAGIC) {
            longOffsets = true;
            offsetSize = 8;

            // Just validate we're ok
            int offSize = input.readUnsignedShort();
            if (offSize != 8) {
                throw new IIOException(String.format("Unexpected BigTIFF offset size: %04x, expected: %04x", offSize, 8));
            }

            int padding = input.readUnsignedShort();
            if (padding != 0) {
                throw new IIOException(String.format("Unexpected BigTIFF padding: %04x, expected: %04x", padding, 0));
            }
        }
        else {
            throw new IIOException(String.format("Wrong TIFF magic in input data: %04x, expected: %04x", magic, TIFF.TIFF_MAGIC));
        }

        length = input.length();

        return readLinkedIFDs(input);
    }

    private TIFFDirectory readLinkedIFDs(final ImageInputStream input) throws IOException {
        long ifdOffset = readOffset(input);

        List ifds = new ArrayList<>();

        // Read linked IFDs
        while (ifdOffset != 0) {
            try {
                if ((length > 0 && ifdOffset >= length) || !parsedIFDs.add(ifdOffset)) {
                    // TODO: Issue warning
                    if (DEBUG) {
                        System.err.println("Bad IFD offset: " + ifdOffset);
                    }
                    break;
                }

                ifds.add(readIFD(input, ifdOffset, VALID_TOP_LEVEL_IFDS));

                ifdOffset = readOffset(input);
            }
            catch (EOFException eof) {
                // catch EOF here as missing EOF marker
                ifdOffset = 0;
            }
        }

        return new TIFFDirectory(ifds);
    }

    private long readOffset(final ImageInputStream input) throws IOException {
        return longOffsets ? input.readLong() : input.readUnsignedInt();
    }

    private IFD readIFD(final ImageInputStream pInput, final long pOffset, Collection subIFDIds) throws IOException {
        // TODO: Issue warning if IFD offset is not on word boundary (pOffset % 2 != 0)
        pInput.seek(pOffset);

        long entryCount = readEntryCount(pInput);

        List entries = new ArrayList<>();

        for (int i = 0; i < entryCount; i++) {
            try {
                TIFFEntry entry = readEntry(pInput);

                if (entry != null) {
                    entries.add(entry);
                }
            }
            catch (IIOException e) {
                if (DEBUG) {
                    e.printStackTrace();
                }
                // TODO: Warning listener!

                break;
            }
        }

        readSubIFDs(pInput, entries, subIFDIds);

        return new IFD(entries);
    }

    private long readEntryCount(final ImageInputStream pInput) throws IOException {
        return longOffsets ? pInput.readLong() : pInput.readUnsignedShort();
    }

    private void readSubIFDs(ImageInputStream input, List entries, Collection subIFDIds) throws IOException {
        if (subIFDIds == null || subIFDIds.isEmpty()) {
            return;
        }

        long initialPosition = input.getStreamPosition();

        for (int i = 0, entriesSize = entries.size(); i < entriesSize; i++) {
            TIFFEntry entry = entries.get(i);
            int tagId = (Integer) entry.getIdentifier();

            if (subIFDIds.contains(tagId)) {
                try {
                    long[] ifdOffsets = getPointerOffsets(entry);
                    List subIFDs = new ArrayList<>(ifdOffsets.length);

                    for (long ifdOffset : ifdOffsets) {
                        try {
                            if ((length > 0 && ifdOffset >= length) || !parsedIFDs.add(ifdOffset)) {
                                // TODO: Issue warning
                                if (DEBUG) {
                                    System.err.println("Bad IFD offset: " + ifdOffset);
                                }
                                break;
                            }

                            subIFDs.add(readIFD(input, ifdOffset, VALID_SUB_IFDS.get(tagId)));
                        }
                        catch (EOFException eof) {
                            // TODO: Issue warning
                            if (DEBUG) {
                                eof.printStackTrace();
                            }
                        }
                    }

                    if (subIFDs.size() == 1) {
                        // Replace the entry with parsed data
                        entries.set(i, new TIFFEntry(tagId, entry.getType(), subIFDs.get(0)));
                    }
                    else if (!subIFDs.isEmpty()) {
                        // Replace the entry with parsed data
                        entries.set(i, new TIFFEntry(tagId, entry.getType(), subIFDs.toArray(new IFD[0])));
                    }
                }
                catch (IIOException e) {
                    if (DEBUG) {
                        // TODO: Issue warning without crashing...?
                        System.err.println("Error parsing sub-IFD: " + tagId);
                        e.printStackTrace();
                    }
                }
            }
        }

        // Restore initial position
        input.seek(initialPosition);
    }

    private long[] getPointerOffsets(final Entry entry) throws IIOException {
        long[] offsets;
        Object value = entry.getValue();

        if (value instanceof Byte) {
            offsets = new long[] {(Byte) value & 0xff};
        }
        else if (value instanceof Short) {
            offsets = new long[] {(Short) value & 0xffff};
        }
        else if (value instanceof Integer) {
            offsets = new long[] {(Integer) value & 0xffffffffL};
        }
        else if (value instanceof Long) {
            offsets = new long[] {(Long) value};
        }
        else if (value instanceof long[]) {
            offsets = (long[]) value;
        }
        else {
            throw new IIOException(String.format("Unknown pointer type: %s", value != null ? value.getClass() : null));
        }

        return offsets;
    }

    private TIFFEntry readEntry(final ImageInputStream pInput) throws IOException {
        int tagId = pInput.readUnsignedShort();
        short type = pInput.readShort();
        int count = readValueCount(pInput); // Number of values

        // TODO: Move this check into readValueCount?
        // It's probably a spec violation to have count 0, but we'll be lenient about it
        if (count < 0) {
            throw new IIOException(String.format("Illegal count %d for tag %s type %s @%08x", count, tagId, type, pInput.getStreamPosition()));
        }

        if (!isValidType(type)) {
            pInput.skipBytes(4); // read Value

            if (DEBUG) {
                // Invalid tag, this is just for debugging
                long offset = pInput.getStreamPosition() - 12L;

                System.err.printf("Bad TIFF data @%08x\n", pInput.getStreamPosition());
                System.err.println("tagId: " + tagId + (tagId <= 0 ? " (INVALID)" : ""));
                System.err.println("type: " + type + " (INVALID)");
                System.err.println("count: " + count);

                pInput.mark();

                try {
                    pInput.seek(offset);

                    byte[] bytes = new byte[8 + Math.min(120, Math.max(24, count))];
                    int len = pInput.read(bytes);

                    System.err.print(HexDump.dump(offset, bytes, 0, len));
                    System.err.println(len < count ? "[...]" : "");
                }
                finally {
                    pInput.reset();
                }
            }

            return null;
        }

        long valueLength = getValueLength(type, count);

        Object value;
        if (valueLength > 0 && valueLength <= offsetSize) {
            value = readValueInLine(pInput, type, count);
            pInput.skipBytes(offsetSize - valueLength);
        }
        else {
            long valueOffset = readOffset(pInput); // This is the *value* iff the value size is <= offsetSize

            // Note: This a precaution
            if (count >= Integer.MAX_VALUE || length > 0 && length < valueOffset + valueLength) {
                value = new EOFException(String.format("TIFF value offset or size too large: %d/%d bytes (length: %d bytes)", valueOffset, valueLength, length));
            }
            else {
                value = readValueAt(pInput, valueOffset, type, count);
            }
        }

        return new TIFFEntry(tagId, type, value);
    }

    private boolean isValidType(final short type) {
        return type > 0 && type < TIFF.TYPE_LENGTHS.length && TIFF.TYPE_LENGTHS[type] > 0;
    }

    private int readValueCount(final ImageInputStream pInput) throws IOException {
        return assertIntCount(longOffsets ? pInput.readLong() : pInput.readUnsignedInt());
    }

    private int assertIntCount(final long count) throws IOException {
        if (count > Integer.MAX_VALUE) {
            throw new IIOException(String.format("Unsupported TIFF value count value: %s > Integer.MAX_VALUE", count));
        }

        return (int) count;
    }

    private Object readValueAt(final ImageInputStream pInput, final long pOffset, final short pType, final int pCount) throws IOException {
        long pos = pInput.getStreamPosition();
        try {
            pInput.seek(pOffset);
            return readValue(pInput, pType, pCount, longOffsets);
        }
        catch (EOFException e) {
            // TODO: Add warning listener API and report problem to client code
            return e;
        }
        finally {
            pInput.seek(pos);
        }
    }

    private Object readValueInLine(final ImageInputStream pInput, final short pType, final int pCount) throws IOException {
        return readValue(pInput, pType, pCount, longOffsets);
    }

    private static Object readValue(final ImageInputStream pInput, final short pType, final int pCount, boolean bigTIFF) throws IOException {
        // TODO: Review value "widening" for the unsigned types. Right now it's inconsistent. Should we leave it to client code?
        // TODO: New strategy: Leave data as is, instead perform the widening in TIFFEntry.getValue.
        // TODO: Add getValueByte/getValueUnsignedByte/getValueShort/getValueUnsignedShort/getValueInt/etc... in API.

        long pos = pInput.getStreamPosition();

        switch (pType) {
            case TIFF.TYPE_ASCII:
                // TODO: This might be UTF-8 or ISO-8859-x, even though spec says NULL-terminated 7 bit ASCII
                // TODO: Fail if unknown chars, try parsing with ISO-8859-1 or file.encoding
                if (pCount == 0) {
                    return "";
                }

                // NOTE: This can actually be more than one string, each string ends with a NULL-terminator
                byte[] ascii = new byte[pCount];
                pInput.readFully(ascii);
                int len = ascii[ascii.length - 1] == 0 ? ascii.length - 1 : ascii.length;

                String[] strings = new String(ascii, 0, len, StandardCharsets.UTF_8) // UTF-8 is ASCII compatible
                        .split("\0"); // Split on NULL

                return strings.length == 1 ? strings[0] : strings;
            case TIFF.TYPE_BYTE:
                if (pCount == 1) {
                    return pInput.readUnsignedByte();
                }
                // else fall through
            case TIFF.TYPE_SBYTE:
                if (pCount == 1) {
                    return pInput.readByte();
                }
                // else fall through
            case TIFF.TYPE_UNDEFINED:
                byte[] bytes = new byte[pCount];
                pInput.readFully(bytes);

                // NOTE: We don't change (unsigned) BYTE array wider Java type, as most often BYTE array means
                // binary data and we want to keep that as a byte array for clients to parse further

                return bytes;
            case TIFF.TYPE_SHORT:
                if (pCount == 1) {
                    return pInput.readUnsignedShort();
                }
            case TIFF.TYPE_SSHORT:
                if (pCount == 1) {
                    return pInput.readShort();
                }

                short[] shorts = new short[pCount];
                pInput.readFully(shorts, 0, shorts.length);

                if (pType == TIFF.TYPE_SHORT) {
                    int[] ints = new int[pCount];
                    for (int i = 0; i < pCount; i++) {
                        ints[i] = shorts[i] & 0xffff;
                    }

                    return ints;
                }

                return shorts;
            case TIFF.TYPE_IFD:
            case TIFF.TYPE_LONG:
                if (pCount == 1) {
                    return pInput.readUnsignedInt();
                }
            case TIFF.TYPE_SLONG:
                if (pCount == 1) {
                    return pInput.readInt();
                }

                int[] ints = new int[pCount];
                pInput.readFully(ints, 0, ints.length);

                if (pType == TIFF.TYPE_LONG || pType == TIFF.TYPE_IFD) {
                    long[] longs = new long[pCount];
                    for (int i = 0; i < pCount; i++) {
                        longs[i] = ints[i] & 0xffffffffL;
                    }

                    return longs;
                }

                return ints;
            case TIFF.TYPE_FLOAT:
                if (pCount == 1) {
                    return pInput.readFloat();
                }

                float[] floats = new float[pCount];
                pInput.readFully(floats, 0, floats.length);
                return floats;
            case TIFF.TYPE_DOUBLE:
                if (pCount == 1) {
                    return pInput.readDouble();
                }

                double[] doubles = new double[pCount];
                pInput.readFully(doubles, 0, doubles.length);
                return doubles;

            case TIFF.TYPE_RATIONAL:
                if (pCount == 1) {
                    return createSafeRational(pInput.readUnsignedInt(), pInput.readUnsignedInt());
                }

                Rational[] rationals = new Rational[pCount];
                for (int i = 0; i < rationals.length; i++) {
                    rationals[i] = createSafeRational(pInput.readUnsignedInt(), pInput.readUnsignedInt());
                }

                return rationals;
            case TIFF.TYPE_SRATIONAL:
                if (pCount == 1) {
                    return createSafeRational(pInput.readInt(), pInput.readInt());
                }

                Rational[] srationals = new Rational[pCount];
                for (int i = 0; i < srationals.length; i++) {
                    srationals[i] = createSafeRational(pInput.readInt(), pInput.readInt());
                }

                return srationals;

            // BigTiff:
            case TIFF.TYPE_LONG8:
            case TIFF.TYPE_SLONG8:
            case TIFF.TYPE_IFD8:
                if (bigTIFF) {
                    if (pCount == 1) {
                        long val = pInput.readLong();
                        if (pType != TIFF.TYPE_SLONG8 && val < 0) {
                            throw new IIOException(String.format("Value > %s", Long.MAX_VALUE));
                        }

                        return val;
                    }

                    long[] longs = new long[pCount];
                    for (int i = 0; i < pCount; i++) {
                        longs[i] = pInput.readLong();
                    }

                    return longs;
                }

            default:
                // Spec says skip unknown values
                return new Unknown(pType, pCount, pos);
        }
    }

    private static Rational createSafeRational(final long numerator, final long denominator) {
        if (denominator == 0) {
            // Bad data.
            return Rational.NaN;
        }

        return new Rational(numerator, denominator);
    }

    public static void main(String[] args) throws IOException {
        TIFFReader reader = new TIFFReader();

        try (ImageInputStream stream = ImageIO.createImageInputStream(new File(args[0]))) {
            long pos = 0;

            if (args.length > 1) {
                if (args[1].startsWith("0x")) {
                    pos = Integer.parseInt(args[1].substring(2), 16);
                }
                else {
                    pos = Long.parseLong(args[1]);
                }

                stream.setByteOrder(pos < 0 ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN);
                pos = Math.abs(pos);

                stream.seek(pos);
            }

            Directory directory = args.length > 1
                    ? reader.readIFD(stream, pos, VALID_TOP_LEVEL_IFDS)
                    : reader.read(stream);

            for (Entry entry : directory) {
                System.err.println(entry);

                Object value = entry.getValue();
                if (value instanceof byte[]) {
                    byte[] bytes = (byte[]) value;
                    System.err.println(HexDump.dump(0, bytes, 0, Math.min(bytes.length, 128)));
                }
            }
        }
    }

    //////////////////////
    // TODO: Stream based hex dump util?
    public static class HexDump {
        private HexDump() {
        }

        private static final int WIDTH = 32;

        public static String dump(byte[] bytes) {
            return dump(0, bytes, 0, bytes.length);
        }

        public static String dump(long offset, byte[] bytes, int off, int len) {
            StringBuilder builder = new StringBuilder();

            int i;
            for (i = 0; i < len; i++) {
                if (i % WIDTH == 0) {
                    if (i > 0) {
                        builder.append("\n");
                    }
                    builder.append(String.format("%08x: ", i + off + offset));
                }
                else if (i > 0 && i % 2 == 0) {
                    builder.append(" ");
                }

                builder.append(String.format("%02x", bytes[i + off]));

                int next = i + 1;
                if (next % WIDTH == 0 || next == len) {
                    int leftOver = (WIDTH - (next % WIDTH)) % WIDTH;

                    if (leftOver != 0) {
                        // Pad: 5 spaces for every 2 bytes... Special care if padding is non-even.
                        int pad = leftOver / 2;

                        if (len % 2 != 0) {
                            builder.append("  ");
                        }

                        for (int j = 0; j < pad; j++) {
                            builder.append("     ");
                        }
                    }

                    builder.append("  ");
                    builder.append(toAsciiString(bytes, next - (WIDTH - leftOver) + off, next + off));
                }
            }

            return builder.toString();
        }

        private static String toAsciiString(final byte[] bytes, final int from, final int to) {
            byte[] range = Arrays.copyOfRange(bytes, from, to);

            for (int i = 0; i < range.length; i++) {
                if (range[i] < 32 || range[i] > 126) {
                    range[i] = '.'; // Unreadable char
                }
            }

            return new String(range, StandardCharsets.US_ASCII);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy