All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twelvemonkeys.imageio.plugins.jpeg.JPEGImage10MetadataCleaner Maven / Gradle / Ivy

package com.twelvemonkeys.imageio.plugins.jpeg;

import com.twelvemonkeys.imageio.metadata.jpeg.JPEG;
import com.twelvemonkeys.imageio.metadata.jpeg.JPEGSegment;
import com.twelvemonkeys.xml.XMLSerializer;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import javax.imageio.metadata.IIOInvalidTreeException;
import javax.imageio.metadata.IIOMetadata;
import javax.imageio.metadata.IIOMetadataNode;
import java.awt.color.ICC_Profile;
import java.io.DataInputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.List;

/**
 * JPEGImage10MetadataCleaner
 *
 * @author Harald Kuhr
 * @author last modified by $Author: haraldk$
 * @version $Id: JPEGImage10MetadataCleaner.java,v 1.0 22.10.13 14:41 haraldk Exp$
 */
final class JPEGImage10MetadataCleaner {

    /**
     * Native metadata format name
     */
    static final String JAVAX_IMAGEIO_JPEG_IMAGE_1_0 = "javax_imageio_jpeg_image_1.0";

    private final JPEGImageReader reader;

    JPEGImage10MetadataCleaner(final JPEGImageReader reader) {
        this.reader = reader;
    }

    IIOMetadata cleanMetadata(final IIOMetadata imageMetadata) throws IOException {
        // We filter out pretty much everything from the stream..
        // Meaning we have to read get *all APP segments* and re-insert into metadata.
        List appSegments = reader.getAppSegments(JPEGImageReader.ALL_APP_MARKERS, null);

        // NOTE: There's a bug in the merging code in JPEGMetadata mergeUnknownNode that makes sure all "unknown" nodes are added twice in certain conditions.... ARGHBL...
        // DONE: 1: Work around
        // TODO: 2: REPORT BUG!
        // TODO: Report dht inconsistency bug (reads any amount of tables but only allows setting 4 tables)

        // TODO: Allow EXIF (as app1EXIF) in the JPEGvariety (sic) node. Need new format, might as well create a completely new format...
        // As EXIF is (a subset of) TIFF, (and the EXIF data is a valid TIFF stream) probably use something like:
        // http://download.java.net/media/jai-imageio/javadoc/1.1/com/sun/media/imageio/plugins/tiff/package-summary.html#ImageMetadata
        /*
        from: http://docs.oracle.com/javase/6/docs/api/javax/imageio/metadata/doc-files/jpeg_metadata.html

        In future versions of the JPEG metadata format, other varieties of JPEG metadata may be supported (e.g. Exif)
        by defining other types of nodes which may appear as a child of the JPEGvariety node.

        (Note that an application wishing to interpret Exif metadata given a metadata tree structure in the
        javax_imageio_jpeg_image_1.0 format must check for an unknown marker segment with a tag indicating an
        APP1 marker and containing data identifying it as an Exif marker segment. Then it may use application-specific
        code to interpret the data in the marker segment. If such an application were to encounter a metadata tree
        formatted according to a future version of the JPEG metadata format, the Exif marker segment might not be
        unknown in that format - it might be structured as a child node of the JPEGvariety node.

        Thus, it is important for an application to specify which version to use by passing the string identifying
        the version to the method/constructor used to obtain an IIOMetadata object.)
         */

        IIOMetadataNode tree = (IIOMetadataNode) imageMetadata.getAsTree(JAVAX_IMAGEIO_JPEG_IMAGE_1_0);
        IIOMetadataNode jpegVariety = (IIOMetadataNode) tree.getElementsByTagName("JPEGvariety").item(0);
        IIOMetadataNode markerSequence = (IIOMetadataNode) tree.getElementsByTagName("markerSequence").item(0);

        JFIFSegment jfifSegment = reader.getJFIF();
        JFXXSegment jfxxSegment = reader.getJFXX();
        AdobeDCTSegment adobeDCT = reader.getAdobeDCT();
        ICC_Profile embeddedICCProfile = reader.getEmbeddedICCProfile(true);
        SOFSegment sof = reader.getSOF();

        boolean hasRealJFIF = false;
        boolean hasRealJFXX = false;
        boolean hasRealICC = false;

        if (jfifSegment != null) {
            // Normal case, conformant JFIF with 1 or 3 components
            // TODO: Test if we have CMY or other non-JFIF color space?
            if (sof.componentsInFrame() == 1 || sof.componentsInFrame() == 3) {
                IIOMetadataNode jfif = new IIOMetadataNode("app0JFIF");
                jfif.setAttribute("majorVersion", String.valueOf(jfifSegment.majorVersion));
                jfif.setAttribute("minorVersion", String.valueOf(jfifSegment.minorVersion));
                jfif.setAttribute("resUnits", String.valueOf(jfifSegment.units));
                jfif.setAttribute("Xdensity", String.valueOf(Math.max(1, jfifSegment.xDensity))); // Avoid 0 density
                jfif.setAttribute("Ydensity", String.valueOf(Math.max(1,jfifSegment.yDensity)));
                jfif.setAttribute("thumbWidth", String.valueOf(jfifSegment.xThumbnail));
                jfif.setAttribute("thumbHeight", String.valueOf(jfifSegment.yThumbnail));

                jpegVariety.appendChild(jfif);
                hasRealJFIF = true;

                // Add app2ICC and JFXX as proper nodes
                if (embeddedICCProfile != null) {
                    IIOMetadataNode app2ICC = new IIOMetadataNode("app2ICC");
                    app2ICC.setUserObject(embeddedICCProfile);
                    jfif.appendChild(app2ICC);
                    hasRealICC = true;
                }

                if (jfxxSegment != null) {
                    IIOMetadataNode JFXX = new IIOMetadataNode("JFXX");
                    jfif.appendChild(JFXX);
                    IIOMetadataNode app0JFXX = new IIOMetadataNode("app0JFXX");
                    app0JFXX.setAttribute("extensionCode", String.valueOf(jfxxSegment.extensionCode));

                    JFXXThumbnailReader thumbnailReader = new JFXXThumbnailReader(null, reader.getThumbnailReader(), 0, 0, jfxxSegment);
                    IIOMetadataNode jfifThumb;

                    switch (jfxxSegment.extensionCode) {
                        case JFXXSegment.JPEG:
                            jfifThumb = new IIOMetadataNode("JFIFthumbJPEG");
                            // Contains it's own "markerSequence" with full DHT, DQT, SOF etc...
                            IIOMetadata thumbMeta = thumbnailReader.readMetadata();
                            Node thumbTree = thumbMeta.getAsTree(JAVAX_IMAGEIO_JPEG_IMAGE_1_0);
                            jfifThumb.appendChild(thumbTree.getLastChild());
                            app0JFXX.appendChild(jfifThumb);
                            break;

                        case JFXXSegment.INDEXED:
                            jfifThumb = new IIOMetadataNode("JFIFthumbPalette");
                            jfifThumb.setAttribute("thumbWidth", String.valueOf(thumbnailReader.getWidth()));
                            jfifThumb.setAttribute("thumbHeight", String.valueOf(thumbnailReader.getHeight()));
                            app0JFXX.appendChild(jfifThumb);
                            break;

                        case JFXXSegment.RGB:
                            jfifThumb = new IIOMetadataNode("JFIFthumbRGB");
                            jfifThumb.setAttribute("thumbWidth", String.valueOf(thumbnailReader.getWidth()));
                            jfifThumb.setAttribute("thumbHeight", String.valueOf(thumbnailReader.getHeight()));
                            app0JFXX.appendChild(jfifThumb);
                            break;

                        default:
                            reader.processWarningOccurred(String.format("Unknown JFXX extension code: %d", jfxxSegment.extensionCode));
                    }

                    JFXX.appendChild(app0JFXX);
                    hasRealJFXX = true;
                }
            }
            else {
                // Typically CMYK JPEG with JFIF segment (Adobe or similar).
                reader.processWarningOccurred(String.format(
                        "Incompatible JFIF marker segment in stream. " +
                                "SOF%d has %d color components, JFIF allows only 1 or 3 components. Ignoring JFIF marker.",
                        sof.marker & 0xf, sof.componentsInFrame()
                ));
            }
        }

        // Special case: Broken AdobeDCT segment, inconsistent with SOF, use values from SOF
        if (adobeDCT != null && (adobeDCT.getTransform() == AdobeDCTSegment.YCCK && sof.componentsInFrame() < 4 ||
                adobeDCT.getTransform() == AdobeDCTSegment.YCC && sof.componentsInFrame() < 3)) {
            reader.processWarningOccurred(String.format(
                    "Invalid Adobe App14 marker. Indicates %s data, but SOF%d has %d color component(s). " +
                            "Ignoring Adobe App14 marker.",
                    adobeDCT.getTransform() == AdobeDCTSegment.YCCK ? "YCCK/CMYK" : "YCC/RGB",
                    sof.marker & 0xf, sof.componentsInFrame()
            ));

            // Remove bad AdobeDCT
            NodeList app14Adobe = tree.getElementsByTagName("app14Adobe");
            for (int i = app14Adobe.getLength() - 1; i >= 0; i--) {
                Node item = app14Adobe.item(i);
                item.getParentNode().removeChild(item);
            }

            // We don't add this as unknown marker, as we are certain it's bogus by now
        }

        Node next = null;
        for (JPEGSegment segment : appSegments) {
            // Except real app0JFIF, app0JFXX, app2ICC and app14Adobe, add all the app segments that we filtered away as "unknown" markers
            if (segment.marker() == JPEG.APP0 && "JFIF".equals(segment.identifier()) && hasRealJFIF) {
                continue;
            }
            else if (segment.marker() == JPEG.APP0 && "JFXX".equals(segment.identifier()) && hasRealJFXX) {
                continue;
            }
            else if (segment.marker() == JPEG.APP1 && "Exif".equals(segment.identifier()) /* always inserted */) {
                continue;
            }
            else if (segment.marker() == JPEG.APP2 && "ICC_PROFILE".equals(segment.identifier()) && hasRealICC) {
                continue;
            }
            else if (segment.marker() == JPEG.APP14 && "Adobe".equals(segment.identifier()) /* always inserted */) {
                continue;
            }

            IIOMetadataNode unknown = new IIOMetadataNode("unknown");
            unknown.setAttribute("MarkerTag", Integer.toString(segment.marker() & 0xff));

            DataInputStream stream = new DataInputStream(segment.data());

            try {
                String identifier = segment.identifier();
                int off = identifier != null ? identifier.length() + 1 : 0;

                byte[] data = new byte[off + segment.length()];

                if (identifier != null) {
                    System.arraycopy(identifier.getBytes(Charset.forName("ASCII")), 0, data, 0, identifier.length());
                }

                stream.readFully(data, off, segment.length());

                unknown.setUserObject(data);
            }
            finally {
                stream.close();
            }

            if (next == null) {
                // To be semi-compatible with the functionality in mergeTree,
                // let's insert after the last unknown tag, or before any other tag if no unknown tag exists
                NodeList unknowns = markerSequence.getElementsByTagName("unknown");

                if (unknowns.getLength() > 0) {
                    next = unknowns.item(unknowns.getLength() - 1).getNextSibling();
                }
                else {
                    next = markerSequence.getFirstChild();
                }
            }

            markerSequence.insertBefore(unknown, next);
        }

        // Known issues in the com.sun classes, if sof/sos component id or selector is negative,
        // setFromTree will fail. We'll fix the range from -128...127 to be 0...255.
        NodeList sofs = markerSequence.getElementsByTagName("sof");

        if (sofs.getLength() > 0) {
            NodeList components = sofs.item(0).getChildNodes();
            for (int i = 0; i < components.getLength(); i++) {
                forceComponentIdInRange((IIOMetadataNode) components.item(i), "componentId");
            }
        }

        NodeList sos = markerSequence.getElementsByTagName("sos");

        for (int i = 0; i < sos.getLength(); i++) {
            NodeList specs = sos.item(i).getChildNodes();
            for (int j = 0; j < specs.getLength(); j++) {
                forceComponentIdInRange((IIOMetadataNode) specs.item(j), "componentSelector");
            }
        }

        // Inconsistency issue in the com.sun classes, it can read metadata with dht containing
        // more than 4 children, but will not allow setting such a tree...
        // We'll split AC/DC tables into separate dht nodes.
        NodeList dhts = markerSequence.getElementsByTagName("dht");
        for (int j = 0; j < dhts.getLength(); j++) {
            Node dht = dhts.item(j);
            NodeList dhtables = dht.getChildNodes();

            if (dhtables.getLength() < 1) {
                // Why is there an empty DHT node?
                dht.getParentNode().removeChild(dht);
                reader.processWarningOccurred("Metadata contains empty dht node. Ignoring.");
            }
            else if (dhtables.getLength() > 4) {
                IIOMetadataNode acTables = new IIOMetadataNode("dht");
                dht.getParentNode().insertBefore(acTables, dht.getNextSibling());

                // Split into 2 dht nodes, one for AC and one for DC
                for (int i = 0; i < dhtables.getLength(); i++) {
                    Element dhtable = (Element) dhtables.item(i);
                    String tableClass = dhtable.getAttribute("class");
                    if ("1".equals(tableClass)) {
                        dht.removeChild(dhtable);
                        acTables.appendChild(dhtable);
                    }
                }
            }
        }

        try {
            imageMetadata.setFromTree(JAVAX_IMAGEIO_JPEG_IMAGE_1_0, tree);
        }
        catch (IIOInvalidTreeException e) {
            if (JPEGImageReader.DEBUG) {
                new XMLSerializer(System.out, System.getProperty("file.encoding")).serialize(imageMetadata.getAsTree(JAVAX_IMAGEIO_JPEG_IMAGE_1_0), false);
                System.out.println("-- 8< --");
                new XMLSerializer(System.out, System.getProperty("file.encoding")).serialize(tree, false);
            }

            throw e;
        }

        return imageMetadata;
    }

    private void forceComponentIdInRange(final IIOMetadataNode component, final String attributeName) {
        String attribute = component.getAttribute(attributeName);

        if (attribute != null) {
            try {
                int componentId = Integer.parseInt(attribute);

                if (componentId < 0) {
                    // Metadata doesn't like negative component ids/specs
                    // We'll convert to the positive value it probably should have been
                    componentId = ((byte) componentId) & 0xff;
                    component.setAttribute(attributeName, String.valueOf(componentId));
                }
            }
            catch (NumberFormatException ignore) {
                if ("scanComponentSpec".equals(component.getNodeName())) {
                    reader.processWarningOccurred("Bad SOS component selector: " + attribute);
                }
                else {
                    reader.processWarningOccurred("Bad SOF component id: " + attribute);
                }
            }
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy