All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.sejda.sambox.pdmodel.graphics.image.SampledImageReader Maven / Gradle / Ivy

Go to download

An Apache PDFBox fork intended to be used as PDF processor for Sejda and PDFsam related projects

There is a newer version: 3.0.21
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.sejda.sambox.pdmodel.graphics.image;

import org.sejda.sambox.cos.COSArray;
import org.sejda.sambox.cos.COSNumber;
import org.sejda.sambox.pdmodel.graphics.color.PDColorSpace;
import org.sejda.sambox.pdmodel.graphics.color.PDDeviceGray;
import org.sejda.sambox.pdmodel.graphics.color.PDIndexed;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.imageio.stream.ImageInputStream;
import javax.imageio.stream.MemoryCacheImageInputStream;
import java.awt.Graphics2D;
import java.awt.Paint;
import java.awt.Point;
import java.awt.image.BufferedImage;
import java.awt.image.DataBuffer;
import java.awt.image.DataBufferByte;
import java.awt.image.Raster;
import java.awt.image.WritableRaster;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.Arrays;

/**
 * Reads a sampled image from a PDF file.
 *
 * @author John Hewson
 */
final class SampledImageReader
{
    private static final Logger LOG = LoggerFactory.getLogger(SampledImageReader.class);

    private SampledImageReader()
    {
    }

    /**
     * Returns an ARGB image filled with the given paint and using the given image as a mask.
     *
     * @param paint the paint to fill the visible portions of the image with
     * @return a masked image filled with the given paint
     * @throws IOException           if the image cannot be read
     * @throws IllegalStateException if the image is not a stencil.
     */
    public static BufferedImage getStencilImage(PDImage pdImage, Paint paint) throws IOException
    {
        int width = pdImage.getWidth();
        int height = pdImage.getHeight();

        // compose to ARGB
        BufferedImage masked = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB);
        Graphics2D g = masked.createGraphics();

        // draw the mask
        // g.drawImage(mask, 0, 0, null);

        // fill with paint using src-in
        // g.setComposite(AlphaComposite.SrcIn);
        g.setPaint(paint);
        g.fillRect(0, 0, width, height);
        g.dispose();

        // set the alpha
        WritableRaster raster = masked.getRaster();

        final int[] transparent = new int[4];
        // avoid getting a BufferedImage for the mask to lessen memory footprint.
        // Such masks are always bpc=1 and have no colorspace, but have a decode.
        // (see 8.9.6.2 Stencil Masking)
        try (ImageInputStream iis = new MemoryCacheImageInputStream(pdImage.createInputStream()))
        {
            final float[] decode = getDecodeArray(pdImage);
            int value = decode[0] < decode[1] ? 1 : 0;
            int rowLen = width / 8;
            if (width % 8 > 0)
            {
                rowLen++;
            }
            byte[] buff = new byte[rowLen];
            for (int y = 0; y < height; y++)
            {
                int x = 0;
                int readLen = iis.read(buff);
                for (int r = 0; r < rowLen && r < readLen; r++)
                {
                    int byteValue = buff[r];
                    int mask = 128;
                    int shift = 7;
                    for (int i = 0; i < 8; i++)
                    {
                        int bit = (byteValue & mask) >> shift;
                        mask >>= 1;
                        --shift;
                        if (bit == value)
                        {
                            raster.setPixel(x, y, transparent);
                        }
                        x++;
                        if (x == width)
                        {
                            break;
                        }
                    }
                }
                if (readLen != rowLen)
                {
                    LOG.warn("premature EOF, image will be incomplete");
                    break;
                }
            }
        }
        return masked;
    }

    /**
     * Returns the content of the given image as an AWT buffered image with an RGB color space. If a
     * color key mask is provided then an ARGB image is returned instead. This method never returns
     * null.
     *
     * @param pdImage  the image to read
     * @param colorKey an optional color key mask
     * @return content of this image as an RGB buffered image
     * @throws IOException if the image cannot be read
     */
    public static BufferedImage getRGBImage(PDImage pdImage, COSArray colorKey) throws IOException
    {

        if (pdImage.isEmpty())
        {
            throw new IOException("Image stream is empty");
        }

        // get parameters, they must be valid or have been repaired
        final PDColorSpace colorSpace = pdImage.getColorSpace();
        final int numComponents = colorSpace.getNumberOfComponents();
        final int width = pdImage.getWidth();
        final int height = pdImage.getHeight();
        final int bitsPerComponent = pdImage.getBitsPerComponent();

        if (width <= 0 || height <= 0)
        {
            throw new IOException("image width and height must be positive");
        }
        try
        {
            if (bitsPerComponent == 1 && colorKey == null && numComponents == 1)
            {
                return from1Bit(pdImage);
            }

            //
            // An AWT raster must use 8/16/32 bits per component. Images with < 8bpc
            // will be unpacked into a byte-backed raster. Images with 16bpc will be reduced
            // in depth to 8bpc as they will be drawn to TYPE_INT_RGB images anyway. All code
            // in PDColorSpace#toRGBImage expects an 8-bit range, i.e. 0-255.
            // Interleaved raster allows chunk-copying for 8-bit images.
            // Here it's different from PDFBox, we should investigate
            WritableRaster raster = Raster.createBandedRaster(DataBuffer.TYPE_BYTE, width, height,
                    numComponents, new Point(0, 0));
            final float[] defaultDecode = pdImage.getColorSpace().getDefaultDecode(8);
            final float[] decode = getDecodeArray(pdImage);
            if (bitsPerComponent == 8 && colorKey == null && Arrays.equals(decode, defaultDecode))
            {
                // convert image, faster path for non-decoded, non-colormasked 8-bit images
                return from8bit(pdImage, raster);
            }
            return fromAny(pdImage, raster, colorKey);
        }
        catch (NegativeArraySizeException ex)
        {
            throw new IOException(ex);
        }
    }

    /**
     * Extract the raw unconverted raster of the given image
     *
     * @param pdImage The image to get the raw raster data from
     * @return the raw raster of this image
     * @throws IOException
     */
    public static WritableRaster getRawRaster(PDImage pdImage) throws IOException
    {
        if (pdImage.isEmpty())
        {
            throw new IOException("Image stream is empty");
        }

        // get parameters, they must be valid or have been repaired
        final PDColorSpace colorSpace = pdImage.getColorSpace();
        final int numComponents = colorSpace.getNumberOfComponents();
        final int width = pdImage.getWidth();
        final int height = pdImage.getHeight();
        final int bitsPerComponent = pdImage.getBitsPerComponent();

        if (width <= 0 || height <= 0)
        {
            throw new IOException("image width and height must be positive");
        }

        try
        {
            int dataBufferType = DataBuffer.TYPE_BYTE;
            if (bitsPerComponent > 8)
            {
                dataBufferType = DataBuffer.TYPE_USHORT;
            }
            WritableRaster raster = Raster.createInterleavedRaster(dataBufferType, width, height,
                    numComponents, new Point(0, 0));
            readRasterFromAny(pdImage, raster);
            return raster;
        }
        catch (NegativeArraySizeException ex)
        {
            throw new IOException(ex);
        }
    }

    private static void readRasterFromAny(PDImage pdImage, WritableRaster raster) throws IOException
    {
        final PDColorSpace colorSpace = pdImage.getColorSpace();
        final int numComponents = colorSpace.getNumberOfComponents();
        final int bitsPerComponent = pdImage.getBitsPerComponent();
        final float[] decode = getDecodeArray(pdImage);

        // read bit stream
        try (ImageInputStream iis = new MemoryCacheImageInputStream(pdImage.createInputStream()))
        {
            final int inputWidth = pdImage.getWidth();
            final int scanWidth = pdImage.getWidth();
            final int scanHeight = pdImage.getHeight();

            // create stream
            final float sampleMax = (float) Math.pow(2, bitsPerComponent) - 1f;
            final boolean isIndexed = colorSpace instanceof PDIndexed;

            // calculate row padding
            int padding = 0;
            if (inputWidth * numComponents * bitsPerComponent % 8 > 0)
            {
                padding = 8 - (inputWidth * numComponents * bitsPerComponent % 8);
            }

            // read stream
            final boolean isShort = raster.getDataBuffer().getDataType() == DataBuffer.TYPE_USHORT;
            assert !isIndexed || !isShort;
            final byte[] srcColorValuesBytes = isShort ? null : new byte[numComponents];
            final short[] srcColorValuesShort = isShort ? new short[numComponents] : null;
            for (int y = 0; y < scanHeight; y++)
            {
                for (int x = 0; x < scanWidth; x++)
                {
                    for (int c = 0; c < numComponents; c++)
                    {
                        int value = (int) iis.readBits(bitsPerComponent);

                        // decode array
                        final float dMin = decode[c * 2];
                        final float dMax = decode[(c * 2) + 1];

                        // interpolate to domain
                        float output = dMin + (value * ((dMax - dMin) / sampleMax));

                        if (isIndexed)
                        {
                            // indexed color spaces get the raw value, because the TYPE_BYTE
                            // below cannot be reversed by the color space without it having
                            // knowledge of the number of bits per component
                            srcColorValuesBytes[c] = (byte) Math.round(output);
                        }
                        else
                        {
                            if (isShort)
                            {
                                // interpolate to TYPE_SHORT
                                int outputShort = Math.round(
                                        ((output - Math.min(dMin, dMax)) / Math.abs(dMax - dMin))
                                                * 65535f);

                                srcColorValuesShort[c] = (short) outputShort;
                            }
                            else
                            {
                                // interpolate to TYPE_BYTE
                                int outputByte = Math.round(
                                        ((output - Math.min(dMin, dMax)) / Math.abs(dMax - dMin))
                                                * 255f);

                                srcColorValuesBytes[c] = (byte) outputByte;
                            }
                        }
                    }

                    if (isShort)
                    {
                        raster.setDataElements(x, y, srcColorValuesShort);
                    }
                    else
                    {
                        raster.setDataElements(x, y, srcColorValuesBytes);
                    }
                }

                // rows are padded to the nearest byte
                iis.readBits(padding);
            }
        }
    }

    private static BufferedImage from1Bit(PDImage pdImage) throws IOException
    {
        final PDColorSpace colorSpace = pdImage.getColorSpace();
        final int width = pdImage.getWidth();
        final int height = pdImage.getHeight();
        final float[] decode = getDecodeArray(pdImage);
        BufferedImage bim = null;
        WritableRaster raster;
        byte[] output;
        if (colorSpace instanceof PDDeviceGray)
        {
            // TYPE_BYTE_GRAY and not TYPE_BYTE_BINARY because this one is handled
            // without conversion to RGB by Graphics.drawImage
            // this reduces the memory footprint, only one byte per pixel instead of three.
            bim = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_GRAY);
            raster = bim.getRaster();
        }
        else
        {
            raster = Raster.createBandedRaster(DataBuffer.TYPE_BYTE, width, height, 1,
                    new Point(0, 0));
        }
        output = ((DataBufferByte) raster.getDataBuffer()).getData();

        // read bit stream
        try (InputStream iis = pdImage.createInputStream())
        {
            final boolean isIndexed = colorSpace instanceof PDIndexed;

            int rowLen = width / 8;
            if (width % 8 > 0)
            {
                rowLen++;
            }

            // read stream
            byte value0;
            byte value1;
            if (isIndexed || decode[0] < decode[1])
            {
                value0 = 0;
                value1 = (byte) 255;
            }
            else
            {
                value0 = (byte) 255;
                value1 = 0;
            }
            byte[] buff = new byte[rowLen];
            int idx = 0;
            for (int y = 0; y < height; y++)
            {
                int x = 0;
                int readLen = iis.read(buff);
                for (int r = 0; r < rowLen && r < readLen; r++)
                {
                    int value = buff[r];
                    int mask = 128;
                    for (int i = 0; i < 8; i++)
                    {
                        int bit = value & mask;
                        mask >>= 1;
                        output[idx++] = bit == 0 ? value0 : value1;
                        x++;
                        if (x == width)
                        {
                            break;
                        }
                    }
                }
                if (readLen != rowLen)
                {
                    LOG.warn("premature EOF, image will be incomplete");
                    break;
                }
            }

            if (bim != null)
            {
                return bim;
            }

            // use the color space to convert the image to RGB
            return colorSpace.toRGBImage(raster);
        }
    }

    // faster, 8-bit non-decoded, non-colormasked image conversion
    private static BufferedImage from8bit(PDImage pdImage, WritableRaster raster) throws IOException
    {
        // get the raster's underlying byte buffer
        byte[][] banks = ((DataBufferByte) raster.getDataBuffer()).getBankData();
        ByteBuffer source = pdImage.asByteBuffer();

        final int width = pdImage.getWidth();
        final int height = pdImage.getHeight();
        final int numComponents = pdImage.getColorSpace().getNumberOfComponents();
        int max = width * height;

        boolean warnedAboutIndexOutOfBounds = false;

        for (int c = 0; c < numComponents; c++)
        {
            int sourceOffset = c;
            for (int i = 0; i < max; i++)
            {
                if (sourceOffset < source.limit())
                {
                    banks[c][i] = source.get(sourceOffset);
                    sourceOffset += numComponents;
                }
                else
                {
                    if (!warnedAboutIndexOutOfBounds)
                    {
                        LOG.warn("Tried reading: " + sourceOffset + " but only: " + source.limit()
                                + " available (component: " + c + ")");
                        warnedAboutIndexOutOfBounds = true;
                    }
                    banks[c][i] = -1;
                }
            }
        }

        // use the color space to convert the image to RGB
        return pdImage.getColorSpace().toRGBImage(raster);
    }

    // slower, general-purpose image conversion from any image format
    private static BufferedImage fromAny(PDImage pdImage, WritableRaster raster, COSArray colorKey)
            throws IOException
    {
        final PDColorSpace colorSpace = pdImage.getColorSpace();
        final int numComponents = colorSpace.getNumberOfComponents();
        final int width = pdImage.getWidth();
        final int height = pdImage.getHeight();
        final int bitsPerComponent = pdImage.getBitsPerComponent();
        final float[] decode = getDecodeArray(pdImage);

        try (ImageInputStream iis = new MemoryCacheImageInputStream(pdImage.createInputStream()))
        {
            final float sampleMax = (float) Math.pow(2, bitsPerComponent) - 1f;
            final boolean isIndexed = colorSpace instanceof PDIndexed;

            // init color key mask
            float[] colorKeyRanges = null;
            BufferedImage colorKeyMask = null;
            if (colorKey != null)
            {
                if (colorKey.size() >= numComponents * 2)
                {
                    colorKeyRanges = colorKey.toFloatArray();
                    colorKeyMask = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_GRAY);
                }
                else
                {
                    LOG.warn("colorKey mask size is " + colorKey.size() +
                            ", should be " + (numComponents * 2) + ", ignored");
                }
            }

            // calculate row padding
            int padding = 0;
            if (width * numComponents * bitsPerComponent % 8 > 0)
            {
                padding = 8 - (width * numComponents * bitsPerComponent % 8);
            }

            // read stream
            byte[] srcColorValues = new byte[numComponents];
            byte[] alpha = new byte[1];
            for (int y = 0; y < height; y++)
            {
                for (int x = 0; x < width; x++)
                {
                    boolean isMasked = true;
                    for (int c = 0; c < numComponents; c++)
                    {
                        int value = (int) iis.readBits(bitsPerComponent);

                        // color key mask requires values before they are decoded
                        if (colorKeyRanges != null)
                        {
                            isMasked &=
                                    value >= colorKeyRanges[c * 2] && value <= colorKeyRanges[c * 2
                                            + 1];
                        }

                        // decode array
                        final float dMin = decode[c * 2];
                        final float dMax = decode[(c * 2) + 1];

                        // interpolate to domain
                        float output = dMin + (value * ((dMax - dMin) / sampleMax));

                        if (isIndexed)
                        {
                            // indexed color spaces get the raw value, because the TYPE_BYTE
                            // below cannot be reversed by the color space without it having
                            // knowledge of the number of bits per component
                            srcColorValues[c] = (byte) Math.round(output);
                        }
                        else
                        {
                            // interpolate to TYPE_BYTE
                            int outputByte = Math.round(
                                    ((output - Math.min(dMin, dMax)) / Math.abs(dMax - dMin))
                                            * 255f);

                            srcColorValues[c] = (byte) outputByte;
                        }
                    }
                    raster.setDataElements(x, y, srcColorValues);

                    // set alpha channel in color key mask, if any
                    if (colorKeyMask != null)
                    {
                        alpha[0] = (byte) (isMasked ? 255 : 0);
                        colorKeyMask.getRaster().setDataElements(x, y, alpha);
                    }
                }

                // rows are padded to the nearest byte
                iis.readBits(padding);
            }

            // use the color space to convert the image to RGB
            BufferedImage rgbImage = colorSpace.toRGBImage(raster);

            // apply color mask, if any
            if (colorKeyMask != null)
            {
                return applyColorKeyMask(rgbImage, colorKeyMask);
            }
            return rgbImage;
        }
    }

    // color key mask: RGB + Binary -> ARGB
    private static BufferedImage applyColorKeyMask(BufferedImage image, BufferedImage mask)
    {
        int width = image.getWidth();
        int height = image.getHeight();

        // compose to ARGB
        BufferedImage masked = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB);

        WritableRaster src = image.getRaster();
        WritableRaster dest = masked.getRaster();
        WritableRaster alpha = mask.getRaster();

        float[] rgb = new float[3];
        float[] rgba = new float[4];
        float[] alphaPixel = null;
        for (int y = 0; y < height; y++)
        {
            for (int x = 0; x < width; x++)
            {
                src.getPixel(x, y, rgb);

                rgba[0] = rgb[0];
                rgba[1] = rgb[1];
                rgba[2] = rgb[2];
                alphaPixel = alpha.getPixel(x, y, alphaPixel);
                rgba[3] = 255 - alphaPixel[0];

                dest.setPixel(x, y, rgba);
            }
        }

        return masked;
    }

    // gets decode array from dictionary or returns default
    private static float[] getDecodeArray(PDImage pdImage) throws IOException
    {
        final COSArray cosDecode = pdImage.getDecode();
        float[] decode = null;

        if (cosDecode != null)
        {
            int numberOfComponents = pdImage.getColorSpace().getNumberOfComponents();
            if (cosDecode.size() != numberOfComponents * 2)
            {
                if (pdImage.isStencil() && cosDecode.size() >= 2 && cosDecode.get(
                        0) instanceof COSNumber && cosDecode.get(1) instanceof COSNumber)
                {
                    float decode0 = ((COSNumber) cosDecode.get(0)).floatValue();
                    float decode1 = ((COSNumber) cosDecode.get(1)).floatValue();
                    if (decode0 >= 0 && decode0 <= 1 && decode1 >= 0 && decode1 <= 1)
                    {
                        LOG.warn("decode array " + cosDecode
                                + " not compatible with color space, using the first two entries");
                        return new float[] { decode0, decode1 };
                    }
                }
                LOG.error("decode array " + cosDecode
                        + " not compatible with color space, using default");
            }
            else
            {
                decode = cosDecode.toFloatArray();
            }
        }

        // use color space default
        if (decode == null)
        {
            return pdImage.getColorSpace().getDefaultDecode(pdImage.getBitsPerComponent());
        }

        return decode;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy