org.apache.pdfbox.pdmodel.graphics.image.CCITTFactory Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of pdfbox Show documentation
Show all versions of pdfbox Show documentation
The Apache PDFBox library is an open source Java tool for working with PDF documents.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.pdmodel.graphics.image;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import javax.imageio.stream.MemoryCacheImageOutputStream;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.filter.Filter;
import org.apache.pdfbox.filter.FilterFactory;
import org.apache.pdfbox.io.RandomAccess;
import org.apache.pdfbox.io.RandomAccessBuffer;
import org.apache.pdfbox.io.RandomAccessFile;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace;
import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray;
/**
* Factory for creating a PDImageXObject containing a CCITT Fax compressed TIFF image.
*
* @author Ben Litchfield
* @author Paul King
*/
public final class CCITTFactory
{
private CCITTFactory()
{
}
/**
* Creates a new CCITT group 4 (T6) compressed image XObject from a b/w BufferedImage. This
* compression technique usually results in smaller images than those produced by {@link LosslessFactory#createFromImage(PDDocument, BufferedImage)
* }.
*
* @param document the document to create the image as part of.
* @param image the image.
* @return a new image XObject.
* @throws IOException if there is an error creating the image.
* @throws IllegalArgumentException if the BufferedImage is not a b/w image.
*/
public static PDImageXObject createFromImage(PDDocument document, BufferedImage image)
throws IOException
{
if (image.getType() != BufferedImage.TYPE_BYTE_BINARY && image.getColorModel().getPixelSize() != 1)
{
throw new IllegalArgumentException("Only 1-bit b/w images supported");
}
int height = image.getHeight();
int width = image.getWidth();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
MemoryCacheImageOutputStream mcios = new MemoryCacheImageOutputStream(bos);
for (int y = 0; y < height; ++y)
{
for (int x = 0; x < width; ++x)
{
// flip bit to avoid having to set /BlackIs1
mcios.writeBits(~(image.getRGB(x, y) & 1), 1);
}
if (mcios.getBitOffset() != 0)
{
mcios.writeBits(0, 8 - mcios.getBitOffset());
}
}
mcios.flush();
mcios.close();
return prepareImageXObject(document, bos.toByteArray(), width, height, PDDeviceGray.INSTANCE);
}
/**
* Creates a new CCITT Fax compressed image XObject from a specific image of a TIFF file stored
* in a byte array. Only single-strip CCITT T4 or T6 compressed TIFF files are supported. If
* you're not sure what TIFF files you have, use
* {@link LosslessFactory#createFromImage(PDDocument, BufferedImage) }
* or {@link CCITTFactory#createFromImage(PDDocument, BufferedImage) }
* instead.
*
* @param document the document to create the image as part of.
* @param byteArray the TIFF file in a byte array which contains a suitable CCITT compressed
* image
* @return a new Image XObject
* @throws IOException if there is an error reading the TIFF data.
*/
public static PDImageXObject createFromByteArray(PDDocument document, byte[] byteArray)
throws IOException
{
return createFromByteArray(document, byteArray, 0);
}
/**
* Creates a new CCITT Fax compressed image XObject from a specific image of a TIFF file stored
* in a byte array. Only single-strip CCITT T4 or T6 compressed TIFF files are supported. If
* you're not sure what TIFF files you have, use
* {@link LosslessFactory#createFromImage(PDDocument, BufferedImage) }
* or {@link CCITTFactory#createFromImage(PDDocument, BufferedImage) }
* instead.
*
* @param document the document to create the image as part of.
* @param byteArray the TIFF file in a byte array which contains a suitable CCITT compressed
* image
* @param number TIFF image number, starting from 0
* @return a new Image XObject
* @throws IOException if there is an error reading the TIFF data.
*/
public static PDImageXObject createFromByteArray(PDDocument document, byte[] byteArray, int number)
throws IOException
{
RandomAccess raf = new RandomAccessBuffer(byteArray);
try
{
return createFromRandomAccessImpl(document, raf, number);
}
finally
{
raf.close();
}
}
private static PDImageXObject prepareImageXObject(PDDocument document,
byte[] byteArray, int width, int height,
PDColorSpace initColorSpace) throws IOException
{
ByteArrayOutputStream baos = new ByteArrayOutputStream();
Filter filter = FilterFactory.INSTANCE.getFilter(COSName.CCITTFAX_DECODE);
COSDictionary dict = new COSDictionary();
dict.setInt(COSName.COLUMNS, width);
dict.setInt(COSName.ROWS, height);
filter.encode(new ByteArrayInputStream(byteArray), baos, dict, 0);
ByteArrayInputStream encodedByteStream = new ByteArrayInputStream(baos.toByteArray());
PDImageXObject image = new PDImageXObject(document, encodedByteStream, COSName.CCITTFAX_DECODE,
width, height, 1, initColorSpace);
dict.setInt(COSName.K, -1);
image.getCOSObject().setItem(COSName.DECODE_PARMS, dict);
return image;
}
/**
* Creates a new CCITT Fax compressed image XObject from the first image of a TIFF file.
*
* @param document the document to create the image as part of.
* @param reader the random access TIFF file which contains a suitable CCITT
* compressed image
* @return a new image XObject
* @throws IOException if there is an error reading the TIFF data.
*
* @deprecated Use {@link #createFromFile(PDDocument, File)} instead.
*/
@Deprecated
public static PDImageXObject createFromRandomAccess(PDDocument document, RandomAccess reader)
throws IOException
{
return createFromRandomAccessImpl(document, reader, 0);
}
/**
* Creates a new CCITT Fax compressed image XObject from a specific image of a TIFF file.
*
* @param document the document to create the image as part of.
* @param reader the random access TIFF file which contains a suitable CCITT
* compressed image
* @param number TIFF image number, starting from 0
* @return a new image XObject, or null if no such page
* @throws IOException if there is an error reading the TIFF data.
*
* @deprecated Use {@link #createFromFile(PDDocument, File, int)} instead.
*/
@Deprecated
public static PDImageXObject createFromRandomAccess(PDDocument document, RandomAccess reader,
int number) throws IOException
{
return createFromRandomAccessImpl(document, reader, number);
}
/**
* Creates a new CCITT Fax compressed image XObject from the first image of a TIFF file. Only
* single-strip CCITT T4 or T6 compressed TIFF files are supported. If you're not sure what TIFF
* files you have, use
* {@link LosslessFactory#createFromImage(org.apache.pdfbox.pdmodel.PDDocument, java.awt.image.BufferedImage)}
* or {@link CCITTFactory#createFromImage(PDDocument, BufferedImage) }
* instead.
*
* @param document the document to create the image as part of.
* @param file the TIFF file which contains a suitable CCITT compressed image
* @return a new Image XObject
* @throws IOException if there is an error reading the TIFF data.
*/
public static PDImageXObject createFromFile(PDDocument document, File file)
throws IOException
{
return createFromFile(document, file, 0);
}
/**
* Creates a new CCITT Fax compressed image XObject from a specific image of a TIFF file. Only
* single-strip CCITT T4 or T6 compressed TIFF files are supported. If you're not sure what TIFF
* files you have, use
* {@link LosslessFactory#createFromImage(PDDocument, BufferedImage) }
* or {@link CCITTFactory#createFromImage(PDDocument, BufferedImage) }
* instead.
*
* @param document the document to create the image as part of.
* @param file the TIFF file which contains a suitable CCITT compressed image
* @param number TIFF image number, starting from 0
* @return a new Image XObject
* @throws IOException if there is an error reading the TIFF data.
*/
public static PDImageXObject createFromFile(PDDocument document, File file, int number)
throws IOException
{
RandomAccessFile raf = new RandomAccessFile(file, "r");
try
{
return createFromRandomAccessImpl(document, raf, number);
}
finally
{
raf.close();
}
}
/**
* Creates a new CCITT Fax compressed image XObject from a TIFF file.
*
* @param document the document to create the image as part of.
* @param reader the random access TIFF file which contains a suitable CCITT
* compressed image
* @param number TIFF image number, starting from 0
* @return a new Image XObject, or null if no such page
* @throws IOException if there is an error reading the TIFF data.
*/
private static PDImageXObject createFromRandomAccessImpl(PDDocument document,
RandomAccess reader,
int number) throws IOException
{
COSDictionary decodeParms = new COSDictionary();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
extractFromTiff(reader, bos, decodeParms, number);
if (bos.size() == 0)
{
return null;
}
ByteArrayInputStream encodedByteStream = new ByteArrayInputStream(bos.toByteArray());
PDImageXObject pdImage = new PDImageXObject(document,
encodedByteStream,
COSName.CCITTFAX_DECODE,
decodeParms.getInt(COSName.COLUMNS),
decodeParms.getInt(COSName.ROWS),
1,
PDDeviceGray.INSTANCE);
COSDictionary dict = pdImage.getCOSObject();
dict.setItem(COSName.DECODE_PARMS, decodeParms);
return pdImage;
}
// extracts the CCITT stream from the TIFF file
private static void extractFromTiff(RandomAccess reader, OutputStream os,
COSDictionary params, int number) throws IOException
{
try
{
// First check the basic tiff header
reader.seek(0);
char endianess = (char) reader.read();
if ((char) reader.read() != endianess)
{
throw new IOException("Not a valid tiff file");
}
// ensure that endianess is either M or I
if (endianess != 'M' && endianess != 'I')
{
throw new IOException("Not a valid tiff file");
}
int magicNumber = readshort(endianess, reader);
if (magicNumber != 42)
{
throw new IOException("Not a valid tiff file");
}
// Relocate to the first set of tags
int address = readlong(endianess, reader);
reader.seek(address);
// If some higher page number is required, skip this page's tags,
// then read the next page's address
for (int i = 0; i < number; i++)
{
int numtags = readshort(endianess, reader);
if (numtags > 50)
{
throw new IOException("Not a valid tiff file");
}
reader.seek(address + 2 + numtags * 12);
address = readlong(endianess, reader);
if (address == 0)
{
return;
}
reader.seek(address);
}
int numtags = readshort(endianess, reader);
// The number 50 is somewhat arbitary, it just stops us load up junk from somewhere
// and tramping on
if (numtags > 50)
{
throw new IOException("Not a valid tiff file");
}
// Loop through the tags, some will convert to items in the params dictionary
// Other point us to where to find the data stream.
// The only param which might change as a result of other TIFF tags is K, so
// we'll deal with that differently.
// Default value to detect error
int k = -1000;
int dataoffset = 0;
int datalength = 0;
for (int i = 0; i < numtags; i++)
{
int tag = readshort(endianess, reader);
int type = readshort(endianess, reader);
int count = readlong(endianess, reader);
int val;
// Note that when the type is shorter than 4 bytes, the rest can be garbage
// and must be ignored. E.g. short (2 bytes) from "01 00 38 32" (little endian)
// is 1, not 842530817 (seen in a real-life TIFF image).
switch (type)
{
case 1: // byte value
val = reader.read();
reader.read();
reader.read();
reader.read();
break;
case 3: // short value
val = readshort(endianess, reader);
reader.read();
reader.read();
break;
default: // long and other types
val = readlong(endianess, reader);
break;
}
switch (tag)
{
case 256:
{
params.setInt(COSName.COLUMNS, val);
break;
}
case 257:
{
params.setInt(COSName.ROWS, val);
break;
}
case 259:
{
if (val == 4)
{
k = -1;
}
if (val == 3)
{
k = 0;
}
break; // T6/T4 Compression
}
case 262:
{
if (val == 1)
{
params.setBoolean(COSName.BLACK_IS_1, true);
}
break;
}
case 266:
{
if (val != 1)
{
throw new IOException("FillOrder " + val + " is not supported");
}
break;
}
case 273:
{
if (count == 1)
{
dataoffset = val;
}
break;
}
case 274:
{
// http://www.awaresystems.be/imaging/tiff/tifftags/orientation.html
if (val != 1)
{
throw new IOException("Orientation " + val + " is not supported");
}
break;
}
case 279:
{
if (count == 1)
{
datalength = val;
}
break;
}
case 292:
{
if ((val & 1) != 0)
{
// T4 2D - arbitary positive K value
k = 50;
}
// http://www.awaresystems.be/imaging/tiff/tifftags/t4options.html
if ((val & 4) != 0)
{
throw new IOException("CCITT Group 3 'uncompressed mode' is not supported");
}
if ((val & 2) != 0)
{
throw new IOException("CCITT Group 3 'fill bits before EOL' is not supported");
}
break;
}
case 324:
{
if (count == 1)
{
dataoffset = val;
}
break;
}
case 325:
{
if (count == 1)
{
datalength = val;
}
break;
}
default:
{
// do nothing
}
}
}
if (k == -1000)
{
throw new IOException("First image in tiff is not CCITT T4 or T6 compressed");
}
if (dataoffset == 0)
{
throw new IOException("First image in tiff is not a single tile/strip");
}
params.setInt(COSName.K, k);
reader.seek(dataoffset);
byte[] buf = new byte[8192];
int amountRead;
while ((amountRead = reader.read(buf, 0, Math.min(8192, datalength))) > 0)
{
datalength -= amountRead;
os.write(buf, 0, amountRead);
}
}
finally
{
os.close();
}
}
private static int readshort(char endianess, RandomAccess raf) throws IOException
{
if (endianess == 'I')
{
return raf.read() | (raf.read() << 8);
}
return (raf.read() << 8) | raf.read();
}
private static int readlong(char endianess, RandomAccess raf) throws IOException
{
if (endianess == 'I')
{
return raf.read() | (raf.read() << 8) | (raf.read() << 16) | (raf.read() << 24);
}
return (raf.read() << 24) | (raf.read() << 16) | (raf.read() << 8) | raf.read();
}
}