All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jpedal.jbig2.decoders.JBIG2StreamDecoder Maven / Gradle / Ivy

The newest version!
/**
 * ===========================================
 * Java Pdf Extraction Decoding Access Library
 * ===========================================
 *
 * Project Info:  http://www.idrsolutions.com
 * Help section for developers at http://www.idrsolutions.com/java-pdf-library-support/
 *
 * (C) Copyright 1997-2008, IDRsolutions and Contributors.
 * Main Developer: Simon Barnett
 *
 * 	This file is part of JPedal
 *
 * Copyright (c) 2008, IDRsolutions
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *     * Neither the name of the IDRsolutions nor the
 *       names of its contributors may be used to endorse or promote products
 *       derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY IDRsolutions ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL IDRsolutions BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Other JBIG2 image decoding implementations include
 * jbig2dec (http://jbig2dec.sourceforge.net/)
 * xpdf (http://www.foolabs.com/xpdf/)
 * 
 * The final draft JBIG2 specification can be found at http://www.jpeg.org/public/fcd14492.pdf
 * 
 * All three of the above resources were used in the writing of this software, with methodologies,
 * processes and inspiration taken from all three.
 *
 * ---------------
 * JBIG2StreamDecoder.java
 * ---------------
 */
package org.jpedal.jbig2.decoders;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;

import org.jpedal.jbig2.JBIG2Exception;
import org.jpedal.jbig2.image.JBIG2Bitmap;
import org.jpedal.jbig2.io.StreamReader;
import org.jpedal.jbig2.segment.Segment;
import org.jpedal.jbig2.segment.SegmentHeader;
import org.jpedal.jbig2.segment.extensions.ExtensionSegment;
import org.jpedal.jbig2.segment.pageinformation.PageInformationSegment;
import org.jpedal.jbig2.segment.pattern.PatternDictionarySegment;
import org.jpedal.jbig2.segment.region.generic.GenericRegionSegment;
import org.jpedal.jbig2.segment.region.halftone.HalftoneRegionSegment;
import org.jpedal.jbig2.segment.region.refinement.RefinementRegionSegment;
import org.jpedal.jbig2.segment.region.text.TextRegionSegment;
import org.jpedal.jbig2.segment.stripes.EndOfStripeSegment;
import org.jpedal.jbig2.segment.symboldictionary.SymbolDictionarySegment;
import org.jpedal.jbig2.util.BinaryOperation;

public class JBIG2StreamDecoder {

	private StreamReader reader;

	private boolean noOfPagesKnown;
	private boolean randomAccessOrganisation;

	private int noOfPages = -1;

	private List segments = new ArrayList();
	private List bitmaps = new ArrayList();

	private byte[] globalData;

	private ArithmeticDecoder arithmeticDecoder;

	private HuffmanDecoder huffmanDecoder;

	private MMRDecoder mmrDecoder;
	
	public static boolean debug = false;

	public void movePointer(int i){
		reader.movePointer(i);
	}
	
	public void setGlobalData(final byte[] data) {
		globalData = data;
	}

	public void decodeJBIG2(final byte[] data) throws IOException, JBIG2Exception {
		reader = new StreamReader(data);

		resetDecoder();

		boolean validFile = checkHeader();
		if (JBIG2StreamDecoder.debug)
			System.out.println("validFile = " + validFile);

		if (!validFile) {
			/**
			 * Assume this is a stream from a PDF so there is no file header,
			 * end of page segments, or end of file segments. Organisation must
			 * be sequential, and the number of pages is assumed to be 1.
			 */

			noOfPagesKnown = true;
			randomAccessOrganisation = false;
			noOfPages = 1;

			/** check to see if there is any global data to be read */
			if (globalData != null) {
				/** set the reader to read from the global data */
				reader = new StreamReader(globalData);

				huffmanDecoder = new HuffmanDecoder(reader);
				mmrDecoder = new MMRDecoder(reader);
				arithmeticDecoder = new ArithmeticDecoder(reader);
				
				/** read in the global data segments */
				readSegments();

				/** set the reader back to the main data */
				reader = new StreamReader(data);
			} else {
				/**
				 * There's no global data, so move the file pointer back to the
				 * start of the stream
				 */
				reader.movePointer(-8);
			}
		} else {
			/**
			 * We have the file header, so assume it is a valid stand-alone
			 * file.
			 */

			if (JBIG2StreamDecoder.debug)
				System.out.println("==== File Header ====");

			setFileHeaderFlags();

			if (JBIG2StreamDecoder.debug) {
				System.out.println("randomAccessOrganisation = " + randomAccessOrganisation);
				System.out.println("noOfPagesKnown = " + noOfPagesKnown);
			}

			if (noOfPagesKnown) {
				noOfPages = getNoOfPages();

				if (JBIG2StreamDecoder.debug)
					System.out.println("noOfPages = " + noOfPages);
			}
		}

		huffmanDecoder = new HuffmanDecoder(reader);
		mmrDecoder = new MMRDecoder(reader);
		arithmeticDecoder = new ArithmeticDecoder(reader);
		
		/** read in the main segment data */
		readSegments();
	}
	
	public HuffmanDecoder getHuffmanDecoder() {
		return huffmanDecoder;
	}
	
	public MMRDecoder getMMRDecoder() {
		return mmrDecoder;
	}
	
	public ArithmeticDecoder getArithmeticDecoder() {
		return arithmeticDecoder;
	}
	
	private void resetDecoder() {
		noOfPagesKnown = false;
		randomAccessOrganisation = false;

		noOfPages = -1;

		segments.clear();
		bitmaps.clear();
	}

	private void readSegments() throws IOException, JBIG2Exception {

		if (JBIG2StreamDecoder.debug)
			System.out.println("==== Segments ====");

		boolean finished = false;
		while (!reader.isFinished() && !finished) {

			SegmentHeader segmentHeader = new SegmentHeader();

			if (JBIG2StreamDecoder.debug)
				System.out.println("==== Segment Header ====");

			readSegmentHeader(segmentHeader);

			// read the Segment data
			Segment segment = null;

			int segmentType = segmentHeader.getSegmentType();
			int[] referredToSegments = segmentHeader.getReferredToSegments();
			int noOfReferredToSegments = segmentHeader.getReferredToSegmentCount();

			switch (segmentType) {
			case Segment.SYMBOL_DICTIONARY:
				if (JBIG2StreamDecoder.debug)
					System.out.println("==== Segment Symbol Dictionary ====");

				segment = new SymbolDictionarySegment(this);

				segment.setSegmentHeader(segmentHeader);

				break;

			case Segment.INTERMEDIATE_TEXT_REGION:
				if (JBIG2StreamDecoder.debug)
					System.out.println("==== Intermediate Text Region ====");

				segment = new TextRegionSegment(this, false);

				segment.setSegmentHeader(segmentHeader);

				break;

			case Segment.IMMEDIATE_TEXT_REGION:
				if (JBIG2StreamDecoder.debug)
					System.out.println("==== Immediate Text Region ====");

				segment = new TextRegionSegment(this, true);

				segment.setSegmentHeader(segmentHeader);

				break;

			case Segment.IMMEDIATE_LOSSLESS_TEXT_REGION:
				if (JBIG2StreamDecoder.debug)
					System.out.println("==== Immediate Lossless Text Region ====");

				segment = new TextRegionSegment(this, true);

				segment.setSegmentHeader(segmentHeader);

				break;

			case Segment.PATTERN_DICTIONARY:
				if (JBIG2StreamDecoder.debug)
					System.out.println("==== Pattern Dictionary ====");

				segment = new PatternDictionarySegment(this);

				segment.setSegmentHeader(segmentHeader);

				break;

			case Segment.INTERMEDIATE_HALFTONE_REGION:
				if (JBIG2StreamDecoder.debug)
					System.out.println("==== Intermediate Halftone Region ====");

				segment = new HalftoneRegionSegment(this, false);

				segment.setSegmentHeader(segmentHeader);

				break;

			case Segment.IMMEDIATE_HALFTONE_REGION:
				if (JBIG2StreamDecoder.debug)
					System.out.println("==== Immediate Halftone Region ====");

				segment = new HalftoneRegionSegment(this, true);

				segment.setSegmentHeader(segmentHeader);

				break;

			case Segment.IMMEDIATE_LOSSLESS_HALFTONE_REGION:
				if (JBIG2StreamDecoder.debug)
					System.out.println("==== Immediate Lossless Halftone Region ====");

				segment = new HalftoneRegionSegment(this, true);

				segment.setSegmentHeader(segmentHeader);

				break;

			case Segment.INTERMEDIATE_GENERIC_REGION:
				if (JBIG2StreamDecoder.debug)
					System.out.println("==== Intermediate Generic Region ====");

				segment = new GenericRegionSegment(this, false);

				segment.setSegmentHeader(segmentHeader);

				break;

			case Segment.IMMEDIATE_GENERIC_REGION:
				if (JBIG2StreamDecoder.debug)
					System.out.println("==== Immediate Generic Region ====");

				segment = new GenericRegionSegment(this, true);

				segment.setSegmentHeader(segmentHeader);

				break;

			case Segment.IMMEDIATE_LOSSLESS_GENERIC_REGION:
				if (JBIG2StreamDecoder.debug)
					System.out.println("==== Immediate Lossless Generic Region ====");

				segment = new GenericRegionSegment(this, true);

				segment.setSegmentHeader(segmentHeader);

				break;

			case Segment.INTERMEDIATE_GENERIC_REFINEMENT_REGION:
				if (JBIG2StreamDecoder.debug)
					System.out.println("==== Intermediate Generic Refinement Region ====");

				segment = new RefinementRegionSegment(this, false, referredToSegments, noOfReferredToSegments);

				segment.setSegmentHeader(segmentHeader);

				break;

			case Segment.IMMEDIATE_GENERIC_REFINEMENT_REGION:
				if (JBIG2StreamDecoder.debug)
					System.out.println("==== Immediate Generic Refinement Region ====");

				segment = new RefinementRegionSegment(this, true, referredToSegments, noOfReferredToSegments);

				segment.setSegmentHeader(segmentHeader);

				break;

			case Segment.IMMEDIATE_LOSSLESS_GENERIC_REFINEMENT_REGION:
				if (JBIG2StreamDecoder.debug)
					System.out.println("==== Immediate lossless Generic Refinement Region ====");

				segment = new RefinementRegionSegment(this, true, referredToSegments, noOfReferredToSegments);

				segment.setSegmentHeader(segmentHeader);

				break;

			case Segment.PAGE_INFORMATION:
				if (JBIG2StreamDecoder.debug)
					System.out.println("==== Page Information Dictionary ====");

				segment = new PageInformationSegment(this);

				segment.setSegmentHeader(segmentHeader);

				break;

			case Segment.END_OF_PAGE:
				continue;

			case Segment.END_OF_STRIPE:
				if (JBIG2StreamDecoder.debug)
					System.out.println("==== End of Stripes ====");

				segment = new EndOfStripeSegment(this);

				segment.setSegmentHeader(segmentHeader);
				break;

			case Segment.END_OF_FILE:
				if (JBIG2StreamDecoder.debug)
					System.out.println("==== End of File ====");

				finished = true;

				continue;

			case Segment.PROFILES:
				if (JBIG2StreamDecoder.debug)
					System.out.println("PROFILES UNIMPLEMENTED");
				break;

			case Segment.TABLES:
				if (JBIG2StreamDecoder.debug)
					System.out.println("TABLES UNIMPLEMENTED");
				break;

			case Segment.EXTENSION:
				if (JBIG2StreamDecoder.debug)
					System.out.println("==== Extensions ====");

				segment = new ExtensionSegment(this);

				segment.setSegmentHeader(segmentHeader);

				break;

			default:
				System.out.println("Unknown Segment type in JBIG2 stream");

				break;
			}
			
			if (!randomAccessOrganisation) {
				segment.readSegment();
			}

			segments.add(segment);
		}

		if (randomAccessOrganisation) {
			for (Iterator it = segments.iterator(); it.hasNext();) {
				Segment segment = (Segment) it.next();
				segment.readSegment();
			}
		}
	}

	public PageInformationSegment findPageSegement(int page) {
		for (Iterator it = segments.iterator(); it.hasNext();) {
			Segment segment = (Segment) it.next();
			SegmentHeader segmentHeader = segment.getSegmentHeader();
			if (segmentHeader.getSegmentType() == segment.PAGE_INFORMATION && segmentHeader.getPageAssociation() == page) {
				return (PageInformationSegment) segment;
			}
		}

		return null;
	}

	public Segment findSegment(int segmentNumber) {
		for (Iterator it = segments.iterator(); it.hasNext();) {
			Segment segment = (Segment) it.next();
			if (segment.getSegmentHeader().getSegmentNumber() == segmentNumber) {
				return segment;
			}
		}

		return null;
	}

	private void readSegmentHeader(SegmentHeader segmentHeader) throws IOException, JBIG2Exception {
		handleSegmentNumber(segmentHeader);

		handleSegmentHeaderFlags(segmentHeader);

		handleSegmentReferredToCountAndRententionFlags(segmentHeader);

		handleReferedToSegmentNumbers(segmentHeader);

		handlePageAssociation(segmentHeader);

		if (segmentHeader.getSegmentType() != Segment.END_OF_FILE)
			handleSegmentDataLength(segmentHeader);
	}

	private void handlePageAssociation(SegmentHeader segmentHeader) throws IOException {
		int pageAssociation;

		boolean isPageAssociationSizeSet = segmentHeader.isPageAssociationSizeSet();
		if (isPageAssociationSizeSet) { // field is 4 bytes long
			short[] buf = new short[4];
			reader.readByte(buf);
			pageAssociation = BinaryOperation.getInt32(buf);
		} else { // field is 1 byte long
			pageAssociation = reader.readByte();
		}

		segmentHeader.setPageAssociation(pageAssociation);

		if (JBIG2StreamDecoder.debug)
			System.out.println("pageAssociation = " + pageAssociation);
	}

	private void handleSegmentNumber(SegmentHeader segmentHeader) throws IOException {
		short[] segmentBytes = new short[4];
		reader.readByte(segmentBytes);

		int segmentNumber = BinaryOperation.getInt32(segmentBytes);

		if (JBIG2StreamDecoder.debug)
			System.out.println("SegmentNumber = " + segmentNumber);
		segmentHeader.setSegmentNumber(segmentNumber);
	}

	private void handleSegmentHeaderFlags(SegmentHeader segmentHeader) throws IOException {
		short segmentHeaderFlags = reader.readByte();
		// System.out.println("SegmentHeaderFlags = " + SegmentHeaderFlags);
		segmentHeader.setSegmentHeaderFlags(segmentHeaderFlags);
	}

	private void handleSegmentReferredToCountAndRententionFlags(SegmentHeader segmentHeader) throws IOException, JBIG2Exception {
		short referedToSegmentCountAndRetentionFlags = reader.readByte();

		int referredToSegmentCount = (referedToSegmentCountAndRetentionFlags & 224) >> 5; // 224
																							// =
																							// 11100000

		short[] retentionFlags = null;
		/** take off the first three bits of the first byte */
		short firstByte = (short) (referedToSegmentCountAndRetentionFlags & 31); // 31 =
																					// 00011111

		if (referredToSegmentCount <= 4) { // short form

			retentionFlags = new short[1];
			retentionFlags[0] = firstByte;

		} else if (referredToSegmentCount == 7) { // long form

			short[] longFormCountAndFlags = new short[4];
			/** add the first byte of the four */
			longFormCountAndFlags[0] = firstByte;

			for (int i = 1; i < 4; i++)
				// add the next 3 bytes to the array
				longFormCountAndFlags[i] = reader.readByte();

			/** get the count of the referred to Segments */
			referredToSegmentCount = BinaryOperation.getInt32(longFormCountAndFlags);

			/** calculate the number of bytes in this field */
			int noOfBytesInField = (int) Math.ceil(4 + ((referredToSegmentCount + 1) / 8d));
			// System.out.println("noOfBytesInField = " + noOfBytesInField);

			int noOfRententionFlagBytes = noOfBytesInField - 4;
			retentionFlags = new short[noOfRententionFlagBytes];
			reader.readByte(retentionFlags);

		} else { // error
			throw new JBIG2Exception("Error, 3 bit Segment count field = " + referredToSegmentCount);
		}

		segmentHeader.setReferredToSegmentCount(referredToSegmentCount);

		if (JBIG2StreamDecoder.debug)
			System.out.println("referredToSegmentCount = " + referredToSegmentCount);

		segmentHeader.setRententionFlags(retentionFlags);

		if (JBIG2StreamDecoder.debug)
			System.out.print("retentionFlags = ");

		if (JBIG2StreamDecoder.debug) {
			for (int i = 0; i < retentionFlags.length; i++)
				System.out.print(retentionFlags[i] + " ");
			System.out.println("");
		}
	}

	private void handleReferedToSegmentNumbers(SegmentHeader segmentHeader) throws IOException {
		int referredToSegmentCount = segmentHeader.getReferredToSegmentCount();
		int[] referredToSegments = new int[referredToSegmentCount];

		int segmentNumber = segmentHeader.getSegmentNumber();

		if (segmentNumber <= 256) {
			for (int i = 0; i < referredToSegmentCount; i++)
				referredToSegments[i] = reader.readByte();
		} else if (segmentNumber <= 65536) {
			short[] buf = new short[2];
			for (int i = 0; i < referredToSegmentCount; i++) {
				reader.readByte(buf);
				referredToSegments[i] = BinaryOperation.getInt16(buf);
			}
		} else {
			short[] buf = new short[4];
			for (int i = 0; i < referredToSegmentCount; i++) {
				reader.readByte(buf);
				referredToSegments[i] = BinaryOperation.getInt32(buf);
			}
		}

		segmentHeader.setReferredToSegments(referredToSegments);

		if (JBIG2StreamDecoder.debug) {
			System.out.print("referredToSegments = ");
			for (int i = 0; i < referredToSegments.length; i++)
				System.out.print(referredToSegments[i] + " ");
			System.out.println("");
		}
	}

	private int getNoOfPages() throws IOException {
		short[] noOfPages = new short[4];
		reader.readByte(noOfPages);

		return BinaryOperation.getInt32(noOfPages);
	}

	private void handleSegmentDataLength(SegmentHeader segmentHeader) throws IOException {
		short[] buf = new short[4];
		reader.readByte(buf);
		
		int dateLength = BinaryOperation.getInt32(buf);
		segmentHeader.setDataLength(dateLength);

		if (JBIG2StreamDecoder.debug)
			System.out.println("dateLength = " + dateLength);
	}

	private void setFileHeaderFlags() throws IOException {
		short headerFlags = reader.readByte();

		if ((headerFlags & 0xfc) != 0) {
			System.out.println("Warning, reserved bits (2-7) of file header flags are not zero " + headerFlags);
		}

		int fileOrganisation = headerFlags & 1;
		randomAccessOrganisation = fileOrganisation == 0;

		int pagesKnown = headerFlags & 2;
		noOfPagesKnown = pagesKnown == 0;
	}

	private boolean checkHeader() throws IOException {
		short[] controlHeader = new short[] { 151, 74, 66, 50, 13, 10, 26, 10 };
		short[] actualHeader = new short[8];
		reader.readByte(actualHeader);

		return Arrays.equals(controlHeader, actualHeader);
	}

	public int readBits(int num) throws IOException {
		return reader.readBits(num);
	}

	public int readBit() throws IOException {
		return reader.readBit();
	}

	public void readByte(short[] buff) throws IOException {
		reader.readByte(buff);
	}

	public void consumeRemainingBits() throws IOException {
		reader.consumeRemainingBits();
	}

	public short readByte() throws java.io.IOException {
		return reader.readByte();
	}

	public void appendBitmap(JBIG2Bitmap bitmap) {
		bitmaps.add(bitmap);
	}

	public JBIG2Bitmap findBitmap(int bitmapNumber) {
		for (Iterator it = bitmaps.iterator(); it.hasNext();) {
			JBIG2Bitmap bitmap = (JBIG2Bitmap) it.next();
			if (bitmap.getBitmapNumber() == bitmapNumber) {
				return bitmap;
			}
		}

		return null;
	}

	public JBIG2Bitmap getPageAsJBIG2Bitmap(int i) {
		JBIG2Bitmap pageBitmap = findPageSegement(1).getPageBitmap();
		return pageBitmap;
	}

	public boolean isNumberOfPagesKnown() {
		return noOfPagesKnown;
	}

	public int getNumberOfPages() {
		return noOfPages;
	}

	public boolean isRandomAccessOrganisationUsed() {
		return randomAccessOrganisation;
	}

	public List getAllSegments() {
		return segments;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy