All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.lowagie.text.pdf.codec.JBIG2SegmentReader Maven / Gradle / Ivy

Go to download

Itext is a java library to create and manipulate PDFs. This is a fork of version 2.1.7 the last MPL/LGPL version. It's focused basically on mantain compatibility with newer bouncycastle releases and small bugfixes.

There is a newer version: 2.2.2
Show newest version
/*
 * $Id: JBIG2SegmentReader.java 3714 2009-02-20 21:04:16Z xlv $
 *
 * Copyright 2009 by Nigel Kerr.
 *
 * The contents of this file are subject to the Mozilla Public License Version 1.1
 * (the "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the License.
 *
 * The Original Code is 'iText, a free JAVA-PDF library'.
 *
 * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
 * the Initial Developer are Copyright (C) 1999-2009 by Bruno Lowagie.
 * All Rights Reserved.
 * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
 * are Copyright (C) 2000-2009 by Paulo Soares. All Rights Reserved.
 *
 * Contributor(s): all the names of the contributors are added in the source code
 * where applicable.
 *
 * Alternatively, the contents of this file may be used under the terms of the
 * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
 * provisions of LGPL are applicable instead of those above.  If you wish to
 * allow use of your version of this file only under the terms of the LGPL
 * License and not to allow others to use your version of this file under
 * the MPL, indicate your decision by deleting the provisions above and
 * replace them with the notice and other provisions required by the LGPL.
 * If you do not delete the provisions above, a recipient may use your version
 * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
 *
 * This library is free software; you can redistribute it and/or modify it
 * under the terms of the MPL as stated above or under the terms of the GNU
 * Library General Public License as published by the Free Software Foundation;
 * either version 2 of the License, or any later version.
 *
 * This library is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
 * details.
 *
 * If you didn't download this code from the following link, you should check if
 * you aren't using an obsolete version:
 * http://www.lowagie.com/iText/
 */

package com.lowagie.text.pdf.codec;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Iterator;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;

import com.lowagie.text.pdf.RandomAccessFileOrArray;

/**
 * Class to read a JBIG2 file at a basic level: understand all the segments, 
 * understand what segments belong to which pages, how many pages there are,
 * what the width and height of each page is, and global segments if there
 * are any.  Or: the minimum required to be able to take a normal sequential
 * or random-access organized file, and be able to embed JBIG2 pages as images 
 * in a PDF.
 * 
 * TODO: the indeterminate-segment-size value of dataLength, else?
 * 
 * @since 2.1.5
 */

public class JBIG2SegmentReader {
	
	public static final int SYMBOL_DICTIONARY = 0; //see 7.4.2.                                               

	public static final int INTERMEDIATE_TEXT_REGION = 4; //see 7.4.3.                                        
	public static final int IMMEDIATE_TEXT_REGION = 6; //see 7.4.3.                                           
	public static final int IMMEDIATE_LOSSLESS_TEXT_REGION = 7; //see 7.4.3.                                  
	public static final int PATTERN_DICTIONARY = 16; //see 7.4.4.                                             
	public static final int INTERMEDIATE_HALFTONE_REGION = 20; //see 7.4.5.                                   
	public static final int IMMEDIATE_HALFTONE_REGION = 22; //see 7.4.5.                                      
	public static final int IMMEDIATE_LOSSLESS_HALFTONE_REGION = 23; //see 7.4.5.                             
	public static final int INTERMEDIATE_GENERIC_REGION = 36; //see 7.4.6.                                    
	public static final int IMMEDIATE_GENERIC_REGION = 38; //see 7.4.6.                                       
	public static final int IMMEDIATE_LOSSLESS_GENERIC_REGION = 39; //see 7.4.6.                              
	public static final int INTERMEDIATE_GENERIC_REFINEMENT_REGION = 40; //see 7.4.7.                          
	public static final int IMMEDIATE_GENERIC_REFINEMENT_REGION = 42; //see 7.4.7.                             
	public static final int IMMEDIATE_LOSSLESS_GENERIC_REFINEMENT_REGION = 43; //see 7.4.7.                    

	public static final int PAGE_INFORMATION = 48; //see 7.4.8.                                               
	public static final int END_OF_PAGE = 49; //see 7.4.9.                                                    
	public static final int END_OF_STRIPE = 50; //see 7.4.10.                                                 
	public static final int END_OF_FILE = 51; //see 7.4.11.                                                   
	public static final int PROFILES = 52; //see 7.4.12.                                                      
	public static final int TABLES = 53; //see 7.4.13.                                                        
	public static final int EXTENSION = 62; //see 7.4.14.                                                     
	
	private final SortedMap segments = new TreeMap();
	private final SortedMap pages = new TreeMap();
	private final SortedSet globals = new TreeSet();
	private RandomAccessFileOrArray ra;
	private boolean sequential;
	private boolean number_of_pages_known;
	@SuppressWarnings("unused")
	private int number_of_pages = -1;
	private boolean read = false;
	
	/**
	 * Inner class that holds information about a JBIG2 segment.
	 * @since	2.1.5
	 */
	public static class JBIG2Segment implements Comparable {

		public final int segmentNumber;
		public long dataLength = -1;
		public int page = -1;
		public int[] referredToSegmentNumbers = null;
		public boolean[] segmentRetentionFlags = null;
		public int type = -1;
		public boolean deferredNonRetain = false;
		public int countOfReferredToSegments = -1;
		public byte[] data = null;
		public byte[] headerData = null;
		public boolean page_association_size = false;
		public int page_association_offset = -1;

		public JBIG2Segment(int segment_number) {
			this.segmentNumber = segment_number;
		}

		// for the globals treeset
		public int compareTo(Object o) {
			return this.compareTo((JBIG2Segment)o);
		}
		public int compareTo(JBIG2Segment s) {
			return this.segmentNumber - s.segmentNumber;
		}

		
	}
	/**
	 * Inner class that holds information about a JBIG2 page.
	 * @since	2.1.5
	 */
	public static class JBIG2Page {
		public final int page;
		private final SortedMap segs = new TreeMap();
		public int pageBitmapWidth = -1;
		public int pageBitmapHeight = -1;
		public JBIG2Page(int page, JBIG2SegmentReader sr) {
			this.page = page;
		}
		/**
		 * return as a single byte array the header-data for each segment in segment number
		 * order, EMBEDDED organization, but i am putting the needed segments in SEQUENTIAL organization.
		 * if for_embedding, skip the segment types that are known to be not for acrobat. 
		 * @param for_embedding
		 * @return	a byte array
		 * @throws IOException
		 */
		public byte[] getData(boolean for_embedding) throws IOException {
			ByteArrayOutputStream os = new ByteArrayOutputStream();
			for (Iterator i = segs.keySet().iterator(); i.hasNext();  ) {
				Integer sn = (Integer) i.next();
				JBIG2Segment s = (JBIG2Segment) segs.get(sn);

				// pdf reference 1.4, section 3.3.6 JBIG2Decode Filter
				// D.3 Embedded organisation
				if ( for_embedding && 
						( s.type == END_OF_FILE || s.type == END_OF_PAGE ) ) {
					continue;
				}

				if ( for_embedding ) {
					// change the page association to page 1
					byte[] headerData_emb = copyByteArray(s.headerData);
					if ( s.page_association_size ) {
						headerData_emb[s.page_association_offset] = 0x0;
						headerData_emb[s.page_association_offset+1] = 0x0;
						headerData_emb[s.page_association_offset+2] = 0x0;
						headerData_emb[s.page_association_offset+3] = 0x1;
					} else {
						headerData_emb[s.page_association_offset] = 0x1;
					}
					os.write(headerData_emb);
				} else {
					os.write(s.headerData);
				}
				os.write(s.data);
			}
			os.close();
			return os.toByteArray();
		}
		public void addSegment(JBIG2Segment s) {
			segs.put(Integer.valueOf(s.segmentNumber), s);
		}
		
	}
	
	public JBIG2SegmentReader(RandomAccessFileOrArray ra ) throws IOException {
		this.ra = ra;
	}

	public static byte[] copyByteArray(byte[] b) {
		byte[] bc = new byte[b.length];
		System.arraycopy(b, 0, bc, 0, b.length);
		return bc;
	}

	public void read() throws IOException {
		if ( this.read ) {
			throw new IllegalStateException("already attempted a read() on this Jbig2 File");
		}
		this.read = true;
		
		readFileHeader();
		// Annex D
		if ( this.sequential ) {
			// D.1
			do {
				JBIG2Segment tmp = readHeader();
				readSegment(tmp);
				segments.put(Integer.valueOf(tmp.segmentNumber), tmp);
			} while ( this.ra.getFilePointer() < this.ra.length() );
		} else {
			// D.2
			JBIG2Segment tmp;
			do {
				tmp = readHeader();
				segments.put(Integer.valueOf(tmp.segmentNumber), tmp);
			} while ( tmp.type != END_OF_FILE );
			Iterator segs = segments.keySet().iterator();
			while ( segs.hasNext() ) {
				readSegment((JBIG2Segment)segments.get(segs.next()));
			}
		}
	}

	void readSegment(JBIG2Segment s) throws IOException {
		int ptr = ra.getFilePointer();
		
		if ( s.dataLength == 0xffffffffl ) {
			// TODO figure this bit out, 7.2.7
			return;
		}
		
		byte[] data = new byte[(int)s.dataLength];
		ra.read(data);
		s.data = data;
		
		if ( s.type == PAGE_INFORMATION ) {
			int last = ra.getFilePointer();
			ra.seek(ptr);
			int page_bitmap_width = ra.readInt();
			int page_bitmap_height = ra.readInt();
			ra.seek(last);
			JBIG2Page p = (JBIG2Page)pages.get(Integer.valueOf(s.page));
			if ( p == null ) {
				throw new IllegalStateException("referring to widht/height of page we havent seen yet? " + s.page);
			}
			
			p.pageBitmapWidth = page_bitmap_width;
			p.pageBitmapHeight = page_bitmap_height;
		}
	}

	JBIG2Segment readHeader() throws IOException {
		int ptr = ra.getFilePointer();
		// 7.2.1
		int segment_number = ra.readInt();
		JBIG2Segment s = new JBIG2Segment(segment_number);

		// 7.2.3
		int segment_header_flags = ra.read();
		boolean deferred_non_retain = (( segment_header_flags & 0x80 ) == 0x80);
		s.deferredNonRetain = deferred_non_retain;
		boolean page_association_size = (( segment_header_flags & 0x40 ) == 0x40);
		int segment_type = ( segment_header_flags & 0x3f );
		s.type = segment_type;
		
		//7.2.4
		int referred_to_byte0 = ra.read();
		int count_of_referred_to_segments = (referred_to_byte0 & 0xE0) >> 5;
		int[] referred_to_segment_numbers = null;
		boolean[] segment_retention_flags = null;
		
		if ( count_of_referred_to_segments == 7 ) {
			// at least five bytes
			ra.seek(ra.getFilePointer() - 1);
			count_of_referred_to_segments = ( ra.readInt() & 0x1fffffff );
			segment_retention_flags = new boolean[count_of_referred_to_segments+1];
			int i = 0;
			int referred_to_current_byte = 0;
			do {
				int j = i % 8;
				if ( j == 0) {
					referred_to_current_byte = ra.read();
				}
				segment_retention_flags[i] = (((( 0x1 << j ) & referred_to_current_byte) >> j) == 0x1);
				i++;
			} while ( i <= count_of_referred_to_segments );
			
		} else if ( count_of_referred_to_segments <= 4 ) {
			// only one byte
			segment_retention_flags = new boolean[count_of_referred_to_segments+1];
			referred_to_byte0 &= 0x1f;
			for ( int i = 0; i <= count_of_referred_to_segments; i++ ) {
				segment_retention_flags[i] = (((( 0x1 << i ) & referred_to_byte0) >> i) == 0x1); 
			}
			
		} else if ( count_of_referred_to_segments == 5 || count_of_referred_to_segments == 6 ) {
			throw new IllegalStateException("count of referred-to segments had bad value in header for segment " + segment_number + " starting at " + ptr);
		}
		s.segmentRetentionFlags = segment_retention_flags;
		s.countOfReferredToSegments = count_of_referred_to_segments;

		// 7.2.5
		referred_to_segment_numbers = new int[count_of_referred_to_segments+1];
		for ( int i = 1; i <= count_of_referred_to_segments; i++ ) {
			if ( segment_number <= 256 ) {
				referred_to_segment_numbers[i] = ra.read();
			} else if ( segment_number <= 65536 ) {
				referred_to_segment_numbers[i] = ra.readUnsignedShort();
			} else {
				referred_to_segment_numbers[i] = (int)ra.readUnsignedInt(); // TODO wtf ack
			}
		}
		s.referredToSegmentNumbers = referred_to_segment_numbers;
		
		// 7.2.6
		int segment_page_association;
		int page_association_offset = ra.getFilePointer() - ptr;
		if ( page_association_size ) {
			segment_page_association = ra.readInt();
		} else {
			segment_page_association = ra.read();
		}
		if ( segment_page_association < 0 ) {
			throw new IllegalStateException("page " + segment_page_association + " invalid for segment " + segment_number + " starting at " + ptr);
		}
		s.page = segment_page_association;
		// so we can change the page association at embedding time.
		s.page_association_size = page_association_size;
		s.page_association_offset = page_association_offset;
		
		if ( segment_page_association > 0 && ! pages.containsKey(Integer.valueOf(segment_page_association)) ) {
			pages.put(Integer.valueOf(segment_page_association), new JBIG2Page(segment_page_association, this));
		}
		if ( segment_page_association > 0 ) {
			((JBIG2Page)pages.get(Integer.valueOf(segment_page_association))).addSegment(s);
		} else {
			globals.add(s);
		}
		
		// 7.2.7
		long segment_data_length = ra.readUnsignedInt();
		// TODO the 0xffffffff value that might be here, and how to understand those afflicted segments
		s.dataLength = segment_data_length;
		
		int end_ptr = ra.getFilePointer();
		ra.seek(ptr);
		byte[] header_data = new byte[end_ptr - ptr];
		ra.read(header_data);
		s.headerData  = header_data;
		
		return s;
	}

	void readFileHeader() throws IOException {
		ra.seek(0);
		byte[] idstring = new byte[8];
		ra.read(idstring);
		
		byte[] refidstring = {(byte)0x97, 0x4A, 0x42, 0x32, 0x0D, 0x0A, 0x1A, 0x0A};
		
		for ( int i = 0; i < idstring.length; i++ ) {
			if ( idstring[i] != refidstring[i] ) {
				throw new IllegalStateException("file header idstring not good at byte " + i);
			}
		}
		
		int fileheaderflags = ra.read();

		this.sequential = (( fileheaderflags & 0x1 ) == 0x1);
		this.number_of_pages_known = (( fileheaderflags & 0x2) == 0x0);
		
		if ( (fileheaderflags & 0xfc) != 0x0 ) {
			throw new IllegalStateException("file header flags bits 2-7 not 0");
		}
		
		if ( this.number_of_pages_known ) {
			this.number_of_pages = ra.readInt();
		}
	}

	public int numberOfPages() {
		return pages.size();
	}

	public int getPageHeight(int i) {
		return ((JBIG2Page)pages.get(Integer.valueOf(i))).pageBitmapHeight;
	}

	public int getPageWidth(int i) {
		return ((JBIG2Page)pages.get(Integer.valueOf(i))).pageBitmapWidth;
	}

	public JBIG2Page getPage(int page) {
		return (JBIG2Page)pages.get(Integer.valueOf(page));
	}

	public byte[] getGlobal(boolean for_embedding) {
		ByteArrayOutputStream os = new ByteArrayOutputStream();
		try {
			for (Iterator gitr = globals.iterator(); gitr.hasNext();) {
				JBIG2Segment s = (JBIG2Segment)gitr.next();
				if ( for_embedding && 
						( s.type == END_OF_FILE || s.type == END_OF_PAGE ) ) {
					continue;
				}
				os.write(s.headerData);
				os.write(s.data);
			}
			os.close();
		} catch (IOException e) {
			e.printStackTrace();
		}
		if ( os.size() <= 0 ) {
			return null;
		}
		return os.toByteArray();
	}
	
	public String toString() {
		if ( this.read ) {
			return "Jbig2SegmentReader: number of pages: " + this.numberOfPages();
		} else {
			return "Jbig2SegmentReader in indeterminate state.";
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy