All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.lowagie.text.pdf.codec.JBIG2SegmentReader Maven / Gradle / Ivy

/*
 * $Id: JBIG2SegmentReader.java 3714 2009-02-20 21:04:16Z xlv $
 *
 * Copyright 2009 by Nigel Kerr.
 *
 * The contents of this file are subject to the Mozilla Public License Version 1.1
 * (the "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the License.
 *
 * The Original Code is 'iText, a free JAVA-PDF library'.
 *
 * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
 * the Initial Developer are Copyright (C) 1999-2009 by Bruno Lowagie.
 * All Rights Reserved.
 * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
 * are Copyright (C) 2000-2009 by Paulo Soares. All Rights Reserved.
 *
 * Contributor(s): all the names of the contributors are added in the source code
 * where applicable.
 *
 * Alternatively, the contents of this file may be used under the terms of the
 * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
 * provisions of LGPL are applicable instead of those above.  If you wish to
 * allow use of your version of this file only under the terms of the LGPL
 * License and not to allow others to use your version of this file under
 * the MPL, indicate your decision by deleting the provisions above and
 * replace them with the notice and other provisions required by the LGPL.
 * If you do not delete the provisions above, a recipient may use your version
 * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
 *
 * This library is free software; you can redistribute it and/or modify it
 * under the terms of the MPL as stated above or under the terms of the GNU
 * Library General Public License as published by the Free Software Foundation;
 * either version 2 of the License, or any later version.
 *
 * This library is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
 * details.
 *
 * If you didn't download this code from the following link, you should check if
 * you aren't using an obsolete version:
 * http://www.lowagie.com/iText/
 */

package com.lowagie.text.pdf.codec;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Iterator;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;

import com.lowagie.text.pdf.RandomAccessFileOrArray;

/**
 * Class to read a JBIG2 file at a basic level: understand all the segments,
 * understand what segments belong to which pages, how many pages there are,
 * what the width and height of each page is, and global segments if there
 * are any. Or: the minimum required to be able to take a normal sequential
 * or random-access organized file, and be able to embed JBIG2 pages as images
 * in a PDF.
 *
 * TODO: the indeterminate-segment-size value of dataLength, else?
 *
 * @since 2.1.5
 */

public class JBIG2SegmentReader {

    public static final int SYMBOL_DICTIONARY = 0; //see 7.4.2.

    public static final int INTERMEDIATE_TEXT_REGION = 4; //see 7.4.3.
    public static final int IMMEDIATE_TEXT_REGION = 6; //see 7.4.3.
    public static final int IMMEDIATE_LOSSLESS_TEXT_REGION = 7; //see 7.4.3.
    public static final int PATTERN_DICTIONARY = 16; //see 7.4.4.
    public static final int INTERMEDIATE_HALFTONE_REGION = 20; //see 7.4.5.
    public static final int IMMEDIATE_HALFTONE_REGION = 22; //see 7.4.5.
    public static final int IMMEDIATE_LOSSLESS_HALFTONE_REGION = 23; //see 7.4.5.
    public static final int INTERMEDIATE_GENERIC_REGION = 36; //see 7.4.6.
    public static final int IMMEDIATE_GENERIC_REGION = 38; //see 7.4.6.
    public static final int IMMEDIATE_LOSSLESS_GENERIC_REGION = 39; //see 7.4.6.
    public static final int INTERMEDIATE_GENERIC_REFINEMENT_REGION = 40; //see 7.4.7.
    public static final int IMMEDIATE_GENERIC_REFINEMENT_REGION = 42; //see 7.4.7.
    public static final int IMMEDIATE_LOSSLESS_GENERIC_REFINEMENT_REGION = 43; //see 7.4.7.

    public static final int PAGE_INFORMATION = 48; //see 7.4.8.
    public static final int END_OF_PAGE = 49; //see 7.4.9.
    public static final int END_OF_STRIPE = 50; //see 7.4.10.
    public static final int END_OF_FILE = 51; //see 7.4.11.
    public static final int PROFILES = 52; //see 7.4.12.
    public static final int TABLES = 53; //see 7.4.13.
    public static final int EXTENSION = 62; //see 7.4.14.

    private final SortedMap segments = new TreeMap();
    private final SortedMap pages = new TreeMap();
    private final SortedSet globals = new TreeSet();
    private RandomAccessFileOrArray ra;
    private boolean sequential;
    private boolean number_of_pages_known;
    @SuppressWarnings("unused")
    private int number_of_pages = -1;
    private boolean read = false;

    /**
     * Inner class that holds information about a JBIG2 segment.
     *
     * @since 2.1.5
     */
    public static class JBIG2Segment implements Comparable {

        public final int segmentNumber;
        public long dataLength = -1;
        public int page = -1;
        public int[] referredToSegmentNumbers = null;
        public boolean[] segmentRetentionFlags = null;
        public int type = -1;
        public boolean deferredNonRetain = false;
        public int countOfReferredToSegments = -1;
        public byte[] data = null;
        public byte[] headerData = null;
        public boolean page_association_size = false;
        public int page_association_offset = -1;

        public JBIG2Segment(int segment_number) {
            this.segmentNumber = segment_number;
        }

        // for the globals treeset
        public int compareTo(Object o) {
            return this.compareTo((JBIG2Segment) o);
        }

        public int compareTo(JBIG2Segment s) {
            return this.segmentNumber - s.segmentNumber;
        }

    }

    /**
     * Inner class that holds information about a JBIG2 page.
     *
     * @since 2.1.5
     */
    public static class JBIG2Page {
        public final int page;
        private final SortedMap segs = new TreeMap();
        public int pageBitmapWidth = -1;
        public int pageBitmapHeight = -1;

        public JBIG2Page(int page, JBIG2SegmentReader sr) {
            this.page = page;
        }

        /**
         * return as a single byte array the header-data for each segment in segment number
         * order, EMBEDDED organization, but i am putting the needed segments in SEQUENTIAL organization.
         * if for_embedding, skip the segment types that are known to be not for acrobat.
         *
         * @param for_embedding
         * @return a byte array
         * @throws IOException
         */
        public byte[] getData(boolean for_embedding) throws IOException {
            ByteArrayOutputStream os = new ByteArrayOutputStream();
            for (Iterator i = segs.keySet().iterator(); i.hasNext();) {
                Integer sn = (Integer) i.next();
                JBIG2Segment s = (JBIG2Segment) segs.get(sn);

                // pdf reference 1.4, section 3.3.6 JBIG2Decode Filter
                // D.3 Embedded organisation
                if (for_embedding &&
                        (s.type == END_OF_FILE || s.type == END_OF_PAGE)) {
                    continue;
                }

                if (for_embedding) {
                    // change the page association to page 1
                    byte[] headerData_emb = copyByteArray(s.headerData);
                    if (s.page_association_size) {
                        headerData_emb[s.page_association_offset] = 0x0;
                        headerData_emb[s.page_association_offset + 1] = 0x0;
                        headerData_emb[s.page_association_offset + 2] = 0x0;
                        headerData_emb[s.page_association_offset + 3] = 0x1;
                    } else {
                        headerData_emb[s.page_association_offset] = 0x1;
                    }
                    os.write(headerData_emb);
                } else {
                    os.write(s.headerData);
                }
                os.write(s.data);
            }
            os.close();
            return os.toByteArray();
        }

        public void addSegment(JBIG2Segment s) {
            segs.put(Integer.valueOf(s.segmentNumber), s);
        }

    }

    public JBIG2SegmentReader(RandomAccessFileOrArray ra) throws IOException {
        this.ra = ra;
    }

    public static byte[] copyByteArray(byte[] b) {
        byte[] bc = new byte[b.length];
        System.arraycopy(b, 0, bc, 0, b.length);
        return bc;
    }

    public void read() throws IOException {
        if (this.read) {
            throw new IllegalStateException("already attempted a read() on this Jbig2 File");
        }
        this.read = true;

        readFileHeader();
        // Annex D
        if (this.sequential) {
            // D.1
            do {
                JBIG2Segment tmp = readHeader();
                readSegment(tmp);
                segments.put(Integer.valueOf(tmp.segmentNumber), tmp);
            } while (this.ra.getFilePointer() < this.ra.length());
        } else {
            // D.2
            JBIG2Segment tmp;
            do {
                tmp = readHeader();
                segments.put(Integer.valueOf(tmp.segmentNumber), tmp);
            } while (tmp.type != END_OF_FILE);
            Iterator segs = segments.keySet().iterator();
            while (segs.hasNext()) {
                readSegment((JBIG2Segment) segments.get(segs.next()));
            }
        }
    }

    void readSegment(JBIG2Segment s) throws IOException {
        int ptr = ra.getFilePointer();

        if (s.dataLength == 0xffffffffl) {
            // TODO figure this bit out, 7.2.7
            return;
        }

        byte[] data = new byte[(int) s.dataLength];
        ra.read(data);
        s.data = data;

        if (s.type == PAGE_INFORMATION) {
            int last = ra.getFilePointer();
            ra.seek(ptr);
            int page_bitmap_width = ra.readInt();
            int page_bitmap_height = ra.readInt();
            ra.seek(last);
            JBIG2Page p = (JBIG2Page) pages.get(Integer.valueOf(s.page));
            if (p == null) {
                throw new IllegalStateException("referring to widht/height of page we havent seen yet? " + s.page);
            }

            p.pageBitmapWidth = page_bitmap_width;
            p.pageBitmapHeight = page_bitmap_height;
        }
    }

    JBIG2Segment readHeader() throws IOException {
        int ptr = ra.getFilePointer();
        // 7.2.1
        int segment_number = ra.readInt();
        JBIG2Segment s = new JBIG2Segment(segment_number);

        // 7.2.3
        int segment_header_flags = ra.read();
        boolean deferred_non_retain = ((segment_header_flags & 0x80) == 0x80);
        s.deferredNonRetain = deferred_non_retain;
        boolean page_association_size = ((segment_header_flags & 0x40) == 0x40);
        int segment_type = (segment_header_flags & 0x3f);
        s.type = segment_type;

        //7.2.4
        int referred_to_byte0 = ra.read();
        int count_of_referred_to_segments = (referred_to_byte0 & 0xE0) >> 5;
        int[] referred_to_segment_numbers = null;
        boolean[] segment_retention_flags = null;

        if (count_of_referred_to_segments == 7) {
            // at least five bytes
            ra.seek(ra.getFilePointer() - 1);
            count_of_referred_to_segments = (ra.readInt() & 0x1fffffff);
            segment_retention_flags = new boolean[count_of_referred_to_segments + 1];
            int i = 0;
            int referred_to_current_byte = 0;
            do {
                int j = i % 8;
                if (j == 0) {
                    referred_to_current_byte = ra.read();
                }
                segment_retention_flags[i] = ((((0x1 << j) & referred_to_current_byte) >> j) == 0x1);
                i++;
            } while (i <= count_of_referred_to_segments);

        } else if (count_of_referred_to_segments <= 4) {
            // only one byte
            segment_retention_flags = new boolean[count_of_referred_to_segments + 1];
            referred_to_byte0 &= 0x1f;
            for (int i = 0; i <= count_of_referred_to_segments; i++) {
                segment_retention_flags[i] = ((((0x1 << i) & referred_to_byte0) >> i) == 0x1);
            }

        } else if (count_of_referred_to_segments == 5 || count_of_referred_to_segments == 6) {
            throw new IllegalStateException("count of referred-to segments had bad value in header for segment "
                    + segment_number + " starting at " + ptr);
        }
        s.segmentRetentionFlags = segment_retention_flags;
        s.countOfReferredToSegments = count_of_referred_to_segments;

        // 7.2.5
        referred_to_segment_numbers = new int[count_of_referred_to_segments + 1];
        for (int i = 1; i <= count_of_referred_to_segments; i++) {
            if (segment_number <= 256) {
                referred_to_segment_numbers[i] = ra.read();
            } else if (segment_number <= 65536) {
                referred_to_segment_numbers[i] = ra.readUnsignedShort();
            } else {
                referred_to_segment_numbers[i] = (int) ra.readUnsignedInt(); // TODO wtf ack
            }
        }
        s.referredToSegmentNumbers = referred_to_segment_numbers;

        // 7.2.6
        int segment_page_association;
        int page_association_offset = ra.getFilePointer() - ptr;
        if (page_association_size) {
            segment_page_association = ra.readInt();
        } else {
            segment_page_association = ra.read();
        }
        if (segment_page_association < 0) {
            throw new IllegalStateException(
                    "page " + segment_page_association + " invalid for segment " + segment_number + " starting at " + ptr);
        }
        s.page = segment_page_association;
        // so we can change the page association at embedding time.
        s.page_association_size = page_association_size;
        s.page_association_offset = page_association_offset;

        if (segment_page_association > 0 && !pages.containsKey(Integer.valueOf(segment_page_association))) {
            pages.put(Integer.valueOf(segment_page_association), new JBIG2Page(segment_page_association, this));
        }
        if (segment_page_association > 0) {
            ((JBIG2Page) pages.get(Integer.valueOf(segment_page_association))).addSegment(s);
        } else {
            globals.add(s);
        }

        // 7.2.7
        long segment_data_length = ra.readUnsignedInt();
        // TODO the 0xffffffff value that might be here, and how to understand those afflicted segments
        s.dataLength = segment_data_length;

        int end_ptr = ra.getFilePointer();
        ra.seek(ptr);
        byte[] header_data = new byte[end_ptr - ptr];
        ra.read(header_data);
        s.headerData = header_data;

        return s;
    }

    void readFileHeader() throws IOException {
        ra.seek(0);
        byte[] idstring = new byte[8];
        ra.read(idstring);

        byte[] refidstring = { (byte) 0x97, 0x4A, 0x42, 0x32, 0x0D, 0x0A, 0x1A, 0x0A };

        for (int i = 0; i < idstring.length; i++) {
            if (idstring[i] != refidstring[i]) {
                throw new IllegalStateException("file header idstring not good at byte " + i);
            }
        }

        int fileheaderflags = ra.read();

        this.sequential = ((fileheaderflags & 0x1) == 0x1);
        this.number_of_pages_known = ((fileheaderflags & 0x2) == 0x0);

        if ((fileheaderflags & 0xfc) != 0x0) {
            throw new IllegalStateException("file header flags bits 2-7 not 0");
        }

        if (this.number_of_pages_known) {
            this.number_of_pages = ra.readInt();
        }
    }

    public int numberOfPages() {
        return pages.size();
    }

    public int getPageHeight(int i) {
        return ((JBIG2Page) pages.get(Integer.valueOf(i))).pageBitmapHeight;
    }

    public int getPageWidth(int i) {
        return ((JBIG2Page) pages.get(Integer.valueOf(i))).pageBitmapWidth;
    }

    public JBIG2Page getPage(int page) {
        return (JBIG2Page) pages.get(Integer.valueOf(page));
    }

    public byte[] getGlobal(boolean for_embedding) {
        ByteArrayOutputStream os = new ByteArrayOutputStream();
        try {
            for (Iterator gitr = globals.iterator(); gitr.hasNext();) {
                JBIG2Segment s = (JBIG2Segment) gitr.next();
                if (for_embedding &&
                        (s.type == END_OF_FILE || s.type == END_OF_PAGE)) {
                    continue;
                }
                os.write(s.headerData);
                os.write(s.data);
            }
            os.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        if (os.size() <= 0) {
            return null;
        }
        return os.toByteArray();
    }

    public String toString() {
        if (this.read) {
            return "Jbig2SegmentReader: number of pages: " + this.numberOfPages();
        } else {
            return "Jbig2SegmentReader in indeterminate state.";
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy