org.apache.poi.hwpf.model.PAPFormattedDiskPage Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of poi-scratchpad Show documentation
Apache POI - Java API To Access Microsoft Format Files (Scratchpad)
The newest version!
/* ====================================================================
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
   this work for additional information regarding copyright ownership.
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
==================================================================== */

package org.apache.poi.hwpf.model;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import org.apache.poi.util.IOUtils;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian;

/**
 * Represents a PAP FKP. The style properties for paragraph and character runs
 * are stored in fkps. There are PAP fkps for paragraph properties and CHP fkps
 * for character run properties. The first part of the fkp for both CHP and PAP
 * fkps consists of an array of 4 byte int offsets in the main stream for that
 * Paragraph's or Character run's text. The ending offset is the next
 * value in the array. For example, if an fkp has X number of Paragraph's
 * stored in it then there are (x + 1) 4 byte ints in the beginning array. The
 * number X is determined by the last byte in a 512 byte fkp.
 *
 * CHP and PAP fkps also store the compressed styles(grpprl) that correspond to
 * the offsets on the front of the fkp. The offset of the grpprls is determined
 * differently for CHP fkps and PAP fkps.
 */
@Internal
public final class PAPFormattedDiskPage extends FormattedDiskPage {
    private static final int BX_SIZE = 13;
    private static final int FC_SIZE = 4;

    private ArrayList _papxList = new ArrayList<>();
    private ArrayList _overFlow;



    public PAPFormattedDiskPage() { }

    /**
     * Creates a PAPFormattedDiskPage from a 512 byte array
     */
    public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream,
            int offset, CharIndexTranslator translator ) {
        super( documentStream, offset );
        for ( int x = 0; x < _crun; x++ )
        {
            int bytesStartAt = getStart( x );
            int bytesEndAt = getEnd( x );

            // int charStartAt = translator.getCharIndex( bytesStartAt );
            // int charEndAt = translator.getCharIndex( bytesEndAt, charStartAt
            // );
            // PAPX papx = new PAPX( charStartAt, charEndAt, getGrpprl( x ),
            // getParagraphHeight( x ), dataStream );
            // _papxList.add( papx );

            for ( int[] range : translator.getCharIndexRanges( bytesStartAt,
                    bytesEndAt ) )
            {
                PAPX papx = new PAPX( range[0], range[1], getGrpprl( x ),
                        getParagraphHeight( x ), dataStream );
                _papxList.add( papx );
            }
        }
        _fkp = null;
    }

    /**
     * Fills the queue for writing.
     *
     * @param filler a List of PAPXs
     */
    public void fill(List filler)
    {
      _papxList.addAll(filler);
    }

    /**
     * Used when writing out a Word docunment. This method is part of a sequence
     * that is necessary because there is no easy and efficient way to
     * determine the number PAPX's that will fit into one FKP. THe sequence is
     * as follows:
     *
     * fill()
     * toByteArray()
     * getOverflow()
     *
     * @return The remaining PAPXs that didn't fit into this FKP.
     */
    ArrayList getOverflow()
    {
      return _overFlow;
    }

    /**
     * Gets the PAPX at index.
     * @param index The index to get the PAPX for.
     * @return The PAPX at index.
     */
    public PAPX getPAPX(int index)
    {
      return _papxList.get(index);
    }

    public List getPAPXs()
    {
        return Collections.unmodifiableList( _papxList );
    }

    /**
     * Gets the papx grpprl for the paragraph at index in this fkp.
     *
     * @param index The index of the papx to get.
     * @return a papx grpprl.
     */
    protected byte[] getGrpprl(int index)
    {
        int papxOffset = 2 * LittleEndian.getUByte(_fkp, _offset + (((_crun + 1) * FC_SIZE) + (index * BX_SIZE)));
        int size = 2 * LittleEndian.getUByte(_fkp, _offset + papxOffset);
        if(size == 0) {
            size = 2 * LittleEndian.getUByte(_fkp, _offset + ++papxOffset);
        } else {
            size--;
        }

        return IOUtils.safelyClone(_fkp, _offset + papxOffset + 1, size, 512);
    }

    /**
     * Creates a byte array representation of this data structure. Suitable for
     * writing to a Word document.
     *
     * @param dataStream required if PAPX is too big to fit in FKP
     *
     * @return A byte array representing this data structure.
     * @throws IOException
     *             if an I/O error occurs.
     */
    protected byte[] toByteArray( ByteArrayOutputStream dataStream,
            CharIndexTranslator translator ) throws IOException
    {
        byte[] buf = new byte[512];
        int size = _papxList.size();
        int grpprlOffset = 0;
        int bxOffset = 0;
        int fcOffset = 0;
        byte[] lastGrpprl = new byte[0];

        // total size is currently the size of one FC
        int totalSize = FC_SIZE;

        int index = 0;
        for ( ; index < size; index++ )
        {
            byte[] grpprl = _papxList.get( index ).getGrpprl();
            int grpprlLength = grpprl.length;

            // is grpprl huge?
            if ( grpprlLength > 488 )
            {
                grpprlLength = 8; // set equal to size of sprmPHugePapx grpprl
            }

            // check to see if we have enough room for an FC, a BX, and the
            // grpprl
            // and the 1 byte size of the grpprl.
            int addition = 0;
            if ( !Arrays.equals( grpprl, lastGrpprl ) )
            {
                addition = ( FC_SIZE + BX_SIZE + grpprlLength + 1 );
            }
            else
            {
                addition = ( FC_SIZE + BX_SIZE );
            }

            totalSize += addition;

            // if size is uneven we will have to add one so the first grpprl
            // falls
            // on a word boundary
            if ( totalSize > 511 + ( index % 2 ) )
            {
                totalSize -= addition;
                break;
            }

            // grpprls must fall on word boundaries
            if ( grpprlLength % 2 > 0 )
            {
                totalSize += 1;
            }
            else
            {
                totalSize += 2;
            }
            lastGrpprl = grpprl;
        }

        // see if we couldn't fit some
        if ( index != size )
        {
            _overFlow = new ArrayList<>();
            _overFlow.addAll( _papxList.subList( index, size ) );
        }

        // index should equal number of papxs that will be in this fkp now.
        buf[511] = (byte) index;

        bxOffset = ( FC_SIZE * index ) + FC_SIZE;
        grpprlOffset = 511;

        PAPX papx = null;
        lastGrpprl = new byte[0];
        for ( int x = 0; x < index; x++ )
        {
            papx = _papxList.get( x );
            byte[] phe = papx.getParagraphHeight().toByteArray();
            byte[] grpprl = papx.getGrpprl();

            // is grpprl huge?
            if ( grpprl.length > 488 )
            {
                // if so do we have storage at getHugeGrpprlOffset()
                // int hugeGrpprlOffset = papx.getHugeGrpprlOffset();
                // if ( hugeGrpprlOffset == -1 ) // then we have no storage...
                // {
                // throw new UnsupportedOperationException(
                // "This Paragraph has no dataStream storage." );
                // }
                // we have some storage...

                // get the size of the existing storage
                // int maxHugeGrpprlSize = LittleEndian.getUShort( dataStream,
                // hugeGrpprlOffset );
                //
                // if ( maxHugeGrpprlSize < grpprl.length - 2 )
                // { // grpprl.length-2 because we don't store the istd
                // throw new UnsupportedOperationException(
                // "This Paragraph's dataStream storage is too small." );
                // }

                // store grpprl at hugeGrpprlOffset
                // grpprl.length-2 because we don't store the istd
                // System.arraycopy( grpprl, 2, dataStream, hugeGrpprlOffset +
                // 2,
                // grpprl.length - 2 );
                // LittleEndian.putUShort( dataStream, hugeGrpprlOffset,
                // grpprl.length - 2 );

                byte[] hugePapx = Arrays.copyOfRange(grpprl, 2, grpprl.length);
                int dataStreamOffset = dataStream.size();
                dataStream.write( hugePapx );

                // grpprl = grpprl containing only a sprmPHugePapx2
                int istd = LittleEndian.getUShort( grpprl, 0 );

                grpprl = new byte[8];
                LittleEndian.putUShort( grpprl, 0, istd );
                LittleEndian.putUShort( grpprl, 2, 0x6646 ); // sprmPHugePapx2
                LittleEndian.putInt( grpprl, 4, dataStreamOffset );
            }

            boolean same = Arrays.equals( lastGrpprl, grpprl );
            if ( !same )
            {
                grpprlOffset -= ( grpprl.length + ( 2 - grpprl.length % 2 ) );
                grpprlOffset -= ( grpprlOffset % 2 );
            }
            // LittleEndian.putInt( buf, fcOffset, papx.getStartBytes() );
            LittleEndian.putInt( buf, fcOffset,
                    translator.getByteIndex( papx.getStart() ) );
            buf[bxOffset] = (byte) ( grpprlOffset / 2 );
            System.arraycopy( phe, 0, buf, bxOffset + 1, phe.length );

            /*
             * refer to the section on PAPX in the spec. Places a size on the
             * front of the PAPX. Has to do with how the grpprl stays on word
             * boundaries.
             */
            if ( !same )
            {
                int copyOffset = grpprlOffset;
                if ( ( grpprl.length % 2 ) > 0 )
                {
                    buf[copyOffset++] = (byte) ( ( grpprl.length + 1 ) / 2 );
                }
                else
                {
                    buf[++copyOffset] = (byte) ( ( grpprl.length ) / 2 );
                    copyOffset++;
                }
                System.arraycopy( grpprl, 0, buf, copyOffset, grpprl.length );
                lastGrpprl = grpprl;
            }

            bxOffset += BX_SIZE;
            fcOffset += FC_SIZE;

        }

        if (papx != null) {
            // LittleEndian.putInt(buf, fcOffset, papx.getEndBytes() + fcMin);
            LittleEndian.putInt(buf, fcOffset, translator.getByteIndex(papx.getEnd()));
        }
        return buf;
    }

    /**
     * Used to get the ParagraphHeight of a PAPX at a particular index.
     * @param index
     * @return The ParagraphHeight
     */
    private ParagraphHeight getParagraphHeight(int index)
    {
      int pheOffset = _offset + 1 + (((_crun + 1) * 4) + (index * 13));

        return new ParagraphHeight(_fkp, pheOffset);
    }
}