All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.poi.hwpf.HWPFDocument Maven / Gradle / Ivy

There is a newer version: 5.3.0
Show newest version
/* ====================================================================
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
   this work for additional information regarding copyright ownership.
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
==================================================================== */

package org.apache.poi.hwpf;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Iterator;

import org.apache.poi.hpsf.DocumentSummaryInformation;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hwpf.model.BookmarksTables;
import org.apache.poi.hwpf.model.CHPBinTable;
import org.apache.poi.hwpf.model.ComplexFileTable;
import org.apache.poi.hwpf.model.DocumentProperties;
import org.apache.poi.hwpf.model.EscherRecordHolder;
import org.apache.poi.hwpf.model.FSPADocumentPart;
import org.apache.poi.hwpf.model.FSPATable;
import org.apache.poi.hwpf.model.FieldsTables;
import org.apache.poi.hwpf.model.FontTable;
import org.apache.poi.hwpf.model.ListTables;
import org.apache.poi.hwpf.model.NoteType;
import org.apache.poi.hwpf.model.NotesTables;
import org.apache.poi.hwpf.model.PAPBinTable;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.model.RevisionMarkAuthorTable;
import org.apache.poi.hwpf.model.SavedByTable;
import org.apache.poi.hwpf.model.SectionTable;
import org.apache.poi.hwpf.model.ShapesTable;
import org.apache.poi.hwpf.model.SinglentonTextPiece;
import org.apache.poi.hwpf.model.StyleSheet;
import org.apache.poi.hwpf.model.SubdocumentType;
import org.apache.poi.hwpf.model.TextPiece;
import org.apache.poi.hwpf.model.TextPieceTable;
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.hwpf.usermodel.Bookmarks;
import org.apache.poi.hwpf.usermodel.BookmarksImpl;
import org.apache.poi.hwpf.usermodel.Field;
import org.apache.poi.hwpf.usermodel.Fields;
import org.apache.poi.hwpf.usermodel.FieldsImpl;
import org.apache.poi.hwpf.usermodel.HWPFList;
import org.apache.poi.hwpf.usermodel.Notes;
import org.apache.poi.hwpf.usermodel.NotesImpl;
import org.apache.poi.hwpf.usermodel.OfficeDrawings;
import org.apache.poi.hwpf.usermodel.OfficeDrawingsImpl;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.EntryUtils;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.Internal;

/**
 *
 * This class acts as the bucket that we throw all of the Word data structures
 * into.
 *
 * @author Ryan Ackley
 */
public final class HWPFDocument extends HWPFDocumentCore
{
    static final String PROPERTY_PRESERVE_BIN_TABLES = "org.apache.poi.hwpf.preserveBinTables";
    private static final String PROPERTY_PRESERVE_TEXT_TABLE = "org.apache.poi.hwpf.preserveTextTable";

    private static final String STREAM_DATA = "Data";
    private static final String STREAM_TABLE_0 = "0Table";
    private static final String STREAM_TABLE_1 = "1Table";

  /** table stream buffer*/
  protected byte[] _tableStream;

  /** data stream buffer*/
  protected byte[] _dataStream;

  /** Document wide Properties*/
  protected DocumentProperties _dop;

  /** Contains text of the document wrapped in a obfuscated Word data
  * structure*/
  protected ComplexFileTable _cft;

  /** Contains text buffer linked directly to single-piece document text piece */
  protected StringBuilder _text;

  /** Holds the save history for this document. */
  protected SavedByTable _sbt;
  
  /** Holds the revision mark authors for this document. */
  protected RevisionMarkAuthorTable _rmat;

  /** Holds FSBA (shape) information */
  private FSPATable _fspaHeaders;

  /** Holds FSBA (shape) information */
  private FSPATable _fspaMain;

  /** Escher Drawing Group information */
  protected EscherRecordHolder _escherRecordHolder;

  /** Holds pictures table */
  protected PicturesTable _pictures;

  /** Holds Office Art objects */
  @Deprecated
  protected ShapesTable _officeArts;
  
  /** Holds Office Art objects */
  protected OfficeDrawingsImpl _officeDrawingsHeaders;

  /** Holds Office Art objects */
  protected OfficeDrawingsImpl _officeDrawingsMain;

  /** Holds the bookmarks tables */
  protected BookmarksTables _bookmarksTables;

  /** Holds the bookmarks */
  protected Bookmarks _bookmarks;

  /** Holds the ending notes tables */
  protected NotesTables _endnotesTables = new NotesTables( NoteType.ENDNOTE );

  /** Holds the footnotes */
  protected Notes _endnotes = new NotesImpl( _endnotesTables );

  /** Holds the footnotes tables */
  protected NotesTables _footnotesTables = new NotesTables( NoteType.FOOTNOTE );

  /** Holds the footnotes */
  protected Notes _footnotes = new NotesImpl( _footnotesTables );

  /** Holds the fields PLCFs */
  protected FieldsTables _fieldsTables;

  /** Holds the fields */
  protected Fields _fields;

  protected HWPFDocument()
  {
     super();
     this._text = new StringBuilder("\r");
  }

  /**
   * This constructor loads a Word document from an InputStream.
   *
   * @param istream The InputStream that contains the Word document.
   * @throws IOException If there is an unexpected IOException from the passed
   *         in InputStream.
   */
  public HWPFDocument(InputStream istream) throws IOException
  {
    //do Ole stuff
    this( verifyAndBuildPOIFS(istream) );
  }

  /**
   * This constructor loads a Word document from a POIFSFileSystem
   *
   * @param pfilesystem The POIFSFileSystem that contains the Word document.
   * @throws IOException If there is an unexpected IOException from the passed
   *         in POIFSFileSystem.
   */
  public HWPFDocument(POIFSFileSystem pfilesystem) throws IOException
  {
    this(pfilesystem.getRoot());
  }

  /**
   * This constructor loads a Word document from a specific point
   *  in a POIFSFileSystem, probably not the default.
   * Used typically to open embedded documents.
   *
   * @param pfilesystem The POIFSFileSystem that contains the Word document.
   * @throws IOException If there is an unexpected IOException from the passed
   *         in POIFSFileSystem.
   * @deprecated Use {@link #HWPFDocument(DirectoryNode)} instead
   */
  @Deprecated
  public HWPFDocument(DirectoryNode directory, POIFSFileSystem pfilesystem) throws IOException
  {
     this(directory);
  }
  
  /**
   * This constructor loads a Word document from a specific point
   *  in a POIFSFileSystem, probably not the default.
   * Used typically to open embeded documents.
   *
   * @param directory The DirectoryNode that contains the Word document.
   * @throws IOException If there is an unexpected IOException from the passed
   *         in POIFSFileSystem.
   */
  public HWPFDocument(DirectoryNode directory) throws IOException
  {
    // Load the main stream and FIB
    // Also handles HPSF bits
    super(directory);

    // Is this document too old for us?
    if(_fib.getFibBase().getNFib() < 106) {
        throw new OldWordFileFormatException("The document is too old - Word 95 or older. Try HWPFOldDocument instead?");
    }

    // use the fib to determine the name of the table stream.
    String name = STREAM_TABLE_0;
    if (_fib.getFibBase().isFWhichTblStm())
    {
      name = STREAM_TABLE_1;
    }

    // Grab the table stream.
    if (!directory.hasEntry(name)) {
        throw new IllegalStateException("Table Stream '" + name + "' wasn't found - Either the document is corrupt, or is Word95 (or earlier)");
    }

    // read in the table stream.
    InputStream is = directory.createDocumentInputStream(name);
    _tableStream = IOUtils.toByteArray(is);
    is.close();

    _fib.fillVariableFields(_mainStream, _tableStream);

    // read in the data stream.
    try
    {
      DocumentEntry dataProps =
          (DocumentEntry)directory.getEntry(STREAM_DATA);
      _dataStream = new byte[dataProps.getSize()];
      directory.createDocumentInputStream(STREAM_DATA).read(_dataStream);
    }
    catch(java.io.FileNotFoundException e)
    {
        _dataStream = new byte[0];
    }

    // Get the cp of the start of text in the main stream
    // The latest spec doc says this is always zero!
    int fcMin = 0;
    //fcMin = _fib.getFcMin()

    // Start to load up our standard structures.
    _dop = new DocumentProperties(_tableStream, _fib.getFcDop(), _fib.getLcbDop() );
    _cft = new ComplexFileTable(_mainStream, _tableStream, _fib.getFcClx(), fcMin);
    TextPieceTable _tpt = _cft.getTextPieceTable();

    // Now load the rest of the properties, which need to be adjusted
    //  for where text really begin
    _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _tpt);
    _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _tpt);

        _text = _tpt.getText();

        /*
         * in this mode we preserving PAPX/CHPX structure from file, so text may
         * miss from output, and text order may be corrupted
         */
        boolean preserveBinTables = false;
        try
        {
            preserveBinTables = Boolean.parseBoolean( System
                    .getProperty( PROPERTY_PRESERVE_BIN_TABLES ) );
        }
        catch ( Exception exc )
        {
            // ignore;
        }

        if ( !preserveBinTables )
        {
            _cbt.rebuild( _cft );
            _pbt.rebuild( _text, _cft );
        }

        /*
         * Property to disable text rebuilding. In this mode changing the text
         * will lead to unpredictable behavior
         */
        boolean preserveTextTable = false;
        try
        {
            preserveTextTable = Boolean.parseBoolean( System
                    .getProperty( PROPERTY_PRESERVE_TEXT_TABLE ) );
        }
        catch ( Exception exc )
        {
            // ignore;
        }
        if ( !preserveTextTable )
        {
            _cft = new ComplexFileTable();
            _tpt = _cft.getTextPieceTable();
            final TextPiece textPiece = new SinglentonTextPiece( _text );
            _tpt.add( textPiece );
            _text = textPiece.getStringBuilder();
        }

        // Read FSPA and Escher information
        // _fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(),
        // _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());
        _fspaHeaders = new FSPATable( _tableStream, _fib,
                FSPADocumentPart.HEADER );
        _fspaMain = new FSPATable( _tableStream, _fib, FSPADocumentPart.MAIN );

    if (_fib.getFcDggInfo() != 0)
    {
        _escherRecordHolder = new EscherRecordHolder(_tableStream, _fib.getFcDggInfo(), _fib.getLcbDggInfo());
    } else
    {
        _escherRecordHolder = new EscherRecordHolder();
    }

    // read in the pictures stream
    _pictures = new PicturesTable(this, _dataStream, _mainStream, _fspaMain, _escherRecordHolder);
    // And the art shapes stream
    _officeArts = new ShapesTable(_tableStream, _fib);

    // And escher pictures
    _officeDrawingsHeaders = new OfficeDrawingsImpl( _fspaHeaders, _escherRecordHolder, _mainStream );
    _officeDrawingsMain = new OfficeDrawingsImpl( _fspaMain , _escherRecordHolder, _mainStream);

    _st = new SectionTable(_mainStream, _tableStream, _fib.getFcPlcfsed(), _fib.getLcbPlcfsed(), fcMin, _tpt, _fib.getSubdocumentTextStreamLength( SubdocumentType.MAIN));
    _ss = new StyleSheet(_tableStream, _fib.getFcStshf());
    _ft = new FontTable(_tableStream, _fib.getFcSttbfffn(), _fib.getLcbSttbfffn());

        int listOffset = _fib.getFcPlfLst();
        // int lfoOffset = _fib.getFcPlfLfo();
        if ( listOffset != 0 && _fib.getLcbPlfLst() != 0 )
        {
            _lt = new ListTables( _tableStream, listOffset, _fib.getFcPlfLfo(),
                    _fib.getLcbPlfLfo() );
        }

    int sbtOffset = _fib.getFcSttbSavedBy();
    int sbtLength = _fib.getLcbSttbSavedBy();
    if (sbtOffset != 0 && sbtLength != 0)
    {
      _sbt = new SavedByTable(_tableStream, sbtOffset, sbtLength);
    }

    int rmarkOffset = _fib.getFcSttbfRMark();
    int rmarkLength = _fib.getLcbSttbfRMark();
    if (rmarkOffset != 0 && rmarkLength != 0)
    {
      _rmat = new RevisionMarkAuthorTable(_tableStream, rmarkOffset, rmarkLength);
    }

    _bookmarksTables = new BookmarksTables( _tableStream, _fib );
    _bookmarks = new BookmarksImpl( _bookmarksTables );

    _endnotesTables = new NotesTables( NoteType.ENDNOTE, _tableStream, _fib );
    _endnotes = new NotesImpl( _endnotesTables );
    _footnotesTables = new NotesTables( NoteType.FOOTNOTE, _tableStream, _fib );
    _footnotes = new NotesImpl( _footnotesTables );

    _fieldsTables = new FieldsTables(_tableStream, _fib);
    _fields = new FieldsImpl(_fieldsTables);
  }

  @Internal
  public TextPieceTable getTextTable()
  {
    return _cft.getTextPieceTable();
  }

    @Internal
    @Override
    public StringBuilder getText()
    {
        return _text;
    }

  public DocumentProperties getDocProperties()
  {
    return _dop;
  }

  public Range getOverallRange() {
      return new Range(0, _text.length(), this);
  }

    /**
     * Returns the range which covers the whole of the document, but excludes
     * any headers and footers.
     */
    public Range getRange()
    {
        // // First up, trigger a full-recalculate
        // // Needed in case of deletes etc
        // getOverallRange();
        //
        // if ( getFileInformationBlock().isFComplex() )
        // {
        // /*
        // * Page 31:
        // *
        // * main document must be found by examining the piece table entries
        // * from the 0th piece table entry from the piece table entry that
        // * describes cp=fib.ccpText.
        // */
        // // TODO: review
        // return new Range( _cpSplit.getMainDocumentStart(),
        // _cpSplit.getMainDocumentEnd(), this );
        // }
        //
        // /*
        // * Page 31:
        // *
        // * "In a non-complex file, this means text of the: main document
        // begins
        // * at fib.fcMin in the file and continues through
        // * fib.fcMin+fib.ccpText."
        // */
        // int bytesStart = getFileInformationBlock().getFcMin();
        //
        // int charsStart = getTextTable().getCharIndex( bytesStart );
        // int charsEnd = charsStart
        // + getFileInformationBlock().getSubdocumentTextStreamLength(
        // SubdocumentType.MAIN );

        // it seems much simpler -- sergey
        return getRange(SubdocumentType.MAIN);
    }

    private Range getRange( SubdocumentType subdocument )
    {
        int startCp = 0;
        for ( SubdocumentType previos : SubdocumentType.ORDERED )
        {
            int length = getFileInformationBlock()
                    .getSubdocumentTextStreamLength( previos );
            if ( subdocument == previos )
                return new Range( startCp, startCp + length, this );
            startCp += length;
        }
        throw new UnsupportedOperationException(
                "Subdocument type not supported: " + subdocument );
    }

    /**
     * Returns the {@link Range} which covers all the Footnotes.
     * 
     * @return the {@link Range} which covers all the Footnotes.
     */
    public Range getFootnoteRange()
    {
        return getRange( SubdocumentType.FOOTNOTE );
    }

    /**
     * Returns the {@link Range} which covers all endnotes.
     * 
     * @return the {@link Range} which covers all endnotes.
     */
    public Range getEndnoteRange()
    {
        return getRange( SubdocumentType.ENDNOTE );
    }

    /**
     * Returns the {@link Range} which covers all annotations.
     * 
     * @return the {@link Range} which covers all annotations.
     */
    public Range getCommentsRange()
    {
        return getRange( SubdocumentType.ANNOTATION );
    }

    /**
     * Returns the {@link Range} which covers all textboxes.
     * 
     * @return the {@link Range} which covers all textboxes.
     */
    public Range getMainTextboxRange()
    {
        return getRange( SubdocumentType.TEXTBOX );
    }

  /**
   * Returns the range which covers all "Header Stories".
   * A header story contains a header, footer, end note
   *  separators and footnote separators.
   */
  public Range getHeaderStoryRange() {
      return getRange( SubdocumentType.HEADER );
  }

  /**
   * Returns the character length of a document.
   * @return the character length of a document
   */
  public int characterLength()
  {
      return _text.length();
  }

  /**
   * Gets a reference to the saved -by table, which holds the save history for the document.
   *
   * @return the saved-by table.
   */
  @Internal
  public SavedByTable getSavedByTable()
  {
    return _sbt;
  }

  /**
   * Gets a reference to the revision mark author table, which holds the revision mark authors for the document.
   *
   * @return the saved-by table.
   */
  @Internal
  public RevisionMarkAuthorTable getRevisionMarkAuthorTable()
  {
    return _rmat;
  }
  
  /**
   * @return PicturesTable object, that is able to extract images from this document
   */
  public PicturesTable getPicturesTable() {
      return _pictures;
  }

  @Internal
  public EscherRecordHolder getEscherRecordHolder() {
      return _escherRecordHolder;
  }

    /**
     * @return ShapesTable object, that is able to extract office are shapes
     *         from this document
     * @deprecated use {@link #getOfficeDrawingsMain()} instead
     */
    @Deprecated
    @Internal
    public ShapesTable getShapesTable()
    {
        return _officeArts;
    }

    public OfficeDrawings getOfficeDrawingsHeaders()
    {
        return _officeDrawingsHeaders;
    }

    public OfficeDrawings getOfficeDrawingsMain()
    {
        return _officeDrawingsMain;
    }

    /**
     * @return user-friendly interface to access document bookmarks
     */
    public Bookmarks getBookmarks()
    {
        return _bookmarks;
    }

    /**
     * @return user-friendly interface to access document endnotes
     */
    public Notes getEndnotes()
    {
        return _endnotes;
    }

    /**
     * @return user-friendly interface to access document footnotes
     */
    public Notes getFootnotes()
    {
        return _footnotes;
    }

  /**
   * @return FieldsTables object, that is able to extract fields descriptors from this document
   * @deprecated
   */
    @Deprecated
    @Internal
  public FieldsTables getFieldsTables() {
      return _fieldsTables;
  }

    /**
     * Returns user-friendly interface to access document {@link Field}s
     * 
     * @return user-friendly interface to access document {@link Field}s
     */
    public Fields getFields()
    {
        return _fields;
    }

  /**
   * Writes out the word file that is represented by an instance of this class.
   *
   * @param out The OutputStream to write to.
   * @throws IOException If there is an unexpected IOException from the passed
   *         in OutputStream.
   */
  public void write(OutputStream out)
    throws IOException
  {
    // initialize our streams for writing.
    HWPFFileSystem docSys = new HWPFFileSystem();
    HWPFOutputStream wordDocumentStream = docSys.getStream(STREAM_WORD_DOCUMENT);
    HWPFOutputStream tableStream = docSys.getStream(STREAM_TABLE_1);
    //HWPFOutputStream dataStream = docSys.getStream("Data");
    int tableOffset = 0;

    // FileInformationBlock fib = (FileInformationBlock)_fib.clone();
    // clear the offsets and sizes in our FileInformationBlock.
    _fib.clearOffsetsSizes();

    // determine the FileInformationBLock size
    int fibSize = _fib.getSize();
    fibSize  += POIFSConstants.SMALLER_BIG_BLOCK_SIZE -
        (fibSize % POIFSConstants.SMALLER_BIG_BLOCK_SIZE);

    // preserve space for the FileInformationBlock because we will be writing
    // it after we write everything else.
    byte[] placeHolder = new byte[fibSize];
    wordDocumentStream.write(placeHolder);
    int mainOffset = wordDocumentStream.getOffset();

    // write out the StyleSheet.
    _fib.setFcStshf(tableOffset);
    _ss.writeTo(tableStream);
    _fib.setLcbStshf(tableStream.getOffset() - tableOffset);
    tableOffset = tableStream.getOffset();

    // get fcMin and fcMac because we will be writing the actual text with the
    // complex table.
    int fcMin = mainOffset;

        /*
         * clx (encoding of the sprm lists for a complex file and piece table
         * for a any file) Written immediately after the end of the previously
         * recorded structure. This is recorded in all Word documents
         * 
         * Microsoft Office Word 97-2007 Binary File Format (.doc)
         * Specification; Page 23 of 210
         */
    
    // write out the Complex table, includes text.
    _fib.setFcClx(tableOffset);
    _cft.writeTo(wordDocumentStream, tableStream);
    _fib.setLcbClx(tableStream.getOffset() - tableOffset);
    tableOffset = tableStream.getOffset();
    int fcMac = wordDocumentStream.getOffset();

        /*
         * dop (document properties record) Written immediately after the end of
         * the previously recorded structure. This is recorded in all Word
         * documents
         * 
         * Microsoft Office Word 97-2007 Binary File Format (.doc)
         * Specification; Page 23 of 210
         */

    // write out the DocumentProperties.
    _fib.setFcDop(tableOffset);
    _dop.writeTo(tableStream);
    _fib.setLcbDop(tableStream.getOffset() - tableOffset);
    tableOffset = tableStream.getOffset();

        /*
         * plcfBkmkf (table recording beginning CPs of bookmarks) Written
         * immediately after the sttbfBkmk, if the document contains bookmarks.
         * 
         * Microsoft Office Word 97-2007 Binary File Format (.doc)
         * Specification; Page 24 of 210
         */
        if ( _bookmarksTables != null )
        {
            _bookmarksTables.writePlcfBkmkf( _fib, tableStream );
            tableOffset = tableStream.getOffset();
        }

        /*
         * plcfBkmkl (table recording limit CPs of bookmarks) Written
         * immediately after the plcfBkmkf, if the document contains bookmarks.
         * 
         * Microsoft Office Word 97-2007 Binary File Format (.doc)
         * Specification; Page 24 of 210
         */
        if ( _bookmarksTables != null )
        {
            _bookmarksTables.writePlcfBkmkl( _fib, tableStream );
            tableOffset = tableStream.getOffset();
        }

        /*
         * plcfbteChpx (bin table for CHP FKPs) Written immediately after the
         * previously recorded table. This is recorded in all Word documents.
         * 
         * Microsoft Office Word 97-2007 Binary File Format (.doc)
         * Specification; Page 24 of 210
         */

    // write out the CHPBinTable.
    _fib.setFcPlcfbteChpx(tableOffset);
    _cbt.writeTo(wordDocumentStream, tableStream, fcMin, _cft.getTextPieceTable());
    _fib.setLcbPlcfbteChpx(tableStream.getOffset() - tableOffset);
    tableOffset = tableStream.getOffset();

        /*
         * plcfbtePapx (bin table for PAP FKPs) Written immediately after the
         * plcfbteChpx. This is recorded in all Word documents.
         * 
         * Microsoft Office Word 97-2007 Binary File Format (.doc)
         * Specification; Page 24 of 210
         */

    // write out the PAPBinTable.
    _fib.setFcPlcfbtePapx(tableOffset);
    _pbt.writeTo(wordDocumentStream, tableStream, _cft.getTextPieceTable());
    _fib.setLcbPlcfbtePapx(tableStream.getOffset() - tableOffset);
    tableOffset = tableStream.getOffset();

        /*
         * plcfendRef (endnote reference position table) Written immediately
         * after the previously recorded table if the document contains endnotes
         * 
         * plcfendTxt (endnote text position table) Written immediately after
         * the plcfendRef if the document contains endnotes
         * 
         * Microsoft Office Word 97-2007 Binary File Format (.doc)
         * Specification; Page 24 of 210
         */
        _endnotesTables.writeRef( _fib, tableStream );
        _endnotesTables.writeTxt( _fib, tableStream );
        tableOffset = tableStream.getOffset();

    /*
     * plcffld*** (table of field positions and statuses for annotation
     * subdocument) Written immediately after the previously recorded table,
     * if the ******* subdocument contains fields.
     * 
     * Microsoft Office Word 97-2007 Binary File Format (.doc)
     * Specification; Page 24 of 210
     */

    if ( _fieldsTables != null )
    {
        _fieldsTables.write( _fib, tableStream );
        tableOffset = tableStream.getOffset();
    }

        /*
         * plcffndRef (footnote reference position table) Written immediately
         * after the stsh if the document contains footnotes
         * 
         * plcffndTxt (footnote text position table) Written immediately after
         * the plcffndRef if the document contains footnotes
         * 
         * Microsoft Office Word 97-2007 Binary File Format (.doc)
         * Specification; Page 24 of 210
         */
        _footnotesTables.writeRef( _fib, tableStream );
        _footnotesTables.writeTxt( _fib, tableStream );
        tableOffset = tableStream.getOffset();

        /*
         * plcfsed (section table) Written immediately after the previously
         * recorded table. Recorded in all Word documents
         * 
         * Microsoft Office Word 97-2007 Binary File Format (.doc)
         * Specification; Page 25 of 210
         */

    // write out the SectionTable.
    _fib.setFcPlcfsed(tableOffset);
    _st.writeTo(wordDocumentStream, tableStream);
    _fib.setLcbPlcfsed(tableStream.getOffset() - tableOffset);
    tableOffset = tableStream.getOffset();

        // write out the list tables
        if ( _lt != null )
        {
            /*
             * plcflst (list formats) Written immediately after the end of the
             * previously recorded, if there are any lists defined in the
             * document. This begins with a short count of LSTF structures
             * followed by those LSTF structures. This is immediately followed
             * by the allocated data hanging off the LSTFs. This data consists
             * of the array of LVLs for each LSTF. (Each LVL consists of an LVLF
             * followed by two grpprls and an XST.)
             * 
             * Microsoft Office Word 97-2007 Binary File Format (.doc)
             * Specification; Page 25 of 210
             */
            _lt.writeListDataTo( _fib, tableStream );
            tableOffset = tableStream.getOffset();

            /*
             * plflfo (more list formats) Written immediately after the end of
             * the plcflst and its accompanying data, if there are any lists
             * defined in the document. This consists first of a PL of LFO
             * records, followed by the allocated data (if any) hanging off the
             * LFOs. The allocated data consists of the array of LFOLVLFs for
             * each LFO (and each LFOLVLF is immediately followed by some LVLs).
             * 
             * Microsoft Office Word 97-2007 Binary File Format (.doc)
             * Specification; Page 26 of 210
             */
            _lt.writeListOverridesTo( _fib, tableStream );
            tableOffset = tableStream.getOffset();
        }

        /*
         * sttbfBkmk (table of bookmark name strings) Written immediately after
         * the previously recorded table, if the document contains bookmarks.
         * 
         * Microsoft Office Word 97-2007 Binary File Format (.doc)
         * Specification; Page 27 of 210
         */
        if ( _bookmarksTables != null )
        {
            _bookmarksTables.writeSttbfBkmk( _fib, tableStream );
            tableOffset = tableStream.getOffset();
        }

        /*
         * sttbSavedBy (last saved by string table) Written immediately after
         * the previously recorded table.
         * 
         * Microsoft Office Word 97-2007 Binary File Format (.doc)
         * Specification; Page 27 of 210
         */

    // write out the saved-by table.
    if (_sbt != null)
    {
      _fib.setFcSttbSavedBy(tableOffset);
      _sbt.writeTo(tableStream);
      _fib.setLcbSttbSavedBy(tableStream.getOffset() - tableOffset);

      tableOffset = tableStream.getOffset();
    }
    
    // write out the revision mark authors table.
    if (_rmat != null)
    {
      _fib.setFcSttbfRMark(tableOffset);
      _rmat.writeTo(tableStream);
      _fib.setLcbSttbfRMark(tableStream.getOffset() - tableOffset);

      tableOffset = tableStream.getOffset();
    }

    // write out the FontTable.
    _fib.setFcSttbfffn(tableOffset);
    _ft.writeTo(tableStream);
    _fib.setLcbSttbfffn(tableStream.getOffset() - tableOffset);
    tableOffset = tableStream.getOffset();

    // set some variables in the FileInformationBlock.
    _fib.getFibBase().setFcMin(fcMin);
    _fib.getFibBase().setFcMac(fcMac);
    _fib.setCbMac(wordDocumentStream.getOffset());

    // make sure that the table, doc and data streams use big blocks.
    byte[] mainBuf = wordDocumentStream.toByteArray();
    if (mainBuf.length < 4096)
    {
      byte[] tempBuf = new byte[4096];
      System.arraycopy(mainBuf, 0, tempBuf, 0, mainBuf.length);
      mainBuf = tempBuf;
    }

        // Table1 stream will be used
        _fib.getFibBase().setFWhichTblStm( true );

    // write out the FileInformationBlock.
    //_fib.serialize(mainBuf, 0);
    _fib.writeTo(mainBuf, tableStream);

    byte[] tableBuf = tableStream.toByteArray();
    if (tableBuf.length < 4096)
    {
      byte[] tempBuf = new byte[4096];
      System.arraycopy(tableBuf, 0, tempBuf, 0, tableBuf.length);
      tableBuf = tempBuf;
    }

    byte[] dataBuf = _dataStream;
    if (dataBuf == null)
    {
      dataBuf = new byte[4096];
    }
    if (dataBuf.length < 4096)
    {
      byte[] tempBuf = new byte[4096];
      System.arraycopy(dataBuf, 0, tempBuf, 0, dataBuf.length);
      dataBuf = tempBuf;
    }

        // create new document preserving order of entries
        NPOIFSFileSystem pfs = new NPOIFSFileSystem();
        boolean docWritten = false;
        boolean dataWritten = false;
        boolean objectPoolWritten = false;
        boolean tableWritten = false;
        boolean propertiesWritten = false;
        for ( Iterator iter = directory.getEntries(); iter.hasNext(); )
        {
            Entry entry = iter.next();
            if ( entry.getName().equals( STREAM_WORD_DOCUMENT ) )
            {
                if ( !docWritten )
                {
                    pfs.createDocument( new ByteArrayInputStream( mainBuf ),
                            STREAM_WORD_DOCUMENT );
                    docWritten = true;
                }
            }
            else if ( entry.getName().equals( STREAM_OBJECT_POOL ) )
            {
                if ( !objectPoolWritten )
                {
                    _objectPool.writeTo( pfs.getRoot() );
                    objectPoolWritten = true;
                }
            }
            else if ( entry.getName().equals( STREAM_TABLE_0 )
                    || entry.getName().equals( STREAM_TABLE_1 ) )
            {
                if ( !tableWritten )
                {
                    pfs.createDocument( new ByteArrayInputStream( tableBuf ),
                            STREAM_TABLE_1 );
                    tableWritten = true;
                }
            }
            else if ( entry.getName().equals(
                    SummaryInformation.DEFAULT_STREAM_NAME )
                    || entry.getName().equals(
                            DocumentSummaryInformation.DEFAULT_STREAM_NAME ) )
            {
                if ( !propertiesWritten )
                {
                    writeProperties( pfs );
                    propertiesWritten = true;
                }
            }
            else if ( entry.getName().equals( STREAM_DATA ) )
            {
                if ( !dataWritten )
                {
                    pfs.createDocument( new ByteArrayInputStream( dataBuf ),
                            STREAM_DATA );
                    dataWritten = true;
                }
            }
            else
            {
                EntryUtils.copyNodeRecursively( entry, pfs.getRoot() );
            }
        }

        if ( !docWritten )
            pfs.createDocument( new ByteArrayInputStream( mainBuf ),
                    STREAM_WORD_DOCUMENT );
        if ( !tableWritten )
            pfs.createDocument( new ByteArrayInputStream( tableBuf ),
                    STREAM_TABLE_1 );
        if ( !propertiesWritten )
            writeProperties( pfs );
        if ( !dataWritten )
            pfs.createDocument( new ByteArrayInputStream( dataBuf ),
                    STREAM_DATA );
        if ( !objectPoolWritten )
            _objectPool.writeTo( pfs.getRoot() );

        pfs.writeFilesystem( out );
        this.directory = pfs.getRoot();

        /*
         * since we updated all references in FIB and etc, using new arrays to
         * access data
         */
        this.directory = pfs.getRoot();
        this._tableStream = tableStream.toByteArray();
        this._dataStream = dataBuf;
    }

  @Internal
  public byte[] getDataStream()
  {
    return _dataStream;
  }
  @Internal
  public byte[] getTableStream()
  {
    return _tableStream;
  }

    public int registerList( HWPFList list )
    {
        if ( _lt == null )
        {
            _lt = new ListTables();
        }
        return _lt.addList( list.getListData(), list.getLFO(),
                list.getLFOData() );
    }

  public void delete(int start, int length)
  {
    Range r = new Range(start, start + length, this);
    r.delete();
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy