All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.poi.poifs.storage.BlockAllocationTableReader Maven / Gradle / Ivy

The newest version!
/* ====================================================================
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
   this work for additional information regarding copyright ownership.
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
==================================================================== */

package org.apache.poi.poifs.storage;

import java.io.IOException;

import java.util.*;

import org.apache.poi.poifs.common.POIFSBigBlockSize;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.*;

/**
 * This class manages and creates the Block Allocation Table, which is
 * basically a set of linked lists of block indices.
 * 

* Each block of the filesystem has an index. The first block, the * header, is skipped; the first block after the header is index 0, * the next is index 1, and so on. *

* A block's index is also its index into the Block Allocation * Table. The entry that it finds in the Block Allocation Table is the * index of the next block in the linked list of blocks making up a * file, or it is set to -2: end of list. * * @author Marc Johnson (mjohnson at apache dot org) */ public final class BlockAllocationTableReader { private static final POILogger _logger = POILogFactory.getLogger(BlockAllocationTableReader.class); /** * Maximum number size (in blocks) of the allocation table as supported by * POI.
* * This constant has been chosen to help POI identify corrupted data in the * header block (rather than crash immediately with {@link OutOfMemoryError} * ). It's not clear if the compound document format actually specifies any * upper limits. For files with 512 byte blocks, having an allocation table * of 65,335 blocks would correspond to a total file size of 4GB. Needless * to say, POI probably cannot handle files anywhere near that size. */ private static final int MAX_BLOCK_COUNT = 65535; private final IntList _entries; private POIFSBigBlockSize bigBlockSize; /** * create a BlockAllocationTableReader for an existing filesystem. Side * effect: when this method finishes, the BAT blocks will have * been removed from the raw block list, and any blocks labeled as * 'unused' in the block allocation table will also have been * removed from the raw block list. * * @param block_count the number of BAT blocks making up the block * allocation table * @param block_array the array of BAT block indices from the * filesystem's header * @param xbat_count the number of XBAT blocks * @param xbat_index the index of the first XBAT block * @param raw_block_list the list of RawDataBlocks * * @exception IOException if, in trying to create the table, we * encounter logic errors */ public BlockAllocationTableReader(POIFSBigBlockSize bigBlockSize, int block_count, int [] block_array, int xbat_count, int xbat_index, BlockList raw_block_list) throws IOException { this(bigBlockSize); sanityCheckBlockCount(block_count); // We want to get the whole of the FAT table // To do this: // * Work through raw_block_list, which points to the // first (up to) 109 BAT blocks // * Jump to the XBAT offset, and read in XBATs which // point to more BAT blocks int limit = Math.min(block_count, block_array.length); int block_index; // This will hold all of the BAT blocks in order RawDataBlock blocks[] = new RawDataBlock[ block_count ]; // Process the first (up to) 109 BAT blocks for (block_index = 0; block_index < limit; block_index++) { // Check that the sector number of the BAT block is a valid one int nextOffset = block_array[ block_index ]; if(nextOffset > raw_block_list.blockCount()) { throw new IOException("Your file contains " + raw_block_list.blockCount() + " sectors, but the initial DIFAT array at index " + block_index + " referenced block # " + nextOffset + ". This isn't allowed and " + " your file is corrupt"); } // Record the sector number of this BAT block blocks[ block_index ] = ( RawDataBlock ) raw_block_list.remove(nextOffset); } // Process additional BAT blocks via the XBATs if (block_index < block_count) { // must have extended blocks if (xbat_index < 0) { throw new IOException( "BAT count exceeds limit, yet XBAT index indicates no valid entries"); } int chain_index = xbat_index; int max_entries_per_block = bigBlockSize.getXBATEntriesPerBlock(); int chain_index_offset = bigBlockSize.getNextXBATChainOffset(); // Each XBAT block contains either: // (maximum number of sector indexes) + index of next XBAT // some sector indexes + FREE sectors to max # + EndOfChain for (int j = 0; j < xbat_count; j++) { limit = Math.min(block_count - block_index, max_entries_per_block); byte[] data = raw_block_list.remove(chain_index).getData(); int offset = 0; for (int k = 0; k < limit; k++) { blocks[ block_index++ ] = ( RawDataBlock ) raw_block_list .remove(LittleEndian.getInt(data, offset)); offset += LittleEndianConsts.INT_SIZE; } chain_index = LittleEndian.getInt(data, chain_index_offset); if (chain_index == POIFSConstants.END_OF_CHAIN) { break; } } } if (block_index != block_count) { throw new IOException("Could not find all blocks"); } // Now that we have all of the raw data blocks which make // up the FAT, go through and create the indices setEntries(blocks, raw_block_list); } /** * create a BlockAllocationTableReader from an array of raw data blocks * * @param blocks the raw data * @param raw_block_list the list holding the managed blocks * * @exception IOException */ BlockAllocationTableReader(POIFSBigBlockSize bigBlockSize, ListManagedBlock[] blocks, BlockList raw_block_list) throws IOException { this(bigBlockSize); setEntries(blocks, raw_block_list); } BlockAllocationTableReader(POIFSBigBlockSize bigBlockSize) { this.bigBlockSize = bigBlockSize; _entries = new IntList(); } public static void sanityCheckBlockCount(int block_count) throws IOException { if (block_count <= 0) { throw new IOException( "Illegal block count; minimum count is 1, got " + block_count + " instead" ); } if (block_count > MAX_BLOCK_COUNT) { throw new IOException( "Block count " + block_count + " is too high. POI maximum is " + MAX_BLOCK_COUNT + "." ); } } /** * walk the entries from a specified point and return the * associated blocks. The associated blocks are removed from the * block list * * @param startBlock the first block in the chain * @param blockList the raw data block list * * @return array of ListManagedBlocks, in their correct order * * @exception IOException if there is a problem acquiring the blocks */ ListManagedBlock[] fetchBlocks(int startBlock, int headerPropertiesStartBlock, BlockList blockList) throws IOException { List blocks = new ArrayList(); int currentBlock = startBlock; boolean firstPass = true; ListManagedBlock dataBlock = null; // Process the chain from the start to the end // Normally we have header, data, end // Sometimes we have data, header, end // For those cases, stop at the header, not the end while (currentBlock != POIFSConstants.END_OF_CHAIN) { try { // Grab the data at the current block offset dataBlock = blockList.remove(currentBlock); blocks.add(dataBlock); // Now figure out which block we go to next currentBlock = _entries.get(currentBlock); firstPass = false; } catch(IOException e) { if(currentBlock == headerPropertiesStartBlock) { // Special case where things are in the wrong order _logger.log(POILogger.WARN, "Warning, header block comes after data blocks in POIFS block listing"); currentBlock = POIFSConstants.END_OF_CHAIN; } else if(currentBlock == 0 && firstPass) { // Special case where the termination isn't done right // on an empty set _logger.log(POILogger.WARN, "Warning, incorrectly terminated empty data blocks in POIFS block listing (should end at -2, ended at 0)"); currentBlock = POIFSConstants.END_OF_CHAIN; } else { // Ripple up throw e; } } } return blocks.toArray(new ListManagedBlock[blocks.size()]); } // methods for debugging reader /** * determine whether the block specified by index is used or not * * @param index index of block in question * * @return true if the specific block is used, else false */ boolean isUsed(int index) { try { return _entries.get(index) != -1; } catch (IndexOutOfBoundsException e) { // ignored return false; } } /** * return the next block index * * @param index of the current block * * @return index of the next block (may be * POIFSConstants.END_OF_CHAIN, indicating end of chain * (duh)) * * @exception IOException if the current block is unused */ int getNextBlockIndex(int index) throws IOException { if (isUsed(index)) { return _entries.get(index); } throw new IOException("index " + index + " is unused"); } /** * Convert an array of blocks into a set of integer indices * * @param blocks the array of blocks containing the indices * @param raw_blocks the list of blocks being managed. Unused * blocks will be eliminated from the list */ private void setEntries(ListManagedBlock[] blocks, BlockList raw_blocks) throws IOException { int limit = bigBlockSize.getBATEntriesPerBlock(); for (int block_index = 0; block_index < blocks.length; block_index++) { byte[] data = blocks[ block_index ].getData(); int offset = 0; for (int k = 0; k < limit; k++) { int entry = LittleEndian.getInt(data, offset); if (entry == POIFSConstants.UNUSED_BLOCK) { raw_blocks.zap(_entries.size()); } _entries.add(entry); offset += LittleEndianConsts.INT_SIZE; } // discard block blocks[ block_index ] = null; } raw_blocks.setBAT(this); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy