All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twelvemonkeys.io.ole2.CompoundDocument Maven / Gradle / Ivy

There is a newer version: 2.3
Show newest version
/*
 * Copyright (c) 2008, Harald Kuhr
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *     * Neither the name "TwelveMonkeys" nor the
 *       names of its contributors may be used to endorse or promote products
 *       derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package com.twelvemonkeys.io.ole2;

import com.twelvemonkeys.io.*;
import com.twelvemonkeys.lang.StringUtil;

import javax.imageio.stream.ImageInputStream;
import java.io.*;
import java.util.Arrays;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.UUID;

/**
 * Represents a read-only OLE2 compound document.
 * 

* * NOTE: This class is not synchronized. Accessing the document or its * entries from different threads, will need synchronization on the document * instance. * * @author Harald Kuhr * @author last modified by $Author: haku $ * @version $Id: //depot/branches/personal/haraldk/twelvemonkeys/release-2/twelvemonkeys-core/src/main/java/com/twelvemonkeys/io/ole2/CompoundDocument.java#4 $ */ public final class CompoundDocument { // TODO: Write support... // TODO: Properties: http://support.microsoft.com/kb/186898 private static final byte[] MAGIC = new byte[]{ (byte) 0xD0, (byte) 0xCF, (byte) 0x11, (byte) 0xE0, (byte) 0xA1, (byte) 0xB1, (byte) 0x1A, (byte) 0xE1, }; public static final int HEADER_SIZE = 512; private final DataInput mInput; private UUID mUID; private int mSectorSize; private int mShortSectorSize; private int mDirectorySId; private int mMinStreamSize; private int mShortSATSID; private int mShortSATSize; // Master Sector Allocation Table private int[] mMasterSAT; private int[] mSAT; private int[] mShortSAT; private Entry mRootEntry; private SIdChain mShortStreamSIdChain; private SIdChain mDirectorySIdChain; private static final int END_OF_CHAIN_SID = -2; private static final int FREE_SID = -1; /** The epoch offset of CompoundDocument time stamps */ public final static long EPOCH_OFFSET = -11644477200000L; /** * Creates a (for now) read only {@code CompoundDocument}. * * @param pFile the file to read from * * @throws IOException if an I/O exception occurs while reading the header */ public CompoundDocument(final File pFile) throws IOException { mInput = new LittleEndianRandomAccessFile(FileUtil.resolve(pFile), "r"); // TODO: Might be better to read header on first read operation?! // OTOH: It's also good to be fail-fast, so at least we should make // sure we're reading a valid document readHeader(); } /** * Creates a read only {@code CompoundDocument}. * * @param pInput the input to read from * * @throws IOException if an I/O exception occurs while reading the header */ public CompoundDocument(final InputStream pInput) throws IOException { this(new FileCacheSeekableStream(pInput)); } // For testing only, consider exposing later CompoundDocument(final SeekableInputStream pInput) throws IOException { mInput = new SeekableLittleEndianDataInputStream(pInput); // TODO: Might be better to read header on first read operation?! // OTOH: It's also good to be fail-fast, so at least we should make // sure we're reading a valid document readHeader(); } /** * Creates a read only {@code CompoundDocument}. * * @param pInput the input to read from * * @throws IOException if an I/O exception occurs while reading the header */ public CompoundDocument(final ImageInputStream pInput) throws IOException { mInput = pInput; // TODO: Might be better to read header on first read operation?! // OTOH: It's also good to be fail-fast, so at least we should make // sure we're reading a valid document readHeader(); } public static boolean canRead(final DataInput pInput) { return canRead(pInput, true); } // TODO: Refactor.. Figure out what we really need to expose to ImageIO for // easy reading of the Thumbs.db file // It's probably safer to create one version for InputStream and one for File private static boolean canRead(final DataInput pInput, final boolean pReset) { long pos = FREE_SID; if (pReset) { try { if (pInput instanceof InputStream && ((InputStream) pInput).markSupported()) { ((InputStream) pInput).mark(8); } else if (pInput instanceof ImageInputStream) { ((ImageInputStream) pInput).mark(); } else if (pInput instanceof RandomAccessFile) { pos = ((RandomAccessFile) pInput).getFilePointer(); } else if (pInput instanceof LittleEndianRandomAccessFile) { pos = ((LittleEndianRandomAccessFile) pInput).getFilePointer(); } else { return false; } } catch (IOException ignore) { return false; } } try { byte[] magic = new byte[8]; pInput.readFully(magic); return Arrays.equals(magic, MAGIC); } catch (IOException ignore) { // Ignore } finally { if (pReset) { try { if (pInput instanceof InputStream && ((InputStream) pInput).markSupported()) { ((InputStream) pInput).reset(); } else if (pInput instanceof ImageInputStream) { ((ImageInputStream) pInput).reset(); } else if (pInput instanceof RandomAccessFile) { ((RandomAccessFile) pInput).seek(pos); } else if (pInput instanceof LittleEndianRandomAccessFile) { ((LittleEndianRandomAccessFile) pInput).seek(pos); } } catch (IOException e) { // TODO: This isn't actually good enough... // Means something fucked up, and will fail... e.printStackTrace(); } } } return false; } private void readHeader() throws IOException { if (mMasterSAT != null) { return; } if (!canRead(mInput, false)) { throw new CorruptDocumentException("Not an OLE 2 Compound Document"); } // UID (seems to be all 0s) mUID = new UUID(mInput.readLong(), mInput.readLong()); /*int version = */mInput.readUnsignedShort(); //System.out.println("version: " + version); /*int revision = */mInput.readUnsignedShort(); //System.out.println("revision: " + revision); int byteOrder = mInput.readUnsignedShort(); if (byteOrder != 0xfffe) { // Reversed, as I'm allready reading little-endian throw new CorruptDocumentException("Cannot read big endian OLE 2 Compound Documents"); } mSectorSize = 1 << mInput.readUnsignedShort(); //System.out.println("sectorSize: " + mSectorSize + " bytes"); mShortSectorSize = 1 << mInput.readUnsignedShort(); //System.out.println("shortSectorSize: " + mShortSectorSize + " bytes"); // Reserved if (mInput.skipBytes(10) != 10) { throw new CorruptDocumentException(); } int SATSize = mInput.readInt(); //System.out.println("normalSATSize: " + mSATSize); mDirectorySId = mInput.readInt(); //System.out.println("directorySId: " + mDirectorySId); // Reserved if (mInput.skipBytes(4) != 4) { throw new CorruptDocumentException(); } mMinStreamSize = mInput.readInt(); //System.out.println("minStreamSize: " + mMinStreamSize + " bytes"); mShortSATSID = mInput.readInt(); //System.out.println("shortSATSID: " + mShortSATSID); mShortSATSize = mInput.readInt(); //System.out.println("shortSATSize: " + mShortSATSize); int masterSATSId = mInput.readInt(); //System.out.println("masterSATSId: " + mMasterSATSID); int masterSATSize = mInput.readInt(); //System.out.println("masterSATSize: " + mMasterSATSize); // Read masterSAT: 436 bytes, containing up to 109 SIDs //System.out.println("MSAT:"); mMasterSAT = new int[SATSize]; final int headerSIds = Math.min(SATSize, 109); for (int i = 0; i < headerSIds; i++) { mMasterSAT[i] = mInput.readInt(); //System.out.println("\tSID(" + i + "): " + mMasterSAT[i]); } if (masterSATSId == END_OF_CHAIN_SID) { // End of chain int freeSIdLength = 436 - (SATSize * 4); if (mInput.skipBytes(freeSIdLength) != freeSIdLength) { throw new CorruptDocumentException(); } } else { // Parse the SIDs in the extended MasterSAT sectors... seekToSId(masterSATSId, FREE_SID); int index = headerSIds; for (int i = 0; i < masterSATSize; i++) { for (int j = 0; j < 127; j++) { int sid = mInput.readInt(); switch (sid) { case FREE_SID:// Free break; default: mMasterSAT[index++] = sid; break; } } int next = mInput.readInt(); if (next == END_OF_CHAIN_SID) {// End of chain break; } seekToSId(next, FREE_SID); } } } private void readSAT() throws IOException { if (mSAT != null) { return; } final int intsPerSector = mSectorSize / 4; // Read the Sector Allocation Table mSAT = new int[mMasterSAT.length * intsPerSector]; for (int i = 0; i < mMasterSAT.length; i++) { seekToSId(mMasterSAT[i], FREE_SID); for (int j = 0; j < intsPerSector; j++) { int nextSID = mInput.readInt(); int index = (j + (i * intsPerSector)); mSAT[index] = nextSID; } } // Read the short-stream Sector Allocation Table SIdChain chain = getSIdChain(mShortSATSID, FREE_SID); mShortSAT = new int[mShortSATSize * intsPerSector]; for (int i = 0; i < mShortSATSize; i++) { seekToSId(chain.get(i), FREE_SID); for (int j = 0; j < intsPerSector; j++) { int nextSID = mInput.readInt(); int index = (j + (i * intsPerSector)); mShortSAT[index] = nextSID; } } } /** * Gets the SIdChain for the given stream Id * * @param pSId the stream Id * @param pStreamSize the size of the stream, or -1 for system control streams * @return the SIdChain for the given stream Id * @throws IOException if an I/O exception occurs */ private SIdChain getSIdChain(final int pSId, final long pStreamSize) throws IOException { SIdChain chain = new SIdChain(); int[] sat = isShortStream(pStreamSize) ? mShortSAT : mSAT; int sid = pSId; while (sid != END_OF_CHAIN_SID && sid != FREE_SID) { chain.addSID(sid); sid = sat[sid]; } return chain; } private boolean isShortStream(final long pStreamSize) { return pStreamSize != FREE_SID && pStreamSize < mMinStreamSize; } /** * Seeks to the start pos for the given stream Id * * @param pSId the stream Id * @param pStreamSize the size of the stream, or -1 for system control streams * @throws IOException if an I/O exception occurs */ private void seekToSId(final int pSId, final long pStreamSize) throws IOException { long pos; if (isShortStream(pStreamSize)) { // The short-stream is not continouos... Entry root = getRootEntry(); if (mShortStreamSIdChain == null) { mShortStreamSIdChain = getSIdChain(root.startSId, root.streamSize); } int shortPerStd = mSectorSize / mShortSectorSize; int offset = pSId / shortPerStd; int shortOffset = pSId - (offset * shortPerStd); pos = HEADER_SIZE + (mShortStreamSIdChain.get(offset) * (long) mSectorSize) + (shortOffset * (long) mShortSectorSize); } else { pos = HEADER_SIZE + pSId * (long) mSectorSize; } if (mInput instanceof LittleEndianRandomAccessFile) { ((LittleEndianRandomAccessFile) mInput).seek(pos); } else if (mInput instanceof ImageInputStream) { ((ImageInputStream) mInput).seek(pos); } else { ((SeekableLittleEndianDataInputStream) mInput).seek(pos); } } private void seekToDId(final int pDId) throws IOException { if (mDirectorySIdChain == null) { mDirectorySIdChain = getSIdChain(mDirectorySId, FREE_SID); } int dIdsPerSId = mSectorSize / Entry.LENGTH; int sIdOffset = pDId / dIdsPerSId; int dIdOffset = pDId - (sIdOffset * dIdsPerSId); int sId = mDirectorySIdChain.get(sIdOffset); seekToSId(sId, FREE_SID); if (mInput instanceof LittleEndianRandomAccessFile) { LittleEndianRandomAccessFile input = (LittleEndianRandomAccessFile) mInput; input.seek(input.getFilePointer() + dIdOffset * Entry.LENGTH); } else if (mInput instanceof ImageInputStream) { ImageInputStream input = (ImageInputStream) mInput; input.seek(input.getStreamPosition() + dIdOffset * Entry.LENGTH); } else { SeekableLittleEndianDataInputStream input = (SeekableLittleEndianDataInputStream) mInput; input.seek(input.getStreamPosition() + dIdOffset * Entry.LENGTH); } } SeekableInputStream getInputStreamForSId(final int pStreamId, final int pStreamSize) throws IOException { SIdChain chain = getSIdChain(pStreamId, pStreamSize); // TODO: Detach? Means, we have to copy to a byte buffer, or keep track of // positions, and seek back and forth (would be cool, but difficult).. int sectorSize = pStreamSize < mMinStreamSize ? mShortSectorSize : mSectorSize; return new Stream(chain, pStreamSize, sectorSize, this); } private InputStream getDirectoryStreamForDId(final int pDirectoryId) throws IOException { // This is always exactly 128 bytes, so we'll just read it all, // and buffer (we might want to optimize this later). byte[] bytes = new byte[Entry.LENGTH]; seekToDId(pDirectoryId); mInput.readFully(bytes); return new ByteArrayInputStream(bytes); } Entry getEntry(final int pDirectoryId, Entry pParent) throws IOException { Entry entry = Entry.readEntry(new LittleEndianDataInputStream( getDirectoryStreamForDId(pDirectoryId) )); entry.mParent = pParent; entry.mDocument = this; return entry; } SortedSet getEntries(final int pDirectoryId, final Entry pParent) throws IOException { return getEntriesRecursive(pDirectoryId, pParent, new TreeSet()); } private SortedSet getEntriesRecursive(final int pDirectoryId, final Entry pParent, final SortedSet pEntries) throws IOException { //System.out.println("pDirectoryId: " + pDirectoryId); Entry entry = getEntry(pDirectoryId, pParent); //System.out.println("entry: " + entry); if (!pEntries.add(entry)) { // TODO: This occurs in some Thumbs.db files, and Windows will // still parse the file gracefully somehow... // Deleting and regenerating the file will remove the cyclic // references, but... How can Windows parse this file? throw new CorruptDocumentException("Cyclic chain reference for entry: " + pDirectoryId); } if (entry.prevDId != FREE_SID) { //System.out.println("prevDId: " + entry.prevDId); getEntriesRecursive(entry.prevDId, pParent, pEntries); } if (entry.nextDId != FREE_SID) { //System.out.println("nextDId: " + entry.nextDId); getEntriesRecursive(entry.nextDId, pParent, pEntries); } return pEntries; } /*public*/ Entry getEntry(String pPath) throws IOException { if (StringUtil.isEmpty(pPath) || !pPath.startsWith("/")) { throw new IllegalArgumentException("Path must be absolute, and contain a valid path: " + pPath); } Entry entry = getRootEntry(); if (pPath.equals("/")) { // '/' means root entry return entry; } else { // Otherwise get children recursively: String[] pathElements = StringUtil.toStringArray(pPath, "/"); for (String pathElement : pathElements) { entry = entry.getChildEntry(pathElement); // No such child... if (entry == null) { break;// TODO: FileNotFoundException? Should behave like Entry.getChildEntry!! } } return entry; } } public Entry getRootEntry() throws IOException { if (mRootEntry == null) { readSAT(); mRootEntry = getEntry(0, null); if (mRootEntry.type != Entry.ROOT_STORAGE) { throw new CorruptDocumentException("Invalid root storage type: " + mRootEntry.type); } } return mRootEntry; } // @Override // public int hashCode() { // return mUID.hashCode(); // } // // @Override // public boolean equals(final Object pOther) { // if (pOther == this) { // return true; // } // // if (pOther == null) { // return true; // } // // if (pOther.getClass() == getClass()) { // return mUID.equals(((CompoundDocument) pOther).mUID); // } // // return false; // } @Override public String toString() { return String.format( "%s[uuid: %s, sector size: %d/%d bytes, directory SID: %d, master SAT: %s entries]", getClass().getSimpleName(), mUID, mSectorSize, mShortSectorSize, mDirectorySId, mMasterSAT.length ); } /** * Converts the given time stamp to standard Java time representation, * milliseconds since January 1, 1970. * The time stamp parameter is assumed to be in units of * 100 nano seconds since January 1, 1601. *

* If the timestamp is {@code 0L} (meaning not specified), no conversion * is done, to behave like {@code java.io.File}. * * @param pMSTime an unsigned long value representing the time stamp (in * units of 100 nano seconds since January 1, 1601). * * @return the time stamp converted to Java time stamp in milliseconds, * or {@code 0L} if {@code pMSTime == 0L} */ public static long toJavaTimeInMillis(final long pMSTime) { // NOTE: The time stamp field is an unsigned 64-bit integer value that // contains the time elapsed since 1601-Jan-01 00:00:00 (Gregorian // calendar). // One unit of this value is equal to 100 nanoseconds). // That means, each second the time stamp value will be increased by // 10 million units. if (pMSTime == 0L) { return 0L; // This is just less confusing... } // Convert to milliseconds (signed), // then convert to Java std epoch (1970-Jan-01 00:00:00) return ((pMSTime >> 1) / 5000) + EPOCH_OFFSET; } // TODO: Enforce stream length! static class Stream extends SeekableInputStream { private SIdChain mChain; int mNextSectorPos; byte[] mBuffer; int mBufferPos; private final CompoundDocument mDocument; private final long mLength; public Stream(final SIdChain pChain, final long pLength, final int pSectorSize, final CompoundDocument pDocument) { mChain = pChain; mLength = pLength; mBuffer = new byte[pSectorSize]; mBufferPos = mBuffer.length; mDocument = pDocument; } @Override public int available() throws IOException { return (int) Math.min(mBuffer.length - mBufferPos, mLength - getStreamPosition()); } public int read() throws IOException { if (available() <= 0) { if (!fillBuffer()) { return -1; } } return mBuffer[mBufferPos++] & 0xff; } private boolean fillBuffer() throws IOException { if (mNextSectorPos < mChain.length()) { // TODO: Sync on mDocument.mInput here, and we are completely detached... :-) // TODO: We also need to sync other places... synchronized (mDocument) { mDocument.seekToSId(mChain.get(mNextSectorPos), mLength); mDocument.mInput.readFully(mBuffer); } mNextSectorPos++; mBufferPos = 0; return true; } return false; } @Override public int read(byte b[], int off, int len) throws IOException { if (available() <= 0) { if (!fillBuffer()) { return -1; } } int toRead = Math.min(len, available()); System.arraycopy(mBuffer, mBufferPos, b, off, toRead); mBufferPos += toRead; return toRead; } public boolean isCached() { return true; } public boolean isCachedMemory() { return false; } public boolean isCachedFile() { return true; } protected void closeImpl() throws IOException { mBuffer = null; mChain = null; } protected void seekImpl(final long pPosition) throws IOException { long pos = getStreamPosition(); if (pos - mBufferPos >= pPosition && pPosition <= pos + available()) { // Skip inside buffer only mBufferPos += (pPosition - pos); } else { // Skip outside buffer mNextSectorPos = (int) (pPosition / mBuffer.length); if (!fillBuffer()) { throw new EOFException(); } mBufferPos = (int) (pPosition % mBuffer.length); } } protected void flushBeforeImpl(long pPosition) throws IOException { // No need to do anything here } } // TODO: Add test case for this class!!! static class SeekableLittleEndianDataInputStream extends LittleEndianDataInputStream implements Seekable { private final SeekableInputStream mSeekable; public SeekableLittleEndianDataInputStream(final SeekableInputStream pInput) { super(pInput); mSeekable = pInput; } public void seek(final long pPosition) throws IOException { mSeekable.seek(pPosition); } public boolean isCachedFile() { return mSeekable.isCachedFile(); } public boolean isCachedMemory() { return mSeekable.isCachedMemory(); } public boolean isCached() { return mSeekable.isCached(); } public long getStreamPosition() throws IOException { return mSeekable.getStreamPosition(); } public long getFlushedPosition() throws IOException { return mSeekable.getFlushedPosition(); } public void flushBefore(final long pPosition) throws IOException { mSeekable.flushBefore(pPosition); } public void flush() throws IOException { mSeekable.flush(); } @Override public void reset() throws IOException { mSeekable.reset(); } public void mark() { mSeekable.mark(); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy