All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.parser.microsoft.JackcessCompoundOleUtil Maven / Gradle / Ivy

There is a newer version: 3.0.0-BETA2
Show newest version
/*
Copyright (c) 2013 James Ahlborn

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package org.apache.tika.parser.microsoft;

import com.healthmarketscience.jackcess.RuntimeIOException;
import com.healthmarketscience.jackcess.impl.ByteUtil;
import com.healthmarketscience.jackcess.impl.CustomToStringStyle;
import com.healthmarketscience.jackcess.util.MemFileChannel;
import com.healthmarketscience.jackcess.util.OleBlob;
import org.apache.commons.lang.builder.ToStringBuilder;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/**
 * Temporary copy/paste from Jackcess to allow upgrade to POI 4.0.0.
 * This class will be removed once POI 4.0.0 is released and jackcess
 * updates to the most recent version of POI.
 * @deprecated -- this class will be removed in Tika >= 1.20
 */
@Deprecated
class JackcessCompoundOleUtil implements JackcessOleUtil.CompoundPackageFactory {
    private static final String ENTRY_NAME_CHARSET = "UTF-8";
    private static final String ENTRY_SEPARATOR = "/";
    private static final String CONTENTS_ENTRY = "CONTENTS";

    static {
        // force a poi class to be loaded to ensure that when this class is
        // loaded, we know that the poi classes are available
        POIFSFileSystem.class.getName();
    }

    public JackcessCompoundOleUtil() {
    }

    /**
     * Creates a nes CompoundContent for the given blob information.
     */
    public JackcessOleUtil.ContentImpl createCompoundPackageContent(
            JackcessOleUtil.OleBlobImpl blob, String prettyName, String className, String typeName,
            ByteBuffer blobBb, int dataBlockLen) {
        return new CompoundContentImpl(blob, prettyName, className, typeName,
                blobBb.position(), dataBlockLen);
    }

    /**
     * Gets a DocumentEntry from compound storage based on a fully qualified,
     * encoded entry name.
     *
     * @param entryName fully qualified, encoded entry name
     * @param dir       root directory of the compound storage
     * @return the relevant DocumentEntry
     * @throws FileNotFoundException if the entry does not exist
     * @throws IOException           if some other io error occurs
     */
    public static DocumentEntry getDocumentEntry(String entryName,
                                                 DirectoryEntry dir)
            throws IOException {
        // split entry name into individual components and decode them
        List entryNames = new ArrayList();
        for (String str : entryName.split(ENTRY_SEPARATOR)) {
            if (str.length() == 0) {
                continue;
            }
            entryNames.add(decodeEntryName(str));
        }

        DocumentEntry entry = null;
        Iterator iter = entryNames.iterator();
        while (iter.hasNext()) {
            org.apache.poi.poifs.filesystem.Entry tmpEntry = dir.getEntry(iter.next());
            if (tmpEntry instanceof DirectoryEntry) {
                dir = (DirectoryEntry) tmpEntry;
            } else if (!iter.hasNext() && (tmpEntry instanceof DocumentEntry)) {
                entry = (DocumentEntry) tmpEntry;
            } else {
                break;
            }
        }

        if (entry == null) {
            throw new FileNotFoundException("Could not find document " + entryName);
        }

        return entry;
    }

    private static String encodeEntryName(String name) {
        try {
            return URLEncoder.encode(name, ENTRY_NAME_CHARSET);
        } catch (UnsupportedEncodingException e) {
            throw new RuntimeException(e);
        }
    }

    private static String decodeEntryName(String name) {
        try {
            return URLDecoder.decode(name, ENTRY_NAME_CHARSET);
        } catch (UnsupportedEncodingException e) {
            throw new RuntimeException(e);
        }
    }

    private static final class CompoundContentImpl
            extends JackcessOleUtil.EmbeddedPackageContentImpl
            implements OleBlob.CompoundContent {
        private POIFSFileSystem _fs;

        private CompoundContentImpl(
                JackcessOleUtil.OleBlobImpl blob, String prettyName, String className,
                String typeName, int position, int length) {
            super(blob, prettyName, className, typeName, position, length);
        }

        public OleBlob.ContentType getType() {
            return OleBlob.ContentType.COMPOUND_STORAGE;
        }

        private POIFSFileSystem getFileSystem() throws IOException {
            if (_fs == null) {
                _fs = new POIFSFileSystem(MemFileChannel.newChannel(getStream(), "r"));
            }
            return _fs;
        }

        public Iterator iterator() {
            try {
                return getEntries(new ArrayList(), getFileSystem().getRoot(),
                        ENTRY_SEPARATOR).iterator();
            } catch (IOException e) {
                throw new RuntimeIOException(e);
            }
        }

        public EntryImpl getEntry(String entryName) throws IOException {
            return new EntryImpl(entryName,
                    getDocumentEntry(entryName, getFileSystem().getRoot()));
        }

        public boolean hasContentsEntry() throws IOException {
            return getFileSystem().getRoot().hasEntry(CONTENTS_ENTRY);
        }

        public EntryImpl getContentsEntry() throws IOException {
            return getEntry(CONTENTS_ENTRY);
        }

        private List getEntries(List entries, DirectoryEntry dir,
                                       String prefix) {
            for (org.apache.poi.poifs.filesystem.Entry entry : dir) {
                if (entry instanceof DirectoryEntry) {
                    // .. recurse into this directory
                    getEntries(entries, (DirectoryEntry) entry, prefix + ENTRY_SEPARATOR);
                } else if (entry instanceof DocumentEntry) {
                    // grab the entry name/detils
                    DocumentEntry de = (DocumentEntry) entry;
                    String entryName = prefix + encodeEntryName(entry.getName());
                    entries.add(new EntryImpl(entryName, de));
                }
            }
            return entries;
        }

        @Override
        public void close() {
            ByteUtil.closeQuietly(_fs);
            _fs = null;
            super.close();
        }

        @Override
        public String toString() {
            ToStringBuilder sb = toString(CustomToStringStyle.builder(this));

            try {
                sb.append("hasContentsEntry", hasContentsEntry());
                sb.append("entries", getEntries(new ArrayList(),
                        getFileSystem().getRoot(),
                        ENTRY_SEPARATOR));
            } catch (IOException e) {
                sb.append("entries", "<" + e + ">");
            }

            return sb.toString();
        }

        private final class EntryImpl implements OleBlob.CompoundContent.Entry {
            private final String _name;
            private final DocumentEntry _docEntry;

            private EntryImpl(String name, DocumentEntry docEntry) {
                _name = name;
                _docEntry = docEntry;
            }

            public OleBlob.ContentType getType() {
                return OleBlob.ContentType.UNKNOWN;
            }

            public String getName() {
                return _name;
            }

            public CompoundContentImpl getParent() {
                return CompoundContentImpl.this;
            }

            public JackcessOleUtil.OleBlobImpl getBlob() {
                return getParent().getBlob();
            }

            public long length() {
                return _docEntry.getSize();
            }

            public InputStream getStream() throws IOException {
                return new DocumentInputStream(_docEntry);
            }

            public void writeTo(OutputStream out) throws IOException {
                InputStream in = null;
                try {
                    ByteUtil.copy(in = getStream(), out);
                } finally {
                    ByteUtil.closeQuietly(in);
                }
            }

            @Override
            public String toString() {
                return CustomToStringStyle.valueBuilder(this)
                        .append("name", _name)
                        .append("length", length())
                        .toString();
            }
        }
    }
}







© 2015 - 2024 Weber Informatics LLC | Privacy Policy