All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.healthmarketscience.jackcess.impl.OleUtil Maven / Gradle / Ivy

/*
Copyright (c) 2013 James Ahlborn

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
USA
*/

package com.healthmarketscience.jackcess.impl;

import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.sql.Blob;
import java.sql.SQLException;
import java.sql.SQLFeatureNotSupportedException;
import java.text.Normalizer;
import java.util.EnumSet;
import java.util.Set;
import java.util.regex.Pattern;

import com.healthmarketscience.jackcess.DataType;
import com.healthmarketscience.jackcess.util.OleBlob;
import static com.healthmarketscience.jackcess.util.OleBlob.*;
import org.apache.commons.lang.builder.ToStringBuilder;

/**
 * Utility code for working with OLE data.
 *
 * @author James Ahlborn
 * @usage _advanced_class_
 */
public class OleUtil 
{
  /**
   * Interface used to allow optional inclusion of the poi library for working
   * with compound ole data.
   */
  interface CompoundPackageFactory
  {
    public ContentImpl createCompoundPackageContent(
        OleBlobImpl blob, String prettyName, String className, String typeName,
        ByteBuffer blobBb, int dataBlockLen);
  }

  private static final int PACKAGE_SIGNATURE = 0x1C15;
  private static final Charset OLE_CHARSET = Charset.forName("US-ASCII");
  private static final Charset OLE_UTF_CHARSET = Charset.forName("UTF-16LE");
  private static final byte[] COMPOUND_STORAGE_SIGNATURE = 
    {(byte)0xd0,(byte)0xcf,(byte)0x11,(byte)0xe0,
     (byte)0xa1,(byte)0xb1,(byte)0x1a,(byte)0xe1};
  private static final String SIMPLE_PACKAGE_TYPE = "Package";
  private static final int PACKAGE_OBJECT_TYPE = 0x02;
  private static final int OLE_VERSION = 0x0501;
  private static final int OLE_FORMAT = 0x02;
  private static final int PACKAGE_STREAM_SIGNATURE = 0x02;
  private static final int PS_EMBEDDED_FILE = 0x030000;
  private static final int PS_LINKED_FILE = 0x010000;
  private static final Set WRITEABLE_TYPES = EnumSet.of(
      ContentType.LINK, ContentType.SIMPLE_PACKAGE, ContentType.OTHER);
  private static final byte[] NO_DATA = new byte[0];
  private static final int LINK_HEADER = 0x01;
  private static final byte[] PACKAGE_FOOTER = {
    0x01, 0x05, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x00, 0x01, (byte)0xAD, 0x05, (byte)0xFE
  };

  // regex pattern which matches all the crazy extra stuff in unicode
  private static final Pattern UNICODE_ACCENT_PATTERN = 
    Pattern.compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");

  private static final CompoundPackageFactory COMPOUND_FACTORY;

  static {
    CompoundPackageFactory compoundFactory = null;
    try {
      compoundFactory = (CompoundPackageFactory)
        Class.forName("com.healthmarketscience.jackcess.impl.CompoundOleUtil")
        .newInstance();
    } catch(Throwable t) {
      // must not have poi, will load compound ole data as "other"
    }
    COMPOUND_FACTORY = compoundFactory;
  }

  /**
   * Parses an access database blob structure and returns an appropriate
   * OleBlob instance.
   */
  public static OleBlob parseBlob(byte[] bytes) {
    return new OleBlobImpl(bytes);
  }

  /**
   * Creates a new OlBlob instance using the given information.
   */
  public static OleBlob createBlob(OleBlob.Builder oleBuilder)
    throws IOException
  {
    try {
      
      if(!WRITEABLE_TYPES.contains(oleBuilder.getType())) {
        throw new IllegalArgumentException(
            "Cannot currently create ole values of type " +
            oleBuilder.getType());
      }
      
      long contentLen = oleBuilder.getContentLength();
      byte[] contentBytes = oleBuilder.getBytes();
      InputStream contentStream = oleBuilder.getStream();
      byte[] packageStreamHeader = NO_DATA;
      byte[] packageStreamFooter = NO_DATA;

      switch(oleBuilder.getType()) {
      case LINK:
        packageStreamHeader = writePackageStreamHeader(oleBuilder);

        // link "content" is file path
        contentBytes = getZeroTermStrBytes(oleBuilder.getFilePath());
        contentLen = contentBytes.length;
        break;
        
      case SIMPLE_PACKAGE:
        packageStreamHeader = writePackageStreamHeader(oleBuilder);
        packageStreamFooter = writePackageStreamFooter(oleBuilder);
        break;
        
      case OTHER:
        // nothing more to do
        break;
      default:
        throw new RuntimeException("unexpected type " + oleBuilder.getType());
      }

      long payloadLen = packageStreamHeader.length + packageStreamFooter.length +
        contentLen;
      byte[] packageHeader = writePackageHeader(oleBuilder, payloadLen);
            
      long totalOleLen = packageHeader.length + PACKAGE_FOOTER.length +
        payloadLen;
      if(totalOleLen > DataType.OLE.getMaxSize()) {
        throw new IllegalArgumentException("Content size of " + totalOleLen +
                                           " is too large for ole column");
      }
      
      byte[] oleBytes = new byte[(int)totalOleLen];
      ByteBuffer bb = PageChannel.wrap(oleBytes);
      bb.put(packageHeader);
      bb.put(packageStreamHeader);
      
      if(contentLen > 0L) {
        if(contentBytes != null) {
          bb.put(contentBytes);
        } else {
          byte[] buf = new byte[8192];
          int numBytes = 0;
          while((numBytes = contentStream.read(buf)) >= 0) {
            bb.put(buf, 0, numBytes);
          }
        }
      }

      bb.put(packageStreamFooter);
      bb.put(PACKAGE_FOOTER);
    
      return parseBlob(oleBytes);
      
    } finally {
      ByteUtil.closeQuietly(oleBuilder.getStream());
    }
  }

  private static byte[] writePackageHeader(OleBlob.Builder oleBuilder,
                                           long contentLen) {

    byte[] prettyNameBytes = getZeroTermStrBytes(oleBuilder.getPrettyName());
    String className = oleBuilder.getClassName();
    String typeName = oleBuilder.getTypeName();
    if(className == null) {
      className = typeName;
    } else if(typeName == null) {
      typeName = className;
    }
    byte[] classNameBytes = getZeroTermStrBytes(className);
    byte[] typeNameBytes = getZeroTermStrBytes(typeName);
    
    int packageHeaderLen = 20 + prettyNameBytes.length + classNameBytes.length;

    int oleHeaderLen = 24 + typeNameBytes.length;

    byte[] headerBytes = new byte[packageHeaderLen + oleHeaderLen];
    
    ByteBuffer bb = PageChannel.wrap(headerBytes);

    // write outer package header
    bb.putShort((short)PACKAGE_SIGNATURE);
    bb.putShort((short)packageHeaderLen);
    bb.putInt(PACKAGE_OBJECT_TYPE);
    bb.putShort((short)prettyNameBytes.length);
    bb.putShort((short)classNameBytes.length);
    int prettyNameOff = bb.position() + 8;
    bb.putShort((short)prettyNameOff);
    bb.putShort((short)(prettyNameOff + prettyNameBytes.length));
    bb.putInt(-1);
    bb.put(prettyNameBytes);
    bb.put(classNameBytes);

    // put ole header
    bb.putInt(OLE_VERSION);
    bb.putInt(OLE_FORMAT);
    bb.putInt(typeNameBytes.length);
    bb.put(typeNameBytes);
    bb.putLong(0L);
    bb.putInt((int)contentLen);
    
    return headerBytes;
  }

  private static byte[] writePackageStreamHeader(OleBlob.Builder oleBuilder) {

    byte[] fileNameBytes = getZeroTermStrBytes(oleBuilder.getFileName());
    byte[] filePathBytes = getZeroTermStrBytes(oleBuilder.getFilePath());

    int headerLen = 6 + fileNameBytes.length + filePathBytes.length;

    if(oleBuilder.getType() == ContentType.SIMPLE_PACKAGE) {

      headerLen += 8 + filePathBytes.length;
      
    } else {

      headerLen += 2;
    }

    byte[] headerBytes = new byte[headerLen];
    ByteBuffer bb = PageChannel.wrap(headerBytes);
    bb.putShort((short)PACKAGE_STREAM_SIGNATURE);
    bb.put(fileNameBytes);
    bb.put(filePathBytes);

    if(oleBuilder.getType() == ContentType.SIMPLE_PACKAGE) {
      bb.putInt(PS_EMBEDDED_FILE);
      bb.putInt(filePathBytes.length);
      bb.put(filePathBytes, 0, filePathBytes.length);
      bb.putInt((int) oleBuilder.getContentLength());
    } else {
      bb.putInt(PS_LINKED_FILE);
      bb.putShort((short)LINK_HEADER);
    }
    
    return headerBytes;
  }

  private static byte[] writePackageStreamFooter(OleBlob.Builder oleBuilder) {

    // note, these are _not_ zero terminated
    byte[] fileNameBytes = oleBuilder.getFileName().getBytes(OLE_UTF_CHARSET);
    byte[] filePathBytes = oleBuilder.getFilePath().getBytes(OLE_UTF_CHARSET);

    int footerLen = 12 + (filePathBytes.length * 2) + fileNameBytes.length;

    byte[] footerBytes = new byte[footerLen];
    ByteBuffer bb = PageChannel.wrap(footerBytes);

    bb.putInt(filePathBytes.length/2);
    bb.put(filePathBytes);
    bb.putInt(fileNameBytes.length/2);
    bb.put(fileNameBytes);
    bb.putInt(filePathBytes.length/2);
    bb.put(filePathBytes);    

    return footerBytes;
  }
  
  /**
   * creates the appropriate ContentImpl for the given blob.
   */
  private static ContentImpl parseContent(OleBlobImpl blob) 
    throws IOException 
  {
    ByteBuffer bb = PageChannel.wrap(blob.getBytes());

    if((bb.remaining() < 2) || (bb.getShort() != PACKAGE_SIGNATURE)) {  
      return new UnknownContentImpl(blob);
    }

    // read outer package header
    int headerSize = bb.getShort();
    int objType = bb.getInt();
    int prettyNameLen = bb.getShort();
    int classNameLen = bb.getShort();
    int prettyNameOff = bb.getShort();
    int classNameOff = bb.getShort();       
    int objSize = bb.getInt();
    String prettyName = readStr(bb, prettyNameOff, prettyNameLen);
    String className = readStr(bb, classNameOff, classNameLen);
    bb.position(headerSize);

    // read ole header
    int oleVer = bb.getInt();
    int format = bb.getInt();

    if(oleVer != OLE_VERSION) {
      return new UnknownContentImpl(blob);
    }

    int typeNameLen = bb.getInt();
    String typeName = readStr(bb, bb.position(), typeNameLen);
    bb.getLong(); // unused
    int dataBlockLen = bb.getInt();
    int dataBlockPos = bb.position();


    if(SIMPLE_PACKAGE_TYPE.equalsIgnoreCase(typeName)) {
      return createSimplePackageContent(
          blob, prettyName, className, typeName, bb, dataBlockLen);
    }

    // if COMPOUND_FACTORY is null, the poi library isn't available, so just
    // load compound data as "other"
    if((COMPOUND_FACTORY != null) &&
       (bb.remaining() >= COMPOUND_STORAGE_SIGNATURE.length) &&
       ByteUtil.matchesRange(bb, bb.position(), COMPOUND_STORAGE_SIGNATURE)) {
      return COMPOUND_FACTORY.createCompoundPackageContent(
          blob, prettyName, className, typeName, bb, dataBlockLen);
    }
    
    // this is either some other "special" (as yet unhandled) format, or it is
    // simply an embedded file (or it is compound data and poi isn't available)
    return new OtherContentImpl(blob, prettyName, className,
                                typeName, dataBlockPos, dataBlockLen);
  }

  private static ContentImpl createSimplePackageContent(
      OleBlobImpl blob, String prettyName, String className, String typeName,
      ByteBuffer blobBb, int dataBlockLen) {

    int dataBlockPos = blobBb.position();
    ByteBuffer bb = PageChannel.narrowBuffer(blobBb, dataBlockPos, 
                                             dataBlockPos + dataBlockLen);
    
    int packageSig = bb.getShort();
    if(packageSig != PACKAGE_STREAM_SIGNATURE) {
      return new OtherContentImpl(blob, prettyName, className,
                                  typeName, dataBlockPos, dataBlockLen);
    }

    String fileName = readZeroTermStr(bb);
    String filePath = readZeroTermStr(bb);
    int packageType = bb.getInt();

    if(packageType == PS_EMBEDDED_FILE) {

      int localFilePathLen = bb.getInt();
      String localFilePath = readStr(bb, bb.position(), localFilePathLen);
      int dataLen = bb.getInt();
      int dataPos = bb.position();
      bb.position(dataLen + dataPos);

      // remaining strings are in "reverse" order (local file path, file name,
      // file path).  these string usee a real utf charset, and therefore can
      // "fix" problems with ascii based names (so we prefer these strings to
      // the original strings we found)
      int strNum = 0;
      while(true) {

        int rem = bb.remaining();
        if(rem < 4) {
          break;
        }

        int strLen = bb.getInt();
        String remStr = readStr(bb, bb.position(), strLen * 2, OLE_UTF_CHARSET);

        switch(strNum) {
        case 0:
          localFilePath = remStr;
          break;
        case 1:
          fileName = remStr;
          break;
        case 2:
          filePath = remStr;
          break;
        default:
          // ignore
        }

        ++strNum;
      }

      return new SimplePackageContentImpl(
          blob, prettyName, className, typeName, dataPos, dataLen,
          fileName, filePath, localFilePath);
    } 

    if(packageType == PS_LINKED_FILE) {
      
      bb.getShort(); //unknown
      String linkStr = readZeroTermStr(bb);

      return new LinkContentImpl(blob, prettyName, className, typeName, 
                                 fileName, linkStr, filePath);
    }

    return new OtherContentImpl(blob, prettyName, className,
                                typeName, dataBlockPos, dataBlockLen);      
  }

  private static String readStr(ByteBuffer bb, int off, int len) {
    return readStr(bb, off, len, OLE_CHARSET);
  }

  private static String readZeroTermStr(ByteBuffer bb) {
    int off = bb.position();
    while(bb.hasRemaining()) {
      byte b = bb.get();
      if(b == 0) {
        break;
      }
    }
    int len = bb.position() - off;
    return readStr(bb, off, len);
  }

  private static String readStr(ByteBuffer bb, int off, int len, 
                                Charset charset) {
    String str = new String(bb.array(), off, len, charset);
    bb.position(off + len);
    if(str.charAt(str.length() - 1) == '\0') {
      str = str.substring(0, str.length() - 1);
    }
    return str;
  }

  private static byte[] getZeroTermStrBytes(String str) {
    // since we are converting to ascii, try to make "nicer" versions of crazy
    // chars (e.g. convert "u with an umlaut" to just "u").  this may not
    // ultimately help anything but it is what ms access does.

    // decompose complex chars into combos of char and accent
    str = Normalizer.normalize(str, Normalizer.Form.NFD);
    // strip the accents
    str = UNICODE_ACCENT_PATTERN.matcher(str).replaceAll("");
    // (re)normalize what is left
    str = Normalizer.normalize(str, Normalizer.Form.NFC);

    return (str + '\0').getBytes(OLE_CHARSET);
  }


  static final class OleBlobImpl implements OleBlob
  {
    private byte[] _bytes;
    private ContentImpl _content;

    private OleBlobImpl(byte[] bytes) {
      _bytes = bytes;
    }

    public void writeTo(OutputStream out) throws IOException {
      out.write(_bytes);
    }

    public Content getContent() throws IOException {
      if(_content == null) {
        _content = parseContent(this);
      }
      return _content;
    }

    public InputStream getBinaryStream() throws SQLException {
      return new ByteArrayInputStream(_bytes);
    }

    public InputStream getBinaryStream(long pos, long len) 
      throws SQLException 
    {
      return new ByteArrayInputStream(_bytes, fromJdbcOffset(pos), (int)len);
    }

    public long length() throws SQLException {
      return _bytes.length;
    }

    public byte[] getBytes() throws IOException {
      if(_bytes == null) {
        throw new IOException("blob is closed");
      }
      return _bytes;
    }

    public byte[] getBytes(long pos, int len) throws SQLException {
      return ByteUtil.copyOf(_bytes, fromJdbcOffset(pos), len);
    }

    public long position(byte[] pattern, long start) throws SQLException {
      int pos = ByteUtil.findRange(PageChannel.wrap(_bytes), 
                                   fromJdbcOffset(start), pattern);
      return((pos >= 0) ? toJdbcOffset(pos) : pos);
    }
    
    public long position(Blob pattern, long start) throws SQLException {
      return position(pattern.getBytes(1L, (int)pattern.length()), start);
    }

    public OutputStream setBinaryStream(long position) throws SQLException {
      throw new SQLFeatureNotSupportedException();
    }
    
    public void truncate(long len) throws SQLException {
      throw new SQLFeatureNotSupportedException();
    }
    
    public int setBytes(long pos, byte[] bytes) throws SQLException {
      throw new SQLFeatureNotSupportedException();
    }
    
    public int setBytes(long pos, byte[] bytes, int offset, int lesn)
      throws SQLException {
      throw new SQLFeatureNotSupportedException();
    }
    
    public void free() {
      close();
    }

    public void close() {
      _bytes = null;
      ByteUtil.closeQuietly(_content);
      _content = null;
    }

    private static int toJdbcOffset(int off) {
      return off + 1;
    } 

    private static int fromJdbcOffset(long off) {
      return (int)off - 1;
    } 

    @Override
    public String toString() {
      ToStringBuilder sb = CustomToStringStyle.builder(this);
      if(_content != null) {
        sb.append("content", _content);
      } else {
        sb.append("bytes", _bytes);
        sb.append("content", "(uninitialized)");
      }
      return sb.toString();
    }
  }

  static abstract class ContentImpl implements Content, Closeable
  {
    protected final OleBlobImpl _blob;

    protected ContentImpl(OleBlobImpl blob) {
      _blob = blob;
    }

    public OleBlobImpl getBlob() {
      return _blob;
    }

    protected byte[] getBytes() throws IOException {
      return getBlob().getBytes();
    }
    
    public void close() {
      // base does nothing
    }

    protected ToStringBuilder toString(ToStringBuilder sb) {
      sb.append("type", getType());
      return sb;
    } 
  }

  static abstract class EmbeddedContentImpl extends ContentImpl
    implements EmbeddedContent
  {
    private final int _position;
    private final int _length;

    protected EmbeddedContentImpl(OleBlobImpl blob, int position, int length) 
    {
      super(blob);
      _position = position;
      _length = length;
    }

    public long length() {
      return _length;
    }

    public InputStream getStream() throws IOException {
      return new ByteArrayInputStream(getBytes(), _position, _length);
    }

    public void writeTo(OutputStream out) throws IOException {
      out.write(getBytes(), _position, _length);
    }

    @Override
    protected ToStringBuilder toString(ToStringBuilder sb) {
      super.toString(sb);
      if(_position >= 0) {
        sb.append("content", ByteBuffer.wrap(_blob._bytes, _position, _length));
      }
      return sb;
    } 
  }

  static abstract class EmbeddedPackageContentImpl 
    extends EmbeddedContentImpl
    implements PackageContent
  {
    private final String _prettyName;
    private final String _className;
    private final String _typeName;

    protected EmbeddedPackageContentImpl(
        OleBlobImpl blob, String prettyName, String className,
        String typeName, int position, int length)
    {
      super(blob, position, length);
      _prettyName = prettyName;
      _className = className;
      _typeName = typeName;
    }

    public String getPrettyName() {
      return _prettyName;
    }

    public String getClassName() {
      return _className;
    }

    public String getTypeName() {
      return _typeName;
    }

    @Override
    protected ToStringBuilder toString(ToStringBuilder sb) {
      sb.append("prettyName", _prettyName)
        .append("className", _className)
        .append("typeName", _typeName);
      super.toString(sb);
      return sb;
    } 
  }

  private static final class LinkContentImpl 
    extends EmbeddedPackageContentImpl
    implements LinkContent
  {
    private final String _fileName;
    private final String _linkPath;
    private final String _filePath;

    private LinkContentImpl(OleBlobImpl blob, String prettyName,
                            String className, String typeName,
                            String fileName, String linkPath, 
                            String filePath) 
    {
      super(blob, prettyName, className, typeName, -1, -1);
      _fileName = fileName;
      _linkPath = linkPath;
      _filePath = filePath;      
    }

    public ContentType getType() {
      return ContentType.LINK;
    }

    public String getFileName() {
      return _fileName;
    }

    public String getLinkPath() {
      return _linkPath;
    }

    public String getFilePath() {
      return _filePath;
    }

    public InputStream getLinkStream() throws IOException {
      return new FileInputStream(getLinkPath());
    }

    @Override
    public String toString() {
      return toString(CustomToStringStyle.builder(this))
        .append("fileName", _fileName)
        .append("linkPath", _linkPath)
        .append("filePath", _filePath)
        .toString();
    }
  }

  private static final class SimplePackageContentImpl 
    extends EmbeddedPackageContentImpl
    implements SimplePackageContent
  {
    private final String _fileName;
    private final String _filePath;
    private final String _localFilePath;

    private SimplePackageContentImpl(OleBlobImpl blob, String prettyName,
                                     String className, String typeName,
                                     int position, int length,
                                     String fileName, String filePath,
                                     String localFilePath) 
    {
      super(blob, prettyName, className, typeName, position, length);
      _fileName = fileName;
      _filePath = filePath;      
      _localFilePath = localFilePath;
    }

    public ContentType getType() {
      return ContentType.SIMPLE_PACKAGE;
    }

    public String getFileName() {
      return _fileName;
    }

    public String getFilePath() {
      return _filePath;
    }

    public String getLocalFilePath() {
      return _localFilePath;
    }

    @Override
    public String toString() {
      return toString(CustomToStringStyle.builder(this))
        .append("fileName", _fileName)
        .append("filePath", _filePath)
        .append("localFilePath", _localFilePath)
        .toString();
    }
  }

  private static final class OtherContentImpl 
    extends EmbeddedPackageContentImpl
    implements OtherContent
  {
    private OtherContentImpl(
        OleBlobImpl blob, String prettyName, String className,
        String typeName, int position, int length) 
    {
      super(blob, prettyName, className, typeName, position, length);
    }        

    public ContentType getType() {
      return ContentType.OTHER;
    }

    @Override
    public String toString() {
      return toString(CustomToStringStyle.builder(this))
        .toString();
    }
  }

  private static final class UnknownContentImpl extends ContentImpl
  {
    private UnknownContentImpl(OleBlobImpl blob) {
      super(blob);
    }

    public ContentType getType() {
      return ContentType.UNKNOWN;
    }

    @Override
    public String toString() {
      return toString(CustomToStringStyle.builder(this))
        .append("content", _blob._bytes)
        .toString();
    }
  }
  
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy