com.healthmarketscience.jackcess.impl.OleUtil Maven / Gradle / Ivy
/*
Copyright (c) 2013 James Ahlborn
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
USA
*/
package com.healthmarketscience.jackcess.impl;
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.sql.Blob;
import java.sql.SQLException;
import java.sql.SQLFeatureNotSupportedException;
import java.text.Normalizer;
import java.util.EnumSet;
import java.util.Set;
import java.util.regex.Pattern;
import com.healthmarketscience.jackcess.DataType;
import com.healthmarketscience.jackcess.util.OleBlob;
import static com.healthmarketscience.jackcess.util.OleBlob.*;
import org.apache.commons.lang.builder.ToStringBuilder;
/**
* Utility code for working with OLE data.
*
* @author James Ahlborn
* @usage _advanced_class_
*/
public class OleUtil
{
/**
* Interface used to allow optional inclusion of the poi library for working
* with compound ole data.
*/
interface CompoundPackageFactory
{
public ContentImpl createCompoundPackageContent(
OleBlobImpl blob, String prettyName, String className, String typeName,
ByteBuffer blobBb, int dataBlockLen);
}
private static final int PACKAGE_SIGNATURE = 0x1C15;
private static final Charset OLE_CHARSET = Charset.forName("US-ASCII");
private static final Charset OLE_UTF_CHARSET = Charset.forName("UTF-16LE");
private static final byte[] COMPOUND_STORAGE_SIGNATURE =
{(byte)0xd0,(byte)0xcf,(byte)0x11,(byte)0xe0,
(byte)0xa1,(byte)0xb1,(byte)0x1a,(byte)0xe1};
private static final String SIMPLE_PACKAGE_TYPE = "Package";
private static final int PACKAGE_OBJECT_TYPE = 0x02;
private static final int OLE_VERSION = 0x0501;
private static final int OLE_FORMAT = 0x02;
private static final int PACKAGE_STREAM_SIGNATURE = 0x02;
private static final int PS_EMBEDDED_FILE = 0x030000;
private static final int PS_LINKED_FILE = 0x010000;
private static final Set WRITEABLE_TYPES = EnumSet.of(
ContentType.LINK, ContentType.SIMPLE_PACKAGE, ContentType.OTHER);
private static final byte[] NO_DATA = new byte[0];
private static final int LINK_HEADER = 0x01;
private static final byte[] PACKAGE_FOOTER = {
0x01, 0x05, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x01, (byte)0xAD, 0x05, (byte)0xFE
};
// regex pattern which matches all the crazy extra stuff in unicode
private static final Pattern UNICODE_ACCENT_PATTERN =
Pattern.compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
private static final CompoundPackageFactory COMPOUND_FACTORY;
static {
CompoundPackageFactory compoundFactory = null;
try {
compoundFactory = (CompoundPackageFactory)
Class.forName("com.healthmarketscience.jackcess.impl.CompoundOleUtil")
.newInstance();
} catch(Throwable t) {
// must not have poi, will load compound ole data as "other"
}
COMPOUND_FACTORY = compoundFactory;
}
/**
* Parses an access database blob structure and returns an appropriate
* OleBlob instance.
*/
public static OleBlob parseBlob(byte[] bytes) {
return new OleBlobImpl(bytes);
}
/**
* Creates a new OlBlob instance using the given information.
*/
public static OleBlob createBlob(OleBlob.Builder oleBuilder)
throws IOException
{
try {
if(!WRITEABLE_TYPES.contains(oleBuilder.getType())) {
throw new IllegalArgumentException(
"Cannot currently create ole values of type " +
oleBuilder.getType());
}
long contentLen = oleBuilder.getContentLength();
byte[] contentBytes = oleBuilder.getBytes();
InputStream contentStream = oleBuilder.getStream();
byte[] packageStreamHeader = NO_DATA;
byte[] packageStreamFooter = NO_DATA;
switch(oleBuilder.getType()) {
case LINK:
packageStreamHeader = writePackageStreamHeader(oleBuilder);
// link "content" is file path
contentBytes = getZeroTermStrBytes(oleBuilder.getFilePath());
contentLen = contentBytes.length;
break;
case SIMPLE_PACKAGE:
packageStreamHeader = writePackageStreamHeader(oleBuilder);
packageStreamFooter = writePackageStreamFooter(oleBuilder);
break;
case OTHER:
// nothing more to do
break;
default:
throw new RuntimeException("unexpected type " + oleBuilder.getType());
}
long payloadLen = packageStreamHeader.length + packageStreamFooter.length +
contentLen;
byte[] packageHeader = writePackageHeader(oleBuilder, payloadLen);
long totalOleLen = packageHeader.length + PACKAGE_FOOTER.length +
payloadLen;
if(totalOleLen > DataType.OLE.getMaxSize()) {
throw new IllegalArgumentException("Content size of " + totalOleLen +
" is too large for ole column");
}
byte[] oleBytes = new byte[(int)totalOleLen];
ByteBuffer bb = PageChannel.wrap(oleBytes);
bb.put(packageHeader);
bb.put(packageStreamHeader);
if(contentLen > 0L) {
if(contentBytes != null) {
bb.put(contentBytes);
} else {
byte[] buf = new byte[8192];
int numBytes = 0;
while((numBytes = contentStream.read(buf)) >= 0) {
bb.put(buf, 0, numBytes);
}
}
}
bb.put(packageStreamFooter);
bb.put(PACKAGE_FOOTER);
return parseBlob(oleBytes);
} finally {
ByteUtil.closeQuietly(oleBuilder.getStream());
}
}
private static byte[] writePackageHeader(OleBlob.Builder oleBuilder,
long contentLen) {
byte[] prettyNameBytes = getZeroTermStrBytes(oleBuilder.getPrettyName());
String className = oleBuilder.getClassName();
String typeName = oleBuilder.getTypeName();
if(className == null) {
className = typeName;
} else if(typeName == null) {
typeName = className;
}
byte[] classNameBytes = getZeroTermStrBytes(className);
byte[] typeNameBytes = getZeroTermStrBytes(typeName);
int packageHeaderLen = 20 + prettyNameBytes.length + classNameBytes.length;
int oleHeaderLen = 24 + typeNameBytes.length;
byte[] headerBytes = new byte[packageHeaderLen + oleHeaderLen];
ByteBuffer bb = PageChannel.wrap(headerBytes);
// write outer package header
bb.putShort((short)PACKAGE_SIGNATURE);
bb.putShort((short)packageHeaderLen);
bb.putInt(PACKAGE_OBJECT_TYPE);
bb.putShort((short)prettyNameBytes.length);
bb.putShort((short)classNameBytes.length);
int prettyNameOff = bb.position() + 8;
bb.putShort((short)prettyNameOff);
bb.putShort((short)(prettyNameOff + prettyNameBytes.length));
bb.putInt(-1);
bb.put(prettyNameBytes);
bb.put(classNameBytes);
// put ole header
bb.putInt(OLE_VERSION);
bb.putInt(OLE_FORMAT);
bb.putInt(typeNameBytes.length);
bb.put(typeNameBytes);
bb.putLong(0L);
bb.putInt((int)contentLen);
return headerBytes;
}
private static byte[] writePackageStreamHeader(OleBlob.Builder oleBuilder) {
byte[] fileNameBytes = getZeroTermStrBytes(oleBuilder.getFileName());
byte[] filePathBytes = getZeroTermStrBytes(oleBuilder.getFilePath());
int headerLen = 6 + fileNameBytes.length + filePathBytes.length;
if(oleBuilder.getType() == ContentType.SIMPLE_PACKAGE) {
headerLen += 8 + filePathBytes.length;
} else {
headerLen += 2;
}
byte[] headerBytes = new byte[headerLen];
ByteBuffer bb = PageChannel.wrap(headerBytes);
bb.putShort((short)PACKAGE_STREAM_SIGNATURE);
bb.put(fileNameBytes);
bb.put(filePathBytes);
if(oleBuilder.getType() == ContentType.SIMPLE_PACKAGE) {
bb.putInt(PS_EMBEDDED_FILE);
bb.putInt(filePathBytes.length);
bb.put(filePathBytes, 0, filePathBytes.length);
bb.putInt((int) oleBuilder.getContentLength());
} else {
bb.putInt(PS_LINKED_FILE);
bb.putShort((short)LINK_HEADER);
}
return headerBytes;
}
private static byte[] writePackageStreamFooter(OleBlob.Builder oleBuilder) {
// note, these are _not_ zero terminated
byte[] fileNameBytes = oleBuilder.getFileName().getBytes(OLE_UTF_CHARSET);
byte[] filePathBytes = oleBuilder.getFilePath().getBytes(OLE_UTF_CHARSET);
int footerLen = 12 + (filePathBytes.length * 2) + fileNameBytes.length;
byte[] footerBytes = new byte[footerLen];
ByteBuffer bb = PageChannel.wrap(footerBytes);
bb.putInt(filePathBytes.length/2);
bb.put(filePathBytes);
bb.putInt(fileNameBytes.length/2);
bb.put(fileNameBytes);
bb.putInt(filePathBytes.length/2);
bb.put(filePathBytes);
return footerBytes;
}
/**
* creates the appropriate ContentImpl for the given blob.
*/
private static ContentImpl parseContent(OleBlobImpl blob)
throws IOException
{
ByteBuffer bb = PageChannel.wrap(blob.getBytes());
if((bb.remaining() < 2) || (bb.getShort() != PACKAGE_SIGNATURE)) {
return new UnknownContentImpl(blob);
}
// read outer package header
int headerSize = bb.getShort();
int objType = bb.getInt();
int prettyNameLen = bb.getShort();
int classNameLen = bb.getShort();
int prettyNameOff = bb.getShort();
int classNameOff = bb.getShort();
int objSize = bb.getInt();
String prettyName = readStr(bb, prettyNameOff, prettyNameLen);
String className = readStr(bb, classNameOff, classNameLen);
bb.position(headerSize);
// read ole header
int oleVer = bb.getInt();
int format = bb.getInt();
if(oleVer != OLE_VERSION) {
return new UnknownContentImpl(blob);
}
int typeNameLen = bb.getInt();
String typeName = readStr(bb, bb.position(), typeNameLen);
bb.getLong(); // unused
int dataBlockLen = bb.getInt();
int dataBlockPos = bb.position();
if(SIMPLE_PACKAGE_TYPE.equalsIgnoreCase(typeName)) {
return createSimplePackageContent(
blob, prettyName, className, typeName, bb, dataBlockLen);
}
// if COMPOUND_FACTORY is null, the poi library isn't available, so just
// load compound data as "other"
if((COMPOUND_FACTORY != null) &&
(bb.remaining() >= COMPOUND_STORAGE_SIGNATURE.length) &&
ByteUtil.matchesRange(bb, bb.position(), COMPOUND_STORAGE_SIGNATURE)) {
return COMPOUND_FACTORY.createCompoundPackageContent(
blob, prettyName, className, typeName, bb, dataBlockLen);
}
// this is either some other "special" (as yet unhandled) format, or it is
// simply an embedded file (or it is compound data and poi isn't available)
return new OtherContentImpl(blob, prettyName, className,
typeName, dataBlockPos, dataBlockLen);
}
private static ContentImpl createSimplePackageContent(
OleBlobImpl blob, String prettyName, String className, String typeName,
ByteBuffer blobBb, int dataBlockLen) {
int dataBlockPos = blobBb.position();
ByteBuffer bb = PageChannel.narrowBuffer(blobBb, dataBlockPos,
dataBlockPos + dataBlockLen);
int packageSig = bb.getShort();
if(packageSig != PACKAGE_STREAM_SIGNATURE) {
return new OtherContentImpl(blob, prettyName, className,
typeName, dataBlockPos, dataBlockLen);
}
String fileName = readZeroTermStr(bb);
String filePath = readZeroTermStr(bb);
int packageType = bb.getInt();
if(packageType == PS_EMBEDDED_FILE) {
int localFilePathLen = bb.getInt();
String localFilePath = readStr(bb, bb.position(), localFilePathLen);
int dataLen = bb.getInt();
int dataPos = bb.position();
bb.position(dataLen + dataPos);
// remaining strings are in "reverse" order (local file path, file name,
// file path). these string usee a real utf charset, and therefore can
// "fix" problems with ascii based names (so we prefer these strings to
// the original strings we found)
int strNum = 0;
while(true) {
int rem = bb.remaining();
if(rem < 4) {
break;
}
int strLen = bb.getInt();
String remStr = readStr(bb, bb.position(), strLen * 2, OLE_UTF_CHARSET);
switch(strNum) {
case 0:
localFilePath = remStr;
break;
case 1:
fileName = remStr;
break;
case 2:
filePath = remStr;
break;
default:
// ignore
}
++strNum;
}
return new SimplePackageContentImpl(
blob, prettyName, className, typeName, dataPos, dataLen,
fileName, filePath, localFilePath);
}
if(packageType == PS_LINKED_FILE) {
bb.getShort(); //unknown
String linkStr = readZeroTermStr(bb);
return new LinkContentImpl(blob, prettyName, className, typeName,
fileName, linkStr, filePath);
}
return new OtherContentImpl(blob, prettyName, className,
typeName, dataBlockPos, dataBlockLen);
}
private static String readStr(ByteBuffer bb, int off, int len) {
return readStr(bb, off, len, OLE_CHARSET);
}
private static String readZeroTermStr(ByteBuffer bb) {
int off = bb.position();
while(bb.hasRemaining()) {
byte b = bb.get();
if(b == 0) {
break;
}
}
int len = bb.position() - off;
return readStr(bb, off, len);
}
private static String readStr(ByteBuffer bb, int off, int len,
Charset charset) {
String str = new String(bb.array(), off, len, charset);
bb.position(off + len);
if(str.charAt(str.length() - 1) == '\0') {
str = str.substring(0, str.length() - 1);
}
return str;
}
private static byte[] getZeroTermStrBytes(String str) {
// since we are converting to ascii, try to make "nicer" versions of crazy
// chars (e.g. convert "u with an umlaut" to just "u"). this may not
// ultimately help anything but it is what ms access does.
// decompose complex chars into combos of char and accent
str = Normalizer.normalize(str, Normalizer.Form.NFD);
// strip the accents
str = UNICODE_ACCENT_PATTERN.matcher(str).replaceAll("");
// (re)normalize what is left
str = Normalizer.normalize(str, Normalizer.Form.NFC);
return (str + '\0').getBytes(OLE_CHARSET);
}
static final class OleBlobImpl implements OleBlob
{
private byte[] _bytes;
private ContentImpl _content;
private OleBlobImpl(byte[] bytes) {
_bytes = bytes;
}
public void writeTo(OutputStream out) throws IOException {
out.write(_bytes);
}
public Content getContent() throws IOException {
if(_content == null) {
_content = parseContent(this);
}
return _content;
}
public InputStream getBinaryStream() throws SQLException {
return new ByteArrayInputStream(_bytes);
}
public InputStream getBinaryStream(long pos, long len)
throws SQLException
{
return new ByteArrayInputStream(_bytes, fromJdbcOffset(pos), (int)len);
}
public long length() throws SQLException {
return _bytes.length;
}
public byte[] getBytes() throws IOException {
if(_bytes == null) {
throw new IOException("blob is closed");
}
return _bytes;
}
public byte[] getBytes(long pos, int len) throws SQLException {
return ByteUtil.copyOf(_bytes, fromJdbcOffset(pos), len);
}
public long position(byte[] pattern, long start) throws SQLException {
int pos = ByteUtil.findRange(PageChannel.wrap(_bytes),
fromJdbcOffset(start), pattern);
return((pos >= 0) ? toJdbcOffset(pos) : pos);
}
public long position(Blob pattern, long start) throws SQLException {
return position(pattern.getBytes(1L, (int)pattern.length()), start);
}
public OutputStream setBinaryStream(long position) throws SQLException {
throw new SQLFeatureNotSupportedException();
}
public void truncate(long len) throws SQLException {
throw new SQLFeatureNotSupportedException();
}
public int setBytes(long pos, byte[] bytes) throws SQLException {
throw new SQLFeatureNotSupportedException();
}
public int setBytes(long pos, byte[] bytes, int offset, int lesn)
throws SQLException {
throw new SQLFeatureNotSupportedException();
}
public void free() {
close();
}
public void close() {
_bytes = null;
ByteUtil.closeQuietly(_content);
_content = null;
}
private static int toJdbcOffset(int off) {
return off + 1;
}
private static int fromJdbcOffset(long off) {
return (int)off - 1;
}
@Override
public String toString() {
ToStringBuilder sb = CustomToStringStyle.builder(this);
if(_content != null) {
sb.append("content", _content);
} else {
sb.append("bytes", _bytes);
sb.append("content", "(uninitialized)");
}
return sb.toString();
}
}
static abstract class ContentImpl implements Content, Closeable
{
protected final OleBlobImpl _blob;
protected ContentImpl(OleBlobImpl blob) {
_blob = blob;
}
public OleBlobImpl getBlob() {
return _blob;
}
protected byte[] getBytes() throws IOException {
return getBlob().getBytes();
}
public void close() {
// base does nothing
}
protected ToStringBuilder toString(ToStringBuilder sb) {
sb.append("type", getType());
return sb;
}
}
static abstract class EmbeddedContentImpl extends ContentImpl
implements EmbeddedContent
{
private final int _position;
private final int _length;
protected EmbeddedContentImpl(OleBlobImpl blob, int position, int length)
{
super(blob);
_position = position;
_length = length;
}
public long length() {
return _length;
}
public InputStream getStream() throws IOException {
return new ByteArrayInputStream(getBytes(), _position, _length);
}
public void writeTo(OutputStream out) throws IOException {
out.write(getBytes(), _position, _length);
}
@Override
protected ToStringBuilder toString(ToStringBuilder sb) {
super.toString(sb);
if(_position >= 0) {
sb.append("content", ByteBuffer.wrap(_blob._bytes, _position, _length));
}
return sb;
}
}
static abstract class EmbeddedPackageContentImpl
extends EmbeddedContentImpl
implements PackageContent
{
private final String _prettyName;
private final String _className;
private final String _typeName;
protected EmbeddedPackageContentImpl(
OleBlobImpl blob, String prettyName, String className,
String typeName, int position, int length)
{
super(blob, position, length);
_prettyName = prettyName;
_className = className;
_typeName = typeName;
}
public String getPrettyName() {
return _prettyName;
}
public String getClassName() {
return _className;
}
public String getTypeName() {
return _typeName;
}
@Override
protected ToStringBuilder toString(ToStringBuilder sb) {
sb.append("prettyName", _prettyName)
.append("className", _className)
.append("typeName", _typeName);
super.toString(sb);
return sb;
}
}
private static final class LinkContentImpl
extends EmbeddedPackageContentImpl
implements LinkContent
{
private final String _fileName;
private final String _linkPath;
private final String _filePath;
private LinkContentImpl(OleBlobImpl blob, String prettyName,
String className, String typeName,
String fileName, String linkPath,
String filePath)
{
super(blob, prettyName, className, typeName, -1, -1);
_fileName = fileName;
_linkPath = linkPath;
_filePath = filePath;
}
public ContentType getType() {
return ContentType.LINK;
}
public String getFileName() {
return _fileName;
}
public String getLinkPath() {
return _linkPath;
}
public String getFilePath() {
return _filePath;
}
public InputStream getLinkStream() throws IOException {
return new FileInputStream(getLinkPath());
}
@Override
public String toString() {
return toString(CustomToStringStyle.builder(this))
.append("fileName", _fileName)
.append("linkPath", _linkPath)
.append("filePath", _filePath)
.toString();
}
}
private static final class SimplePackageContentImpl
extends EmbeddedPackageContentImpl
implements SimplePackageContent
{
private final String _fileName;
private final String _filePath;
private final String _localFilePath;
private SimplePackageContentImpl(OleBlobImpl blob, String prettyName,
String className, String typeName,
int position, int length,
String fileName, String filePath,
String localFilePath)
{
super(blob, prettyName, className, typeName, position, length);
_fileName = fileName;
_filePath = filePath;
_localFilePath = localFilePath;
}
public ContentType getType() {
return ContentType.SIMPLE_PACKAGE;
}
public String getFileName() {
return _fileName;
}
public String getFilePath() {
return _filePath;
}
public String getLocalFilePath() {
return _localFilePath;
}
@Override
public String toString() {
return toString(CustomToStringStyle.builder(this))
.append("fileName", _fileName)
.append("filePath", _filePath)
.append("localFilePath", _localFilePath)
.toString();
}
}
private static final class OtherContentImpl
extends EmbeddedPackageContentImpl
implements OtherContent
{
private OtherContentImpl(
OleBlobImpl blob, String prettyName, String className,
String typeName, int position, int length)
{
super(blob, prettyName, className, typeName, position, length);
}
public ContentType getType() {
return ContentType.OTHER;
}
@Override
public String toString() {
return toString(CustomToStringStyle.builder(this))
.toString();
}
}
private static final class UnknownContentImpl extends ContentImpl
{
private UnknownContentImpl(OleBlobImpl blob) {
super(blob);
}
public ContentType getType() {
return ContentType.UNKNOWN;
}
@Override
public String toString() {
return toString(CustomToStringStyle.builder(this))
.append("content", _blob._bytes)
.toString();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy