All Downloads are FREE. Search and download functionalities are using the official Maven repository.

loci.poi.hpsf.VariantSupport Maven / Gradle / Ivy

Go to download

Java API to handle Microsoft OLE 2 Compound Document format (Word, Excel). Based on poi-2.5.1-final-20040804.jar, with bugfixes for OLE v2 and memory efficiency improvements. Used by Bio-Formats for OLE support (cxd, ipw, oib, zvi). Used by VisBio overlays logic for XLS export feature.

The newest version!
/*
 * #%L
 * Fork of Apache Jakarta POI.
 * %%
 * Copyright (C) 2008 - 2016 Open Microscopy Environment:
 *   - Board of Regents of the University of Wisconsin-Madison
 *   - Glencoe Software, Inc.
 *   - University of Dundee
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */

/* ====================================================================
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
   this work for additional information regarding copyright ownership.
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
==================================================================== */

package loci.poi.hpsf;

import java.io.IOException;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.util.Date;
import java.util.LinkedList;
import java.util.List;

import loci.poi.util.LittleEndian;
import loci.poi.util.LittleEndianConsts;

/**
 * 

Supports reading and writing of variant data.

* *

FIXME (3): Reading and writing should be made more * uniform than it is now. The following items should be resolved: * *

    * *
  • Reading requires a length parameter that is 4 byte greater than the * actual data, because the variant type field is included.

  • * *
  • Reading reads from a byte array while writing writes to an byte array * output stream.

  • * *
* * @author Rainer Klute <[email protected]> * @since 2003-08-08 * @version $Id: VariantSupport.java 489730 2006-12-22 19:18:16Z bayard $ */ public class VariantSupport extends Variant { private static boolean logUnsupportedTypes = false; /** *

Specifies whether warnings about unsupported variant types are to be * written to System.err or not.

* * @param logUnsupportedTypes If true warnings will be written, * if false they won't. */ public static void setLogUnsupportedTypes(final boolean logUnsupportedTypes) { VariantSupport.logUnsupportedTypes = logUnsupportedTypes; } /** *

Checks whether logging of unsupported variant types warning is turned * on or off.

* * @return true if logging is turned on, else * false. */ public static boolean isLogUnsupportedTypes() { return logUnsupportedTypes; } /** *

Keeps a list of the variant types an "unsupported" message has already * been issued for.

*/ protected static List unsupportedMessage; /** *

Writes a warning to System.err that a variant type is * unsupported by HPSF. Such a warning is written only once for each variant * type. Log messages can be turned on or off by

* * @param ex The exception to log */ protected static void writeUnsupportedTypeMessage (final UnsupportedVariantTypeException ex) { if (isLogUnsupportedTypes()) { if (unsupportedMessage == null) unsupportedMessage = new LinkedList(); Long vt = new Long(ex.getVariantType()); if (!unsupportedMessage.contains(vt)) { System.err.println(ex.getMessage()); unsupportedMessage.add(vt); } } } /** *

Reads a variant type from a byte array.

* * @param src The byte array * @param offset The offset in the byte array where the variant * starts * @param length The length of the variant including the variant * type field * @param type The variant type to read * @param codepage The codepage to use to write non-wide strings * @return A Java object that corresponds best to the variant * field. For example, a VT_I4 is returned as a {@link Long}, a * VT_LPSTR as a {@link String}. * @exception ReadingNotSupportedException if a property is to be written * who's variant type HPSF does not yet support * @exception UnsupportedEncodingException if the specified codepage is not * supported. * * @see Variant */ public static Object read(final byte[] src, final int offset, final int length, final long type, final int codepage) throws ReadingNotSupportedException, UnsupportedEncodingException { Object value; int o1 = offset; int l1 = length - LittleEndian.INT_SIZE; long lType = type; /* Instead of trying to read 8-bit characters from a Unicode string, * read 16-bit characters. */ if (codepage == Constants.CP_UNICODE && type == Variant.VT_LPSTR) lType = Variant.VT_LPWSTR; switch ((int) lType) { case Variant.VT_EMPTY: { value = null; break; } case Variant.VT_I2: { /* * Read a short. In Java it is represented as an * Integer object. */ value = new Integer(LittleEndian.getShort(src, o1)); break; } case Variant.VT_I4: { /* * Read a word. In Java it is represented as an * Integer object. */ value = new Integer(LittleEndian.getInt(src, o1)); break; } case Variant.VT_I8: { /* * Read a double word. In Java it is represented as a * Long object. */ value = new Long(LittleEndian.getLong(src, o1)); break; } case Variant.VT_R8: { /* * Read an eight-byte double value. In Java it is represented as * a Double object. */ value = new Double(LittleEndian.getDouble(src, o1)); break; } case Variant.VT_FILETIME: { /* * Read a FILETIME object. In Java it is represented * as a Date object. */ final long low = LittleEndian.getUInt(src, o1); o1 += LittleEndian.INT_SIZE; final long high = LittleEndian.getUInt(src, o1); value = Util.filetimeToDate((int) high, (int) low); break; } case Variant.VT_LPSTR: { /* * Read a byte string. In Java it is represented as a * String object. The 0x00 bytes at the end must be * stripped. */ final int first = o1 + LittleEndian.INT_SIZE; long last = first + LittleEndian.getUInt(src, o1) - 1; o1 += LittleEndian.INT_SIZE; while (src[(int) last] == 0 && first <= last) last--; final int l = (int) (last - first + 1); value = codepage != -1 ? new String(src, first, l, codepageToEncoding(codepage)) : new String(src, first, l); break; } case Variant.VT_LPWSTR: { /* * Read a Unicode string. In Java it is represented as * a String object. The 0x00 bytes at the end must be * stripped. */ final int first = o1 + LittleEndian.INT_SIZE; long last = first + LittleEndian.getUInt(src, o1) - 1; long l = last - first; o1 += LittleEndian.INT_SIZE; StringBuffer b = new StringBuffer((int) (last - first)); for (int i = 0; i <= l; i++) { final int i1 = o1 + (i * 2); final int i2 = i1 + 1; final int high = src[i2] << 8; final int low = src[i1] & 0x00ff; final char c = (char) (high | low); b.append(c); } /* Strip 0x00 characters from the end of the string: */ while (b.length() > 0 && b.charAt(b.length() - 1) == 0x00) b.setLength(b.length() - 1); value = b.toString(); break; } case Variant.VT_CF: { final byte[] v = new byte[l1]; for (int i = 0; i < l1; i++) v[i] = src[(o1 + i)]; value = v; break; } case Variant.VT_BOOL: { /* * The first four bytes in src, from src[offset] to * src[offset + 3] contain the DWord for VT_BOOL, so * skip it, we don't need it. */ // final int first = offset + LittleEndian.INT_SIZE; long bool = LittleEndian.getUInt(src, o1); if (bool != 0) value = Boolean.TRUE; else value = Boolean.FALSE; break; } default: { final byte[] v = new byte[l1]; for (int i = 0; i < l1; i++) v[i] = src[(o1 + i)]; throw new ReadingNotSupportedException(type, v); } } return value; } /** *

Turns a codepage number into the equivalent character encoding's * name.

* * @param codepage The codepage number * * @return The character encoding's name. If the codepage number is 65001, * the encoding name is "UTF-8". All other positive numbers are mapped to * "cp" followed by the number, e.g. if the codepage number is 1252 the * returned character encoding name will be "cp1252". * * @exception UnsupportedEncodingException if the specified codepage is * less than zero. */ public static String codepageToEncoding(final int codepage) throws UnsupportedEncodingException { if (codepage <= 0) throw new UnsupportedEncodingException ("Codepage number may not be " + codepage); switch (codepage) { case Constants.CP_UTF16: return "UTF-16"; case Constants.CP_UTF16_BE: return "UTF-16BE"; case Constants.CP_UTF8: return "UTF-8"; case Constants.CP_037: return "cp037"; case Constants.CP_GBK: return "GBK"; case Constants.CP_MS949: return "ms949"; case Constants.CP_WINDOWS_1250: return "windows-1250"; case Constants.CP_WINDOWS_1251: return "windows-1251"; case Constants.CP_WINDOWS_1252: return "windows-1252"; case Constants.CP_WINDOWS_1253: return "windows-1253"; case Constants.CP_WINDOWS_1254: return "windows-1254"; case Constants.CP_WINDOWS_1255: return "windows-1255"; case Constants.CP_WINDOWS_1256: return "windows-1256"; case Constants.CP_WINDOWS_1257: return "windows-1257"; case Constants.CP_WINDOWS_1258: return "windows-1258"; case Constants.CP_JOHAB: return "johab"; case Constants.CP_MAC_ROMAN: return "MacRoman"; case Constants.CP_MAC_JAPAN: return "SJIS"; case Constants.CP_MAC_CHINESE_TRADITIONAL: return "Big5"; case Constants.CP_MAC_KOREAN: return "EUC-KR"; case Constants.CP_MAC_ARABIC: return "MacArabic"; case Constants.CP_MAC_HEBREW: return "MacHebrew"; case Constants.CP_MAC_GREEK: return "MacGreek"; case Constants.CP_MAC_CYRILLIC: return "MacCyrillic"; case Constants.CP_MAC_CHINESE_SIMPLE: return "EUC_CN"; case Constants.CP_MAC_ROMANIA: return "MacRomania"; case Constants.CP_MAC_UKRAINE: return "MacUkraine"; case Constants.CP_MAC_THAI: return "MacThai"; case Constants.CP_MAC_CENTRAL_EUROPE: return "MacCentralEurope"; case Constants.CP_MAC_ICELAND: return "MacIceland"; case Constants.CP_MAC_TURKISH: return "MacTurkish"; case Constants.CP_MAC_CROATIAN: return "MacCroatian"; case Constants.CP_US_ACSII: case Constants.CP_US_ASCII2: return "US-ASCII"; case Constants.CP_KOI8_R: return "KOI8-R"; case Constants.CP_ISO_8859_1: return "ISO-8859-1"; case Constants.CP_ISO_8859_2: return "ISO-8859-2"; case Constants.CP_ISO_8859_3: return "ISO-8859-3"; case Constants.CP_ISO_8859_4: return "ISO-8859-4"; case Constants.CP_ISO_8859_5: return "ISO-8859-5"; case Constants.CP_ISO_8859_6: return "ISO-8859-6"; case Constants.CP_ISO_8859_7: return "ISO-8859-7"; case Constants.CP_ISO_8859_8: return "ISO-8859-8"; case Constants.CP_ISO_8859_9: return "ISO-8859-9"; case Constants.CP_ISO_2022_JP1: case Constants.CP_ISO_2022_JP2: case Constants.CP_ISO_2022_JP3: return "ISO-2022-JP"; case Constants.CP_ISO_2022_KR: return "ISO-2022-KR"; case Constants.CP_EUC_JP: return "EUC-JP"; case Constants.CP_EUC_KR: return "EUC-KR"; case Constants.CP_GB2312: return "GB2312"; case Constants.CP_GB18030: return "GB18030"; case Constants.CP_SJIS: return "SJIS"; default: return "cp" + codepage; } } /** *

Writes a variant value to an output stream. This method ensures that * always a multiple of 4 bytes is written.

* *

If the codepage is UTF-16, which is encouraged, strings * must always be written as {@link Variant#VT_LPWSTR} * strings, not as {@link Variant#VT_LPSTR} strings. This method ensure this * by converting strings appropriately, if needed.

* * @param out The stream to write the value to. * @param type The variant's type. * @param value The variant's value. * @param codepage The codepage to use to write non-wide strings * @return The number of entities that have been written. In many cases an * "entity" is a byte but this is not always the case. * @exception IOException if an I/O exceptions occurs * @exception WritingNotSupportedException if a property is to be written * who's variant type HPSF does not yet support */ public static int write(final OutputStream out, final long type, final Object value, final int codepage) throws IOException, WritingNotSupportedException { int length = 0; switch ((int) type) { case Variant.VT_BOOL: { int trueOrFalse; if (((Boolean) value).booleanValue()) trueOrFalse = 1; else trueOrFalse = 0; length = TypeWriter.writeUIntToStream(out, trueOrFalse); break; } case Variant.VT_LPSTR: { final byte[] bytes = (codepage == -1 ? ((String) value).getBytes() : ((String) value).getBytes(codepageToEncoding(codepage))); length = TypeWriter.writeUIntToStream(out, bytes.length + 1); final byte[] b = new byte[bytes.length + 1]; System.arraycopy(bytes, 0, b, 0, bytes.length); b[b.length - 1] = 0x00; out.write(b); length += b.length; break; } case Variant.VT_LPWSTR: { final int nrOfChars = ((String) value).length() + 1; length += TypeWriter.writeUIntToStream(out, nrOfChars); char[] s = Util.pad4((String) value); for (int i = 0; i < s.length; i++) { final int high = ((s[i] & 0x0000ff00) >> 8); final int low = (s[i] & 0x000000ff); final byte highb = (byte) high; final byte lowb = (byte) low; out.write(lowb); out.write(highb); length += 2; } out.write(0x00); out.write(0x00); length += 2; break; } case Variant.VT_CF: { final byte[] b = (byte[]) value; out.write(b); length = b.length; break; } case Variant.VT_EMPTY: { TypeWriter.writeUIntToStream(out, Variant.VT_EMPTY); length = LittleEndianConsts.INT_SIZE; break; } case Variant.VT_I2: { TypeWriter.writeToStream(out, ((Integer) value).shortValue()); length = LittleEndianConsts.SHORT_SIZE; break; } case Variant.VT_I4: { if (!(value instanceof Integer)) { throw new ClassCastException("Could not cast an object to " + Integer.class.toString() + ": " + value.getClass().toString() + ", " + value.toString()); } length += TypeWriter.writeToStream(out, ((Integer) value).intValue()); break; } case Variant.VT_I8: { TypeWriter.writeToStream(out, ((Long) value).longValue()); length = LittleEndianConsts.LONG_SIZE; break; } case Variant.VT_R8: { length += TypeWriter.writeToStream(out, ((Double) value).doubleValue()); break; } case Variant.VT_FILETIME: { long filetime = Util.dateToFileTime((Date) value); int high = (int) ((filetime >> 32) & 0x00000000FFFFFFFFL); int low = (int) (filetime & 0x00000000FFFFFFFFL); length += TypeWriter.writeUIntToStream (out, 0x0000000FFFFFFFFL & low); length += TypeWriter.writeUIntToStream (out, 0x0000000FFFFFFFFL & high); break; } default: { /* The variant type is not supported yet. However, if the value * is a byte array we can write it nevertheless. */ if (value instanceof byte[]) { final byte[] b = (byte[]) value; out.write(b); length = b.length; writeUnsupportedTypeMessage (new WritingNotSupportedException(type, value)); } else throw new WritingNotSupportedException(type, value); break; } } return length; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy