jxl.read.biff.SSTRecord Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of jxl Show documentation
JExcelApi is a java library which provides the ability to read, write, and modify Microsoft Excel spreadsheets.
The newest version!
/*********************************************************************
*
*      Copyright (C) 2002 Andrew Khan
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
***************************************************************************/

package jxl.read.biff;

import jxl.common.Assert;

import jxl.WorkbookSettings;
import jxl.biff.IntegerHelper;
import jxl.biff.RecordData;
import jxl.biff.StringHelper;

/**
 * Holds all the strings in the shared string table
 */
class SSTRecord extends RecordData
{
  /**
   * The total number of strings in this table
   */
  private int totalStrings;
  /**
   * The number of unique strings
   */
  private int uniqueStrings;
  /**
   * The shared strings
   */
  private String[] strings;
  /**
   * The array of continuation breaks
   */
  private int[] continuationBreaks;

  /**
   * A holder for a byte array
   */
  private static class ByteArrayHolder
  {
    /**
     * the byte holder
     */
    public byte[] bytes;
  }

  /**
   * A holder for a boolean
   */
  private static class BooleanHolder
  {
    /**
     * the holder holder
     */
    public boolean value;
  }

  /**
   * Constructs this object from the raw data
   *
   * @param t the raw data
   * @param continuations the continuations
   * @param ws the workbook settings
   */
  public SSTRecord(Record t, Record[] continuations, WorkbookSettings ws)
  {
    super(t);

    // If a continue record appears in the middle of
    // a string, then the encoding character is repeated

    // Concatenate everything into one big bugger of a byte array
    int totalRecordLength = 0;

    for (int i = 0; i < continuations.length; i++)
    {
      totalRecordLength += continuations[i].getLength();
    }
    totalRecordLength += getRecord().getLength();

    byte[] data = new byte[totalRecordLength];

    // First the original data gets put in
    int pos = 0;
    System.arraycopy(getRecord().getData(), 0,
                     data, 0, getRecord().getLength());
    pos += getRecord().getLength();

    // Now copy in everything else.
    continuationBreaks = new int[continuations.length];
    Record r = null;
    for (int i = 0; i < continuations.length; i++)
    {
      r = continuations[i];
      System.arraycopy(r.getData(), 0,
                       data, pos,
                       r.getLength());
      continuationBreaks[i] = pos;
      pos += r.getLength();
    }

    totalStrings = IntegerHelper.getInt(data[0], data[1],
                                        data[2], data[3]);
    uniqueStrings = IntegerHelper.getInt(data[4], data[5],
                                         data[6], data[7]);

    strings = new String[uniqueStrings];
    readStrings(data, 8, ws);
  }

  /**
   * Reads in all the strings from the raw data
   *
   * @param data the raw data
   * @param offset the offset
   * @param ws the workbook settings
   */
  private void readStrings(byte[] data, int offset, WorkbookSettings ws)
  {
    int pos = offset;
    int numChars;
    byte optionFlags;
    String s = null;
    boolean asciiEncoding = false;
    boolean richString = false;
    boolean extendedString = false;
    int formattingRuns = 0;
    int extendedRunLength = 0;

    for (int i = 0; i < uniqueStrings; i++)
    {
      // Read in the number of characters
      numChars = IntegerHelper.getInt(data[pos], data[pos + 1]);
      pos += 2;
      optionFlags = data[pos];
      pos++;

      // See if it is an extended string
      extendedString = ((optionFlags & 0x04) != 0);

      // See if string contains formatting information
      richString = ((optionFlags & 0x08) != 0);

      if (richString)
      {
        // Read in the crun
        formattingRuns = IntegerHelper.getInt(data[pos], data[pos + 1]);
        pos += 2;
      }

      if (extendedString)
      {
        // Read in cchExtRst
        extendedRunLength = IntegerHelper.getInt
          (data[pos], data[pos + 1], data[pos + 2], data[pos + 3]);
        pos += 4;
      }

      // See if string is ASCII (compressed) or unicode
      asciiEncoding = ((optionFlags & 0x01) == 0);

      ByteArrayHolder bah = new ByteArrayHolder();
      BooleanHolder   bh = new BooleanHolder();
      bh.value = asciiEncoding;
      pos += getChars(data, bah, pos, bh, numChars);
      asciiEncoding = bh.value;

      if (asciiEncoding)
      {
        s = StringHelper.getString(bah.bytes, numChars, 0, ws);
      }
      else
      {
        s = StringHelper.getUnicodeString(bah.bytes, numChars, 0);
      }

      strings[i] = s;

      // For rich strings, skip over the formatting runs
      if (richString)
      {
        pos += 4 * formattingRuns;
      }

      // For extended strings, skip over the extended string data
      if (extendedString)
      {
        pos += extendedRunLength;
      }

      if (pos > data.length)
      {
        Assert.verify(false, "pos exceeds record length");
      }
    }
  }

  /**
   * Gets the chars in the ascii array, taking into account continuation
   * breaks
   *
   * @param source the original source
   * @param bah holder for the new byte array
   * @param pos the current position in the source
   * @param ascii holder for a return ascii flag
   * @param numChars the number of chars in the string
   * @return the number of bytes read from the source
   */
  private int getChars(byte[] source,
                       ByteArrayHolder bah,
                       int pos,
                       BooleanHolder ascii,
                       int numChars)
  {
    int i = 0;
    boolean spansBreak = false;

    if (ascii.value)
    {
      bah.bytes = new byte[numChars];
    }
    else
    {
      bah.bytes = new byte[numChars * 2];
    }

    while (i < continuationBreaks.length && !spansBreak)
    {
      spansBreak = pos <= continuationBreaks[i] &&
                   (pos + bah.bytes.length > continuationBreaks[i]);

      if (!spansBreak)
      {
        i++;
      }
    }

    // If it doesn't span a break simply do an array copy into the
    // destination array and finish
    if (!spansBreak)
    {
      System.arraycopy(source, pos, bah.bytes, 0, bah.bytes.length);
      return bah.bytes.length;
    }

    // Copy the portion before the break pos into the array
    int breakpos = continuationBreaks[i];
    System.arraycopy(source, pos, bah.bytes, 0, breakpos - pos);

    int bytesRead = breakpos - pos;
    int charsRead;
    if (ascii.value)
    {
      charsRead = bytesRead;
    }
    else
    {
      charsRead = bytesRead / 2;
    }

    bytesRead += getContinuedString(source,
                                    bah,
                                    bytesRead,
                                    i,
                                    ascii,
                                    numChars - charsRead);
    return bytesRead;
  }

  /**
   * Gets the rest of the string after a continuation break
   *
   * @param source the original bytes
   * @param bah the holder for the new bytes
   * @param destPos the des pos
   * @param contBreakIndex the index of the continuation break
   * @param ascii the ascii flag holder
   * @param charsLeft the number of chars left in the array
   * @return the number of bytes read in the continued string
   */
  private int getContinuedString(byte[] source,
                                 ByteArrayHolder bah,
                                 int destPos,
                                 int contBreakIndex,
                                 BooleanHolder ascii,
                                 int charsLeft)
  {
    int breakpos = continuationBreaks[contBreakIndex];
    int bytesRead = 0;

    while (charsLeft > 0)
    {
      Assert.verify(contBreakIndex < continuationBreaks.length,
                    "continuation break index");

      if (ascii.value && source[breakpos] == 0)
      {
        // The string is consistently ascii throughout

        int length = contBreakIndex == continuationBreaks.length - 1 ?
          charsLeft :
          Math.min
            (charsLeft,
             continuationBreaks[contBreakIndex + 1] - breakpos - 1);

        System.arraycopy(source,
                         breakpos + 1,
                         bah.bytes,
                         destPos,
                         length);
        destPos   += length;
        bytesRead += length + 1;
        charsLeft -= length;
        ascii.value = true;
      }
      else if (!ascii.value && source[breakpos] != 0)
      {
        // The string is Unicode throughout

        int length = contBreakIndex == continuationBreaks.length - 1 ?
          charsLeft * 2 :
          Math.min
            (charsLeft * 2,
             continuationBreaks[contBreakIndex + 1] - breakpos - 1);

        // It looks like the string continues as Unicode too.  That's handy
        System.arraycopy(source,
                         breakpos + 1,
                         bah.bytes,
                         destPos,
                         length);

        destPos   += length;
        bytesRead += length + 1;
        charsLeft -= length / 2;
        ascii.value = false;
      }
      else if (!ascii.value && source[breakpos] == 0)
      {
        // Bummer - the string starts off as Unicode, but after the
        // continuation it is in straightforward ASCII encoding
        int chars = contBreakIndex == continuationBreaks.length - 1 ?
          charsLeft:
          Math.min
            (charsLeft,
             continuationBreaks[contBreakIndex + 1] - breakpos - 1);

        for (int j = 0; j < chars; j++)
        {
          bah.bytes[destPos] = source[breakpos + j + 1];
          destPos += 2;
        }

        bytesRead += chars + 1;
        charsLeft -= chars;
        ascii.value = false;
      }
      else
      {
        // Double Bummer - the string starts off as ASCII, but after the
        // continuation it is in Unicode.  This impacts the allocated array

        // Reallocate what we have of the byte array so that it is all
        // Unicode
        byte[] oldBytes = bah.bytes;
        bah.bytes = new byte[destPos * 2 + charsLeft * 2];
        for (int j = 0; j < destPos; j++)
        {
          bah.bytes[j * 2] = oldBytes[j];
        }

        destPos = destPos * 2;

        int length = contBreakIndex == continuationBreaks.length - 1 ?
          charsLeft * 2 :
          Math.min
            (charsLeft * 2,
             continuationBreaks[contBreakIndex + 1] - breakpos - 1);

        System.arraycopy(source,
                         breakpos + 1,
                         bah.bytes,
                         destPos,
                         length);

        destPos   += length;
        bytesRead += length + 1;
        charsLeft -= length / 2;
        ascii.value = false;
      }

      contBreakIndex++;

      if (contBreakIndex < continuationBreaks.length)
      {
        breakpos = continuationBreaks[contBreakIndex];
      }
    }

    return bytesRead;
  }

  /**
   * Gets the string at the specified position
   *
   * @param index the index of the string to return
   * @return the strings
   */
  public String getString(int index)
  {
    Assert.verify(index < uniqueStrings);
    return strings[index];
  }
}