jxl.read.biff.SSTRecord Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jxl Show documentation
Show all versions of jxl Show documentation
JExcelApi is a java library which provides the ability to read, write, and modify Microsoft Excel spreadsheets.
The newest version!
/*********************************************************************
*
* Copyright (C) 2002 Andrew Khan
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
***************************************************************************/
package jxl.read.biff;
import jxl.common.Assert;
import jxl.WorkbookSettings;
import jxl.biff.IntegerHelper;
import jxl.biff.RecordData;
import jxl.biff.StringHelper;
/**
* Holds all the strings in the shared string table
*/
class SSTRecord extends RecordData
{
/**
* The total number of strings in this table
*/
private int totalStrings;
/**
* The number of unique strings
*/
private int uniqueStrings;
/**
* The shared strings
*/
private String[] strings;
/**
* The array of continuation breaks
*/
private int[] continuationBreaks;
/**
* A holder for a byte array
*/
private static class ByteArrayHolder
{
/**
* the byte holder
*/
public byte[] bytes;
}
/**
* A holder for a boolean
*/
private static class BooleanHolder
{
/**
* the holder holder
*/
public boolean value;
}
/**
* Constructs this object from the raw data
*
* @param t the raw data
* @param continuations the continuations
* @param ws the workbook settings
*/
public SSTRecord(Record t, Record[] continuations, WorkbookSettings ws)
{
super(t);
// If a continue record appears in the middle of
// a string, then the encoding character is repeated
// Concatenate everything into one big bugger of a byte array
int totalRecordLength = 0;
for (int i = 0; i < continuations.length; i++)
{
totalRecordLength += continuations[i].getLength();
}
totalRecordLength += getRecord().getLength();
byte[] data = new byte[totalRecordLength];
// First the original data gets put in
int pos = 0;
System.arraycopy(getRecord().getData(), 0,
data, 0, getRecord().getLength());
pos += getRecord().getLength();
// Now copy in everything else.
continuationBreaks = new int[continuations.length];
Record r = null;
for (int i = 0; i < continuations.length; i++)
{
r = continuations[i];
System.arraycopy(r.getData(), 0,
data, pos,
r.getLength());
continuationBreaks[i] = pos;
pos += r.getLength();
}
totalStrings = IntegerHelper.getInt(data[0], data[1],
data[2], data[3]);
uniqueStrings = IntegerHelper.getInt(data[4], data[5],
data[6], data[7]);
strings = new String[uniqueStrings];
readStrings(data, 8, ws);
}
/**
* Reads in all the strings from the raw data
*
* @param data the raw data
* @param offset the offset
* @param ws the workbook settings
*/
private void readStrings(byte[] data, int offset, WorkbookSettings ws)
{
int pos = offset;
int numChars;
byte optionFlags;
String s = null;
boolean asciiEncoding = false;
boolean richString = false;
boolean extendedString = false;
int formattingRuns = 0;
int extendedRunLength = 0;
for (int i = 0; i < uniqueStrings; i++)
{
// Read in the number of characters
numChars = IntegerHelper.getInt(data[pos], data[pos + 1]);
pos += 2;
optionFlags = data[pos];
pos++;
// See if it is an extended string
extendedString = ((optionFlags & 0x04) != 0);
// See if string contains formatting information
richString = ((optionFlags & 0x08) != 0);
if (richString)
{
// Read in the crun
formattingRuns = IntegerHelper.getInt(data[pos], data[pos + 1]);
pos += 2;
}
if (extendedString)
{
// Read in cchExtRst
extendedRunLength = IntegerHelper.getInt
(data[pos], data[pos + 1], data[pos + 2], data[pos + 3]);
pos += 4;
}
// See if string is ASCII (compressed) or unicode
asciiEncoding = ((optionFlags & 0x01) == 0);
ByteArrayHolder bah = new ByteArrayHolder();
BooleanHolder bh = new BooleanHolder();
bh.value = asciiEncoding;
pos += getChars(data, bah, pos, bh, numChars);
asciiEncoding = bh.value;
if (asciiEncoding)
{
s = StringHelper.getString(bah.bytes, numChars, 0, ws);
}
else
{
s = StringHelper.getUnicodeString(bah.bytes, numChars, 0);
}
strings[i] = s;
// For rich strings, skip over the formatting runs
if (richString)
{
pos += 4 * formattingRuns;
}
// For extended strings, skip over the extended string data
if (extendedString)
{
pos += extendedRunLength;
}
if (pos > data.length)
{
Assert.verify(false, "pos exceeds record length");
}
}
}
/**
* Gets the chars in the ascii array, taking into account continuation
* breaks
*
* @param source the original source
* @param bah holder for the new byte array
* @param pos the current position in the source
* @param ascii holder for a return ascii flag
* @param numChars the number of chars in the string
* @return the number of bytes read from the source
*/
private int getChars(byte[] source,
ByteArrayHolder bah,
int pos,
BooleanHolder ascii,
int numChars)
{
int i = 0;
boolean spansBreak = false;
if (ascii.value)
{
bah.bytes = new byte[numChars];
}
else
{
bah.bytes = new byte[numChars * 2];
}
while (i < continuationBreaks.length && !spansBreak)
{
spansBreak = pos <= continuationBreaks[i] &&
(pos + bah.bytes.length > continuationBreaks[i]);
if (!spansBreak)
{
i++;
}
}
// If it doesn't span a break simply do an array copy into the
// destination array and finish
if (!spansBreak)
{
System.arraycopy(source, pos, bah.bytes, 0, bah.bytes.length);
return bah.bytes.length;
}
// Copy the portion before the break pos into the array
int breakpos = continuationBreaks[i];
System.arraycopy(source, pos, bah.bytes, 0, breakpos - pos);
int bytesRead = breakpos - pos;
int charsRead;
if (ascii.value)
{
charsRead = bytesRead;
}
else
{
charsRead = bytesRead / 2;
}
bytesRead += getContinuedString(source,
bah,
bytesRead,
i,
ascii,
numChars - charsRead);
return bytesRead;
}
/**
* Gets the rest of the string after a continuation break
*
* @param source the original bytes
* @param bah the holder for the new bytes
* @param destPos the des pos
* @param contBreakIndex the index of the continuation break
* @param ascii the ascii flag holder
* @param charsLeft the number of chars left in the array
* @return the number of bytes read in the continued string
*/
private int getContinuedString(byte[] source,
ByteArrayHolder bah,
int destPos,
int contBreakIndex,
BooleanHolder ascii,
int charsLeft)
{
int breakpos = continuationBreaks[contBreakIndex];
int bytesRead = 0;
while (charsLeft > 0)
{
Assert.verify(contBreakIndex < continuationBreaks.length,
"continuation break index");
if (ascii.value && source[breakpos] == 0)
{
// The string is consistently ascii throughout
int length = contBreakIndex == continuationBreaks.length - 1 ?
charsLeft :
Math.min
(charsLeft,
continuationBreaks[contBreakIndex + 1] - breakpos - 1);
System.arraycopy(source,
breakpos + 1,
bah.bytes,
destPos,
length);
destPos += length;
bytesRead += length + 1;
charsLeft -= length;
ascii.value = true;
}
else if (!ascii.value && source[breakpos] != 0)
{
// The string is Unicode throughout
int length = contBreakIndex == continuationBreaks.length - 1 ?
charsLeft * 2 :
Math.min
(charsLeft * 2,
continuationBreaks[contBreakIndex + 1] - breakpos - 1);
// It looks like the string continues as Unicode too. That's handy
System.arraycopy(source,
breakpos + 1,
bah.bytes,
destPos,
length);
destPos += length;
bytesRead += length + 1;
charsLeft -= length / 2;
ascii.value = false;
}
else if (!ascii.value && source[breakpos] == 0)
{
// Bummer - the string starts off as Unicode, but after the
// continuation it is in straightforward ASCII encoding
int chars = contBreakIndex == continuationBreaks.length - 1 ?
charsLeft:
Math.min
(charsLeft,
continuationBreaks[contBreakIndex + 1] - breakpos - 1);
for (int j = 0; j < chars; j++)
{
bah.bytes[destPos] = source[breakpos + j + 1];
destPos += 2;
}
bytesRead += chars + 1;
charsLeft -= chars;
ascii.value = false;
}
else
{
// Double Bummer - the string starts off as ASCII, but after the
// continuation it is in Unicode. This impacts the allocated array
// Reallocate what we have of the byte array so that it is all
// Unicode
byte[] oldBytes = bah.bytes;
bah.bytes = new byte[destPos * 2 + charsLeft * 2];
for (int j = 0; j < destPos; j++)
{
bah.bytes[j * 2] = oldBytes[j];
}
destPos = destPos * 2;
int length = contBreakIndex == continuationBreaks.length - 1 ?
charsLeft * 2 :
Math.min
(charsLeft * 2,
continuationBreaks[contBreakIndex + 1] - breakpos - 1);
System.arraycopy(source,
breakpos + 1,
bah.bytes,
destPos,
length);
destPos += length;
bytesRead += length + 1;
charsLeft -= length / 2;
ascii.value = false;
}
contBreakIndex++;
if (contBreakIndex < continuationBreaks.length)
{
breakpos = continuationBreaks[contBreakIndex];
}
}
return bytesRead;
}
/**
* Gets the string at the specified position
*
* @param index the index of the string to return
* @return the strings
*/
public String getString(int index)
{
Assert.verify(index < uniqueStrings);
return strings[index];
}
}