org.apache.tika.parser.chm.lzx.ChmLzxBlock Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of aem-sdk-api Show documentation
Show all versions of aem-sdk-api Show documentation
The Adobe Experience Manager SDK
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.chm.lzx;
import java.math.BigInteger;
import org.apache.tika.exception.TikaException;
import org.apache.tika.exception.TikaMemoryLimitException;
import org.apache.tika.parser.chm.core.ChmCommons;
import org.apache.tika.parser.chm.core.ChmCommons.IntelState;
import org.apache.tika.parser.chm.core.ChmCommons.LzxState;
import org.apache.tika.parser.chm.core.ChmConstants;
import org.apache.tika.parser.chm.exception.ChmParsingException;
/**
* Decompresses a chm block. Depending on chm block type chooses most relevant
* decompressing method. A chm block type can be as follows: UNDEFINED
* - no action taken, i.e. skipping the block VERBATIM ALIGNED_OFFSET
* UNCOMPRESSED the most simplest In addition there are unknown types (4-7).
* Currently relying on previous chm block these types changing according to the
* previous chm block type. We need to invent more appropriate way to handle
* such types.
*
*/
public class ChmLzxBlock {
private static int MAX_CONTENT_SIZE = 50 * 1024 * 1024;
private int block_number;
private long block_length;
private ChmLzxState state;
private byte[] content = null;
private ChmSection chmSection = null;
private int contentLength = 0;
// trying to find solution for bad blocks ...
private int previousBlockType = -1;
public ChmLzxBlock(int blockNumber, byte[] dataSegment, long blockLength,
ChmLzxBlock prevBlock) throws TikaException {
try {
if (validateConstructorParams(blockNumber, dataSegment, blockLength)) {
setBlockNumber(blockNumber);
if (prevBlock != null
&& prevBlock.getState().getBlockLength() > prevBlock
.getState().getBlockRemaining())
setChmSection(new ChmSection(dataSegment, prevBlock.getContent()));
else
setChmSection(new ChmSection(dataSegment));
setBlockLength(blockLength);
// ============================================
// we need to take care of previous context
// ============================================
checkLzxBlock(prevBlock);
if (prevBlock == null
|| blockLength < (int) getBlockLength()) {
setContent((int) getBlockLength());
}
else {
setContent((int) blockLength);
}
if (prevBlock != null && prevBlock.getState() != null)
previousBlockType = prevBlock.getState().getBlockType();
extractContent();
} else
throw new TikaException("Check your chm lzx block parameters");
} catch (TikaException e) {
throw e;
}
}
protected int getContentLength() {
return contentLength;
}
protected void setContentLength(int contentLength) {
this.contentLength = contentLength;
}
private ChmSection getChmSection() {
return chmSection;
}
private void setChmSection(ChmSection chmSection) {
this.chmSection = chmSection;
}
private void assertStateNotNull() throws TikaException {
if (getState() == null)
throw new ChmParsingException("state is null");
}
private void extractContent() throws TikaException {
assertStateNotNull();
if (getChmSection().getData() != null) {
boolean continueLoop = true;
while (continueLoop && getContentLength() < getBlockLength()) {
if (getState() != null && getState().getBlockRemaining() == 0) {
if (getState().getHadStarted() == LzxState.NOT_STARTED_DECODING) {
getState().setHadStarted(LzxState.STARTED_DECODING);
if (getChmSection().getSyncBits(1) == 1) {
int intelSizeTemp = (getChmSection()
.getSyncBits(16) << 16)
+ getChmSection().getSyncBits(16);
if (intelSizeTemp >= 0)
getState().setIntelFileSize(intelSizeTemp);
else
getState().setIntelFileSize(0);
}
}
getState().setBlockType(getChmSection().getSyncBits(3));
getState().setBlockLength(
(getChmSection().getSyncBits(16) << 8)
+ getChmSection().getSyncBits(8));
getState().setBlockRemaining(getState().getBlockLength());
// ----------------------------------------
// Trying to handle 3 - 7 block types
// ----------------------------------------
if (getState().getBlockType() > 3) {
if (previousBlockType >= 0 && previousBlockType < 3)
getState().setBlockType(previousBlockType);
}
switch (getState().getBlockType()) {
case ChmCommons.ALIGNED_OFFSET:
createAlignedTreeTable();
//fall through
case ChmCommons.VERBATIM:
/* Creates mainTreeTable */
createMainTreeTable();
createLengthTreeTable();
if (getState().getMainTreeLengtsTable()[0xe8] != 0)
getState().setIntelState(IntelState.STARTED);
break;
case ChmCommons.UNCOMPRESSED:
getState().setIntelState(IntelState.STARTED);
if (getChmSection().getTotal() > 16)
getChmSection().setSwath(
getChmSection().getSwath() - 1);
getState().setR0(
(new BigInteger(getChmSection()
.reverseByteOrder(
getChmSection().unmarshalBytes(
4))).longValue()));
getState().setR1(
(new BigInteger(getChmSection()
.reverseByteOrder(
getChmSection().unmarshalBytes(
4))).longValue()));
getState().setR2(
(new BigInteger(getChmSection()
.reverseByteOrder(
getChmSection().unmarshalBytes(
4))).longValue()));
break;
default:
break;
}
} //end of if BlockRemaining == 0
int tempLen;
if (getContentLength() + getState().getBlockRemaining() > getBlockLength()) {
getState().setBlockRemaining(
getContentLength() + getState().getBlockRemaining()
- (int) getBlockLength());
tempLen = (int) getBlockLength();
} else {
tempLen = getContentLength()
+ getState().getBlockRemaining();
getState().setBlockRemaining(0);
}
int lastLength = getContentLength();
switch (getState().getBlockType()) {
case ChmCommons.ALIGNED_OFFSET:
// if(prevblock.lzxState.length>prevblock.lzxState.remaining)
decompressAlignedBlock(tempLen, getChmSection().getPrevContent() == null ? getChmSection().getData() : getChmSection().getPrevContent());// prevcontext
break;
case ChmCommons.VERBATIM:
decompressVerbatimBlock(tempLen, getChmSection().getPrevContent() == null ? getChmSection().getData() : getChmSection().getPrevContent());
break;
case ChmCommons.UNCOMPRESSED:
decompressUncompressedBlock(tempLen, getChmSection().getPrevContent() == null ? getChmSection().getData() : getChmSection().getPrevContent());
break;
}
getState().increaseFramesRead();
if ((getState().getFramesRead() < 32768)
&& getState().getIntelFileSize() != 0)
intelE8Decoding();
continueLoop = getContentLength() > lastLength;
}
}
}
protected void intelE8Decoding() {
if (getBlockLength() <= ChmConstants.LZX_PRETREE_TABLEBITS
|| (getState().getIntelState() == IntelState.NOT_STARTED)) {
getState().setBlockRemaining(
getState().getBlockRemaining() - (int) getBlockLength());
} else {
long curpos = getState().getBlockRemaining();
getState().setBlockRemaining(
getState().getBlockRemaining() - (int) getBlockLength());
int i = 0;
while (i < getBlockLength() - 10) {
if (content[i] != 0xe8) {
i++;
continue;
}
byte[] b = new byte[4];
b[0] = getContent()[i + 3];
b[1] = getContent()[i + 2];
b[2] = getContent()[i + 1];
b[3] = getContent()[i + 0];
long absoff = (new BigInteger(b)).longValue();
if ((absoff >= -curpos)
&& (absoff < getState().getIntelFileSize())) {
long reloff = (absoff >= 0) ? absoff - curpos : absoff
+ getState().getIntelFileSize();
getContent()[i + 0] = (byte) reloff;
getContent()[i + 1] = (byte) (reloff >>> 8);
getContent()[i + 2] = (byte) (reloff >>> 16);
getContent()[i + 3] = (byte) (reloff >>> 24);
}
i += 4;
curpos += 5;
}
}
}
private short[] createPreLenTable() {
short[] tmp = new short[ChmConstants.LZX_PRETREE_MAXSYMBOLS];
for (int i = 0; i < ChmConstants.LZX_PRETREE_MAXSYMBOLS; i++) {
tmp[i] = (short) getChmSection().getSyncBits(
ChmConstants.LZX_PRETREE_NUM_ELEMENTS_BITS);
}
return tmp;
}
private void createLengthTreeTable() throws TikaException {
//Read Pre Tree Table
short[] prelentable = createPreLenTable();
if (prelentable == null) {
throw new ChmParsingException("pretreetable is null");
}
short[] pretreetable = createTreeTable2(prelentable,
(1 << ChmConstants.LZX_PRETREE_TABLEBITS)
+ (ChmConstants.LZX_PRETREE_MAXSYMBOLS << 1),
ChmConstants.LZX_PRETREE_TABLEBITS,
ChmConstants.LZX_PRETREE_MAXSYMBOLS);
if (pretreetable == null) {
throw new ChmParsingException("pretreetable is null");
}
//Build Length Tree
createLengthTreeLenTable(0, ChmConstants.LZX_NUM_SECONDARY_LENGTHS,
pretreetable, prelentable);
getState().setLengthTreeTable(
createTreeTable2(getState().getLengthTreeLengtsTable(),
(1 << ChmConstants.LZX_LENGTH_TABLEBITS)
+ (ChmConstants.LZX_LENGTH_MAXSYMBOLS << 1),
ChmConstants.LZX_LENGTH_TABLEBITS,
ChmConstants.LZX_NUM_SECONDARY_LENGTHS));
}
private void decompressUncompressedBlock(int len, byte[] prevcontent) {
if (getContentLength() + getState().getBlockRemaining() <= getBlockLength()) {
for (int i = getContentLength(); i < (getContentLength() + getState()
.getBlockRemaining()); i++)
content[i] = getChmSection().getByte();
setContentLength(getContentLength()
+ getState().getBlockRemaining());
getState().setBlockRemaining(0);
} else {
for (int i = getContentLength(); i < getBlockLength(); i++)
content[i] = getChmSection().getByte();
getState().setBlockRemaining(
(int) getBlockLength() - getContentLength());// = blockLen -
// contentlen;
setContentLength((int) getBlockLength());
}
}
private void decompressAlignedBlock(int len, byte[] prevcontent) throws TikaException {
if ((getChmSection() == null) || (getState() == null)
|| (getState().getMainTreeTable() == null))
throw new ChmParsingException("chm section is null");
short s;
int x, i, border;
int matchlen = 0, matchfooter = 0, extra, rundest, runsrc;
int matchoffset = 0;
for (i = getContentLength(); i < len; i++) {
/* new code */
//read huffman tree from main tree
border = getChmSection().peekBits(
ChmConstants.LZX_MAINTREE_TABLEBITS);
if (border >= getState().mainTreeTable.length)
throw new ChmParsingException("error decompressing aligned block.");
//break;
/* end new code */
s = getState().mainTreeTable[getChmSection().peekBits(
ChmConstants.LZX_MAINTREE_TABLEBITS)];
if (s >= getState().getMainTreeElements()) {
x = ChmConstants.LZX_MAINTREE_TABLEBITS;
do {
x++;
s <<= 1;
s += getChmSection().checkBit(x);
} while ((s = getState().mainTreeTable[s]) >= getState()
.getMainTreeElements());
}
//System.out.printf("%d,", s);
//?getChmSection().getSyncBits(getState().mainTreeTable[s]);
getChmSection().getSyncBits(getState().getMainTreeLengtsTable()[s]);
if (s < ChmConstants.LZX_NUM_CHARS) {
content[i] = (byte) s;
} else {
s -= ChmConstants.LZX_NUM_CHARS;
matchlen = s & ChmConstants.LZX_NUM_PRIMARY_LENGTHS;
if (matchlen == ChmConstants.LZX_NUM_PRIMARY_LENGTHS) {
matchfooter = getState().lengthTreeTable[getChmSection()
.peekBits(ChmConstants.LZX_LENGTH_TABLEBITS)];//.LZX_MAINTREE_TABLEBITS)];
if (matchfooter >= ChmConstants.LZX_LENGTH_MAXSYMBOLS/*?LZX_LENGTH_TABLEBITS*/) {
x = ChmConstants.LZX_LENGTH_TABLEBITS;
do {
x++;
matchfooter <<= 1;
matchfooter += getChmSection().checkBit(x);
} while ((matchfooter = getState().lengthTreeTable[matchfooter]) >= ChmConstants.LZX_NUM_SECONDARY_LENGTHS);
}
getChmSection().getSyncBits(
getState().lengthTreeLengtsTable[matchfooter]);
matchlen += matchfooter;
}
matchlen += ChmConstants.LZX_MIN_MATCH;
matchoffset = s >>> 3;
if (matchoffset > 2) {
extra = ChmConstants.EXTRA_BITS[matchoffset];
matchoffset = (ChmConstants.POSITION_BASE[matchoffset] - 2);
if (extra > 3) {
extra -= 3;
long verbatim_bits = getChmSection().getSyncBits(extra);
matchoffset += (verbatim_bits << 3);
//READ HUFF SYM in Aligned Tree
int aligned_bits = getChmSection().peekBits(
ChmConstants.LZX_NUM_PRIMARY_LENGTHS);
int t = getState().getAlignedTreeTable()[aligned_bits];
if (t >= getState().getMainTreeElements()) {
x = ChmConstants.LZX_ALIGNED_TABLEBITS; //?LZX_MAINTREE_TABLEBITS; //?LZX_ALIGNED_TABLEBITS
do {
x++;
t <<= 1;
t += getChmSection().checkBit(x);
} while ((t = getState().getAlignedTreeTable()[t]) >= getState()
.getMainTreeElements());
}
getChmSection().getSyncBits(
getState().getAlignedLenTable()[t]);
matchoffset += t;
} else if (extra == 3) {
int g = getChmSection().peekBits(
ChmConstants.LZX_NUM_PRIMARY_LENGTHS);
int t = getState().getAlignedTreeTable()[g];
if (t >= getState().getMainTreeElements()) {
x = ChmConstants.LZX_ALIGNED_TABLEBITS; //?LZX_MAINTREE_TABLEBITS;
do {
x++;
t <<= 1;
t += getChmSection().checkBit(x);
} while ((t = getState().getAlignedTreeTable()[t]) >= getState()
.getMainTreeElements());
}
getChmSection().getSyncBits(
getState().getAlignedLenTable()[t]);
matchoffset += t;
} else if (extra > 0) {
long l = getChmSection().getSyncBits(extra);
matchoffset += l;
} else
matchoffset = 1;
getState().setR2(getState().getR1());
getState().setR1(getState().getR0());
getState().setR0(matchoffset);
} else if (matchoffset == 0) {
matchoffset = (int) getState().getR0();
} else if (matchoffset == 1) {
matchoffset = (int) getState().getR1();
getState().setR1(getState().getR0());
getState().setR0(matchoffset);
} else /** match_offset == 2 */
{
matchoffset = (int) getState().getR2();
getState().setR2(getState().getR0());
getState().setR0(matchoffset);
}
rundest = i;
runsrc = rundest - matchoffset;
i += (matchlen - 1);
if (i > len)
break;
if (runsrc < 0) {
if (matchlen + runsrc <= 0) {
runsrc = prevcontent.length + runsrc;
while (matchlen-- > 0)
content[rundest++] = prevcontent[runsrc++];
} else {
runsrc = prevcontent.length + runsrc;
while (runsrc < prevcontent.length)
content[rundest++] = prevcontent[runsrc++];
matchlen = matchlen + runsrc - prevcontent.length;
runsrc = 0;
while (matchlen-- > 0)
content[rundest++] = content[runsrc++];
}
} else {
/* copies any wrappes around source data */
while ((runsrc < 0) && (matchlen-- > 0)) {
content[rundest++] = content[(int) (runsrc + getBlockLength())];
runsrc++;
}
/* copies match data - no worries about destination wraps */
while (matchlen-- > 0)
content[rundest++] = content[runsrc++];
}
}
}
setContentLength(len);
}
private void assertShortArrayNotNull(short[] array) throws TikaException {
if (array == null)
throw new ChmParsingException("short[] is null");
}
private void decompressVerbatimBlock(int len, byte[] prevcontent) throws TikaException {
short s;
int x, i;
int matchlen = 0, matchfooter = 0, extra, rundest, runsrc;
int matchoffset = 0;
for (i = getContentLength(); i < len; i++) {
int f = getChmSection().peekBits(
ChmConstants.LZX_MAINTREE_TABLEBITS);
assertShortArrayNotNull(getState().getMainTreeTable());
s = getState().getMainTreeTable()[f];
if (s >= ChmConstants.LZX_MAIN_MAXSYMBOLS) {
x = ChmConstants.LZX_MAINTREE_TABLEBITS;
do {
x++;
s <<= 1;
s += getChmSection().checkBit(x);
} while ((s = getState().getMainTreeTable()[s]) >= ChmConstants.LZX_MAIN_MAXSYMBOLS);
}
getChmSection().getSyncBits(getState().getMainTreeLengtsTable()[s]);
if (s < ChmConstants.LZX_NUM_CHARS) {
content[i] = (byte) s;
} else {
s -= ChmConstants.LZX_NUM_CHARS;
matchlen = s & ChmConstants.LZX_NUM_PRIMARY_LENGTHS;
if (matchlen == ChmConstants.LZX_NUM_PRIMARY_LENGTHS) {
matchfooter = getState().getLengthTreeTable()[getChmSection()
.peekBits(ChmConstants.LZX_LENGTH_TABLEBITS)];
if (matchfooter >= ChmConstants.LZX_NUM_SECONDARY_LENGTHS) {
x = ChmConstants.LZX_LENGTH_TABLEBITS;
do {
x++;
matchfooter <<= 1;
matchfooter += getChmSection().checkBit(x);
} while ((matchfooter = getState().getLengthTreeTable()[matchfooter]) >= ChmConstants.LZX_NUM_SECONDARY_LENGTHS);
}
getChmSection().getSyncBits(
getState().getLengthTreeLengtsTable()[matchfooter]);
matchlen += matchfooter;
}
matchlen += ChmConstants.LZX_MIN_MATCH;
// shorter than 2
matchoffset = s >>> 3;
if (matchoffset > 2) {
if (matchoffset != 3) { // should get other bits to retrieve
// offset
extra = ChmConstants.EXTRA_BITS[matchoffset];
long l = getChmSection().getSyncBits(extra);
matchoffset = (int) (ChmConstants.POSITION_BASE[matchoffset] - 2 + l);
} else {
matchoffset = 1;
}
getState().setR2(getState().getR1());
getState().setR1(getState().getR0());
getState().setR0(matchoffset);
} else if (matchoffset == 0) {
matchoffset = (int) getState().getR0();
} else if (matchoffset == 1) {
matchoffset = (int) getState().getR1();
getState().setR1(getState().getR0());
getState().setR0(matchoffset);
} else /* match_offset == 2 */
{
matchoffset = (int) getState().getR2();
getState().setR2(getState().getR0());
getState().setR0(matchoffset);
}
rundest = i;
runsrc = rundest - matchoffset;
i += (matchlen - 1);
if (i > len)
break;
if (runsrc < 0) {
if (matchlen + runsrc <= 0) {
runsrc = prevcontent.length + runsrc;
while ((matchlen-- > 0) && (prevcontent != null)
&& ((runsrc + 1) > 0))
if ((rundest < content.length)
&& (runsrc < content.length))
content[rundest++] = prevcontent[runsrc++];
} else {
runsrc = prevcontent.length + runsrc;
while (runsrc < prevcontent.length)
if ((rundest < content.length)
&& (runsrc < content.length))
content[rundest++] = prevcontent[runsrc++];
matchlen = matchlen + runsrc - prevcontent.length;
runsrc = 0;
while (matchlen-- > 0)
content[rundest++] = content[runsrc++];
}
} else {
/* copies any wrapped source data */
while ((runsrc < 0) && (matchlen-- > 0)) {
content[rundest++] = content[(int) (runsrc + getBlockLength())];
runsrc++;
}
/* copies match data - no worries about destination wraps */
while (matchlen-- > 0) {
if ((rundest < content.length)
&& (runsrc < content.length))
content[rundest++] = content[runsrc++];
}
}
}
}
setContentLength(len);
}
private void createLengthTreeLenTable(int offset, int tablelen,
short[] pretreetable, short[] prelentable) throws TikaException {
if (prelentable == null || getChmSection() == null
|| pretreetable == null || prelentable == null)
throw new ChmParsingException("is null");
int i = offset; // represents offset
int z, y, x;// local counters
while (i < tablelen) {
//Read HUFF sym to z
z = pretreetable[getChmSection().peekBits(
ChmConstants.LZX_PRETREE_TABLEBITS)];
if (z >= ChmConstants.LZX_PRETREE_NUM_ELEMENTS) {// 1 bug, should be
// 20
x = ChmConstants.LZX_PRETREE_TABLEBITS;
do {
x++;
z <<= 1;
z += getChmSection().checkBit(x);
} while ((z = pretreetable[z]) >= ChmConstants.LZX_PRETREE_NUM_ELEMENTS);
}
getChmSection().getSyncBits(prelentable[z]);
if (z < 17) {
z = getState().getLengthTreeLengtsTable()[i] - z;
if (z < 0)
z = z + 17;
getState().getLengthTreeLengtsTable()[i] = (short) z;
i++;
} else if (z == 17) {
y = getChmSection().getSyncBits(4);
y += 4;
for (int j = 0; j < y; j++)
if (i < getState().getLengthTreeLengtsTable().length)
getState().getLengthTreeLengtsTable()[i++] = 0;
} else if (z == 18) {
y = getChmSection().getSyncBits(5);
y += 20;
for (int j = 0; j < y; j++)
//no tolerate //if (i < getState().getLengthTreeLengtsTable().length)
getState().getLengthTreeLengtsTable()[i++] = 0;
} else if (z == 19) {
y = getChmSection().getSyncBits(1);
y += 4;
z = pretreetable[getChmSection().peekBits(
ChmConstants.LZX_PRETREE_TABLEBITS)];
if (z >= ChmConstants.LZX_PRETREE_NUM_ELEMENTS) {// 20
x = ChmConstants.LZX_PRETREE_TABLEBITS;// 6
do {
x++;
z <<= 1;
z += getChmSection().checkBit(x);
} while ((z = pretreetable[z]) >= ChmConstants.LZX_PRETREE_NUM_ELEMENTS);//LZX_MAINTREE_TABLEBITS);
}
getChmSection().getSyncBits(prelentable[z]);
z = getState().getLengthTreeLengtsTable()[i] - z;
if (z < 0)
z = z + 17;
for (int j = 0; j < y; j++)
getState().getLengthTreeLengtsTable()[i++] = (short) z;
}
}
}
private void createMainTreeTable() throws TikaException {
//Read Pre Tree Table
short[] prelentable = createPreLenTable();
short[] pretreetable = createTreeTable2(prelentable,
(1 << ChmConstants.LZX_PRETREE_TABLEBITS)
+ (ChmConstants.LZX_PRETREE_MAXSYMBOLS << 1),
ChmConstants.LZX_PRETREE_TABLEBITS,
ChmConstants.LZX_PRETREE_MAXSYMBOLS);
createMainTreeLenTable(0, ChmConstants.LZX_NUM_CHARS, pretreetable,
prelentable);
//Read Pre Tree Table
prelentable = createPreLenTable();
pretreetable = createTreeTable2(prelentable,
(1 << ChmConstants.LZX_PRETREE_TABLEBITS)
+ (ChmConstants.LZX_PRETREE_MAXSYMBOLS << 1),
ChmConstants.LZX_PRETREE_TABLEBITS,
ChmConstants.LZX_PRETREE_MAXSYMBOLS);
createMainTreeLenTable(ChmConstants.LZX_NUM_CHARS,
getState().mainTreeLengtsTable.length, pretreetable,
prelentable);
getState().setMainTreeTable(
createTreeTable2(getState().mainTreeLengtsTable,
(1 << ChmConstants.LZX_MAINTREE_TABLEBITS)
+ (ChmConstants.LZX_MAINTREE_MAXSYMBOLS << 1),
ChmConstants.LZX_MAINTREE_TABLEBITS, getState()
.getMainTreeElements()));
}
private void createMainTreeLenTable(int offset, int tablelen,
short[] pretreetable, short[] prelentable) throws TikaException {
if (pretreetable == null)
throw new ChmParsingException("pretreetable is null");
int i = offset;
int z, y, x;
while (i < tablelen) {
int f = getChmSection().peekBits(
ChmConstants.LZX_PRETREE_TABLEBITS);
z = pretreetable[f];
if (z >= ChmConstants.LZX_PRETREE_MAXSYMBOLS) {
x = ChmConstants.LZX_PRETREE_TABLEBITS;
do {
x++;
z <<= 1;
z += getChmSection().checkBit(x);
} while ((z = pretreetable[z]) >= ChmConstants.LZX_PRETREE_MAXSYMBOLS);
}
getChmSection().getSyncBits(prelentable[z]);
if (z < 17) {
z = getState().getMainTreeLengtsTable()[i] - z;
if (z < 0)
z = z + 17;
getState().mainTreeLengtsTable[i] = (short) z;
i++;
} else if (z == 17) {
y = getChmSection().getSyncBits(4);
y += 4;
for (int j = 0; j < y; j++) {
assertInRange(getState().getMainTreeLengtsTable(), i);
getState().mainTreeLengtsTable[i++] = 0;
}
} else if (z == 18) {
y = getChmSection().getSyncBits(5);
y += 20;
for (int j = 0; j < y; j++) {
assertInRange(getState().getMainTreeLengtsTable(), i);
getState().mainTreeLengtsTable[i++] = 0;
}
} else if (z == 19) {
y = getChmSection().getSyncBits(1);
y += 4;
z = pretreetable[getChmSection().peekBits(
ChmConstants.LZX_PRETREE_TABLEBITS)];
if (z >= ChmConstants.LZX_PRETREE_MAXSYMBOLS) {
x = ChmConstants.LZX_PRETREE_TABLEBITS;
do {
x++;
z <<= 1;
z += getChmSection().checkBit(x);
} while ((z = pretreetable[z]) >= ChmConstants.LZX_PRETREE_MAXSYMBOLS);
}
getChmSection().getSyncBits(prelentable[z]);
z = getState().mainTreeLengtsTable[i] - z;
if (z < 0)
z = z + 17;
for (int j = 0; j < y; j++)
if (i < getState().getMainTreeLengtsTable().length)
getState().mainTreeLengtsTable[i++] = (short) z;
}
}
}
private void assertInRange(short[] array, int index) throws ChmParsingException {
if (index >= array.length)
throw new ChmParsingException(index + " is bigger than "
+ array.length);
}
private short[] createAlignedLenTable() {
int tablelen = ChmConstants.LZX_ALIGNED_NUM_ELEMENTS;//LZX_BLOCKTYPE_UNCOMPRESSED;//
int bits = ChmConstants.LZX_BLOCKTYPE_UNCOMPRESSED;
short[] tmp = new short[tablelen];
for (int i = 0; i < tablelen; i++) {
tmp[i] = (short) getChmSection().getSyncBits(bits);
}
return tmp;
}
private void createAlignedTreeTable() throws ChmParsingException {
getState().setAlignedLenTable(createAlignedLenTable());
getState().setAlignedTreeTable(//setAlignedLenTable(
createTreeTable2(getState().getAlignedLenTable(),
(1 << ChmConstants.LZX_NUM_PRIMARY_LENGTHS)
+ (ChmConstants.LZX_ALIGNED_MAXSYMBOLS << 1),
ChmConstants.LZX_NUM_PRIMARY_LENGTHS,
ChmConstants.LZX_ALIGNED_MAXSYMBOLS));
}
private short[] createTreeTable2(short[] lentable, int tablelen, int bits,
int maxsymbol) throws ChmParsingException {
short[] tmp = new short[tablelen];
short sym;
int leaf;
int bit_num = 1;
long fill;
int pos = 0;
/* the current position in the decode table */
long table_mask = (1 << bits);
long bit_mask = (table_mask >> 1);
long next_symbol = bit_mask;
/* fills entries for short codes for a direct mapping */
while (bit_num <= bits) {
for (sym = 0; sym < maxsymbol; sym++) {
if (lentable.length > sym && lentable[sym] == bit_num) {
leaf = pos;
if ((pos += bit_mask) > table_mask) {
/* table overflow */
throw new ChmParsingException("Table overflow");
}
fill = bit_mask;
while (fill-- > 0)
tmp[leaf++] = sym;
}
}
bit_mask >>= 1;
bit_num++;
}
/* if there are any codes longer than nbits */
if (pos != table_mask) {
/* clears the remainder of the table */
for (leaf = pos; leaf < table_mask; leaf++)
tmp[leaf] = 0;
/* gives ourselves room for codes to grow by up to 16 more bits */
pos <<= 16;
table_mask <<= 16;
bit_mask = 1 << 15;
while (bit_num <= 16) {
for (sym = 0; sym < maxsymbol; sym++) {
if ((lentable.length > sym) && (lentable[sym] == bit_num)) {
leaf = pos >> 16;
for (fill = 0; fill < bit_num - bits; fill++) {
/*
* if this path hasn't been taken yet, 'allocate'
* two entries
*/
if (tmp[leaf] == 0) {
if (((next_symbol << 1) + 1) < tmp.length) {
tmp[(int) (next_symbol << 1)] = 0;
tmp[(int) (next_symbol << 1) + 1] = 0;
tmp[leaf] = (short) next_symbol++;
}
}
/*
* follows the path and select either left or right
* for next bit
*/
leaf = tmp[leaf] << 1;
if (((pos >> (15 - fill)) & 1) != 0)
leaf++;
}
tmp[leaf] = sym;
if ((pos += bit_mask) > table_mask) {
/* table overflow */
throw new ChmParsingException("Table overflow");
}
}
}
bit_mask >>= 1;
bit_num++;
}
}
/* is it full table? */
if (pos == table_mask)
return tmp;
return tmp;
}
public byte[] getContent() {
return content;
}
public byte[] getContent(int startOffset, int endOffset) throws TikaException {
return (getContent() != null) ? ChmCommons.copyOfRange(getContent(),
startOffset, endOffset) : new byte[1];
}
public byte[] getContent(int start) throws TikaException {
return (getContent() != null) ? ChmCommons.copyOfRange(getContent(),
start, getContent().length) : new byte[1];
}
private void setContent(int contentLength) throws TikaMemoryLimitException {
if (contentLength > MAX_CONTENT_SIZE) {
throw new TikaMemoryLimitException("content length (" + contentLength +
" bytes) is > MAX_CONTENT_SIZE");
}
this.content = new byte[contentLength];
}
private void checkLzxBlock(ChmLzxBlock chmPrevLzxBlock) throws TikaException {
if (chmPrevLzxBlock == null && getBlockLength() < Integer.MAX_VALUE)
setState(new ChmLzxState((int) getBlockLength()));
else
//use clone to avoid changing a cached or to be cached block
setState(chmPrevLzxBlock.getState().clone());
}
private boolean validateConstructorParams(int blockNumber,
byte[] dataSegment, long blockLength) throws TikaException {
int goodParameter = 0;
if (blockNumber >= 0)
++goodParameter;
else
throw new ChmParsingException("block number should be possitive");
if (dataSegment != null && dataSegment.length > 0)
++goodParameter;
else
throw new ChmParsingException("data segment should not be null");
if (blockLength > 0)
++goodParameter;
else
throw new ChmParsingException(
"block length should be more than zero");
return (goodParameter == 3);
}
public int getBlockNumber() {
return block_number;
}
private void setBlockNumber(int block_number) {
this.block_number = block_number;
}
private long getBlockLength() {
return block_length;
}
private void setBlockLength(long block_length) {
this.block_length = block_length;
}
public ChmLzxState getState() {
return state;
}
private void setState(ChmLzxState state) {
this.state = state;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy