All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.parser.chm.lzx.ChmLzxBlock Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.parser.chm.lzx;

import java.math.BigInteger;

import org.apache.tika.exception.TikaException;
import org.apache.tika.exception.TikaMemoryLimitException;
import org.apache.tika.parser.chm.core.ChmCommons;
import org.apache.tika.parser.chm.core.ChmCommons.IntelState;
import org.apache.tika.parser.chm.core.ChmCommons.LzxState;
import org.apache.tika.parser.chm.core.ChmConstants;
import org.apache.tika.parser.chm.exception.ChmParsingException;

/**
 * Decompresses a chm block. Depending on chm block type chooses most relevant
 * decompressing method. A chm block type can be as follows:
  • UNDEFINED * - no action taken, i.e. skipping the block
  • VERBATIM
  • ALIGNED_OFFSET *
  • UNCOMPRESSED the most simplest In addition there are unknown types (4-7). * Currently relying on previous chm block these types changing according to the * previous chm block type. We need to invent more appropriate way to handle * such types. * */ public class ChmLzxBlock { private static int MAX_CONTENT_SIZE = 50 * 1024 * 1024; private int block_number; private long block_length; private ChmLzxState state; private byte[] content = null; private ChmSection chmSection = null; private int contentLength = 0; // trying to find solution for bad blocks ... private int previousBlockType = -1; public ChmLzxBlock(int blockNumber, byte[] dataSegment, long blockLength, ChmLzxBlock prevBlock) throws TikaException { try { if (validateConstructorParams(blockNumber, dataSegment, blockLength)) { setBlockNumber(blockNumber); if (prevBlock != null && prevBlock.getState().getBlockLength() > prevBlock .getState().getBlockRemaining()) setChmSection(new ChmSection(dataSegment, prevBlock.getContent())); else setChmSection(new ChmSection(dataSegment)); setBlockLength(blockLength); // ============================================ // we need to take care of previous context // ============================================ checkLzxBlock(prevBlock); if (prevBlock == null || blockLength < (int) getBlockLength()) { setContent((int) getBlockLength()); } else { setContent((int) blockLength); } if (prevBlock != null && prevBlock.getState() != null) previousBlockType = prevBlock.getState().getBlockType(); extractContent(); } else throw new TikaException("Check your chm lzx block parameters"); } catch (TikaException e) { throw e; } } protected int getContentLength() { return contentLength; } protected void setContentLength(int contentLength) { this.contentLength = contentLength; } private ChmSection getChmSection() { return chmSection; } private void setChmSection(ChmSection chmSection) { this.chmSection = chmSection; } private void assertStateNotNull() throws TikaException { if (getState() == null) throw new ChmParsingException("state is null"); } private void extractContent() throws TikaException { assertStateNotNull(); if (getChmSection().getData() != null) { boolean continueLoop = true; while (continueLoop && getContentLength() < getBlockLength()) { if (getState() != null && getState().getBlockRemaining() == 0) { if (getState().getHadStarted() == LzxState.NOT_STARTED_DECODING) { getState().setHadStarted(LzxState.STARTED_DECODING); if (getChmSection().getSyncBits(1) == 1) { int intelSizeTemp = (getChmSection() .getSyncBits(16) << 16) + getChmSection().getSyncBits(16); if (intelSizeTemp >= 0) getState().setIntelFileSize(intelSizeTemp); else getState().setIntelFileSize(0); } } getState().setBlockType(getChmSection().getSyncBits(3)); getState().setBlockLength( (getChmSection().getSyncBits(16) << 8) + getChmSection().getSyncBits(8)); getState().setBlockRemaining(getState().getBlockLength()); // ---------------------------------------- // Trying to handle 3 - 7 block types // ---------------------------------------- if (getState().getBlockType() > 3) { if (previousBlockType >= 0 && previousBlockType < 3) getState().setBlockType(previousBlockType); } switch (getState().getBlockType()) { case ChmCommons.ALIGNED_OFFSET: createAlignedTreeTable(); //fall through case ChmCommons.VERBATIM: /* Creates mainTreeTable */ createMainTreeTable(); createLengthTreeTable(); if (getState().getMainTreeLengtsTable()[0xe8] != 0) getState().setIntelState(IntelState.STARTED); break; case ChmCommons.UNCOMPRESSED: getState().setIntelState(IntelState.STARTED); if (getChmSection().getTotal() > 16) getChmSection().setSwath( getChmSection().getSwath() - 1); getState().setR0( (new BigInteger(getChmSection() .reverseByteOrder( getChmSection().unmarshalBytes( 4))).longValue())); getState().setR1( (new BigInteger(getChmSection() .reverseByteOrder( getChmSection().unmarshalBytes( 4))).longValue())); getState().setR2( (new BigInteger(getChmSection() .reverseByteOrder( getChmSection().unmarshalBytes( 4))).longValue())); break; default: break; } } //end of if BlockRemaining == 0 int tempLen; if (getContentLength() + getState().getBlockRemaining() > getBlockLength()) { getState().setBlockRemaining( getContentLength() + getState().getBlockRemaining() - (int) getBlockLength()); tempLen = (int) getBlockLength(); } else { tempLen = getContentLength() + getState().getBlockRemaining(); getState().setBlockRemaining(0); } int lastLength = getContentLength(); switch (getState().getBlockType()) { case ChmCommons.ALIGNED_OFFSET: // if(prevblock.lzxState.length>prevblock.lzxState.remaining) decompressAlignedBlock(tempLen, getChmSection().getPrevContent() == null ? getChmSection().getData() : getChmSection().getPrevContent());// prevcontext break; case ChmCommons.VERBATIM: decompressVerbatimBlock(tempLen, getChmSection().getPrevContent() == null ? getChmSection().getData() : getChmSection().getPrevContent()); break; case ChmCommons.UNCOMPRESSED: decompressUncompressedBlock(tempLen, getChmSection().getPrevContent() == null ? getChmSection().getData() : getChmSection().getPrevContent()); break; } getState().increaseFramesRead(); if ((getState().getFramesRead() < 32768) && getState().getIntelFileSize() != 0) intelE8Decoding(); continueLoop = getContentLength() > lastLength; } } } protected void intelE8Decoding() { if (getBlockLength() <= ChmConstants.LZX_PRETREE_TABLEBITS || (getState().getIntelState() == IntelState.NOT_STARTED)) { getState().setBlockRemaining( getState().getBlockRemaining() - (int) getBlockLength()); } else { long curpos = getState().getBlockRemaining(); getState().setBlockRemaining( getState().getBlockRemaining() - (int) getBlockLength()); int i = 0; while (i < getBlockLength() - 10) { if (content[i] != 0xe8) { i++; continue; } byte[] b = new byte[4]; b[0] = getContent()[i + 3]; b[1] = getContent()[i + 2]; b[2] = getContent()[i + 1]; b[3] = getContent()[i + 0]; long absoff = (new BigInteger(b)).longValue(); if ((absoff >= -curpos) && (absoff < getState().getIntelFileSize())) { long reloff = (absoff >= 0) ? absoff - curpos : absoff + getState().getIntelFileSize(); getContent()[i + 0] = (byte) reloff; getContent()[i + 1] = (byte) (reloff >>> 8); getContent()[i + 2] = (byte) (reloff >>> 16); getContent()[i + 3] = (byte) (reloff >>> 24); } i += 4; curpos += 5; } } } private short[] createPreLenTable() { short[] tmp = new short[ChmConstants.LZX_PRETREE_MAXSYMBOLS]; for (int i = 0; i < ChmConstants.LZX_PRETREE_MAXSYMBOLS; i++) { tmp[i] = (short) getChmSection().getSyncBits( ChmConstants.LZX_PRETREE_NUM_ELEMENTS_BITS); } return tmp; } private void createLengthTreeTable() throws TikaException { //Read Pre Tree Table short[] prelentable = createPreLenTable(); if (prelentable == null) { throw new ChmParsingException("pretreetable is null"); } short[] pretreetable = createTreeTable2(prelentable, (1 << ChmConstants.LZX_PRETREE_TABLEBITS) + (ChmConstants.LZX_PRETREE_MAXSYMBOLS << 1), ChmConstants.LZX_PRETREE_TABLEBITS, ChmConstants.LZX_PRETREE_MAXSYMBOLS); if (pretreetable == null) { throw new ChmParsingException("pretreetable is null"); } //Build Length Tree createLengthTreeLenTable(0, ChmConstants.LZX_NUM_SECONDARY_LENGTHS, pretreetable, prelentable); getState().setLengthTreeTable( createTreeTable2(getState().getLengthTreeLengtsTable(), (1 << ChmConstants.LZX_LENGTH_TABLEBITS) + (ChmConstants.LZX_LENGTH_MAXSYMBOLS << 1), ChmConstants.LZX_LENGTH_TABLEBITS, ChmConstants.LZX_NUM_SECONDARY_LENGTHS)); } private void decompressUncompressedBlock(int len, byte[] prevcontent) { if (getContentLength() + getState().getBlockRemaining() <= getBlockLength()) { for (int i = getContentLength(); i < (getContentLength() + getState() .getBlockRemaining()); i++) content[i] = getChmSection().getByte(); setContentLength(getContentLength() + getState().getBlockRemaining()); getState().setBlockRemaining(0); } else { for (int i = getContentLength(); i < getBlockLength(); i++) content[i] = getChmSection().getByte(); getState().setBlockRemaining( (int) getBlockLength() - getContentLength());// = blockLen - // contentlen; setContentLength((int) getBlockLength()); } } private void decompressAlignedBlock(int len, byte[] prevcontent) throws TikaException { if ((getChmSection() == null) || (getState() == null) || (getState().getMainTreeTable() == null)) throw new ChmParsingException("chm section is null"); short s; int x, i, border; int matchlen = 0, matchfooter = 0, extra, rundest, runsrc; int matchoffset = 0; for (i = getContentLength(); i < len; i++) { /* new code */ //read huffman tree from main tree border = getChmSection().peekBits( ChmConstants.LZX_MAINTREE_TABLEBITS); if (border >= getState().mainTreeTable.length) throw new ChmParsingException("error decompressing aligned block."); //break; /* end new code */ s = getState().mainTreeTable[getChmSection().peekBits( ChmConstants.LZX_MAINTREE_TABLEBITS)]; if (s >= getState().getMainTreeElements()) { x = ChmConstants.LZX_MAINTREE_TABLEBITS; do { x++; s <<= 1; s += getChmSection().checkBit(x); } while ((s = getState().mainTreeTable[s]) >= getState() .getMainTreeElements()); } //System.out.printf("%d,", s); //?getChmSection().getSyncBits(getState().mainTreeTable[s]); getChmSection().getSyncBits(getState().getMainTreeLengtsTable()[s]); if (s < ChmConstants.LZX_NUM_CHARS) { content[i] = (byte) s; } else { s -= ChmConstants.LZX_NUM_CHARS; matchlen = s & ChmConstants.LZX_NUM_PRIMARY_LENGTHS; if (matchlen == ChmConstants.LZX_NUM_PRIMARY_LENGTHS) { matchfooter = getState().lengthTreeTable[getChmSection() .peekBits(ChmConstants.LZX_LENGTH_TABLEBITS)];//.LZX_MAINTREE_TABLEBITS)]; if (matchfooter >= ChmConstants.LZX_LENGTH_MAXSYMBOLS/*?LZX_LENGTH_TABLEBITS*/) { x = ChmConstants.LZX_LENGTH_TABLEBITS; do { x++; matchfooter <<= 1; matchfooter += getChmSection().checkBit(x); } while ((matchfooter = getState().lengthTreeTable[matchfooter]) >= ChmConstants.LZX_NUM_SECONDARY_LENGTHS); } getChmSection().getSyncBits( getState().lengthTreeLengtsTable[matchfooter]); matchlen += matchfooter; } matchlen += ChmConstants.LZX_MIN_MATCH; matchoffset = s >>> 3; if (matchoffset > 2) { extra = ChmConstants.EXTRA_BITS[matchoffset]; matchoffset = (ChmConstants.POSITION_BASE[matchoffset] - 2); if (extra > 3) { extra -= 3; long verbatim_bits = getChmSection().getSyncBits(extra); matchoffset += (verbatim_bits << 3); //READ HUFF SYM in Aligned Tree int aligned_bits = getChmSection().peekBits( ChmConstants.LZX_NUM_PRIMARY_LENGTHS); int t = getState().getAlignedTreeTable()[aligned_bits]; if (t >= getState().getMainTreeElements()) { x = ChmConstants.LZX_ALIGNED_TABLEBITS; //?LZX_MAINTREE_TABLEBITS; //?LZX_ALIGNED_TABLEBITS do { x++; t <<= 1; t += getChmSection().checkBit(x); } while ((t = getState().getAlignedTreeTable()[t]) >= getState() .getMainTreeElements()); } getChmSection().getSyncBits( getState().getAlignedLenTable()[t]); matchoffset += t; } else if (extra == 3) { int g = getChmSection().peekBits( ChmConstants.LZX_NUM_PRIMARY_LENGTHS); int t = getState().getAlignedTreeTable()[g]; if (t >= getState().getMainTreeElements()) { x = ChmConstants.LZX_ALIGNED_TABLEBITS; //?LZX_MAINTREE_TABLEBITS; do { x++; t <<= 1; t += getChmSection().checkBit(x); } while ((t = getState().getAlignedTreeTable()[t]) >= getState() .getMainTreeElements()); } getChmSection().getSyncBits( getState().getAlignedLenTable()[t]); matchoffset += t; } else if (extra > 0) { long l = getChmSection().getSyncBits(extra); matchoffset += l; } else matchoffset = 1; getState().setR2(getState().getR1()); getState().setR1(getState().getR0()); getState().setR0(matchoffset); } else if (matchoffset == 0) { matchoffset = (int) getState().getR0(); } else if (matchoffset == 1) { matchoffset = (int) getState().getR1(); getState().setR1(getState().getR0()); getState().setR0(matchoffset); } else /** match_offset == 2 */ { matchoffset = (int) getState().getR2(); getState().setR2(getState().getR0()); getState().setR0(matchoffset); } rundest = i; runsrc = rundest - matchoffset; i += (matchlen - 1); if (i > len) break; if (runsrc < 0) { if (matchlen + runsrc <= 0) { runsrc = prevcontent.length + runsrc; while (matchlen-- > 0) content[rundest++] = prevcontent[runsrc++]; } else { runsrc = prevcontent.length + runsrc; while (runsrc < prevcontent.length) content[rundest++] = prevcontent[runsrc++]; matchlen = matchlen + runsrc - prevcontent.length; runsrc = 0; while (matchlen-- > 0) content[rundest++] = content[runsrc++]; } } else { /* copies any wrappes around source data */ while ((runsrc < 0) && (matchlen-- > 0)) { content[rundest++] = content[(int) (runsrc + getBlockLength())]; runsrc++; } /* copies match data - no worries about destination wraps */ while (matchlen-- > 0) content[rundest++] = content[runsrc++]; } } } setContentLength(len); } private void assertShortArrayNotNull(short[] array) throws TikaException { if (array == null) throw new ChmParsingException("short[] is null"); } private void decompressVerbatimBlock(int len, byte[] prevcontent) throws TikaException { short s; int x, i; int matchlen = 0, matchfooter = 0, extra, rundest, runsrc; int matchoffset = 0; for (i = getContentLength(); i < len; i++) { int f = getChmSection().peekBits( ChmConstants.LZX_MAINTREE_TABLEBITS); assertShortArrayNotNull(getState().getMainTreeTable()); s = getState().getMainTreeTable()[f]; if (s >= ChmConstants.LZX_MAIN_MAXSYMBOLS) { x = ChmConstants.LZX_MAINTREE_TABLEBITS; do { x++; s <<= 1; s += getChmSection().checkBit(x); } while ((s = getState().getMainTreeTable()[s]) >= ChmConstants.LZX_MAIN_MAXSYMBOLS); } getChmSection().getSyncBits(getState().getMainTreeLengtsTable()[s]); if (s < ChmConstants.LZX_NUM_CHARS) { content[i] = (byte) s; } else { s -= ChmConstants.LZX_NUM_CHARS; matchlen = s & ChmConstants.LZX_NUM_PRIMARY_LENGTHS; if (matchlen == ChmConstants.LZX_NUM_PRIMARY_LENGTHS) { matchfooter = getState().getLengthTreeTable()[getChmSection() .peekBits(ChmConstants.LZX_LENGTH_TABLEBITS)]; if (matchfooter >= ChmConstants.LZX_NUM_SECONDARY_LENGTHS) { x = ChmConstants.LZX_LENGTH_TABLEBITS; do { x++; matchfooter <<= 1; matchfooter += getChmSection().checkBit(x); } while ((matchfooter = getState().getLengthTreeTable()[matchfooter]) >= ChmConstants.LZX_NUM_SECONDARY_LENGTHS); } getChmSection().getSyncBits( getState().getLengthTreeLengtsTable()[matchfooter]); matchlen += matchfooter; } matchlen += ChmConstants.LZX_MIN_MATCH; // shorter than 2 matchoffset = s >>> 3; if (matchoffset > 2) { if (matchoffset != 3) { // should get other bits to retrieve // offset extra = ChmConstants.EXTRA_BITS[matchoffset]; long l = getChmSection().getSyncBits(extra); matchoffset = (int) (ChmConstants.POSITION_BASE[matchoffset] - 2 + l); } else { matchoffset = 1; } getState().setR2(getState().getR1()); getState().setR1(getState().getR0()); getState().setR0(matchoffset); } else if (matchoffset == 0) { matchoffset = (int) getState().getR0(); } else if (matchoffset == 1) { matchoffset = (int) getState().getR1(); getState().setR1(getState().getR0()); getState().setR0(matchoffset); } else /* match_offset == 2 */ { matchoffset = (int) getState().getR2(); getState().setR2(getState().getR0()); getState().setR0(matchoffset); } rundest = i; runsrc = rundest - matchoffset; i += (matchlen - 1); if (i > len) break; if (runsrc < 0) { if (matchlen + runsrc <= 0) { runsrc = prevcontent.length + runsrc; while ((matchlen-- > 0) && (prevcontent != null) && ((runsrc + 1) > 0)) if ((rundest < content.length) && (runsrc < content.length)) content[rundest++] = prevcontent[runsrc++]; } else { runsrc = prevcontent.length + runsrc; while (runsrc < prevcontent.length) if ((rundest < content.length) && (runsrc < content.length)) content[rundest++] = prevcontent[runsrc++]; matchlen = matchlen + runsrc - prevcontent.length; runsrc = 0; while (matchlen-- > 0) content[rundest++] = content[runsrc++]; } } else { /* copies any wrapped source data */ while ((runsrc < 0) && (matchlen-- > 0)) { content[rundest++] = content[(int) (runsrc + getBlockLength())]; runsrc++; } /* copies match data - no worries about destination wraps */ while (matchlen-- > 0) { if ((rundest < content.length) && (runsrc < content.length)) content[rundest++] = content[runsrc++]; } } } } setContentLength(len); } private void createLengthTreeLenTable(int offset, int tablelen, short[] pretreetable, short[] prelentable) throws TikaException { if (prelentable == null || getChmSection() == null || pretreetable == null || prelentable == null) throw new ChmParsingException("is null"); int i = offset; // represents offset int z, y, x;// local counters while (i < tablelen) { //Read HUFF sym to z z = pretreetable[getChmSection().peekBits( ChmConstants.LZX_PRETREE_TABLEBITS)]; if (z >= ChmConstants.LZX_PRETREE_NUM_ELEMENTS) {// 1 bug, should be // 20 x = ChmConstants.LZX_PRETREE_TABLEBITS; do { x++; z <<= 1; z += getChmSection().checkBit(x); } while ((z = pretreetable[z]) >= ChmConstants.LZX_PRETREE_NUM_ELEMENTS); } getChmSection().getSyncBits(prelentable[z]); if (z < 17) { z = getState().getLengthTreeLengtsTable()[i] - z; if (z < 0) z = z + 17; getState().getLengthTreeLengtsTable()[i] = (short) z; i++; } else if (z == 17) { y = getChmSection().getSyncBits(4); y += 4; for (int j = 0; j < y; j++) if (i < getState().getLengthTreeLengtsTable().length) getState().getLengthTreeLengtsTable()[i++] = 0; } else if (z == 18) { y = getChmSection().getSyncBits(5); y += 20; for (int j = 0; j < y; j++) //no tolerate //if (i < getState().getLengthTreeLengtsTable().length) getState().getLengthTreeLengtsTable()[i++] = 0; } else if (z == 19) { y = getChmSection().getSyncBits(1); y += 4; z = pretreetable[getChmSection().peekBits( ChmConstants.LZX_PRETREE_TABLEBITS)]; if (z >= ChmConstants.LZX_PRETREE_NUM_ELEMENTS) {// 20 x = ChmConstants.LZX_PRETREE_TABLEBITS;// 6 do { x++; z <<= 1; z += getChmSection().checkBit(x); } while ((z = pretreetable[z]) >= ChmConstants.LZX_PRETREE_NUM_ELEMENTS);//LZX_MAINTREE_TABLEBITS); } getChmSection().getSyncBits(prelentable[z]); z = getState().getLengthTreeLengtsTable()[i] - z; if (z < 0) z = z + 17; for (int j = 0; j < y; j++) getState().getLengthTreeLengtsTable()[i++] = (short) z; } } } private void createMainTreeTable() throws TikaException { //Read Pre Tree Table short[] prelentable = createPreLenTable(); short[] pretreetable = createTreeTable2(prelentable, (1 << ChmConstants.LZX_PRETREE_TABLEBITS) + (ChmConstants.LZX_PRETREE_MAXSYMBOLS << 1), ChmConstants.LZX_PRETREE_TABLEBITS, ChmConstants.LZX_PRETREE_MAXSYMBOLS); createMainTreeLenTable(0, ChmConstants.LZX_NUM_CHARS, pretreetable, prelentable); //Read Pre Tree Table prelentable = createPreLenTable(); pretreetable = createTreeTable2(prelentable, (1 << ChmConstants.LZX_PRETREE_TABLEBITS) + (ChmConstants.LZX_PRETREE_MAXSYMBOLS << 1), ChmConstants.LZX_PRETREE_TABLEBITS, ChmConstants.LZX_PRETREE_MAXSYMBOLS); createMainTreeLenTable(ChmConstants.LZX_NUM_CHARS, getState().mainTreeLengtsTable.length, pretreetable, prelentable); getState().setMainTreeTable( createTreeTable2(getState().mainTreeLengtsTable, (1 << ChmConstants.LZX_MAINTREE_TABLEBITS) + (ChmConstants.LZX_MAINTREE_MAXSYMBOLS << 1), ChmConstants.LZX_MAINTREE_TABLEBITS, getState() .getMainTreeElements())); } private void createMainTreeLenTable(int offset, int tablelen, short[] pretreetable, short[] prelentable) throws TikaException { if (pretreetable == null) throw new ChmParsingException("pretreetable is null"); int i = offset; int z, y, x; while (i < tablelen) { int f = getChmSection().peekBits( ChmConstants.LZX_PRETREE_TABLEBITS); z = pretreetable[f]; if (z >= ChmConstants.LZX_PRETREE_MAXSYMBOLS) { x = ChmConstants.LZX_PRETREE_TABLEBITS; do { x++; z <<= 1; z += getChmSection().checkBit(x); } while ((z = pretreetable[z]) >= ChmConstants.LZX_PRETREE_MAXSYMBOLS); } getChmSection().getSyncBits(prelentable[z]); if (z < 17) { z = getState().getMainTreeLengtsTable()[i] - z; if (z < 0) z = z + 17; getState().mainTreeLengtsTable[i] = (short) z; i++; } else if (z == 17) { y = getChmSection().getSyncBits(4); y += 4; for (int j = 0; j < y; j++) { assertInRange(getState().getMainTreeLengtsTable(), i); getState().mainTreeLengtsTable[i++] = 0; } } else if (z == 18) { y = getChmSection().getSyncBits(5); y += 20; for (int j = 0; j < y; j++) { assertInRange(getState().getMainTreeLengtsTable(), i); getState().mainTreeLengtsTable[i++] = 0; } } else if (z == 19) { y = getChmSection().getSyncBits(1); y += 4; z = pretreetable[getChmSection().peekBits( ChmConstants.LZX_PRETREE_TABLEBITS)]; if (z >= ChmConstants.LZX_PRETREE_MAXSYMBOLS) { x = ChmConstants.LZX_PRETREE_TABLEBITS; do { x++; z <<= 1; z += getChmSection().checkBit(x); } while ((z = pretreetable[z]) >= ChmConstants.LZX_PRETREE_MAXSYMBOLS); } getChmSection().getSyncBits(prelentable[z]); z = getState().mainTreeLengtsTable[i] - z; if (z < 0) z = z + 17; for (int j = 0; j < y; j++) if (i < getState().getMainTreeLengtsTable().length) getState().mainTreeLengtsTable[i++] = (short) z; } } } private void assertInRange(short[] array, int index) throws ChmParsingException { if (index >= array.length) throw new ChmParsingException(index + " is bigger than " + array.length); } private short[] createAlignedLenTable() { int tablelen = ChmConstants.LZX_ALIGNED_NUM_ELEMENTS;//LZX_BLOCKTYPE_UNCOMPRESSED;// int bits = ChmConstants.LZX_BLOCKTYPE_UNCOMPRESSED; short[] tmp = new short[tablelen]; for (int i = 0; i < tablelen; i++) { tmp[i] = (short) getChmSection().getSyncBits(bits); } return tmp; } private void createAlignedTreeTable() throws ChmParsingException { getState().setAlignedLenTable(createAlignedLenTable()); getState().setAlignedTreeTable(//setAlignedLenTable( createTreeTable2(getState().getAlignedLenTable(), (1 << ChmConstants.LZX_NUM_PRIMARY_LENGTHS) + (ChmConstants.LZX_ALIGNED_MAXSYMBOLS << 1), ChmConstants.LZX_NUM_PRIMARY_LENGTHS, ChmConstants.LZX_ALIGNED_MAXSYMBOLS)); } private short[] createTreeTable2(short[] lentable, int tablelen, int bits, int maxsymbol) throws ChmParsingException { short[] tmp = new short[tablelen]; short sym; int leaf; int bit_num = 1; long fill; int pos = 0; /* the current position in the decode table */ long table_mask = (1 << bits); long bit_mask = (table_mask >> 1); long next_symbol = bit_mask; /* fills entries for short codes for a direct mapping */ while (bit_num <= bits) { for (sym = 0; sym < maxsymbol; sym++) { if (lentable.length > sym && lentable[sym] == bit_num) { leaf = pos; if ((pos += bit_mask) > table_mask) { /* table overflow */ throw new ChmParsingException("Table overflow"); } fill = bit_mask; while (fill-- > 0) tmp[leaf++] = sym; } } bit_mask >>= 1; bit_num++; } /* if there are any codes longer than nbits */ if (pos != table_mask) { /* clears the remainder of the table */ for (leaf = pos; leaf < table_mask; leaf++) tmp[leaf] = 0; /* gives ourselves room for codes to grow by up to 16 more bits */ pos <<= 16; table_mask <<= 16; bit_mask = 1 << 15; while (bit_num <= 16) { for (sym = 0; sym < maxsymbol; sym++) { if ((lentable.length > sym) && (lentable[sym] == bit_num)) { leaf = pos >> 16; for (fill = 0; fill < bit_num - bits; fill++) { /* * if this path hasn't been taken yet, 'allocate' * two entries */ if (tmp[leaf] == 0) { if (((next_symbol << 1) + 1) < tmp.length) { tmp[(int) (next_symbol << 1)] = 0; tmp[(int) (next_symbol << 1) + 1] = 0; tmp[leaf] = (short) next_symbol++; } } /* * follows the path and select either left or right * for next bit */ leaf = tmp[leaf] << 1; if (((pos >> (15 - fill)) & 1) != 0) leaf++; } tmp[leaf] = sym; if ((pos += bit_mask) > table_mask) { /* table overflow */ throw new ChmParsingException("Table overflow"); } } } bit_mask >>= 1; bit_num++; } } /* is it full table? */ if (pos == table_mask) return tmp; return tmp; } public byte[] getContent() { return content; } public byte[] getContent(int startOffset, int endOffset) throws TikaException { return (getContent() != null) ? ChmCommons.copyOfRange(getContent(), startOffset, endOffset) : new byte[1]; } public byte[] getContent(int start) throws TikaException { return (getContent() != null) ? ChmCommons.copyOfRange(getContent(), start, getContent().length) : new byte[1]; } private void setContent(int contentLength) throws TikaMemoryLimitException { if (contentLength > MAX_CONTENT_SIZE) { throw new TikaMemoryLimitException("content length (" + contentLength + " bytes) is > MAX_CONTENT_SIZE"); } this.content = new byte[contentLength]; } private void checkLzxBlock(ChmLzxBlock chmPrevLzxBlock) throws TikaException { if (chmPrevLzxBlock == null && getBlockLength() < Integer.MAX_VALUE) setState(new ChmLzxState((int) getBlockLength())); else //use clone to avoid changing a cached or to be cached block setState(chmPrevLzxBlock.getState().clone()); } private boolean validateConstructorParams(int blockNumber, byte[] dataSegment, long blockLength) throws TikaException { int goodParameter = 0; if (blockNumber >= 0) ++goodParameter; else throw new ChmParsingException("block number should be possitive"); if (dataSegment != null && dataSegment.length > 0) ++goodParameter; else throw new ChmParsingException("data segment should not be null"); if (blockLength > 0) ++goodParameter; else throw new ChmParsingException( "block length should be more than zero"); return (goodParameter == 3); } public int getBlockNumber() { return block_number; } private void setBlockNumber(int block_number) { this.block_number = block_number; } private long getBlockLength() { return block_length; } private void setBlockLength(long block_length) { this.block_length = block_length; } public ChmLzxState getState() { return state; } private void setState(ChmLzxState state) { this.state = state; } }




  • © 2015 - 2024 Weber Informatics LLC | Privacy Policy