All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.parser.microsoft.chm.ChmLzxcControlData Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.parser.microsoft.chm;

import static java.nio.charset.StandardCharsets.UTF_8;

import org.apache.tika.exception.TikaException;

/**
 * ::DataSpace/Storage//ControlData This file contains $20 bytes of
 * information on the compression. The information is partially known: 0000:
 * DWORD 6 (unknown) 0004: ASCII 'LZXC' Compression type identifier 0008: DWORD
 * 2 (Possibly numeric code for LZX) 000C: DWORD The Huffman reset interval in
 * $8000-byte blocks 0010: DWORD The window size in $8000-byte blocks 0014:
 * DWORD unknown (sometimes 2, sometimes 1, sometimes 0) 0018: DWORD 0 (unknown)
 * 001C: DWORD 0 (unknown)
 */
public class ChmLzxcControlData implements ChmAccessor {
    private static final long serialVersionUID = -7897854774939631565L;
    /* class' members */
    private long size; /* 0 */
    private byte[] signature;
    private long version; /* 8 */
    private long resetInterval; /* c */
    private long windowSize; /* 10 */
    private long windowsPerReset; /* 14 */
    private long unknown_18; /* 18 */

    /* local usage */
    private int dataRemained;
    private int currentPlace = 0;

    public ChmLzxcControlData() {
        signature = ChmConstants.LZXC.getBytes(UTF_8); /*
         * 4
         * (LZXC
         * )
         */
    }

    /**
     * @param args
     */
    public static void main(String[] args) {
    }

    /**
     * Returns a remained data
     *
     * @return dataRemained
     */
    private int getDataRemained() {
        return dataRemained;
    }

    /**
     * Sets a remained data
     *
     * @param dataRemained
     */
    private void setDataRemained(int dataRemained) {
        this.dataRemained = dataRemained;
    }

    /**
     * Returns a place holder
     *
     * @return current_place
     */
    private int getCurrentPlace() {
        return currentPlace;
    }

    /**
     * Sets a place holder
     *
     * @param currentPlace
     */
    private void setCurrentPlace(int currentPlace) {
        this.currentPlace = currentPlace;
    }

    /**
     * Returns a size of control data
     *
     * @return size
     */
    public long getSize() {
        return size;
    }

    /**
     * Sets a size of control data
     *
     * @param size
     */
    protected void setSize(long size) {
        this.size = size;
    }

    /**
     * Returns a signature of control data block
     *
     * @return signature
     */
    public byte[] getSignature() {
        return signature;
    }

    /**
     * Sets a signature of control data block
     *
     * @param signature
     */
    protected void setSignature(byte[] signature) {
        this.signature = signature;
    }

    /**
     * Returns a version of control data block
     *
     * @return version
     */
    public long getVersion() {
        return version;
    }

    /**
     * Sets version of control data block
     *
     * @param version
     */
    protected void setVersion(long version) {
        this.version = version;
    }

    /**
     * Returns reset interval
     *
     * @return reset_interval
     */
    public long getResetInterval() {
        return resetInterval;
    }

    /**
     * Sets a reset interval
     *
     * @param resetInterval
     */
    protected void setResetInterval(long resetInterval) {
        this.resetInterval = resetInterval;
    }

    /**
     * Returns a window size
     *
     * @return window_size
     */
    public long getWindowSize() {
        return windowSize;
    }

    /**
     * Sets a window size
     *
     * @param windowSize
     */
    protected void setWindowSize(long windowSize) {
        this.windowSize = windowSize;
    }

    /**
     * Returns windows per reset
     *
     * @return
     */
    public long getWindowsPerReset() {
        return windowsPerReset;
    }

    /**
     * Sets windows per reset
     *
     * @param windowsPerReset
     */
    protected void setWindowsPerReset(long windowsPerReset) {
        this.windowsPerReset = windowsPerReset;
    }

    /**
     * Returns unknown 18 bytes
     *
     * @return unknown_18
     */
    public long getUnknown_18() {
        return unknown_18;
    }

    /**
     * Sets unknown 18 bytes
     *
     * @param unknown_18
     */
    protected void setUnknown_18(long unknown_18) {
        this.unknown_18 = unknown_18;
    }

    private long unmarshalUInt32(byte[] data, long dest) throws ChmParsingException {
        assert (data != null && data.length > 0);
        if (4 > getDataRemained()) {
            throw new ChmParsingException("4 > dataLenght");
        }
        dest = data[this.getCurrentPlace()] | data[this.getCurrentPlace() + 1] << 8 |
                data[this.getCurrentPlace() + 2] << 16 | data[this.getCurrentPlace() + 3] << 24;

        setDataRemained(this.getDataRemained() - 4);
        this.setCurrentPlace(this.getCurrentPlace() + 4);
        return dest;
    }

    private void unmarshalCharArray(byte[] data, ChmLzxcControlData chmLzxcControlData, int count)
            throws TikaException {
        ChmAssert.assertByteArrayNotNull(data);
        ChmAssert.assertChmAccessorNotNull(chmLzxcControlData);
        ChmAssert.assertPositiveInt(count);
        System.arraycopy(data, 4, chmLzxcControlData.getSignature(), 0, count);
        this.setCurrentPlace(this.getCurrentPlace() + count);
        this.setDataRemained(this.getDataRemained() - count);
    }

    /**
     * Returns textual representation of ChmLzxcControlData
     */
    public String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append("size(unknown):=")
                .append(this.getSize())
                .append(", ");
        sb.append("signature(Compression type identifier):=")
                .append(new String(this.getSignature(), UTF_8))
                .append(", ");
        sb.append("version(Possibly numeric code for LZX):=")
                .append(this.getVersion())
                .append(System.getProperty("line.separator"));
        sb.append("resetInterval(The Huffman reset interval):=")
                .append(this.getResetInterval())
                .append(", ");
        sb.append("windowSize:=")
                .append(this.getWindowSize())
                .append(", ");
        sb.append("windowsPerReset(unknown (sometimes 2, sometimes 1, sometimes 0):=")
                .append(this.getWindowsPerReset())
                .append(", ");
        sb.append("unknown_18:=")
                .append(this.getUnknown_18())
                .append(System.getProperty("line.separator"));
        return sb.toString();
    }

    // @Override
    public void parse(byte[] data, ChmLzxcControlData chmLzxcControlData) throws TikaException {
        if (data == null || (data.length < ChmConstants.CHM_LZXC_MIN_LEN)) {
            throw new ChmParsingException("we want at least 0x18 bytes");
        }
        chmLzxcControlData.setDataRemained(data.length);
        chmLzxcControlData.setSize(unmarshalUInt32(data, chmLzxcControlData.getSize()));
        chmLzxcControlData
                .unmarshalCharArray(data, chmLzxcControlData, ChmConstants.CHM_SIGNATURE_LEN);
        chmLzxcControlData.setVersion(unmarshalUInt32(data, chmLzxcControlData.getVersion()));
        chmLzxcControlData
                .setResetInterval(unmarshalUInt32(data, chmLzxcControlData.getResetInterval()));
        chmLzxcControlData.setWindowSize(unmarshalUInt32(data, chmLzxcControlData.getWindowSize()));
        chmLzxcControlData
                .setWindowsPerReset(unmarshalUInt32(data, chmLzxcControlData.getWindowsPerReset()));

        if (data.length >= ChmConstants.CHM_LZXC_V2_LEN) {
            chmLzxcControlData
                    .setUnknown_18(unmarshalUInt32(data, chmLzxcControlData.getUnknown_18()));
        } else {
            chmLzxcControlData.setUnknown_18(0);
        }

        if (chmLzxcControlData.getVersion() == 2) {
            chmLzxcControlData.setWindowSize(getWindowSize() * ChmConstants.CHM_WINDOW_SIZE_BLOCK);
        }

        if (chmLzxcControlData.getWindowSize() == 0 || chmLzxcControlData.getResetInterval() == 0) {
            throw new ChmParsingException("window size / resetInterval should be more than zero");
        }

        if (chmLzxcControlData.getWindowSize() == 1) {
            throw new ChmParsingException("window size / resetInterval should be more than 1");
        }

        /* checks a signature */
        if (!new String(chmLzxcControlData.getSignature(), UTF_8).equals(ChmConstants.LZXC)) {
            throw new ChmParsingException("the signature does not seem to be correct");
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy