All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.parser.chm.accessor.ChmLzxcControlData Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.parser.chm.accessor;

import org.apache.tika.exception.TikaException;
import org.apache.tika.parser.chm.assertion.ChmAssert;
import org.apache.tika.parser.chm.core.ChmConstants;
import org.apache.tika.parser.chm.exception.ChmParsingException;

import static java.nio.charset.StandardCharsets.UTF_8;

/**
 * 
 * ::DataSpace/Storage//ControlData This file contains $20 bytes of
 * information on the compression. The information is partially known: 0000:
 * DWORD 6 (unknown) 0004: ASCII 'LZXC' Compression type identifier 0008: DWORD
 * 2 (Possibly numeric code for LZX) 000C: DWORD The Huffman reset interval in
 * $8000-byte blocks 0010: DWORD The window size in $8000-byte blocks 0014:
 * DWORD unknown (sometimes 2, sometimes 1, sometimes 0) 0018: DWORD 0 (unknown)
 * 001C: DWORD 0 (unknown)
 * 
 * {@link http
 * ://translated.by/you/microsoft-s-html-help-chm-format-incomplete/original
 * /?page=2 }
 * 
 */
public class ChmLzxcControlData implements ChmAccessor {
    private static final long serialVersionUID = -7897854774939631565L;
    /* class' members */
    private long size; /* 0 */
    private byte[] signature;
    private long version; /* 8 */
    private long resetInterval; /* c */
    private long windowSize; /* 10 */
    private long windowsPerReset; /* 14 */
    private long unknown_18; /* 18 */

    /* local usage */
    private int dataRemained;
    private int currentPlace = 0;

    public ChmLzxcControlData() {
        signature = ChmConstants.LZXC.getBytes(UTF_8); /*
                                                        * 4
                                                        * (LZXC
                                                        * )
                                                        */
    }

    /**
     * Returns a remained data
     * 
     * @return dataRemained
     */
    private int getDataRemained() {
        return dataRemained;
    }

    /**
     * Sets a remained data
     * 
     * @param dataRemained
     */
    private void setDataRemained(int dataRemained) {
        this.dataRemained = dataRemained;
    }

    /**
     * Returns a place holder
     * 
     * @return current_place
     */
    private int getCurrentPlace() {
        return currentPlace;
    }

    /**
     * Sets a place holder
     * 
     * @param current_place
     */
    private void setCurrentPlace(int currentPlace) {
        this.currentPlace = currentPlace;
    }

    /**
     * Returns a size of control data
     * 
     * @return size
     */
    public long getSize() {
        return size;
    }

    /**
     * Sets a size of control data
     * 
     * @param size
     */
    protected void setSize(long size) {
        this.size = size;
    }

    /**
     * Returns a signature of control data block
     * 
     * @return signature
     */
    public byte[] getSignature() {
        return signature;
    }

    /**
     * Sets a signature of control data block
     * 
     * @param signature
     */
    protected void setSignature(byte[] signature) {
        this.signature = signature;
    }

    /**
     * Returns a version of control data block
     * 
     * @return version
     */
    public long getVersion() {
        return version;
    }

    /**
     * Sets version of control data block
     * 
     * @param version
     */
    protected void setVersion(long version) {
        this.version = version;
    }

    /**
     * Returns reset interval
     * 
     * @return reset_interval
     */
    public long getResetInterval() {
        return resetInterval;
    }

    /**
     * Sets a reset interval
     * 
     * @param resetInterval
     */
    protected void setResetInterval(long resetInterval) {
        this.resetInterval = resetInterval;
    }

    /**
     * Returns a window size
     * 
     * @return window_size
     */
    public long getWindowSize() {
        return windowSize;
    }

    /**
     * Sets a window size
     * 
     * @param window_size
     */
    protected void setWindowSize(long windowSize) {
        this.windowSize = windowSize;
    }

    /**
     * Returns windows per reset
     * 
     * @return
     */
    public long getWindowsPerReset() {
        return windowsPerReset;
    }

    /**
     * Sets windows per reset
     * 
     * @param windows_per_reset
     */
    protected void setWindowsPerReset(long windowsPerReset) {
        this.windowsPerReset = windowsPerReset;
    }

    /**
     * Returns unknown 18 bytes
     * 
     * @return unknown_18
     */
    public long getUnknown_18() {
        return unknown_18;
    }

    /**
     * Sets unknown 18 bytes
     * 
     * @param unknown_18
     */
    protected void setUnknown_18(long unknown_18) {
        this.unknown_18 = unknown_18;
    }

    private long unmarshalUInt32(byte[] data, long dest) throws ChmParsingException {
        assert (data != null && data.length > 0);
        if (4 > getDataRemained())
            throw new ChmParsingException("4 > dataLenght");
        dest = data[this.getCurrentPlace()]
                | data[this.getCurrentPlace() + 1] << 8
                | data[this.getCurrentPlace() + 2] << 16
                | data[this.getCurrentPlace() + 3] << 24;

        setDataRemained(this.getDataRemained() - 4);
        this.setCurrentPlace(this.getCurrentPlace() + 4);
        return dest;
    }

    private void unmarshalCharArray(byte[] data,
            ChmLzxcControlData chmLzxcControlData, int count) throws TikaException {
        ChmAssert.assertByteArrayNotNull(data);
        ChmAssert.assertChmAccessorNotNull(chmLzxcControlData);
        ChmAssert.assertPositiveInt(count);
        System.arraycopy(data, 4, chmLzxcControlData.getSignature(), 0, count);
        this.setCurrentPlace(this.getCurrentPlace() + count);
        this.setDataRemained(this.getDataRemained() - count);
    }

    /**
     * Returns textual representation of ChmLzxcControlData
     */
    public String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append("size(unknown):=" + this.getSize() + ", ");
        sb.append("signature(Compression type identifier):="
                + new String(this.getSignature(), UTF_8) + ", ");
        sb.append("version(Possibly numeric code for LZX):="
                + this.getVersion() + System.getProperty("line.separator"));
        sb.append("resetInterval(The Huffman reset interval):="
                + this.getResetInterval() + ", ");
        sb.append("windowSize:=" + this.getWindowSize() + ", ");
        sb.append("windowsPerReset(unknown (sometimes 2, sometimes 1, sometimes 0):="
                + this.getWindowsPerReset() + ", ");
        sb.append("unknown_18:=" + this.getUnknown_18()
                + System.getProperty("line.separator"));
        return sb.toString();
    }

    // @Override
    public void parse(byte[] data, ChmLzxcControlData chmLzxcControlData) throws TikaException {
        if (data == null || (data.length < ChmConstants.CHM_LZXC_MIN_LEN))
            throw new ChmParsingException("we want at least 0x18 bytes");
        chmLzxcControlData.setDataRemained(data.length);
        chmLzxcControlData.setSize(unmarshalUInt32(data, chmLzxcControlData.getSize()));
        chmLzxcControlData.unmarshalCharArray(data, chmLzxcControlData,
                ChmConstants.CHM_SIGNATURE_LEN);
        chmLzxcControlData.setVersion(unmarshalUInt32(data,
                chmLzxcControlData.getVersion()));
        chmLzxcControlData.setResetInterval(unmarshalUInt32(data,
                chmLzxcControlData.getResetInterval()));
        chmLzxcControlData.setWindowSize(unmarshalUInt32(data,
                chmLzxcControlData.getWindowSize()));
        chmLzxcControlData.setWindowsPerReset(unmarshalUInt32(data,
                chmLzxcControlData.getWindowsPerReset()));

        if (data.length >= ChmConstants.CHM_LZXC_V2_LEN)
            chmLzxcControlData.setUnknown_18(unmarshalUInt32(data,
                    chmLzxcControlData.getUnknown_18()));
        else
            chmLzxcControlData.setUnknown_18(0);

        if (chmLzxcControlData.getVersion() == 2) {
            chmLzxcControlData.setWindowSize(getWindowSize()
                    * ChmConstants.CHM_WINDOW_SIZE_BLOCK);
        }

        if (chmLzxcControlData.getWindowSize() == 0
                || chmLzxcControlData.getResetInterval() == 0)
            throw new ChmParsingException(
                    "window size / resetInterval should be more than zero");

        if (chmLzxcControlData.getWindowSize() == 1)
            throw new ChmParsingException(
                    "window size / resetInterval should be more than 1");

        /* checks a signature */
        if (!new String(chmLzxcControlData.getSignature(), UTF_8)
                .equals(ChmConstants.LZXC))
            throw new ChmParsingException(
                    "the signature does not seem to be correct");
    }

    /**
     * @param args
     */
    public static void main(String[] args) {
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy