All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.parser.microsoft.chm.ChmItsfHeader Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.parser.microsoft.chm;

import static java.nio.charset.StandardCharsets.UTF_8;

import java.math.BigInteger;
import java.util.Arrays;

import org.apache.tika.exception.TikaException;

/**
 * The Header 0000: char[4] 'ITSF' 0004: DWORD 3 (Version number) 0008: DWORD
 * Total header length, including header section table and following data. 000C:
 * DWORD 1 (unknown) 0010: DWORD a timestamp 0014: DWORD Windows Language ID
 * 0018: GUID {7C01FD10-7BAA-11D0-9E0C-00A0-C922-E6EC} 0028: GUID
 * {7C01FD11-7BAA-11D0-9E0C-00A0-C922-E6EC} Note: a GUID is $10 bytes, arranged
 * as 1 DWORD, 2 WORDs, and 8 BYTEs. 0000: QWORD Offset of section from
 * beginning of file 0008: QWORD Length of section Following the header section
 * table is 8 bytes of additional header data. In Version 2 files, this data is
 * not there and the content section starts immediately after the directory.
 */
/* structure of ITSF headers */
public class ChmItsfHeader implements ChmAccessor {
    private static final long serialVersionUID = 2215291838533213826L;
    private byte[] signature;
    private int version; /* 4 */
    private int header_len; /* 8 */
    private int unknown_000c; /* c */
    private long last_modified; /* 10 */
    private long lang_id; /* 14 */
    private byte[] dir_uuid = new byte[ChmConstants.BYTE_ARRAY_LENGHT]; /* 18 */
    private byte[] stream_uuid = new byte[ChmConstants.BYTE_ARRAY_LENGHT]; /* 28 */
    private long unknown_offset; /* 38 */
    private long unknown_len; /* 40 */
    private long dir_offset; /* 48 */
    private long dir_len; /* 50 */
    private long data_offset; /* 58 (Not present before V3) */

    /* local usage */
    private int dataRemained;
    private int currentPlace = 0;

    public ChmItsfHeader() {
        signature = ChmConstants.ITSF.getBytes(UTF_8); /* 0 (ITSF) */
    }

    public static void main(String[] args) {
    }

    /**
     * Prints the values of ChmfHeader
     */
    public String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append(new String(getSignature(), UTF_8)).append(" ");
        sb.append(getVersion()).append(" ");
        sb.append(getHeaderLen()).append(" ");
        sb.append(getUnknown_000c()).append(" ");
        sb.append(getLastModified()).append(" ");
        sb.append(getLangId()).append(" ");
        sb.append(Arrays.toString(getDir_uuid())).append(" ");
        sb.append(Arrays.toString(getStream_uuid())).append(" ");
        sb.append(getUnknownOffset()).append(" ");
        sb.append(getUnknownLen()).append(" ");
        sb.append(getDirOffset()).append(" ");
        sb.append(getDirLen()).append(" ");
        sb.append(getDataOffset()).append(" ");
        return sb.toString();
    }

    /**
     * Returns a signature of itsf header
     *
     * @return itsf header
     */
    public byte[] getSignature() {
        return signature;
    }

    /**
     * Sets itsf header signature
     *
     * @param signature
     */
    protected void setSignature(byte[] signature) {
        this.signature = signature;
    }

    /**
     * Returns itsf header version
     *
     * @return itsf version
     */
    public int getVersion() {
        return version;
    }

    /**
     * Sets itsf version
     *
     * @param version
     */
    protected void setVersion(int version) {
        this.version = version;
    }

    /**
     * Returns itsf header length
     *
     * @return length
     */
    public int getHeaderLen() {
        return header_len;
    }

    /**
     * Sets itsf header length
     *
     * @param header_len
     */
    protected void setHeaderLen(int header_len) {
        this.header_len = header_len;
    }

    /**
     * Returns unknown_00c value
     *
     * @return unknown_00c
     */
    public int getUnknown_000c() {
        return unknown_000c;
    }

    /**
     * Sets unknown_00c
     *
     * @param unknown_000c
     */
    protected void setUnknown_000c(int unknown_000c) {
        this.unknown_000c = unknown_000c;
    }

    /**
     * Returns last modified date of the chm file
     *
     * @return last modified date as long
     */
    public long getLastModified() {
        return last_modified;
    }

    /**
     * Sets last modified date of the chm file
     *
     * @param last_modified
     */
    protected void setLastModified(long last_modified) {
        this.last_modified = last_modified;
    }

    /**
     * Returns language ID
     *
     * @return language_id
     */
    public long getLangId() {
        return lang_id;
    }

    /**
     * Sets language_id
     *
     * @param lang_id
     */
    protected void setLangId(long lang_id) {
        this.lang_id = lang_id;
    }

    /**
     * Returns directory uuid
     *
     * @return dir_uuid
     */
    public byte[] getDir_uuid() {
        return dir_uuid;
    }

    /**
     * Sets directory uuid
     *
     * @param dir_uuid
     */
    protected void setDir_uuid(byte[] dir_uuid) {
        this.dir_uuid = dir_uuid;
    }

    /**
     * Returns stream uuid
     *
     * @return stream_uuid
     */
    public byte[] getStream_uuid() {
        return stream_uuid;
    }

    /**
     * Sets stream uuid
     *
     * @param stream_uuid
     */
    protected void setStream_uuid(byte[] stream_uuid) {
        this.stream_uuid = stream_uuid;
    }

    /**
     * Returns unknown offset
     *
     * @return unknown_offset
     */
    public long getUnknownOffset() {
        return unknown_offset;
    }

    /**
     * Sets unknown offset
     *
     * @param unknown_offset
     */
    protected void setUnknownOffset(long unknown_offset) {
        this.unknown_offset = unknown_offset;
    }

    /**
     * Returns unknown length
     *
     * @return unknown_length
     */
    public long getUnknownLen() {
        return unknown_len;
    }

    /**
     * Sets unknown length
     *
     * @param unknown_len
     */
    protected void setUnknownLen(long unknown_len) {
        this.unknown_len = unknown_len;
    }

    /**
     * Returns directory offset
     *
     * @return directory_offset
     */
    public long getDirOffset() {
        return dir_offset;
    }

    /**
     * Sets directory offset
     *
     * @param dir_offset
     */
    protected void setDirOffset(long dir_offset) {
        this.dir_offset = dir_offset;
    }

    /**
     * Returns directory length
     *
     * @return directory_offset
     */
    public long getDirLen() {
        return dir_len;
    }

    /**
     * Sets directory length
     *
     * @param dir_len
     */
    protected void setDirLen(long dir_len) {
        this.dir_len = dir_len;
    }

    /**
     * Returns data offset
     *
     * @return data_offset
     */
    public long getDataOffset() {
        return data_offset;
    }

    /**
     * Sets data offset
     *
     * @param data_offset
     */
    protected void setDataOffset(long data_offset) {
        this.data_offset = data_offset;
    }

    /**
     * Copies 4 first bytes of the byte[]
     *
     * @param data
     * @param chmItsfHeader
     * @param count
     * @throws TikaException
     */
    private void unmarshalCharArray(byte[] data, ChmItsfHeader chmItsfHeader, int count)
            throws TikaException {
        ChmAssert.assertChmAccessorParameters(data, chmItsfHeader, count);
        System.arraycopy(data, 0, chmItsfHeader.signature, 0, count);
        this.setCurrentPlace(this.getCurrentPlace() + count);
        this.setDataRemained(this.getDataRemained() - count);
    }

    /**
     * Copies X bytes of source byte[] to the dest byte[]
     *
     * @param data
     * @param dest
     * @param count
     * @return
     */
    private byte[] unmarshalUuid(byte[] data, byte[] dest, int count) {
        System.arraycopy(data, this.getCurrentPlace(), dest, 0, count);
        this.setCurrentPlace(this.getCurrentPlace() + count);
        this.setDataRemained(this.getDataRemained() - count);
        return dest;
    }

    /**
     * Takes 8 bytes and reverses them
     *
     * @param data
     * @param dest
     * @return
     * @throws TikaException
     */
    private long unmarshalUint64(byte[] data, long dest) throws TikaException {
        byte[] temp = new byte[8];
        int i, j;

        if (8 > this.getDataRemained()) {
            throw new TikaException("8 > this.getDataRemained()");
        }

        for (i = 8, j = 7; i > 0; i--) {
            temp[j--] = data[this.getCurrentPlace()];
            this.setCurrentPlace(this.getCurrentPlace() + 1);
        }

        dest = new BigInteger(temp).longValue();
        this.setDataRemained(this.getDataRemained() - 8);
        return dest;
    }

    private int unmarshalInt32(byte[] data, int dest) throws TikaException {
        ChmAssert.assertByteArrayNotNull(data);

        if (4 > this.getDataRemained()) {
            throw new TikaException("4 > dataLenght");
        }
        dest = (data[this.getCurrentPlace()] & 0xff) |
                (data[this.getCurrentPlace() + 1] & 0xff) << 8 |
                (data[this.getCurrentPlace() + 2] & 0xff) << 16 |
                (data[this.getCurrentPlace() + 3] & 0xff) << 24;

        this.setCurrentPlace(this.getCurrentPlace() + 4);
        this.setDataRemained(this.getDataRemained() - 4);
        return dest;
    }

    private long unmarshalUInt32(byte[] data, long dest) throws TikaException {
        ChmAssert.assertByteArrayNotNull(data);
        if (4 > getDataRemained()) {
            throw new TikaException("4 > dataLenght");
        }
        dest = data[this.getCurrentPlace()] | data[this.getCurrentPlace() + 1] << 8 |
                data[this.getCurrentPlace() + 2] << 16 | data[this.getCurrentPlace() + 3] << 24;

        setDataRemained(this.getDataRemained() - 4);
        this.setCurrentPlace(this.getCurrentPlace() + 4);
        return dest;
    }

    /**
     * Returns data remained
     *
     * @return data_remainned
     */
    private int getDataRemained() {
        return dataRemained;
    }

    /**
     * Sets data remained to be processed
     *
     * @param dataRemained
     */
    private void setDataRemained(int dataRemained) {
        this.dataRemained = dataRemained;
    }

    /**
     * Returns current place in the byte[]
     *
     * @return current place
     */
    private int getCurrentPlace() {
        return currentPlace;
    }

    /**
     * Sets current place in the byte[]
     *
     * @param currentPlace
     */
    private void setCurrentPlace(int currentPlace) {
        this.currentPlace = currentPlace;
    }

    // @Override
    public void parse(byte[] data, ChmItsfHeader chmItsfHeader) throws TikaException {
        if (data.length < ChmConstants.CHM_ITSF_V2_LEN ||
                data.length > ChmConstants.CHM_ITSF_V3_LEN) {
            throw new TikaException(
                    "we only know how to deal with the 0x58 and 0x60 byte structures");
        }

        chmItsfHeader.setDataRemained(data.length);
        chmItsfHeader.unmarshalCharArray(data, chmItsfHeader, ChmConstants.CHM_SIGNATURE_LEN);
        chmItsfHeader.setVersion(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getVersion()));
        chmItsfHeader
                .setHeaderLen(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getHeaderLen()));
        chmItsfHeader.setUnknown_000c(
                chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getUnknown_000c()));
        chmItsfHeader.setLastModified(
                chmItsfHeader.unmarshalUInt32(data, chmItsfHeader.getLastModified()));
        chmItsfHeader.setLangId(chmItsfHeader.unmarshalUInt32(data, chmItsfHeader.getLangId()));
        chmItsfHeader
                .setDir_uuid(chmItsfHeader.unmarshalUuid(data, chmItsfHeader.getDir_uuid(), 16));
        chmItsfHeader.setStream_uuid(
                chmItsfHeader.unmarshalUuid(data, chmItsfHeader.getStream_uuid(), 16));
        chmItsfHeader.setUnknownOffset(
                chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getUnknownOffset()));
        chmItsfHeader
                .setUnknownLen(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getUnknownLen()));
        chmItsfHeader
                .setDirOffset(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getDirOffset()));
        chmItsfHeader.setDirLen(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getDirLen()));
        if (!new String(chmItsfHeader.getSignature(), UTF_8).equals(ChmConstants.ITSF)) {
            throw new TikaException("seems not valid file");
        }
        if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_2) {
            if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V2_LEN) {
                throw new TikaException("something wrong with header");
            }
        } else if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
            if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V3_LEN) {
                throw new TikaException("unknown v3 header lenght");
            }
        } else {
            throw new ChmParsingException("unsupported chm format");
        }

        /*
         * now, if we have a V3 structure, unmarshal the rest, otherwise,
         * compute it
         */
        if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
            if (chmItsfHeader.getDataRemained() >= 0) {
                chmItsfHeader
                        .setDataOffset(chmItsfHeader.getDirOffset() + chmItsfHeader.getDirLen());
            } else {
                throw new TikaException("cannot set data offset, no data remained");
            }
        } else {
            chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset() + chmItsfHeader.getDirLen());
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy