All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.parser.microsoft.chm.ChmCommons Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.parser.microsoft.chm;

import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.tika.exception.TikaException;

public class ChmCommons {

    /**
     * Represents lzx block types in order to decompress differently
     */
    public final static int UNDEFINED = 0;
    public final static int VERBATIM = 1;
    public final static int ALIGNED_OFFSET = 2;
    public final static int UNCOMPRESSED = 3;
    private static final Logger LOG = LoggerFactory.getLogger(ChmCommons.class);

    /* Prevents initialization */
    private ChmCommons() {
    }

    public static void assertByteArrayNotNull(byte[] data) throws TikaException {
        if (data == null) {
            throw new TikaException("byte[] is null");
        }
    }

    /**
     * LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) Returns X,
     * i.e 2^X
     *
     * @param window chmLzxControlData.getWindowSize()
     * @return window size
     */
    public static int getWindowSize(int window) {
        int win = 0;
        while (window > 1) {
            window >>>= 1;
            win++;
        }
        return win;
    }

    public static byte[] getChmBlockSegment(byte[] data, ChmLzxcResetTable resetTable,
                                            int blockNumber, int lzxcBlockOffset,
                                            int lzxcBlockLength) throws TikaException {
        ChmAssert.assertChmBlockSegment(data, resetTable, blockNumber, lzxcBlockOffset,
                lzxcBlockLength);
        int blockLength = -1;
        // TODO add int_max_value checking
        if (blockNumber < (resetTable.getBlockAddress().length - 1)) {
            blockLength = (int) (resetTable.getBlockAddress()[blockNumber + 1] -
                    resetTable.getBlockAddress()[blockNumber]);
        } else {
            /* new code */
            if (blockNumber >= resetTable.getBlockAddress().length) {
                blockLength = 0;
            } else
                /* end new code */ {
                blockLength = (int) (lzxcBlockLength - resetTable.getBlockAddress()[blockNumber]);
            }
        }
        byte[] t = ChmCommons.copyOfRange(data,
                (int) (lzxcBlockOffset + resetTable.getBlockAddress()[blockNumber]),
                (int) (lzxcBlockOffset + resetTable.getBlockAddress()[blockNumber] + blockLength));
        return (t != null) ? t : new byte[1];
    }

    /**
     * Returns textual representation of LangID
     *
     * @param langID
     * @return language name
     */
    public static String getLanguage(long langID) {
        /* Potential problem with casting */
        switch ((int) langID) {
            case 1025:
                return "Arabic";
            case 1069:
                return "Basque";
            case 1027:
                return "Catalan";
            case 2052:
                return "Chinese (Simplified)";
            case 1028:
                return "Chinese (Traditional)";
            case 1029:
                return "Czech";
            case 1030:
                return "Danish";
            case 1043:
                return "Dutch";
            case 1033:
                return "English (United States)";
            case 1035:
                return "Finnish";
            case 1036:
                return "French";
            case 1031:
                return "German";
            case 1032:
                return "Greek";
            case 1037:
                return "Hebrew";
            case 1038:
                return "Hungarian";
            case 1040:
                return "Italian";
            case 1041:
                return "Japanese";
            case 1042:
                return "Korean";
            case 1044:
                return "Norwegian";
            case 1045:
                return "Polish";
            case 2070:
                return "Portuguese";
            case 1046:
                return "Portuguese (Brazil)";
            case 1049:
                return "Russian";
            case 1051:
                return "Slovakian";
            case 1060:
                return "Slovenian";
            case 3082:
                return "Spanish";
            case 1053:
                return "Swedish";
            case 1055:
                return "Turkish";
            default:
                return "unknown - http://msdn.microsoft.com/en-us/library/bb165625%28VS.80%29.aspx";
        }
    }

    /**
     * Checks skippable patterns
     *
     * @param directoryListingEntry
     * @return boolean
     */
    public static boolean hasSkip(DirectoryListingEntry directoryListingEntry) {
        String name = directoryListingEntry.getName();
        return name.startsWith("/$") || name.startsWith("/#") || name.startsWith("::");
    }

    /**
     * Writes byte[][] to the file
     *
     * @param buffer
     * @param fileToBeSaved file name
     * @throws TikaException
     */
    public static void writeFile(byte[][] buffer, String fileToBeSaved) throws TikaException {
        if (buffer == null || fileToBeSaved == null || ChmCommons.isEmpty(fileToBeSaved)) {
            return;
        }
        try (FileOutputStream output = new FileOutputStream(fileToBeSaved)) {
            for (byte[] bufferEntry : buffer) {
                output.write(bufferEntry);
            }
        } catch (FileNotFoundException e) {
            throw new TikaException(e.getMessage());
        } catch (IOException e) {
            LOG.warn("problem writing tmp file", e);
        }
    }

    /**
     * Reverses the order of given array
     *
     * @param array
     */
    public static void reverse(byte[] array) {
        if (array == null) {
            return;
        }
        int i = 0;
        int j = array.length - 1;
        byte tmp;
        while (j > i) {
            tmp = array[j];
            array[j] = array[i];
            array[i] = tmp;
            j--;
            i++;
        }
    }

    /**
     * Returns an index of the reset table
     *
     * @param text
     * @param pattern
     * @return index of the reset table
     * @throws ChmParsingException
     */
    public static final int indexOfResetTableBlock(byte[] text, byte[] pattern)
            throws ChmParsingException {
        return (indexOfDataSpaceStorageElement(text, pattern)) - 4;
    }

    /**
     * Searches some pattern in byte[]
     *
     * @param text    byte[]
     * @param pattern byte[]
     * @return an index, if nothing found returns -1
     * @throws ChmParsingException
     */
    public static int indexOfDataSpaceStorageElement(byte[] text, byte[] pattern) throws ChmParsingException {
        int[] next = null;
        int i = 0, j = -1;

        /* Preprocessing */
        if (pattern != null && text != null) {
            next = new int[pattern.length];
            next[0] = -1;
        } else {
            throw new ChmParsingException("pattern and/or text should not be null");
        }

        /* Computes a failure function */
        while (i < pattern.length - 1) {
            if (j == -1 || pattern[i] == pattern[j]) {
                i++;
                j++;
                if (pattern[i] != pattern[j]) {
                    next[i] = j;
                } else {
                    next[i] = next[j];
                }
            } else {
                j = next[j];
            }
        }

        /* Reinitializes local variables */
        i = j = 0;

        /* Matching */
        while (i < text.length && j < pattern.length) {
            if (j == -1 || pattern[j] == text[i]) {
                i++;
                j++;
            } else {
                j = next[j];
            }
        }
        if (j == pattern.length) {
            return (i - j); // match found at offset i - M
        } else {
            return -1; // not found
        }
    }

    /**
     * Searches for some pattern in the directory listing entry list
     * This requires that the entry name start with "::DataSpaceStorage"
     * See TIKA-4204
     *
     * @param list
     * @param pattern
     * @return an index, if nothing found returns -1
     */
    public static int indexOfDataSpaceStorageElement(List list, String pattern) {
        int place = 0;
        for (DirectoryListingEntry directoryListingEntry : list) {
            if (directoryListingEntry.getName().startsWith("::DataSpace/Storage") &&
                    directoryListingEntry.getName().contains(pattern)) {
                return place;
            }
            ++place;
        }
        return -1;// not found
    }

    /*
     * This method is added because of supporting of Java 5
     */
    public static byte[] copyOfRange(byte[] original, int from, int to) throws TikaException {
        checkCopyOfRangeParams(original, from, to);
        int newLength = to - from;
        if (newLength < 0) {
            throw new IllegalArgumentException(from + " > " + to);
        }

        byte[] copy = new byte[newLength];
        System.arraycopy(original, from, copy, 0, Math.min(original.length - from, newLength));
        return copy;
    }

    private static void checkCopyOfRangeParams(byte[] original, int from, int to) {
        if (original == null) {
            throw new NullPointerException("array is null");
        }
        if (from < 0) {
            throw new IllegalArgumentException(from + " should be > 0");
        }
        if (to < 0) {
            throw new IllegalArgumentException(to + " should be > 0");
        }
        if (to > original.length) {
            throw new IllegalArgumentException("can't copy beyond array length");
        }
    }

    /*
     * This method is added because of supporting of Java 5
     */
    public static boolean isEmpty(String str) {
        return str == null || str.length() == 0;
    }

    /**
     * Represents entry types: uncompressed, compressed
     */
    public enum EntryType {
        UNCOMPRESSED, COMPRESSED
    }

    /**
     * Represents lzx states: started decoding, not started decoding
     */
    public enum LzxState {
        STARTED_DECODING, NOT_STARTED_DECODING
    }

    /**
     * Represents intel file states during decompression
     */
    public enum IntelState {
        STARTED, NOT_STARTED
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy