All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.detect.zip.StreamingZipContainerDetector Maven / Gradle / Ivy

There is a newer version: 3.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.detect.zip;


import java.io.IOException;
import java.io.InputStream;
import java.util.List;

import org.apache.commons.io.IOUtils;

import org.apache.tika.config.LoadErrorHandler;
import org.apache.tika.config.ServiceLoader;
import org.apache.tika.io.BoundedInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;

/**
 * Currently only used in tests.  Unlike {@link DefaultZipContainerDetector},
 * this will never try to open the File as a ZipFile; this relies solely
 * on streaming detection.
 */
public class StreamingZipContainerDetector extends DefaultZipContainerDetector {


    /**
     * Serial version UID
     */
    private static final long serialVersionUID = 2891763938430295453L;


    public StreamingZipContainerDetector() {
        this(new ServiceLoader(StreamingZipContainerDetector.class.getClassLoader(),
                LoadErrorHandler.WARN, false));
    }

    public StreamingZipContainerDetector(ServiceLoader loader) {
        super(loader);
    }

    public StreamingZipContainerDetector(List zipDetectors) {
        //in initial development, OPCBased needed to be last.
        //this should have been fixed.
        super(zipDetectors);
    }


    @Override
    public MediaType detect(InputStream input, Metadata metadata) throws IOException {
        // Check if we have access to the document
        if (input == null) {
            return MediaType.OCTET_STREAM;
        }

        byte[] prefix = new byte[1024]; // enough for all known archive formats
        input.mark(1024);
        int length = -1;
        try {
            length = IOUtils.read(input, prefix, 0, 1024);
        } finally {
            input.reset();
        }

        MediaType type = detectArchiveFormat(prefix, length);

        if (type == TIFF) {
            return TIFF;
        } else if (isZipArchive(type)) {
            input.mark(markLimit);
            try (BoundedInputStream lookahead = new BoundedInputStream(markLimit, input)) {
                return detectStreaming(lookahead, metadata);
            } finally {
                input.reset();
            }
        } else if (!type.equals(MediaType.OCTET_STREAM)) {
            return type;
        } else {
            return detectCompressorFormat(prefix, length);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy