All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jackrabbit.oak.plugins.tika.BinaryStats Maven / Gradle / Ivy

There is a newer version: 1.72.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.jackrabbit.oak.plugins.tika;

import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;

import com.google.common.base.Strings;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.Maps;
import org.codehaus.groovy.runtime.StringGroovyMethods;

import static org.apache.jackrabbit.oak.commons.IOUtils.humanReadableByteCount;

class BinaryStats {
    private final TikaHelper tika;
    private final List stats;
    private long totalSize;
    private long totalCount;
    private long indexedSize;
    private long indexedCount;

    public BinaryStats(File tikaConfig, BinaryResourceProvider provider) throws IOException {
        this.tika = new TikaHelper(tikaConfig);
        this.stats = collectStats(provider);
    }

    public long getTotalSize() {
        return totalSize;
    }

    public long getTotalCount() {
        return totalCount;
    }

    public long getIndexedSize() {
        return indexedSize;
    }

    public long getIndexedCount() {
        return indexedCount;
    }

    public String getSummary() throws IOException {
        return getSummary(stats);
    }

    private List collectStats(BinaryResourceProvider provider) throws IOException {
        Map stats = Maps.newHashMap();
        for (BinaryResource binary : provider.getBinaries("/")) {
            String mimeType = binary.getMimeType();
            if (mimeType != null) {
                MimeTypeStats mimeStats = stats.get(mimeType);
                if (mimeStats == null) {
                    mimeStats = createStat(mimeType);
                    stats.put(mimeType, mimeStats);
                }

                long size = binary.getByteSource().size();
                mimeStats.addSize(size);
                totalSize += size;
                totalCount++;

                if (mimeStats.isIndexed()) {
                    indexedSize += size;
                    indexedCount++;
                }
            }
        }

        List result = new ArrayList(stats.values());
        Collections.sort(result, Collections.reverseOrder());
        return result;
    }

    private String getSummary(List stats) {
        int maxWidth = 0;
        for (MimeTypeStats s : stats) {
            maxWidth = Math.max(maxWidth, s.getName().length());
        }

        maxWidth += 5;

        StringWriter sw = new StringWriter();
        PrintWriter pw = new PrintWriter(sw);
        pw.println("MimeType Stats");
        pw.printf("\tTotal size          : %s%n", humanReadableByteCount(totalSize));
        pw.printf("\tTotal indexed size  : %s%n", humanReadableByteCount(indexedSize));
        pw.printf("\tTotal count         : %d%n", totalCount);
        pw.printf("\tTotal indexed count : %d%n", indexedCount);
        pw.println();

        String header = center("Type", maxWidth) + " " +
                center("Indexed", 10) + " " +
                center("Supported", 10) + " " +
                center("Count", 10) + " " +
                center("Size", 10);

        pw.println(header);
        pw.println(Strings.repeat("_", header.length() + 5));

        for (MimeTypeStats s : stats) {
            pw.printf("%-" + maxWidth + "s|%10s|%10s|  %-8d|%10s%n",
                    s.getName(),
                    s.isIndexed(),
                    s.isSupported(),
                    s.getCount(),
                    humanReadableByteCount(s.getTotalSize()));
        }
        return sw.toString();
    }

    private MimeTypeStats createStat(String mimeType) {
        MimeTypeStats stats = new MimeTypeStats(mimeType);
        stats.setIndexed(tika.isIndexed(mimeType));
        stats.setSupported(tika.isSupportedMediaType(mimeType));
        return stats;
    }

    private static String center(String s, int width) {
        return StringGroovyMethods.center(s, width);
    }

    private static class MimeTypeStats implements Comparable {
        private final String mimeType;
        private int count;
        private long totalSize;
        private boolean supported;
        private boolean indexed;

        public MimeTypeStats(String mimeType) {
            this.mimeType = mimeType;
        }

        public void addSize(long size) {
            count++;
            totalSize += size;
        }

        public void setSupported(boolean supported) {
            this.supported = supported;
        }

        public void setIndexed(boolean indexed) {
            this.indexed = indexed;
        }

        public long getTotalSize() {
            return totalSize;
        }

        public int getCount() {
            return count;
        }

        public String getName() {
            return mimeType;
        }

        public boolean isIndexed() {
            return indexed;
        }

        public boolean isSupported() {
            return supported;
        }

        @Override
        public int compareTo(MimeTypeStats o) {
            return ComparisonChain.start()
                    .compareFalseFirst(indexed, o.indexed)
                    .compare(totalSize, o.totalSize)
                    .result();
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy