All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.eval.db.MimeBuffer Maven / Gradle / Ivy

There is a newer version: 3.0.0-BETA2
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.eval.db;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.Types;

import org.apache.tika.config.TikaConfig;
import org.apache.tika.eval.AbstractProfiler;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MimeType;
import org.apache.tika.mime.MimeTypeException;
import org.apache.tika.mime.MimeTypes;


public class MimeBuffer extends AbstractDBBuffer {

    private final PreparedStatement st;
    private final TikaConfig config;
    private final Connection connection;


    public MimeBuffer(Connection connection, TikaConfig config) throws SQLException {
        st = connection.prepareStatement("insert into " + AbstractProfiler.MIME_TABLE.getName() + "( " +
                Cols.MIME_ID.name() + ", " +
                Cols.MIME_STRING.name() + ", " +
                Cols.FILE_EXTENSION.name() + ") values (?,?,?)");
        this.config = config;
        this.connection = connection;
    }

    @Override
    public void write(int id, String value) throws RuntimeException {
        try {
            st.clearParameters();
            st.setInt(1, id);
            st.setString(2, value);
            try {
                String ext = MimeUtil.getExtension(value, config);
                if (ext == null || ext.length() == 0) {
                    st.setNull(3, Types.VARCHAR);
                } else {
                    st.setString(3, ext);
                }
            } catch (MimeTypeException e) {
                st.setNull(3, Types.VARCHAR);
            }
            st.execute();

        } catch (SQLException e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public void close() throws SQLException {
        st.close();
        connection.commit();
        connection.close();
    }

    private static class MimeUtil {
        //TODO: see if MimeType now works for these
        private static final String APPLICATION = "application";
        private static final String TEXT = "text";
        private static final String HTML = "html";
        private static final String XML = "xml";
        private static final String XHTML_XML = "xhtml+xml";
        private static final String CSS = "css";
        private static final String CSV = "csv";
        private static final String PLAIN = "plain";
        private static final String EMPTY_STRING = "";

        /**
         * Utility method to convert from a string value representing a content type
         * (e.g. "application/pdf") into the most common extension for that file type
         * (e.g. "pdf").
         * 

* This will has special handling for texty filetypes whose MimeTypes * don't currently return anything for {@link org.apache.tika.mime.MimeType#getExtension}; * * @param contentType string representing a content type, for example: "application/pdf" * @param config config from which to get MimeRepository * @return extension or empty string * @throws org.apache.tika.mime.MimeTypeException thrown if MimeTypes can't parse the contentType */ public static String getExtension(String contentType, TikaConfig config) throws MimeTypeException { MimeTypes types = config.getMimeRepository(); MimeType mime = types.forName(contentType); return getExtension(mime); } public static String getExtension(MimeType mime) { String ext = mime.getExtension(); if (ext.startsWith(".")) { ext = ext.substring(1); } //special handling for text/html/xml if (ext.length() == 0) { ext = tryTextyTypes(mime.getType()); } return ext; } private static String tryTextyTypes(MediaType mediaType) { String type = mediaType.getType(); String subtype = mediaType.getSubtype(); if (type.equals(TEXT)) { if (subtype.equals(HTML)) { return HTML; } else if (subtype.equals(PLAIN)) { return "txt"; } else if (subtype.equals(CSS)) { return CSS; } else if (subtype.equals(CSV)) { return CSV; } } else if (type.equals(APPLICATION)) { if (subtype.equals(XML)) { return XML; } else if (subtype.equals(XHTML_XML)) { return "html"; } } return EMPTY_STRING; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy