All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.eval.io.DBWriter Maven / Gradle / Ivy

There is a newer version: 3.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.eval.io;

import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.tika.eval.db.ColInfo;
import org.apache.tika.eval.db.Cols;
import org.apache.tika.eval.db.JDBCUtil;
import org.apache.tika.eval.db.MimeBuffer;
import org.apache.tika.eval.db.TableInfo;
import org.apache.tika.io.IOExceptionWithCause;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * This is still in its early stages.  The idea is to
 * get something working with h2 and then add to that
 * as necessary.
 *
 * Beware, this deletes the db file with each initialization.
 *
 * Each thread must construct its own DBWriter because each
 * DBWriter creates its own PreparedStatements at initialization.
 */
public class DBWriter implements IDBWriter {

    private static final Logger LOG = LoggerFactory.getLogger(DBWriter.class);

    private static final AtomicInteger WRITER_ID = new AtomicInteger();
    private final Long commitEveryXRows = 10000L;
    //private final Long commitEveryXMS = 60000L;

    private final Connection conn;
    private final JDBCUtil dbUtil;
    private final MimeBuffer mimeBuffer;
    private final int myId = WRITER_ID.getAndIncrement();

    //
    private final Map inserts = new HashMap<>();
    private final Map lastInsertMap = new HashMap<>();
    public DBWriter(Connection connection, List tableInfos, JDBCUtil dbUtil, MimeBuffer mimeBuffer)
            throws IOException, SQLException {

        this.conn = connection;
        this.mimeBuffer = mimeBuffer;
        this.dbUtil = dbUtil;
        for (TableInfo tableInfo : tableInfos) {
            try {
                PreparedStatement st = createPreparedInsert(tableInfo);
                inserts.put(tableInfo.getName(), st);
                lastInsertMap.put(tableInfo.getName(), new LastInsert());
            } catch (SQLException e) {
                throw new RuntimeException(e);
            }
        }
    }

    public int getMimeId(String mimeString) {
        return mimeBuffer.getId(mimeString);
    }

    private PreparedStatement createPreparedInsert(TableInfo tableInfo) throws SQLException {
        StringBuilder sb = new StringBuilder();
        sb.append("INSERT INTO ").append(tableInfo.getName());
        sb.append("(");
        int i = 0;
        for (ColInfo c : tableInfo.getColInfos()) {
            if (i++ > 0) {
                sb.append(", ");
            }
            sb.append(c.getName());
        }
        sb.append(") ");

        sb.append("VALUES");
        sb.append("(");
        for (int j = 0; j < i; j++) {
            if (j > 0) {
                sb.append(", ");
            }
            sb.append("?");
        }
        sb.append(")");

        return conn.prepareStatement(sb.toString());
    }


    public void writeRow(TableInfo table, Map data) throws IOException {
        try {
            PreparedStatement p = inserts.get(table.getName());
            if (p == null) {
                throw new RuntimeException("Failed to create prepared statement for: "+
                        table.getName());
            }
            dbUtil.batchInsert(p, table, data);
            LastInsert lastInsert = lastInsertMap.get(table.getName());
            lastInsert.rowCount++;
            long elapsed = System.currentTimeMillis()-lastInsert.lastInsert;
            if (
                    //elapsed > commitEveryXMS ||
                lastInsert.rowCount % commitEveryXRows == 0) {
                LOG.info("writer ({}) on table ({}) is committing after {} rows and {} ms", myId,
                        table.getName(),
                        lastInsert.rowCount, elapsed);
                p.executeBatch();
                conn.commit();
                lastInsert.lastInsert = System.currentTimeMillis();
            }
        } catch (SQLException e) {
            throw new IOException(e);
        }
    }

    /**
     * This closes the writer by executing batch and
     * committing changes.  This DOES NOT close the connection
     * @throws IOException
     */
    public void close() throws IOException {
        for (PreparedStatement p : inserts.values()) {
            try {
                p.executeBatch();
            } catch (SQLException e) {
                throw new IOExceptionWithCause(e);
            }
        }
        try {
            conn.commit();
        } catch (SQLException e){
            throw new IOExceptionWithCause(e);
        }
    }

    private class LastInsert {
        private long lastInsert = System.currentTimeMillis();
        private long rowCount = 0;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy