All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datacleaner.output.AbstractMetaModelOutputWriter Maven / Gradle / Ivy

/**
 * DataCleaner (community edition)
 * Copyright (C) 2014 Free Software Foundation, Inc.
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package org.datacleaner.output;

import java.util.Queue;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ConcurrentLinkedQueue;

import org.apache.metamodel.UpdateableDataContext;
import org.apache.metamodel.insert.RowInsertionBuilder;
import org.apache.metamodel.schema.Table;
import org.datacleaner.api.InputColumn;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Abstract {@link OutputWriter} implementation for all
 * {@link UpdateableDataContext}s.
 *
 * This implementation holds a buffer of records to write, to avoid hitting the
 * executeUpdate() method for every single record.
 */
public abstract class AbstractMetaModelOutputWriter implements OutputWriter {

    private static final Logger logger = LoggerFactory.getLogger(AbstractMetaModelOutputWriter.class);

    private final UpdateableDataContext _dataContext;
    private final Queue _buffer;
    private final InputColumn[] _columns;

    /**
     * Creates a new {@link OutputWriter} based on a data context, a set of
     * columns and a buffer size.
     *
     * @param dataContext
     * @param columns
     * @param bufferSize
     *            the size of the write buffer. If 0 or negative, an unlimited
     *            buffer will be used, meaning that the complete dataset will be
     *            held in memory.
     */
    public AbstractMetaModelOutputWriter(final UpdateableDataContext dataContext, final InputColumn[] columns,
            final int bufferSize) {
        _dataContext = dataContext;
        _columns = columns;
        if (bufferSize > 0) {
            _buffer = new ArrayBlockingQueue<>(bufferSize);
        } else {
            _buffer = new ConcurrentLinkedQueue<>();
        }
    }

    @Override
    public final OutputRow createRow() {
        return new MetaModelOutputRow(this, _columns);
    }

    protected final void addToBuffer(final Object[] rowData) {
        while (!_buffer.offer(rowData)) {
            flushBuffer();
        }
    }

    private synchronized void flushBuffer() {
        if (!_buffer.isEmpty()) {
            logger.info("Flushing {} rows in write buffer", _buffer.size());
            _dataContext.executeUpdate(callback -> {
                for (Object[] rowData = _buffer.poll(); rowData != null; rowData = _buffer.poll()) {
                    RowInsertionBuilder insertBuilder = callback.insertInto(getTable());
                    for (int i = 0; i < _columns.length; i++) {
                        final Object value = rowData[i];
                        insertBuilder = insertBuilder.value(i, value);
                    }
                    insertBuilder.execute();
                }
            });
        }
    }

    protected abstract Table getTable();

    @Override
    public final void close() {
        flushBuffer();
        afterClose();
    }

    protected void afterClose() {
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy