All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.e6tech.elements.cassandra.etl.TimeBatchStrategy Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2017 Futeh Kao
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package net.e6tech.elements.cassandra.etl;

import net.e6tech.elements.cassandra.Sibyl;

import java.util.List;

/**
 * This interface is designed to extract data from relation database into Cassandra
 * @param  source table from relation database
 */
public interface TimeBatchStrategy extends BatchStrategy {

    /**
     * Subclasses should not override this method.  Instead, they should override
     * extractUpdate.
     *
     */
    @Override
    default List extract(TimeBatch context) {
        LastUpdate lastUpdate = context.getLastUpdate();
        return extractUpdate(context, lastUpdate);
    }

    /**
     * When overriding this method, the implementation MUST update the context's start and lastUpdate's lastUpdate
     * with the last entry's modified time.  For example,
     *     context.setStart(s.getModifiedTime());
     *     lastUpdate.update(s.getModifiedTime());
     */
    List extractUpdate(TimeBatch context, LastUpdate lastUpdate);

    @Override
    default int run(TimeBatch context) {
        if (context.getSourceClass() == null)
            throw new IllegalStateException("sourceClass not set in context");
        return context.open().apply(Sibyl.class, sibyl -> {
            initializeContext(context);
            LastUpdate lastUpdate = context.getLastUpdate();
            int importedCount = 0;
            List batchResults;
            // NOTE, extract must call lastUpdate.update
            logger.info("Loading Class {}", getClass());
            while (!(batchResults = extract(context)).isEmpty()) {
                int n = context.getImportedCount();
                int processedCount = load(context, batchResults);
                context.setImportedCount(n + processedCount);
                context.saveLastUpdate(lastUpdate);
                logger.info("Processed {} instance of {}", processedCount, getClass());
                importedCount += processedCount;
            }
            logger.info("Done loading {} instance of {}", importedCount, getClass());
            return importedCount;
        });
    }

    default void initializeContext(TimeBatch context) {
        context.initialize();
        LastUpdate lastUpdate = context.getLastUpdate();
        long lastImport = Long.parseLong(lastUpdate.getLastUpdate());
        long start = System.currentTimeMillis();
        context.setEnd(start - context.getTimeLag());
        context.setStart(lastImport);
    }
}