All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.e6tech.elements.cassandra.etl.ETLContext Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2017 Futeh Kao
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package net.e6tech.elements.cassandra.etl;

import net.e6tech.elements.cassandra.SessionProvider;
import net.e6tech.elements.cassandra.Sibyl;
import net.e6tech.elements.cassandra.generator.Generator;
import net.e6tech.elements.common.inject.Inject;
import net.e6tech.elements.common.resources.Provision;
import net.e6tech.elements.common.resources.Resources;
import net.e6tech.elements.common.resources.UnitOfWork;
import net.e6tech.elements.common.util.SystemException;

import java.time.Instant;
import java.util.UUID;
import java.util.concurrent.TimeUnit;

public class ETLContext {
    public static final long DAY = 24 * 60 * 60 * 1000L;
    public static final long HOUR = 60 * 60 * 1000L;
    public static final long MINUTE = 60 * 1000L;
    public static final long SECOND = 1000L;
    public static final long MONTH = DAY * 30;  // not to be used for deriving a partition key
    public static final long YEAR = DAY * 365;  // not to be used for deriving a partition key
    public static final long TIME_LAG = 5 * 60 * 1000L; // 5 minutes
    public static final int ASYNC_MAX_NUM_OF_CHUNKS = 100;
    public static final int BATCH_SIZE = 2000;

    private Provision provision;
    private ETLSettings settings = new ETLSettings();
    private int importedCount;
    private String extractorName;
    private String initialUpdate;
    private Class sourceClass;
    private TimeUnit timeUnit;
    private boolean initialized = false;
    private Class lastUpdateClass;
    private LastUpdate lastUpdate;
    private String useLastUpdate;
    private long timeOffset =  2 * YEAR;

    public ETLContext() {
        settings.batchSize(BATCH_SIZE)
                .timeLag(TIME_LAG)
                .maxPast(2 * YEAR)
                .asyncTimeUnitStepSize(null) // disable by default
                .asyncMaxNumOfChunks(ASYNC_MAX_NUM_OF_CHUNKS)
                .asyncUseFutures(false)
                .retries(0)
                .retrySleep(100L)
                .extractAll(true)
                .startTime(System.currentTimeMillis());

    }

    public Provision getProvision() {
        return provision;
    }

    @Inject
    public void setProvision(Provision provision) {
        this.provision = provision;
    }

    public Generator getGenerator() {
        return getProvision().getInstance(Generator.class);
    }

    public UnitOfWork open() {
        return getProvision().open();
    }

    public int getBatchSize() {
        return settings.getBatchSize();
    }

    public void setBatchSize(int batchSize) {
        settings.setBatchSize(batchSize);
    }

    public Integer getAsyncTimeUnitStepSize() {
        return settings.getAsyncTimeUnitStepSize();
    }

    public void setAsyncTimeUnitStepSize(Integer asyncTimeUnitStepSize) {
        settings.setAsyncTimeUnitStepSize(asyncTimeUnitStepSize);
    }

    public Integer getAsyncMaxNumOfChunks() {
        return settings.getAsyncMaxNumOfChunks();
    }

    public void setAsyncMaxNumOfChunks(Integer asyncMaxNumOfChunks) {
        settings.setAsyncMaxNumOfChunks(asyncMaxNumOfChunks);
    }

    public boolean isAsyncUseFutures() {
        return settings.isAsyncUseFutures();
    }

    public void setAsyncUseFutures(boolean asyncUseFutures) {
        settings.setAsyncUseFutures(asyncUseFutures);
    }

    public long getTimeLag() {
        return settings.getTimeLag();
    }

    public void setTimeLag(long timeLag) {
        settings.setTimeLag(timeLag);
    }

    public long getMaxPast() {
        return settings.getMaxPast();
    }

    public void setMaxPast(long maxPast) {
        settings.setMaxPast(maxPast);
    }

    public int getRetries() {
        return settings.getRetries();
    }

    public void setRetries(int retries) {
        settings.setRetries(retries);
    }

    public long getRetrySleep() {
        return settings.getRetrySleep();
    }

    public void setRetrySleep(long sleep) {
        settings.setRetrySleep(sleep);
    }


    public int getImportedCount() {
        return importedCount;
    }

    public void setImportedCount(int importedCount) {
        this.importedCount = importedCount;
    }

    public Class getSourceClass() {
        return sourceClass;
    }

    public void setSourceClass(Class sourceClass) {
        this.sourceClass = sourceClass;
    }

    public long getStartTime() {
        return settings.getStartTime();
    }

    public void setStartTime(long startTime) {
        settings.setStartTime(startTime);
    }

    public boolean isExtractAll() {
        return settings.getExtractAll();
    }

    public void setExtractAll(boolean extractAll) {
        settings.setExtractAll(extractAll);
    }

    public String getInitialUpdate() {
        return initialUpdate;
    }

    public void setInitialUpdate(String initialUpdate) {
        this.initialUpdate = initialUpdate;
    }

    public String getExtractorName() {
        return extractorName;
    }

    public void setExtractorName(String extractorName) {
        this.extractorName = extractorName;
    }

    public String extractor() {
        return getExtractorName() != null ? getExtractorName() : getSourceClass().getName();
    }

    public Class getLastUpdateClass() {
        return lastUpdateClass;
    }

    public void setLastUpdateClass(Class lastUpdateClass) {
        this.lastUpdateClass = lastUpdateClass;
    }

    public long getTimeOffset() {
        return timeOffset;
    }

    public void setTimeOffset(long timeOffset) {
        this.timeOffset = timeOffset;
    }

    public Class getPartitionKeyType() {
        initialize();
        return getInspector(getSourceClass()).getPartitionKeyClass(0);
    }

    public void reset() {
        // to be overridden by subclass
    }

    public TimeUnit getTimeUnit() {
        initialize();
        if (timeUnit == null) {
            String partitionKeyColumn = getInspector(getSourceClass()).getPartitionKeyColumn(0);
            if (partitionKeyColumn != null) {
                if (partitionKeyColumn.endsWith("_day")) {
                    timeUnit = TimeUnit.DAYS;
                } else if (partitionKeyColumn.endsWith("_hour")) {
                    timeUnit = TimeUnit.HOURS;
                } else if (partitionKeyColumn.endsWith("_minute")) {
                    timeUnit = TimeUnit.MINUTES;
                } else if (partitionKeyColumn.endsWith("_second")) {
                    timeUnit = TimeUnit.SECONDS;
                } else if (partitionKeyColumn.endsWith("_milli")) {
                    timeUnit = TimeUnit.MILLISECONDS;
                } else if (partitionKeyColumn.endsWith("_time")) {
                    timeUnit = TimeUnit.MILLISECONDS;
                } else if (partitionKeyColumn.endsWith("_epoch")) {
                    timeUnit = TimeUnit.MILLISECONDS;
                } else {
                    timeUnit = null;
                }
            }
        }
        return timeUnit;
    }

    public void setTimeUnit(TimeUnit timeUnit) {
        this.timeUnit = timeUnit;
    }

    public String tableName() {
        return getInspector(getSourceClass()).tableName();
    }

    @SuppressWarnings("unchecked")
    public void saveLastUpdate(LastUpdate lastUpdate) {
        if (useLastUpdate != null)
            return;  // lastUpdate value was set manually so do not update the database.
        open().accept(Sibyl.class, sibyl -> {
            if (lastUpdateClass == null)
                lastUpdateClass = getProvision().open().apply(Resources.class,
                        resources -> (Class) resources.getInstance(SessionProvider.class).getLastUpdateClass());
            sibyl.save(lastUpdateClass, lastUpdate);
            this.lastUpdate = lastUpdate;
        });
    }

    @SuppressWarnings("unchecked")
    public LastUpdate lookupLastUpdate() {
        if (lastUpdate != null)
            return lastUpdate;
        if (lastUpdateClass == null)
            lastUpdateClass = (Class) getProvision().getInstance(SessionProvider.class).getLastUpdateClass();

        lastUpdate = open().apply(Sibyl.class, sibyl -> sibyl.get(lastUpdateClass, new PrimaryKey(extractor())));
        return lastUpdate;
    }

    @SuppressWarnings("squid:S3776")
    public LastUpdate getLastUpdate() {
        String name = extractor();
        lookupLastUpdate();

        if (lastUpdate == null) {
            try {
                lastUpdate = getProvision().getInstance(SessionProvider.class).getLastUpdateClass().getDeclaredConstructor().newInstance();
            } catch (Exception e) {
                throw new SystemException(e);
            }
            lastUpdate.setExtractor(name);
            if (initialUpdate != null) {
                lastUpdate.setLastUpdate(getInitialUpdate());
            } else {
                if (settings.getExtractAll()) {
                    // UUID
                    if (UUID.class.isAssignableFrom(getPartitionKeyType())) {
                        lastUpdate.setLastUpdate(new UUID(Long.MIN_VALUE, Long.MIN_VALUE).toString());
                    } else {
                        lastUpdate.setLastUpdate("" + cutoffOrUpdate(false, Instant.now().toEpochMilli() - timeOffset, 0));
                    }
                } else {
                    lastUpdate.setLastUpdate("" + cutoffOrUpdate(false, settings.getStartTime(), 1));
                }
            }
            lastUpdate.setDataType(getGenerator().getDataType(getPartitionKeyType()));
            if (getTimeUnit() != null)
                lastUpdate.setUnit(getTimeUnit().toString());
            else
                lastUpdate.setUnit("1");
            if (useLastUpdate != null)
                lastUpdate.setLastUpdate(useLastUpdate);
        }
        return lastUpdate;
    }

    protected ETLContext lastUpdate(LastUpdate lastUpdate) {
        this.lastUpdate = lastUpdate;
        return this;
    }

    public Object getLastUpdateValue() {
        LastUpdate l = getLastUpdate();
        return getGenerator().getDataValue(l.getDataType(), l.getLastUpdate());
    }

    public Comparable getCutoff() {
        return cutoffOrUpdate(true, settings.getStartTime(), 0);
    }

    public Comparable getCutoff(long startTime, long additionalLag) {
        return cutoffOrUpdate(true, startTime, additionalLag);
    }

    @SuppressWarnings("squid:S3776")
    private Comparable cutoffOrUpdate(boolean cutoff, long startTime, long additionalLag) {
        long timeLag = settings.getTimeLag() + additionalLag;
        if (TimeUnit.DAYS.equals(getTimeUnit()))
            return (startTime - timeLag)/ DAY;
        else if (TimeUnit.HOURS.equals(getTimeUnit()))
            return (startTime - timeLag) / HOUR;
        else if (TimeUnit.MINUTES.equals(getTimeUnit()))
            return (startTime - timeLag) / MINUTE;
        else if (TimeUnit.SECONDS.equals(getTimeUnit()))
            return (startTime - timeLag)  / SECOND;
        else if (TimeUnit.MILLISECONDS.equals(getTimeUnit()))
            return startTime - timeLag;
        else {
            if (cutoff) {
                if (UUID.class.isAssignableFrom(getPartitionKeyType())) {
                    return new UUID(Long.MAX_VALUE, Long.MAX_VALUE);
                } else {
                    return Long.MAX_VALUE;
                }
            } else {
                if (UUID.class.isAssignableFrom(getPartitionKeyType())) {
                    return new UUID(Long.MIN_VALUE, Long.MIN_VALUE);
                } else {
                    return 0L;
                }
            }
        }
    }

    public Inspector getInspector() {
        return getInspector(getSourceClass());
    }

    public Inspector getInspector(Class cls) {
        return provision.getInstance(SessionProvider.class).getInspector(cls);
    }

    public void initialize() {
        if (initialized)
            return;
        initialized = true;
        Inspector inspector = getInspector(getSourceClass());

        if (timeUnit == null)
            timeUnit = inspector.getTimeUnit();
    }

    public void copy(ETLContext context) {
        setStartTime(context.getStartTime());
        setProvision(context.getProvision());
        setBatchSize(context.getBatchSize());
        setExtractAll(context.isExtractAll());
        setTimeLag(context.getTimeLag());
        setMaxPast(context.getMaxPast());
        setAsyncTimeUnitStepSize(context.getAsyncTimeUnitStepSize());
        setAsyncMaxNumOfChunks(context.getAsyncMaxNumOfChunks());
        setAsyncUseFutures(context.isAsyncUseFutures());
        setRetries(context.getRetries());
        setRetrySleep(context.getRetrySleep());
    }

    public void copy(ETLSettings s) {
        if (s == null)
            return;

        if (s.getStartTime() != null)
            setStartTime(s.getStartTime());
        if (s.getBatchSize() != null)
            setBatchSize(s.getBatchSize());
        if (s.getExtractAll() != null)
            setExtractAll(s.getExtractAll());
        if (s.getTimeLag() != null)
            setTimeLag(s.getTimeLag());
        if (s.getMaxPast() != null)
            setMaxPast(s.getMaxPast());
        if (s.getAsyncTimeUnitStepSize() != null)
            setAsyncTimeUnitStepSize(s.getAsyncTimeUnitStepSize());
        if (s.getAsyncMaxNumOfChunks() != null)
            setAsyncMaxNumOfChunks(s.getAsyncMaxNumOfChunks());
        if (s.isAsyncUseFutures() != null)
            setAsyncUseFutures(s.isAsyncUseFutures());
        if (s.getRetries() != null)
            setRetries(s.getRetries());
        if (s.getRetrySleep() != null)
            setRetrySleep(s.getRetrySleep());

    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy