All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.e6tech.elements.cassandra.etl.Transformer Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2017 Futeh Kao
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package net.e6tech.elements.cassandra.etl;

import net.e6tech.elements.cassandra.*;
import net.e6tech.elements.common.resources.Resources;
import net.e6tech.elements.common.util.SystemException;
import net.e6tech.elements.common.util.datastructure.Pair;

import java.util.*;
import java.util.function.BiConsumer;
import java.util.stream.Stream;

/**
 * This is a convenient class for helping to transform an array of extracted entities of type E to another set of entities of
 * type T.
 *
 * NOTE, the entities to be transformed need to be in ascending order.
 *
 * @param  Transformed type
 * @param  Extracted type
 */
public class Transformer {
    private Resources resources;
    private Map map = new HashMap<>();
    private Class tableClass;
    private List> entries = Collections.synchronizedList(new LinkedList<>());
    private boolean hasCheckpoint;
    private Inspector tableInspector;
    private Consistency readConsistency = null;
    private Consistency writeConsistency = null;
    private long timeout = 0;  // ie disable

    public Transformer(Resources resources, Class cls) {
        this.resources = resources;
        tableClass = cls;
        tableInspector = getInspector(tableClass);
        hasCheckpoint = tableInspector.getCheckpointColumn(0) != null;
    }

    public Transformer transform(Stream stream, BiConsumer, E> consumer) {
        stream.forEach(e -> consumer.accept(this, e));
        load();
        return this;
    }

    public Transformer transform(E[] array, BiConsumer, E> consumer) {
        // Do not run this asynchronously.  We need to follw the arry order.
        for (E e : array) {
            consumer.accept(this, e);
        }
        load();
        return this;
    }

    public Transformer transform(Collection collection, BiConsumer, E> consumer) {
        for (E e : collection) {
            consumer.accept(this, e);
        }
        load();
        return this;
    }

    public Consistency getReadConsistency() {
        return readConsistency;
    }

    public void setReadConsistency(Consistency readConsistency) {
        this.readConsistency = readConsistency;
    }

    public Consistency getWriteConsistency() {
        return writeConsistency;
    }

    public void setWriteConsistency(Consistency writeConsistency) {
        this.writeConsistency = writeConsistency;
    }

    public long getTimeout() {
        return timeout;
    }

    public void setTimeout(long timeout) {
        this.timeout = timeout;
    }

    public Transformer timeout(long timeout) {
        setTimeout(timeout);
        return this;
    }

    public Transformer addPrimaryKey(PrimaryKey key, E e) {
        if (key == null)
            return this;

        int keyColumns = tableInspector.getPartitionKeySize() + tableInspector.getClusteringKeySize();
        if (keyColumns != key.getKeys().length) {
            throw new IllegalArgumentException("Mismatch key columns.  The primary key for " + tableClass + " consists of " +  keyColumns + " column(s): " +
                    "the provided primary key has " + key.getKeys().length + " componenets.");
        }

        entries.add(new Pair<>(key, e));
        return this;
    }

    private Transformer load() {
        Sibyl s = resources.getInstance(Sibyl.class);
        map = new HashMap<>(Math.max((int) (entries.size()/.75f) + 1, 16));
        Set keys = new HashSet<>(Math.max((int) (entries.size()/.75f) + 1, 16));
        for (Pair e : entries()) {
            keys.add(e.key());
        }

        ReadOptions readOptions = new ReadOptions();
        readOptions.consistency = readConsistency;
        s.get(keys, tableClass, readOptions)
                    .timeout(timeout)
                    .inExecutionOrder(map::put);
        return this;
    }

    private Collection> entries() {
        return entries;
    }

    private Inspector getInspector(Class cls) {
        return resources.getInstance(SessionProvider.class).getInspector(cls);
    }

    private T computeIfAbsent(PrimaryKey key) {
        return map.computeIfAbsent(key, k -> {
            try {
                T t = tableClass.getDeclaredConstructor().newInstance();
                tableInspector.setPrimaryKey(key, t);
                return t;
            } catch (Exception e) {
                throw new SystemException(e);
            }
        });
    }

    @SuppressWarnings("squid:S3776")
    public Transformer forEachCreateIfNotExist(BiConsumer consumer) {
        forEachNewOrExisting(consumer, consumer);
        return this;
    }

    @SuppressWarnings({"unchecked", "squid:S3776"})
    public Transformer forEachNewOrExisting(BiConsumer newItems, BiConsumer existing) {
        Inspector extractedInspector = null;

        // this cannot be run asynchronously
        for (Pair e : entries()) {
            boolean existingItem = map.get(e.key()) != null;
            T t = computeIfAbsent(e.key());
            E extracted = e.value();
            if (extractedInspector == null)
                extractedInspector = getInspector(extracted.getClass());

            boolean duplicate = false;
            if (hasCheckpoint) {
                Comparable extractedPartitionKey = (Comparable) extractedInspector.getPartitionKey(extracted, 0);
                if (extractedPartitionKey != null) {
                    Comparable checkPoint = tableInspector.getCheckpoint(t, 0);
                    // extracted partition key need to be larger that checkpoint.  Otherwise, it means
                    // it is a duplicate because of failure conditions.
                    duplicate = checkPoint != null && extractedPartitionKey.compareTo(checkPoint) <= 0;
                }
            }
            if (!duplicate) {
                if (existingItem) {
                    existing.accept(extracted, t);
                } else {
                    newItems.accept(extracted, t);
                }
            }
        }

        // update checkpoints.
        for (Pair e : entries()) {
            E extracted = e.value();
            if (hasCheckpoint) {
                T t = computeIfAbsent(e.key());
                Comparable extractedPartitionKey = (Comparable) extractedInspector.getPartitionKey(extracted, 0);
                if (extractedPartitionKey != null)
                    tableInspector.setCheckpoint(t, 0, extractedPartitionKey);
            }
        }
        return this;
    }

    public Transformer save() {
        return save(null);
    }

    public Transformer save(WriteOptions userOptions) {
        WriteOptions options = WriteOptions.from(userOptions);

        if (options.consistency == null && writeConsistency != null) {
            options.consistency = writeConsistency;
        }

        options.timeout = timeout;

        Sibyl s = resources.getInstance(Sibyl.class);

        s.save(values(), tableClass, options);

        return this;
    }

    public Collection values() {
        return map.values();
    }

    public Set keySet() {
        return map.keySet();
    }

    public int size() {
        return map.size();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy