net.e6tech.elements.cassandra.etl.Transformer Maven / Gradle / Ivy
The newest version!
/*
* Copyright 2017 Futeh Kao
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.e6tech.elements.cassandra.etl;
import net.e6tech.elements.cassandra.*;
import net.e6tech.elements.common.resources.Resources;
import net.e6tech.elements.common.util.SystemException;
import net.e6tech.elements.common.util.datastructure.Pair;
import java.util.*;
import java.util.function.BiConsumer;
import java.util.stream.Stream;
/**
* This is a convenient class for helping to transform an array of extracted entities of type E to another set of entities of
* type T.
*
* NOTE, the entities to be transformed need to be in ascending order.
*
* @param Transformed type
* @param Extracted type
*/
public class Transformer {
private Resources resources;
private Map map = new HashMap<>();
private Class tableClass;
private List> entries = Collections.synchronizedList(new LinkedList<>());
private boolean hasCheckpoint;
private Inspector tableInspector;
private Consistency readConsistency = null;
private Consistency writeConsistency = null;
private long timeout = 0; // ie disable
public Transformer(Resources resources, Class cls) {
this.resources = resources;
tableClass = cls;
tableInspector = getInspector(tableClass);
hasCheckpoint = tableInspector.getCheckpointColumn(0) != null;
}
public Transformer transform(Stream stream, BiConsumer, E> consumer) {
stream.forEach(e -> consumer.accept(this, e));
load();
return this;
}
public Transformer transform(E[] array, BiConsumer, E> consumer) {
// Do not run this asynchronously. We need to follw the arry order.
for (E e : array) {
consumer.accept(this, e);
}
load();
return this;
}
public Transformer transform(Collection collection, BiConsumer, E> consumer) {
for (E e : collection) {
consumer.accept(this, e);
}
load();
return this;
}
public Consistency getReadConsistency() {
return readConsistency;
}
public void setReadConsistency(Consistency readConsistency) {
this.readConsistency = readConsistency;
}
public Consistency getWriteConsistency() {
return writeConsistency;
}
public void setWriteConsistency(Consistency writeConsistency) {
this.writeConsistency = writeConsistency;
}
public long getTimeout() {
return timeout;
}
public void setTimeout(long timeout) {
this.timeout = timeout;
}
public Transformer timeout(long timeout) {
setTimeout(timeout);
return this;
}
public Transformer addPrimaryKey(PrimaryKey key, E e) {
if (key == null)
return this;
int keyColumns = tableInspector.getPartitionKeySize() + tableInspector.getClusteringKeySize();
if (keyColumns != key.getKeys().length) {
throw new IllegalArgumentException("Mismatch key columns. The primary key for " + tableClass + " consists of " + keyColumns + " column(s): " +
"the provided primary key has " + key.getKeys().length + " componenets.");
}
entries.add(new Pair<>(key, e));
return this;
}
private Transformer load() {
Sibyl s = resources.getInstance(Sibyl.class);
map = new HashMap<>(Math.max((int) (entries.size()/.75f) + 1, 16));
Set keys = new HashSet<>(Math.max((int) (entries.size()/.75f) + 1, 16));
for (Pair e : entries()) {
keys.add(e.key());
}
ReadOptions readOptions = new ReadOptions();
readOptions.consistency = readConsistency;
s.get(keys, tableClass, readOptions)
.timeout(timeout)
.inExecutionOrder(map::put);
return this;
}
private Collection> entries() {
return entries;
}
private Inspector getInspector(Class cls) {
return resources.getInstance(SessionProvider.class).getInspector(cls);
}
private T computeIfAbsent(PrimaryKey key) {
return map.computeIfAbsent(key, k -> {
try {
T t = tableClass.getDeclaredConstructor().newInstance();
tableInspector.setPrimaryKey(key, t);
return t;
} catch (Exception e) {
throw new SystemException(e);
}
});
}
@SuppressWarnings("squid:S3776")
public Transformer forEachCreateIfNotExist(BiConsumer consumer) {
forEachNewOrExisting(consumer, consumer);
return this;
}
@SuppressWarnings({"unchecked", "squid:S3776"})
public Transformer forEachNewOrExisting(BiConsumer newItems, BiConsumer existing) {
Inspector extractedInspector = null;
// this cannot be run asynchronously
for (Pair e : entries()) {
boolean existingItem = map.get(e.key()) != null;
T t = computeIfAbsent(e.key());
E extracted = e.value();
if (extractedInspector == null)
extractedInspector = getInspector(extracted.getClass());
boolean duplicate = false;
if (hasCheckpoint) {
Comparable extractedPartitionKey = (Comparable) extractedInspector.getPartitionKey(extracted, 0);
if (extractedPartitionKey != null) {
Comparable checkPoint = tableInspector.getCheckpoint(t, 0);
// extracted partition key need to be larger that checkpoint. Otherwise, it means
// it is a duplicate because of failure conditions.
duplicate = checkPoint != null && extractedPartitionKey.compareTo(checkPoint) <= 0;
}
}
if (!duplicate) {
if (existingItem) {
existing.accept(extracted, t);
} else {
newItems.accept(extracted, t);
}
}
}
// update checkpoints.
for (Pair e : entries()) {
E extracted = e.value();
if (hasCheckpoint) {
T t = computeIfAbsent(e.key());
Comparable extractedPartitionKey = (Comparable) extractedInspector.getPartitionKey(extracted, 0);
if (extractedPartitionKey != null)
tableInspector.setCheckpoint(t, 0, extractedPartitionKey);
}
}
return this;
}
public Transformer save() {
return save(null);
}
public Transformer save(WriteOptions userOptions) {
WriteOptions options = WriteOptions.from(userOptions);
if (options.consistency == null && writeConsistency != null) {
options.consistency = writeConsistency;
}
options.timeout = timeout;
Sibyl s = resources.getInstance(Sibyl.class);
s.save(values(), tableClass, options);
return this;
}
public Collection values() {
return map.values();
}
public Set keySet() {
return map.keySet();
}
public int size() {
return map.size();
}
}