Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Copyright (C) 2015 SoftIndex LLC.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.datakernel.aggregation;
import io.datakernel.aggregation.fieldtype.FieldType;
import io.datakernel.aggregation.ot.AggregationDiff;
import io.datakernel.aggregation.ot.AggregationStructure;
import io.datakernel.async.Stage;
import io.datakernel.codegen.ClassBuilder;
import io.datakernel.codegen.DefiningClassLoader;
import io.datakernel.eventloop.Eventloop;
import io.datakernel.jmx.EventloopJmxMBeanEx;
import io.datakernel.jmx.JmxAttribute;
import io.datakernel.serializer.BufferSerializer;
import io.datakernel.stream.StreamConsumer;
import io.datakernel.stream.StreamProducer;
import io.datakernel.stream.processor.*;
import io.datakernel.util.Initializable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.Duration;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import static io.datakernel.aggregation.AggregationUtils.*;
import static io.datakernel.codegen.Expressions.arg;
import static io.datakernel.codegen.Expressions.cast;
import static io.datakernel.stream.DataStreams.stream;
import static io.datakernel.util.CollectionUtils.difference;
import static io.datakernel.util.Preconditions.checkArgument;
import static java.lang.Math.min;
import static java.util.Collections.singletonList;
import static java.util.Comparator.comparing;
import static java.util.stream.Collectors.toList;
/**
* Represents an aggregation, which aggregates data using custom reducer and preaggregator.
* Provides methods for loading and querying data.
*/
@SuppressWarnings("unchecked")
public class Aggregation implements IAggregation, Initializable, EventloopJmxMBeanEx {
private final Logger logger = LoggerFactory.getLogger(this.getClass());
public static final int DEFAULT_CHUNK_SIZE = 1_000_000;
public static final int DEFAULT_REDUCER_BUFFER_SIZE = StreamReducer.DEFAULT_BUFFER_SIZE;
public static final int DEFAULT_SORTER_ITEMS_IN_MEMORY = 1_000_000;
public static final Duration DEFAULT_MAX_INCREMENTAL_RELOAD_PERIOD = Duration.ofMinutes(10);
public static final int DEFAULT_MAX_CHUNKS_TO_CONSOLIDATE = 1000;
private final Eventloop eventloop;
private final ExecutorService executorService;
private final DefiningClassLoader classLoader;
private final AggregationChunkStorage aggregationChunkStorage;
private Path temporarySortDir;
private final AggregationStructure structure;
private AggregationState state;
// settings
private int chunkSize = DEFAULT_CHUNK_SIZE;
private int reducerBufferSize = DEFAULT_REDUCER_BUFFER_SIZE;
private int sorterItemsInMemory = DEFAULT_SORTER_ITEMS_IN_MEMORY;
private Duration maxIncrementalReloadPeriod = DEFAULT_MAX_INCREMENTAL_RELOAD_PERIOD;
private boolean ignoreChunkReadingExceptions = false;
private int maxChunksToConsolidate = DEFAULT_MAX_CHUNKS_TO_CONSOLIDATE;
// jmx
private AggregationStats stats = new AggregationStats();
private long consolidationStarted;
private long consolidationLastTimeMillis;
private int consolidations;
private Throwable consolidationLastError;
private Aggregation(Eventloop eventloop, ExecutorService executorService, DefiningClassLoader classLoader,
AggregationChunkStorage aggregationChunkStorage, AggregationStructure structure,
AggregationState state) {
this.eventloop = eventloop;
this.executorService = executorService;
this.classLoader = classLoader;
this.aggregationChunkStorage = aggregationChunkStorage;
this.structure = structure;
this.state = state;
}
/**
* Instantiates an aggregation with the specified structure, that runs in a given event loop,
* uses the specified class loader for creating dynamic classes, saves data and metadata to given storages.
* Maximum size of chunk is 1,000,000 bytes.
* No more than 1,000,000 records stay in memory while sorting.
* Maximum duration of consolidation attempt is 30 minutes.
* Consolidated chunks become available for removal in 10 minutes from consolidation.
*
* @param eventloop event loop, in which the aggregation is to run
* @param classLoader class loader for defining dynamic classes
* @param aggregationChunkStorage storage for data chunks
*/
public static Aggregation create(Eventloop eventloop, ExecutorService executorService, DefiningClassLoader classLoader,
AggregationChunkStorage aggregationChunkStorage, AggregationStructure structure) {
return new Aggregation(eventloop, executorService, classLoader, aggregationChunkStorage, structure, new AggregationState(structure));
}
public Aggregation withChunkSize(int chunkSize) {
this.chunkSize = chunkSize;
return this;
}
public Aggregation withReducerBufferSize(int reducerBufferSize) {
this.reducerBufferSize = reducerBufferSize;
return this;
}
public Aggregation withSorterItemsInMemory(int sorterItemsInMemory) {
this.sorterItemsInMemory = sorterItemsInMemory;
return this;
}
public Aggregation withMaxIncrementalReloadPeriod(Duration maxIncrementalReloadPeriod) {
this.maxIncrementalReloadPeriod = maxIncrementalReloadPeriod;
return this;
}
public Aggregation withIgnoreChunkReadingExceptions(boolean ignoreChunkReadingExceptions) {
this.ignoreChunkReadingExceptions = ignoreChunkReadingExceptions;
return this;
}
public Aggregation withMaxChunksToConsolidate(int maxChunksToConsolidate) {
this.maxChunksToConsolidate = maxChunksToConsolidate;
return this;
}
public Aggregation withTemporarySortDir(Path temporarySortDir) {
this.temporarySortDir = temporarySortDir;
return this;
}
public Aggregation withStats(AggregationStats stats) {
this.stats = stats;
return this;
}
public AggregationStructure getStructure() {
return structure;
}
public AggregationState getState() {
return state;
}
public void setState(AggregationState state) {
this.state = state;
}
public AggregationState detachState() {
AggregationState state = this.state;
this.state = null;
return state;
}
public List getKeys() {
return structure.getKeys();
}
public List getMeasures() {
return structure.getMeasures();
}
public Map getKeyTypes() {
return structure.getKeyTypes();
}
public Map getMeasureTypes() {
return structure.getMeasureTypes();
}
public List getPartitioningKey() {
return structure.getPartitioningKey();
}
public StreamReducers.Reducer aggregationReducer(Class> inputClass, Class> outputClass,
List keys, List measures,
DefiningClassLoader classLoader) {
return AggregationUtils.aggregationReducer(structure, inputClass, outputClass,
keys, measures, classLoader);
}
/**
* Provides a {@link StreamConsumer} for streaming data to this aggregation.
*
* @param inputClass class of input records
* @param data records type
* @return consumer for streaming data to aggregation
*/
@SuppressWarnings("unchecked")
public Stage consume(StreamProducer producer,
Class inputClass, Map keyFields, Map measureFields) {
checkArgument(new HashSet<>(getKeys()).equals(keyFields.keySet()), "Expected keys: %s, actual keyFields: %s", getKeys(), keyFields);
checkArgument(getMeasureTypes().keySet().containsAll(measureFields.keySet()), "Unknown measures: %s", difference(measureFields.keySet(), getMeasureTypes().keySet()));
logger.info("Started consuming data in aggregation {}. Keys: {} Measures: {}", this, keyFields.keySet(), measureFields.keySet());
Class> keyClass = createKeyClass(structure, getKeys(), classLoader);
Set measureFieldKeys = measureFields.keySet();
List measures = this.getMeasureTypes().keySet().stream().filter(measureFieldKeys::contains).collect(toList());
Class> accumulatorClass = createRecordClass(structure, getKeys(), measures, classLoader);
Aggregate aggregate = createPreaggregator(structure, inputClass, accumulatorClass,
keyFields, measureFields,
classLoader);
AggregationGroupReducer groupReducer = new AggregationGroupReducer<>(aggregationChunkStorage,
structure, measures,
accumulatorClass,
createPartitionPredicate(accumulatorClass, getPartitioningKey(), classLoader),
createKeyFunction(inputClass, keyClass, getKeys(), classLoader),
aggregate, chunkSize, classLoader);
return producer.streamTo(groupReducer)
.getConsumerResult()
.thenApply(chunks -> AggregationDiff.of(new HashSet<>(chunks)));
}
public Stage consume(StreamProducer producer, Class inputClass) {
return consume(producer, inputClass, scanKeyFields(inputClass), scanMeasureFields(inputClass));
}
public double estimateCost(AggregationQuery query) {
List measures = getMeasures();
List aggregationFields = query.getMeasures().stream().filter(measures::contains).collect(toList());
return state.findChunks(query.getPredicate(), aggregationFields).size();
}
public StreamProducer query(AggregationQuery query, Class outputClass) {
return query(query, outputClass, classLoader);
}
/**
* Returns a {@link StreamProducer} of the records retrieved from aggregation for the specified query.
*
* @param type of output objects
* @param query query
* @param outputClass class of output records
* @return producer that streams query results
*/
@SuppressWarnings("unchecked")
@Override
public StreamProducer query(AggregationQuery query, Class outputClass, DefiningClassLoader queryClassLoader) {
ClassLoader cl;
for (cl = queryClassLoader; cl != null; cl = cl.getParent()) {
if (cl == this.classLoader)
break;
}
checkArgument(cl != null, "Unrelated queryClassLoader");
List measures = query.getMeasures();
List fields = getMeasures().stream().filter(measures::contains).collect(toList());
List allChunks = state.findChunks(query.getPredicate(), fields);
return consolidatedProducer(query.getKeys(),
fields, outputClass, query.getPredicate(), allChunks, queryClassLoader);
}
private StreamProducer sortStream(StreamProducer unsortedStream, Class resultClass,
List allKeys, List measures, DefiningClassLoader classLoader) {
Comparator keyComparator = createKeyComparator(resultClass, allKeys, classLoader);
BufferSerializer bufferSerializer = createBufferSerializer(structure, resultClass,
getKeys(), measures, classLoader);
if (temporarySortDir == null) {
try {
temporarySortDir = Files.createTempDirectory("aggregation_sort_dir");
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
return unsortedStream
.with(StreamSorter.create(
StreamSorterStorageImpl.create(executorService, bufferSerializer, temporarySortDir),
Function.identity(), keyComparator, false, sorterItemsInMemory));
}
private Stage> doConsolidation(List chunksToConsolidate) {
Set aggregationFields = new HashSet<>(getMeasures());
Set chunkFields = new HashSet<>();
for (AggregationChunk chunk : chunksToConsolidate) {
for (String measure : chunk.getMeasures()) {
if (aggregationFields.contains(measure))
chunkFields.add(measure);
}
}
List measures = getMeasures().stream().filter(chunkFields::contains).collect(toList());
Class resultClass = createRecordClass(structure, getKeys(), measures, classLoader);
StreamProducer