org.qcri.rheem.profiler.spark.SparkSourceProfiler Maven / Gradle / Ivy
The newest version!
package org.qcri.rheem.profiler.spark;
import org.qcri.rheem.core.api.Configuration;
import org.qcri.rheem.core.platform.ChannelInstance;
import org.qcri.rheem.profiler.util.ProfilingUtils;
import org.qcri.rheem.spark.channels.RddChannel;
import org.qcri.rheem.spark.operators.SparkExecutionOperator;
import java.util.function.Supplier;
/**
* {@link SparkOperatorProfiler} implementation for {@link SparkExecutionOperator}s with one input and one output.
*/
public abstract class SparkSourceProfiler extends SparkOperatorProfiler {
public SparkSourceProfiler(Supplier operatorGenerator,
Configuration configuration,
Supplier> dataQuantumGenerator) {
super(operatorGenerator, configuration, dataQuantumGenerator);
}
@Override
protected Result executeOperator() {
final RddChannel.Instance outputChannelInstance = createChannelInstance(this.sparkExecutor);
// Let the operator execute.
ProfilingUtils.sleep(this.executionPaddingTime); // Pad measurement with some idle time.
final long startTime = System.currentTimeMillis();
this.evaluate(
this.operator,
new ChannelInstance[]{},
new ChannelInstance[]{outputChannelInstance}
);
// Force the execution of the operator.
outputChannelInstance.provideRdd().foreach(dataQuantum -> {
});
final long endTime = System.currentTimeMillis();
ProfilingUtils.sleep(this.executionPaddingTime); // Pad measurement with some idle time.
// Yet another run to count the output cardinality.
final long outputCardinality = outputChannelInstance.provideRdd().count();
// Gather and assemble all result metrics.
return new Result(
this.inputCardinalities,
outputCardinality,
endTime - startTime,
this.provideDiskBytes(startTime, endTime),
this.provideNetworkBytes(startTime, endTime),
this.provideCpuCycles(startTime, endTime),
this.numMachines,
this.numCoresPerMachine
);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy