
org.apache.asterix.api.common.APIFramework Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.asterix.api.common;
import java.io.PrintWriter;
import java.rmi.RemoteException;
import java.util.ArrayList;
import java.util.List;
import org.apache.asterix.api.common.Job.SubmissionMode;
import org.apache.asterix.aql.base.Statement.Kind;
import org.apache.asterix.aql.expression.FunctionDecl;
import org.apache.asterix.aql.expression.Query;
import org.apache.asterix.aql.expression.visitor.AQLPrintVisitor;
import org.apache.asterix.aql.rewrites.AqlRewriter;
import org.apache.asterix.common.config.AsterixCompilerProperties;
import org.apache.asterix.common.config.AsterixExternalProperties;
import org.apache.asterix.common.config.OptimizationConfUtil;
import org.apache.asterix.common.exceptions.ACIDException;
import org.apache.asterix.common.exceptions.AsterixException;
import org.apache.asterix.dataflow.data.common.AqlExpressionTypeComputer;
import org.apache.asterix.dataflow.data.common.AqlMergeAggregationExpressionFactory;
import org.apache.asterix.dataflow.data.common.AqlNullableTypeComputer;
import org.apache.asterix.dataflow.data.common.AqlPartialAggregationTypeComputer;
import org.apache.asterix.formats.base.IDataFormat;
import org.apache.asterix.jobgen.AqlLogicalExpressionJobGen;
import org.apache.asterix.metadata.MetadataManager;
import org.apache.asterix.metadata.MetadataTransactionContext;
import org.apache.asterix.metadata.declared.AqlMetadataProvider;
import org.apache.asterix.metadata.entities.Dataverse;
import org.apache.asterix.om.util.AsterixAppContextInfo;
import org.apache.asterix.optimizer.base.RuleCollections;
import org.apache.asterix.runtime.job.listener.JobEventListenerFactory;
import org.apache.asterix.transaction.management.service.transaction.JobIdFactory;
import org.apache.asterix.translator.AqlExpressionToPlanTranslator;
import org.apache.asterix.translator.CompiledStatements.ICompiledDmlStatement;
import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
import org.apache.hyracks.algebricks.common.utils.Pair;
import org.apache.hyracks.algebricks.compiler.api.HeuristicCompilerFactoryBuilder;
import org.apache.hyracks.algebricks.compiler.api.ICompiler;
import org.apache.hyracks.algebricks.compiler.api.ICompilerFactory;
import org.apache.hyracks.algebricks.compiler.rewriter.rulecontrollers.SequentialFixpointRuleController;
import org.apache.hyracks.algebricks.compiler.rewriter.rulecontrollers.SequentialOnceRuleController;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalPlan;
import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
import org.apache.hyracks.algebricks.core.algebra.expressions.IExpressionEvalSizeComputer;
import org.apache.hyracks.algebricks.core.algebra.expressions.IExpressionTypeComputer;
import org.apache.hyracks.algebricks.core.algebra.expressions.IMergeAggregationExpressionFactory;
import org.apache.hyracks.algebricks.core.algebra.expressions.INullableTypeComputer;
import org.apache.hyracks.algebricks.core.algebra.expressions.LogicalExpressionJobGenToExpressionRuntimeProviderAdapter;
import org.apache.hyracks.algebricks.core.algebra.prettyprint.LogicalOperatorPrettyPrintVisitor;
import org.apache.hyracks.algebricks.core.algebra.prettyprint.PlanPlotter;
import org.apache.hyracks.algebricks.core.algebra.prettyprint.PlanPrettyPrinter;
import org.apache.hyracks.algebricks.core.rewriter.base.AbstractRuleController;
import org.apache.hyracks.algebricks.core.rewriter.base.AlgebricksOptimizationContext;
import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
import org.apache.hyracks.algebricks.core.rewriter.base.IOptimizationContextFactory;
import org.apache.hyracks.algebricks.core.rewriter.base.PhysicalOptimizationConfig;
import org.apache.hyracks.api.client.IHyracksClientConnection;
import org.apache.hyracks.api.job.JobId;
import org.apache.hyracks.api.job.JobSpecification;
import org.json.JSONException;
/**
* Provides helper methods for compilation of a query into a JobSpec and submission
* to Hyracks through the Hyracks client interface.
*/
public class APIFramework {
public static final String HTML_STATEMENT_SEPARATOR = "";
private static List>> buildDefaultLogicalRewrites() {
List>> defaultLogicalRewrites = new ArrayList>>();
SequentialFixpointRuleController seqCtrlNoDfs = new SequentialFixpointRuleController(false);
SequentialFixpointRuleController seqCtrlFullDfs = new SequentialFixpointRuleController(true);
SequentialOnceRuleController seqOnceCtrl = new SequentialOnceRuleController(true);
defaultLogicalRewrites.add(new Pair>(seqOnceCtrl,
RuleCollections.buildInitialTranslationRuleCollection()));
defaultLogicalRewrites.add(new Pair>(seqOnceCtrl,
RuleCollections.buildTypeInferenceRuleCollection()));
defaultLogicalRewrites.add(new Pair>(seqOnceCtrl,
RuleCollections.buildAutogenerateIDRuleCollection()));
defaultLogicalRewrites.add(new Pair>(seqCtrlFullDfs,
RuleCollections.buildNormalizationRuleCollection()));
defaultLogicalRewrites.add(new Pair>(seqCtrlNoDfs,
RuleCollections.buildCondPushDownAndJoinInferenceRuleCollection()));
defaultLogicalRewrites.add(new Pair>(seqCtrlFullDfs,
RuleCollections.buildLoadFieldsRuleCollection()));
// fj
defaultLogicalRewrites.add(new Pair>(seqCtrlFullDfs,
RuleCollections.buildFuzzyJoinRuleCollection()));
//
defaultLogicalRewrites.add(new Pair>(seqCtrlFullDfs,
RuleCollections.buildNormalizationRuleCollection()));
defaultLogicalRewrites.add(new Pair>(seqCtrlNoDfs,
RuleCollections.buildCondPushDownAndJoinInferenceRuleCollection()));
defaultLogicalRewrites.add(new Pair>(seqCtrlFullDfs,
RuleCollections.buildLoadFieldsRuleCollection()));
defaultLogicalRewrites.add(new Pair>(seqOnceCtrl,
RuleCollections.buildDataExchangeRuleCollection()));
defaultLogicalRewrites.add(new Pair>(seqCtrlNoDfs,
RuleCollections.buildConsolidationRuleCollection()));
defaultLogicalRewrites.add(new Pair>(seqCtrlNoDfs,
RuleCollections.buildAccessMethodRuleCollection()));
defaultLogicalRewrites.add(new Pair>(seqCtrlNoDfs,
RuleCollections.buildPlanCleanupRuleCollection()));
//put TXnRuleCollection!
return defaultLogicalRewrites;
}
private static List>> buildDefaultPhysicalRewrites() {
List>> defaultPhysicalRewrites = new ArrayList>>();
SequentialOnceRuleController seqOnceCtrl = new SequentialOnceRuleController(true);
SequentialOnceRuleController seqOnceTopLevel = new SequentialOnceRuleController(false);
defaultPhysicalRewrites.add(new Pair>(seqOnceCtrl,
RuleCollections.buildPhysicalRewritesAllLevelsRuleCollection()));
defaultPhysicalRewrites.add(new Pair>(seqOnceTopLevel,
RuleCollections.buildPhysicalRewritesTopLevelRuleCollection()));
defaultPhysicalRewrites.add(new Pair>(seqOnceCtrl,
RuleCollections.prepareForJobGenRuleCollection()));
return defaultPhysicalRewrites;
}
private static class AqlOptimizationContextFactory implements IOptimizationContextFactory {
public static final AqlOptimizationContextFactory INSTANCE = new AqlOptimizationContextFactory();
private AqlOptimizationContextFactory() {
}
@Override
public IOptimizationContext createOptimizationContext(int varCounter,
IExpressionEvalSizeComputer expressionEvalSizeComputer,
IMergeAggregationExpressionFactory mergeAggregationExpressionFactory,
IExpressionTypeComputer expressionTypeComputer, INullableTypeComputer nullableTypeComputer,
PhysicalOptimizationConfig physicalOptimizationConfig) {
return new AlgebricksOptimizationContext(varCounter, expressionEvalSizeComputer,
mergeAggregationExpressionFactory, expressionTypeComputer, nullableTypeComputer,
physicalOptimizationConfig);
}
}
public static Pair reWriteQuery(List declaredFunctions,
AqlMetadataProvider metadataProvider, Query q, SessionConfig conf) throws AsterixException {
if (!conf.is(SessionConfig.FORMAT_ONLY_PHYSICAL_OPS) && conf.is(SessionConfig.OOB_EXPR_TREE)) {
conf.out().println();
if (conf.is(SessionConfig.FORMAT_HTML)) {
conf.out().println("Expression tree:
");
conf.out().println("");
} else {
conf.out().println("----------Expression tree:");
}
if (q != null) {
q.accept(new AQLPrintVisitor(conf.out()), 0);
}
if (conf.is(SessionConfig.FORMAT_HTML)) {
conf.out().println("
");
}
}
AqlRewriter rw = new AqlRewriter(declaredFunctions, q, metadataProvider);
rw.rewrite();
Query rwQ = rw.getExpr();
return new Pair(rwQ, rw.getVarCounter());
}
public static JobSpecification compileQuery(List declaredFunctions,
AqlMetadataProvider queryMetadataProvider, Query rwQ, int varCounter, String outputDatasetName,
SessionConfig conf, ICompiledDmlStatement statement) throws AsterixException, AlgebricksException,
JSONException, RemoteException, ACIDException {
if (!conf.is(SessionConfig.FORMAT_ONLY_PHYSICAL_OPS) && conf.is(SessionConfig.OOB_REWRITTEN_EXPR_TREE)) {
conf.out().println();
if (conf.is(SessionConfig.FORMAT_HTML)) {
conf.out().println("Rewritten expression tree:
");
conf.out().println("");
} else {
conf.out().println("----------Rewritten expression:");
}
if (rwQ != null) {
rwQ.accept(new AQLPrintVisitor(conf.out()), 0);
}
if (conf.is(SessionConfig.FORMAT_HTML)) {
conf.out().println("
");
}
}
org.apache.asterix.common.transactions.JobId asterixJobId = JobIdFactory.generateJobId();
queryMetadataProvider.setJobId(asterixJobId);
AqlExpressionToPlanTranslator t = new AqlExpressionToPlanTranslator(queryMetadataProvider, varCounter,
outputDatasetName, statement);
ILogicalPlan plan;
// statement = null when it's a query
if (statement == null || statement.getKind() != Kind.LOAD) {
plan = t.translate(rwQ);
} else {
plan = t.translateLoad();
}
LogicalOperatorPrettyPrintVisitor pvisitor = new LogicalOperatorPrettyPrintVisitor();
if (!conf.is(SessionConfig.FORMAT_ONLY_PHYSICAL_OPS) && conf.is(SessionConfig.OOB_LOGICAL_PLAN)) {
conf.out().println();
if (conf.is(SessionConfig.FORMAT_HTML)) {
conf.out().println("Logical plan:
");
conf.out().println("");
} else {
conf.out().println("----------Logical plan:");
}
if (rwQ != null || statement.getKind() == Kind.LOAD) {
StringBuilder buffer = new StringBuilder();
PlanPrettyPrinter.printPlan(plan, buffer, pvisitor, 0);
conf.out().print(buffer);
}
if (conf.is(SessionConfig.FORMAT_HTML)) {
conf.out().println("
");
}
}
//print the plot for the logical plan
AsterixExternalProperties xProps = AsterixAppContextInfo.getInstance().getExternalProperties();
Boolean plot = xProps.getIsPlottingEnabled();
if (plot) {
PlanPlotter.printLogicalPlan(plan);
}
AsterixCompilerProperties compilerProperties = AsterixAppContextInfo.getInstance().getCompilerProperties();
int frameSize = compilerProperties.getFrameSize();
int sortFrameLimit = (int) (compilerProperties.getSortMemorySize() / frameSize);
int groupFrameLimit = (int) (compilerProperties.getGroupMemorySize() / frameSize);
int joinFrameLimit = (int) (compilerProperties.getJoinMemorySize() / frameSize);
OptimizationConfUtil.getPhysicalOptimizationConfig().setFrameSize(frameSize);
OptimizationConfUtil.getPhysicalOptimizationConfig().setMaxFramesExternalSort(sortFrameLimit);
OptimizationConfUtil.getPhysicalOptimizationConfig().setMaxFramesExternalGroupBy(groupFrameLimit);
OptimizationConfUtil.getPhysicalOptimizationConfig().setMaxFramesHybridHash(joinFrameLimit);
HeuristicCompilerFactoryBuilder builder = new HeuristicCompilerFactoryBuilder(
AqlOptimizationContextFactory.INSTANCE);
builder.setPhysicalOptimizationConfig(OptimizationConfUtil.getPhysicalOptimizationConfig());
builder.setLogicalRewrites(buildDefaultLogicalRewrites());
builder.setPhysicalRewrites(buildDefaultPhysicalRewrites());
IDataFormat format = queryMetadataProvider.getFormat();
ICompilerFactory compilerFactory = builder.create();
builder.setExpressionEvalSizeComputer(format.getExpressionEvalSizeComputer());
builder.setIMergeAggregationExpressionFactory(new AqlMergeAggregationExpressionFactory());
builder.setPartialAggregationTypeComputer(new AqlPartialAggregationTypeComputer());
builder.setExpressionTypeComputer(AqlExpressionTypeComputer.INSTANCE);
builder.setNullableTypeComputer(AqlNullableTypeComputer.INSTANCE);
ICompiler compiler = compilerFactory.createCompiler(plan, queryMetadataProvider, t.getVarCounter());
if (conf.isOptimize()) {
compiler.optimize();
//plot optimized logical plan
if (plot)
PlanPlotter.printOptimizedLogicalPlan(plan);
if (conf.is(SessionConfig.OOB_OPTIMIZED_LOGICAL_PLAN)) {
if (conf.is(SessionConfig.FORMAT_ONLY_PHYSICAL_OPS)) {
// For Optimizer tests.
StringBuilder buffer = new StringBuilder();
PlanPrettyPrinter.printPhysicalOps(plan, buffer, 0);
conf.out().print(buffer);
} else {
if (conf.is(SessionConfig.FORMAT_HTML)) {
conf.out().println("Optimized logical plan:
");
conf.out().println("");
} else {
conf.out().println("----------Optimized logical plan:");
}
if (rwQ != null || statement.getKind() == Kind.LOAD) {
StringBuilder buffer = new StringBuilder();
PlanPrettyPrinter.printPlan(plan, buffer, pvisitor, 0);
conf.out().print(buffer);
}
if (conf.is(SessionConfig.FORMAT_HTML)) {
conf.out().println("
");
}
}
}
}
if (!conf.isGenerateJobSpec()) {
return null;
}
AlgebricksPartitionConstraint clusterLocs = queryMetadataProvider.getClusterLocations();
builder.setBinaryBooleanInspectorFactory(format.getBinaryBooleanInspectorFactory());
builder.setBinaryIntegerInspectorFactory(format.getBinaryIntegerInspectorFactory());
builder.setClusterLocations(clusterLocs);
builder.setComparatorFactoryProvider(format.getBinaryComparatorFactoryProvider());
builder.setExpressionRuntimeProvider(new LogicalExpressionJobGenToExpressionRuntimeProviderAdapter(
AqlLogicalExpressionJobGen.INSTANCE));
builder.setHashFunctionFactoryProvider(format.getBinaryHashFunctionFactoryProvider());
builder.setHashFunctionFamilyProvider(format.getBinaryHashFunctionFamilyProvider());
builder.setNullWriterFactory(format.getNullWriterFactory());
builder.setPredicateEvaluatorFactoryProvider(format.getPredicateEvaluatorFactoryProvider());
switch (conf.fmt()) {
case JSON:
builder.setPrinterProvider(format.getJSONPrinterFactoryProvider());
break;
case CSV:
builder.setPrinterProvider(format.getCSVPrinterFactoryProvider());
break;
case ADM:
builder.setPrinterProvider(format.getPrinterFactoryProvider());
break;
default:
throw new RuntimeException("Unexpected OutputFormat!");
}
builder.setSerializerDeserializerProvider(format.getSerdeProvider());
builder.setTypeTraitProvider(format.getTypeTraitProvider());
builder.setNormalizedKeyComputerFactoryProvider(format.getNormalizedKeyComputerFactoryProvider());
JobEventListenerFactory jobEventListenerFactory = new JobEventListenerFactory(asterixJobId,
queryMetadataProvider.isWriteTransaction());
JobSpecification spec = compiler.createJob(AsterixAppContextInfo.getInstance(), jobEventListenerFactory);
if (conf.is(SessionConfig.OOB_HYRACKS_JOB)) {
if (conf.is(SessionConfig.FORMAT_HTML)) {
conf.out().println("Hyracks job:
");
conf.out().println("");
} else {
conf.out().println("----------Hyracks job:");
}
if (rwQ != null) {
conf.out().println(spec.toJSON().toString(1));
conf.out().println(spec.getUserConstraints());
}
if (conf.is(SessionConfig.FORMAT_HTML)) {
conf.out().println("
");
}
}
return spec;
}
public static void executeJobArray(IHyracksClientConnection hcc, JobSpecification[] specs, PrintWriter out)
throws Exception {
for (int i = 0; i < specs.length; i++) {
specs[i].setMaxReattempts(0);
JobId jobId = hcc.startJob(specs[i]);
long startTime = System.currentTimeMillis();
hcc.waitForCompletion(jobId);
long endTime = System.currentTimeMillis();
double duration = (endTime - startTime) / 1000.00;
out.println("Duration: " + duration + " sec
");
}
}
public static void executeJobArray(IHyracksClientConnection hcc, Job[] jobs, PrintWriter out) throws Exception {
for (int i = 0; i < jobs.length; i++) {
jobs[i].getJobSpec().setMaxReattempts(0);
long startTime = System.currentTimeMillis();
try {
JobId jobId = hcc.startJob(jobs[i].getJobSpec());
if (jobs[i].getSubmissionMode() == SubmissionMode.ASYNCHRONOUS) {
continue;
}
hcc.waitForCompletion(jobId);
} catch (Exception e) {
e.printStackTrace();
continue;
}
long endTime = System.currentTimeMillis();
double duration = (endTime - startTime) / 1000.00;
out.println("Duration: " + duration + " sec
");
}
}
private static IDataFormat getDataFormat(MetadataTransactionContext mdTxnCtx, String dataverseName)
throws AsterixException {
Dataverse dataverse = MetadataManager.INSTANCE.getDataverse(mdTxnCtx, dataverseName);
IDataFormat format;
try {
format = (IDataFormat) Class.forName(dataverse.getDataFormat()).newInstance();
} catch (Exception e) {
throw new AsterixException(e);
}
return format;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy