org.apache.pig.newplan.logical.optimizer.SchemaResetter Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.pig.newplan.logical.optimizer;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.pig.PigException;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.plan.PlanValidationException;
import org.apache.pig.impl.util.MultiMap;
import org.apache.pig.newplan.DependencyOrderWalker;
import org.apache.pig.newplan.OperatorPlan;
import org.apache.pig.newplan.PlanWalker;
import org.apache.pig.newplan.ReverseDependencyOrderWalker;
import org.apache.pig.newplan.logical.expression.AllSameExpressionVisitor;
import org.apache.pig.newplan.logical.expression.LogicalExpression;
import org.apache.pig.newplan.logical.expression.LogicalExpressionPlan;
import org.apache.pig.newplan.logical.relational.LOCogroup;
import org.apache.pig.newplan.logical.relational.LOCross;
import org.apache.pig.newplan.logical.relational.LOCube;
import org.apache.pig.newplan.logical.relational.LODistinct;
import org.apache.pig.newplan.logical.relational.LOFilter;
import org.apache.pig.newplan.logical.relational.LOForEach;
import org.apache.pig.newplan.logical.relational.LOGenerate;
import org.apache.pig.newplan.logical.relational.LOInnerLoad;
import org.apache.pig.newplan.logical.relational.LOJoin;
import org.apache.pig.newplan.logical.relational.LOLimit;
import org.apache.pig.newplan.logical.relational.LOLoad;
import org.apache.pig.newplan.logical.relational.LORank;
import org.apache.pig.newplan.logical.relational.LOSort;
import org.apache.pig.newplan.logical.relational.LOSplit;
import org.apache.pig.newplan.logical.relational.LOSplitOutput;
import org.apache.pig.newplan.logical.relational.LOStore;
import org.apache.pig.newplan.logical.relational.LOStream;
import org.apache.pig.newplan.logical.relational.LOUnion;
import org.apache.pig.newplan.logical.relational.LogicalRelationalNodesVisitor;
import org.apache.pig.newplan.logical.relational.LogicalSchema;
import org.apache.pig.newplan.logical.relational.LogicalSchema.LogicalFieldSchema;
public class SchemaResetter extends LogicalRelationalNodesVisitor {
// uid duplicates are removed only after optimizer rule
// DuplicateForEachColumnRewrite has run. So disable it in calls before that
boolean skipDuplicateUidCheck = true;
private void visitAll(Collection lexpPlans) throws FrontendException {
for (LogicalExpressionPlan expPlan : lexpPlans) {
FieldSchemaResetter fsResetter = new FieldSchemaResetter(expPlan);
fsResetter.visit();
}
}
public SchemaResetter(OperatorPlan plan) throws FrontendException {
this(plan, false);
}
public SchemaResetter(OperatorPlan plan, boolean skipDuplicateUidCheck)
throws FrontendException {
super(plan, new DependencyOrderWalker(plan));
this.skipDuplicateUidCheck = skipDuplicateUidCheck;
}
@Override
public void visit(LOLoad load) throws FrontendException {
load.resetSchema();
validate(load.getSchema());
}
@Override
public void visit(LOFilter filter) throws FrontendException {
filter.resetSchema();
FieldSchemaResetter fsResetter = new FieldSchemaResetter(filter.getFilterPlan());
fsResetter.visit();
validate(filter.getSchema());
}
@Override
public void visit(LOStore store) throws FrontendException {
store.resetSchema();
validate(store.getSchema());
}
@Override
public void visit(LOJoin join) throws FrontendException {
join.resetSchema();
visitAll(join.getExpressionPlanValues());
validate(join.getSchema());
}
@Override
public void visit(LOForEach foreach) throws FrontendException {
foreach.resetSchema();
OperatorPlan innerPlan = foreach.getInnerPlan();
PlanWalker newWalker = currentWalker.spawnChildWalker(innerPlan);
pushWalker(newWalker);
currentWalker.walk(this);
popWalker();
validate(foreach.getSchema());
}
@Override
public void visit(LOGenerate gen) throws FrontendException {
gen.resetSchema();
visitAll(gen.getOutputPlans());
validate(gen.getSchema());
}
@Override
public void visit(LOInnerLoad load) throws FrontendException {
load.resetSchema();
load.getProjection().resetFieldSchema();
load.getSchema();
}
@Override
public void visit(LOCube loCube) throws FrontendException {
loCube.resetSchema();
visitAll(loCube.getExpressionPlans().values());
validate(loCube.getSchema());
}
@Override
public void visit(LOCogroup loCogroup) throws FrontendException {
loCogroup.resetSchema();
visitAll(loCogroup.getExpressionPlans().values());
validate(loCogroup.getSchema());
}
@Override
public void visit(LOSplit loSplit) throws FrontendException {
loSplit.resetSchema();
validate(loSplit.getSchema());
}
@Override
public void visit(LOSplitOutput loSplitOutput) throws FrontendException {
loSplitOutput.resetSchema();
FieldSchemaResetter fsResetter = new FieldSchemaResetter(loSplitOutput.getFilterPlan());
fsResetter.visit();
validate(loSplitOutput.getSchema());
}
@Override
public void visit(LOUnion loUnion) throws FrontendException {
loUnion.resetSchema();
validate(loUnion.getSchema());
}
@Override
public void visit(LOSort loSort) throws FrontendException {
loSort.resetSchema();
visitAll(loSort.getSortColPlans());
validate(loSort.getSchema());
}
@Override
public void visit(LORank loRank) throws FrontendException{
loRank.resetSchema();
visitAll(loRank.getRankColPlans());
validate(loRank.getSchema());
}
@Override
public void visit(LODistinct loDistinct) throws FrontendException {
loDistinct.resetSchema();
validate(loDistinct.getSchema());
}
@Override
public void visit(LOLimit loLimit) throws FrontendException {
loLimit.resetSchema();
if (loLimit.getLimitPlan() != null) {
FieldSchemaResetter fsResetter = new FieldSchemaResetter(
loLimit.getLimitPlan());
fsResetter.visit();
}
validate(loLimit.getSchema());
}
@Override
public void visit(LOCross loCross) throws FrontendException {
loCross.resetSchema();
validate(loCross.getSchema());
}
@Override
public void visit(LOStream loStream) throws FrontendException {
loStream.resetSchema();
validate(loStream.getSchema());
}
/**
* Check if schema is valid (ready to be part of a final logical plan)
* @param schema
* @throws PlanValidationException if the if any field in schema has uid -1
* or (skipDuplicateUidCheck is true and there are duplicate uids in schema)
*/
public void validate(LogicalSchema schema)
throws PlanValidationException{
if(schema == null)
return;
Set uidsSeen = new HashSet();
for(LogicalFieldSchema fs : schema.getFields()){
if(!skipDuplicateUidCheck){
//check duplicate uid
if(!uidsSeen.add(fs.uid)){
// uid already seen
String msg = "Logical plan invalid state: duplicate uid in " +
"schema : " + schema;
throw new PlanValidationException(
msg,
2270,
PigException.BUG
);
}
}
if(fs.uid < 0){
String msg = "Logical plan invalid state: invalid uid " + fs.uid +
" in schema : " + schema;
throw new PlanValidationException(
msg,
2271,
PigException.BUG
);
}
}
}
}
class FieldSchemaResetter extends AllSameExpressionVisitor {
protected FieldSchemaResetter(OperatorPlan p) throws FrontendException {
super(p, new ReverseDependencyOrderWalker(p));
}
@Override
protected void execute(LogicalExpression op) throws FrontendException {
op.resetFieldSchema();
op.getFieldSchema();
}
}