All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pig.newplan.logical.optimizer.SchemaResetter Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with this
 * work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package org.apache.pig.newplan.logical.optimizer;

import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.pig.PigException;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.plan.PlanValidationException;
import org.apache.pig.impl.util.MultiMap;
import org.apache.pig.newplan.DependencyOrderWalker;
import org.apache.pig.newplan.OperatorPlan;
import org.apache.pig.newplan.PlanWalker;
import org.apache.pig.newplan.ReverseDependencyOrderWalker;
import org.apache.pig.newplan.logical.expression.AllSameExpressionVisitor;
import org.apache.pig.newplan.logical.expression.LogicalExpression;
import org.apache.pig.newplan.logical.expression.LogicalExpressionPlan;
import org.apache.pig.newplan.logical.relational.LOCogroup;
import org.apache.pig.newplan.logical.relational.LOCross;
import org.apache.pig.newplan.logical.relational.LOCube;
import org.apache.pig.newplan.logical.relational.LODistinct;
import org.apache.pig.newplan.logical.relational.LOFilter;
import org.apache.pig.newplan.logical.relational.LOForEach;
import org.apache.pig.newplan.logical.relational.LOGenerate;
import org.apache.pig.newplan.logical.relational.LOInnerLoad;
import org.apache.pig.newplan.logical.relational.LOJoin;
import org.apache.pig.newplan.logical.relational.LOLimit;
import org.apache.pig.newplan.logical.relational.LOLoad;
import org.apache.pig.newplan.logical.relational.LORank;
import org.apache.pig.newplan.logical.relational.LOSort;
import org.apache.pig.newplan.logical.relational.LOSplit;
import org.apache.pig.newplan.logical.relational.LOSplitOutput;
import org.apache.pig.newplan.logical.relational.LOStore;
import org.apache.pig.newplan.logical.relational.LOStream;
import org.apache.pig.newplan.logical.relational.LOUnion;
import org.apache.pig.newplan.logical.relational.LogicalRelationalNodesVisitor;
import org.apache.pig.newplan.logical.relational.LogicalSchema;
import org.apache.pig.newplan.logical.relational.LogicalSchema.LogicalFieldSchema;

public class SchemaResetter extends LogicalRelationalNodesVisitor {

    // uid duplicates are removed only after optimizer rule 
    // DuplicateForEachColumnRewrite has run. So disable it in calls before that
    boolean skipDuplicateUidCheck = true;
    
    private void visitAll(Collection lexpPlans) throws FrontendException {
	for (LogicalExpressionPlan expPlan : lexpPlans) {
	    FieldSchemaResetter fsResetter = new FieldSchemaResetter(expPlan);
	    fsResetter.visit();
	}
    }
    
    public SchemaResetter(OperatorPlan plan) throws FrontendException {
        this(plan, false);
    }

    public SchemaResetter(OperatorPlan plan, boolean skipDuplicateUidCheck) 
            throws FrontendException {
        super(plan, new DependencyOrderWalker(plan));
        this.skipDuplicateUidCheck = skipDuplicateUidCheck;
    }

    @Override
    public void visit(LOLoad load) throws FrontendException {
        load.resetSchema();
        validate(load.getSchema());
    }

    @Override
    public void visit(LOFilter filter) throws FrontendException {
        filter.resetSchema();
        FieldSchemaResetter fsResetter = new FieldSchemaResetter(filter.getFilterPlan());
        fsResetter.visit();
        validate(filter.getSchema());
    }
    
    @Override
    public void visit(LOStore store) throws FrontendException {
        store.resetSchema();
        validate(store.getSchema());
    }
    
    @Override
    public void visit(LOJoin join) throws FrontendException {
        join.resetSchema();
        visitAll(join.getExpressionPlanValues());
        validate(join.getSchema());
    }
    
    @Override
    public void visit(LOForEach foreach) throws FrontendException {
        foreach.resetSchema();
        OperatorPlan innerPlan = foreach.getInnerPlan();
        PlanWalker newWalker = currentWalker.spawnChildWalker(innerPlan);
        pushWalker(newWalker);
        currentWalker.walk(this);
        popWalker();
        validate(foreach.getSchema());
    }
    
    @Override
    public void visit(LOGenerate gen) throws FrontendException {
        gen.resetSchema();
        visitAll(gen.getOutputPlans());
        validate(gen.getSchema());
    }
    
    @Override
    public void visit(LOInnerLoad load) throws FrontendException {
        load.resetSchema();
        load.getProjection().resetFieldSchema();
        load.getSchema();
    }

    @Override
    public void visit(LOCube loCube) throws FrontendException {
	loCube.resetSchema();
	visitAll(loCube.getExpressionPlans().values());
	validate(loCube.getSchema());
    }
    
    @Override
    public void visit(LOCogroup loCogroup) throws FrontendException {
        loCogroup.resetSchema();
        visitAll(loCogroup.getExpressionPlans().values());
        validate(loCogroup.getSchema());
    }
    
    @Override
    public void visit(LOSplit loSplit) throws FrontendException {
        loSplit.resetSchema();
        validate(loSplit.getSchema());
    }
    
    @Override
    public void visit(LOSplitOutput loSplitOutput) throws FrontendException {
        loSplitOutput.resetSchema();
        FieldSchemaResetter fsResetter = new FieldSchemaResetter(loSplitOutput.getFilterPlan());
        fsResetter.visit();
        validate(loSplitOutput.getSchema());
    }
    
    @Override
    public void visit(LOUnion loUnion) throws FrontendException {
        loUnion.resetSchema();
        validate(loUnion.getSchema());
    }
    
    @Override
    public void visit(LOSort loSort) throws FrontendException {
        loSort.resetSchema();
        visitAll(loSort.getSortColPlans());
        validate(loSort.getSchema());
    }

    @Override
    public void visit(LORank loRank) throws FrontendException{
        loRank.resetSchema();
        visitAll(loRank.getRankColPlans());
        validate(loRank.getSchema());
    }

    @Override
    public void visit(LODistinct loDistinct) throws FrontendException {
        loDistinct.resetSchema();
        validate(loDistinct.getSchema());
    }

    @Override
    public void visit(LOLimit loLimit) throws FrontendException {
        loLimit.resetSchema();
        if (loLimit.getLimitPlan() != null) {
            FieldSchemaResetter fsResetter = new FieldSchemaResetter(
                    loLimit.getLimitPlan());
            fsResetter.visit();
        }
        validate(loLimit.getSchema());
    }
    
    @Override
    public void visit(LOCross loCross) throws FrontendException {
        loCross.resetSchema();
        validate(loCross.getSchema());
    }
    
    @Override
    public void visit(LOStream loStream) throws FrontendException {
        loStream.resetSchema();
        validate(loStream.getSchema());
    }


    /**
     * Check if schema is valid (ready to be part of a final logical plan)
     * @param schema
     * @throws PlanValidationException if the if any field in schema has uid -1
     * or (skipDuplicateUidCheck is true and there are duplicate uids in schema) 
     */
    public void validate(LogicalSchema schema)
            throws PlanValidationException{
        
        if(schema == null)
            return;
        
        Set uidsSeen = new HashSet();
        for(LogicalFieldSchema fs : schema.getFields()){
            
            if(!skipDuplicateUidCheck){
                //check duplicate uid
                if(!uidsSeen.add(fs.uid)){
                    // uid already seen
                    String msg = "Logical plan invalid state: duplicate uid in " +
                            "schema : " + schema;
                    throw new PlanValidationException(
                            msg,
                            2270,
                            PigException.BUG
                            );
                }
            }
            
            if(fs.uid < 0){
                String msg = "Logical plan invalid state: invalid uid " + fs.uid + 
                        " in schema : " + schema;
                throw new PlanValidationException(
                        msg,
                        2271,
                        PigException.BUG
                        );
                
            }
        }
    }

}

class FieldSchemaResetter extends AllSameExpressionVisitor {

    protected FieldSchemaResetter(OperatorPlan p) throws FrontendException {
        super(p, new ReverseDependencyOrderWalker(p));
    }

    @Override
    protected void execute(LogicalExpression op) throws FrontendException {
        op.resetFieldSchema();
        op.getFieldSchema();
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy