All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pig.newplan.logical.relational.LOCogroup Maven / Gradle / Ivy

There is a newer version: 0.17.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.pig.newplan.logical.relational;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.pig.data.DataType;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.util.MultiMap;
import org.apache.pig.newplan.Operator;
import org.apache.pig.newplan.OperatorPlan;
import org.apache.pig.newplan.PlanVisitor;
import org.apache.pig.newplan.logical.expression.LogicalExpression;
import org.apache.pig.newplan.logical.expression.LogicalExpressionPlan;
import org.apache.pig.newplan.logical.relational.LogicalSchema.LogicalFieldSchema;

public class LOCogroup extends LogicalRelationalOperator {
    
    // List of booleans specifying if any of the cogroups is inner
    private boolean[] mIsInner;
    
    // List of expressionPlans according to input
    private MultiMap mExpressionPlans;
    
    /**
     * Enum for the type of group
     */
    public static enum GROUPTYPE {
        REGULAR,    // Regular (co)group
        COLLECTED,  // Collected group
        MERGE       // Map-side CoGroup on sorted data
    };
    
    private GROUPTYPE mGroupType;
    
    private LogicalFieldSchema groupKeyUidOnlySchema; 
    
    /*
     * This is a map storing Uids which have been generated for an input
     * This map is required to make the uids persistant between calls of
     * resetSchema and getSchema
     */
    private Map generatedInputUids = new HashMap();
    
    final static String GROUP_COL_NAME = "group";
    
    /**
     * Constructor for use in defining rule patterns
     * @param plan
     */
    public LOCogroup(LogicalPlan plan) {
        super("LOCogroup", plan);     
    }
        
    public LOCogroup(OperatorPlan plan, MultiMap 
    expressionPlans, boolean[] isInner ) {
        this( plan, expressionPlans, GROUPTYPE.REGULAR, isInner, -1 );
    }

    public LOCogroup(OperatorPlan plan, MultiMap 
    expressionPlans, GROUPTYPE groupType, boolean[] isInner, int requestedParrellism) {
        super("LOCogroup", plan);
        this.mExpressionPlans = expressionPlans;
        if( isInner != null ) {
            mIsInner = Arrays.copyOf(isInner, isInner.length);
        }
        this.mGroupType = groupType;
    }
    
    /**
     * Given an expression plan this function returns a LogicalFieldSchema
     * that can be generated using this expression plan
     * @param exprPlan ExpressionPlan which generates this field
     * @return
     */
    private LogicalFieldSchema getPlanSchema( LogicalExpressionPlan exprPlan ) throws FrontendException {
        LogicalExpression sourceExp = (LogicalExpression) exprPlan.getSources().get(0);
        LogicalFieldSchema planSchema = null;
        planSchema = sourceExp.getFieldSchema().deepCopy();
        planSchema.uid = -1;
        return planSchema;
    }

    @Override
    public LogicalSchema getSchema() throws FrontendException {
        // if schema is calculated before, just return
        if (schema != null) {
            return schema;
        }

        List inputs = null;
        inputs = plan.getPredecessors(this);
        if (inputs == null) {
            throw new FrontendException("Cannot get predecessor for " + this, 2233);
        }
        
        List fieldSchemaList = new ArrayList();

        // See if we have more than one expression plans, if so the
        // schema of the group column will be a tuple
        boolean hasMultipleKeys = false;
        for( Integer key : mExpressionPlans.keySet() ) {
            if( mExpressionPlans.get(key).size() > 1 ) {
                hasMultipleKeys = true;
                break;
            }
        }

        LogicalFieldSchema groupKeySchema = null;
        // Generate the groupField Schema
        if( hasMultipleKeys ) {
            LogicalSchema keySchema = new LogicalSchema();
            // We sort here to maintain the correct order of inputs
            for( Integer key : mExpressionPlans.keySet()) {
                Collection plans = 
                    mExpressionPlans.get(key);

                for( LogicalExpressionPlan plan : plans ) {
                    LogicalFieldSchema fieldSchema = getPlanSchema(plan);
                    // if any plan schema is null, that means we can't calculate
                    // further schemas so we bail out
                    if( fieldSchema == null ) {
                        schema = null;
                        return schema;
                    }
                    fieldSchema = new LogicalFieldSchema(fieldSchema);
                    keySchema.addField(fieldSchema);
                }
                // We only need fields from one input and not all
                break;
            }
            groupKeySchema = new LogicalFieldSchema(GROUP_COL_NAME, keySchema, DataType.TUPLE);
        } else {
            // We sort here to maintain the correct order of inputs
            for( Integer key : mExpressionPlans.keySet() ) {
                Collection plans = mExpressionPlans.get(key);
                for( LogicalExpressionPlan plan : plans ) {
                    groupKeySchema = getPlanSchema(plan);
                    // if any plan schema is null, that means we can't calculate
                    // further schemas so we bail out
                    if( groupKeySchema == null ) {
                        schema = null;
                        return schema;
                    }
                    groupKeySchema = new LogicalSchema.LogicalFieldSchema(groupKeySchema);
                    // Change the uid of this field
                    groupKeySchema.alias = GROUP_COL_NAME;
                    break;
                }
                break;
            }
        }
        
        if (groupKeySchema==null) {
            throw new FrontendException("Cannot get group key schema for " + this, 2234);
        }
        groupKeyUidOnlySchema = groupKeySchema.mergeUid(groupKeyUidOnlySchema);

        fieldSchemaList.add( groupKeySchema );

        // Generate the Bag Schema
        int counter = 0;
        for (Operator op : inputs) {
            LogicalSchema inputSchema = ((LogicalRelationalOperator)op).getSchema();
           
            // Check if we already have calculated Uid for this bag for given 
            // input operator
            long bagUid;
            if (generatedInputUids.get(counter)!=null)
                bagUid = generatedInputUids.get(counter);
            else {
                bagUid = LogicalExpression.getNextUid();
                generatedInputUids.put( counter, bagUid );
            }
            
            LogicalFieldSchema newBagSchema = new LogicalFieldSchema(
                    ((LogicalRelationalOperator)op).getAlias(), inputSchema, 
                    DataType.BAG, bagUid);

            fieldSchemaList.add( newBagSchema );
            counter ++;
        }

        schema = new LogicalSchema();
        for(LogicalFieldSchema fieldSchema: fieldSchemaList) {
            schema.addField(fieldSchema);
        }         

        return schema;
    }

    @Override
    public void accept(PlanVisitor v) throws FrontendException {
        if (!(v instanceof LogicalRelationalNodesVisitor)) {
            throw new FrontendException("Expected LogicalPlanVisitor", 2223);
        }
        ((LogicalRelationalNodesVisitor)v).visit(this);
    }

    @Override
    public boolean isEqual(Operator other) throws FrontendException {
        if (other != null && other instanceof LOCogroup) {
            LOCogroup oc = (LOCogroup)other;
            if( mGroupType == oc.mGroupType && 
                    mIsInner.length == oc.mIsInner.length 
                    && mExpressionPlans.size() == oc.mExpressionPlans.size() ) {
                for( int i = 0; i < mIsInner.length; i++ ) {
                    if( mIsInner[i] != oc.mIsInner[i] ) {
                        return false;
                    }
                }
                for( Integer key : mExpressionPlans.keySet() ) {                    
                    if( ! oc.mExpressionPlans.containsKey(key) ) {
                        return false;
                    }
                    Collection exp1 = 
                        mExpressionPlans.get(key);
                    Collection exp2 = 
                        oc.mExpressionPlans.get(key);

                    if(! ( exp1 instanceof ArrayList 
                    || exp2 instanceof ArrayList ) ) {
                        throw new FrontendException( "Expected an ArrayList " +
                        "of Expression Plans", 2235 );
                    }

                    ArrayList expList1 = 
                        (ArrayList) exp1;
                    ArrayList expList2 = 
                        (ArrayList) exp2;

                    for (int i = 0; i < expList1.size(); i++) {
                        if (!expList1.get(i).isEqual(expList2.get(i))) {
                            return false;
                        }
                    }
                }
                return checkEquality((LogicalRelationalOperator) other);
            }
        }
        return false;
    }

    public GROUPTYPE getGroupType() {
        return mGroupType;
    }
    
    /**
     * Returns an Unmodifiable Map of Input Number to Uid 
     * @return Unmodifiable Map
     */
    public Map getGeneratedInputUids() {
        return Collections.unmodifiableMap( generatedInputUids );
    }
    
    public MultiMap getExpressionPlans() {
        return mExpressionPlans;
    }
    
    public boolean[] getInner() {
        return mIsInner;
    }

    @Override
    public void resetUid() {
        groupKeyUidOnlySchema = null;
        generatedInputUids = new HashMap();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy