All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pig.impl.logicalLayer.LOProject Maven / Gradle / Ivy

There is a newer version: 0.17.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pig.impl.logicalLayer;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Iterator;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pig.PigException;
import org.apache.pig.data.DataType;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.plan.PlanVisitor;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.impl.logicalLayer.parser.ParseException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
 * LOProject is designed like a singly linked list; A few examples will
 * illustrate the point about the linked list nature of the design;
 * a = load 'input1' as (name, age);
 * b = group a by name;
 * foreach b generate a, a.name;
 * The project operator occurs in two places in the above script:
 * generate a(here) and a.name(here)
 * In the first occurrence, we are trying to project the elements of
 * the bag a; In order to retrieve the bag, we need to project the
 * the second column ($1) or column number 1 (using the zero based index)
 * from the input (the relation or bag b)
 * In the second occurence, we are trying to project the first column
 * ($0) or column number 0 from the bag a which in turn is the column
 * number 1 in the relation b; As you can see, the nested structure or
 * the singly linked list nature is clearly visible;
 * Given that it's a singly linked list, the null pointer or the sentinel
 * is marked explictly using the boolean variable mSentinel; The sentinel
 * is marked true only when the input is a relational operator; This occurs
 * when we create the innermost operator
 */
public class LOProject extends ExpressionOperator {
    private static final long serialVersionUID = 2L;

    /**
     * The expression and the column to be projected.
     */
    private LogicalOperator mExp;
    private List mProjection;
    private boolean mIsStar = false;
    private static Log log = LogFactory.getLog(LOProject.class);
    private boolean mSentinel;
    private boolean mOverloaded = false;

    private boolean sendEmptyBagOnEOP = false;

    /**
     * 
     * @param plan
     *            Logical plan this operator is a part of.
     * @param key
     *            Operator key to assign to this node.
     * @param exp
     *            the expression which might contain the column to project
     * @param projection
     *            the list of columns to project
     */
    public LOProject(LogicalPlan plan, OperatorKey key, LogicalOperator exp,
            List projection) {
        super(plan, key);
        mExp = exp;
        mProjection = projection;
        if(mExp instanceof ExpressionOperator) {
            mSentinel = false;
        } else {
            mSentinel = true;
        }
    }

    /**
     * 
     * @param plan
     *            Logical plan this operator is a part of.
     * @param key
     *            Operator key to assign to this node.
     * @param exp
     *            the expression which might contain the column to project
     * @param projection
     *            the column to project
     */
    public LOProject(LogicalPlan plan, OperatorKey key, LogicalOperator exp,
            Integer projection) {
        super(plan, key);
        mExp = exp;
        mProjection = new ArrayList(1);
        mProjection.add(projection);
        if(mExp instanceof ExpressionOperator) {
            mSentinel = false;
        } else {
            mSentinel = true;
        }
    }

    public LogicalOperator getExpression() {
        return mExp;
    }

    public void setExpression(LogicalOperator exp) {
        mExp = exp;
    }

    public boolean isStar() { 
        return mIsStar;
    }

    public List getProjection() {
        return mProjection;
    }

    public void setProjection(List proj) {
        mProjection = proj;
    }

    public int getCol() {
        if (mProjection.size() != 1)
            
            throw new RuntimeException(
                    "Internal error: improper use of getCol in "
                            + LOProject.class.getName());
        return mProjection.get(0);

    }

    public void setStar(boolean b) {
        mIsStar = b;
    }

    public boolean getSentinel() {
        return mSentinel;
    }

    public void setSentinel(boolean b) {
        mSentinel = b;
    }

    public boolean getOverloaded() {
        return mOverloaded;
    }

    public void setOverloaded(boolean b) {
        mOverloaded = b;
    }

    @Override
    public String name() {
        return "Project " + mKey.scope + "-" + mKey.id + " Projections: " + (mIsStar? " [*] ": mProjection) + " Overloaded: " + mOverloaded;
    }

    @Override
    public boolean supportsMultipleInputs() {
        return false;
    }

    @Override
    public Schema.FieldSchema getFieldSchema() throws FrontendException {
        log.debug("Inside getFieldSchemas");
        log.debug("Number of columns: " + mProjection.size());
        for (int i : mProjection) {
            log.debug("Column: " + i);
        }

        if (mExp == null){
            String msg = "The input for a projection operator cannot be null";
            int errCode = 2998;
            throw new FrontendException(msg, errCode, PigException.BUG, false, null);
        }

        LogicalOperator expressionOperator = mExp;
        log.debug("expressionOperator = " + expressionOperator);
        log.debug("mIsStar: " + mIsStar);

        if (!mIsFieldSchemaComputed) {

            if (mIsStar) {
                log.debug("mIsStar is true");
                try {
                    if(!mSentinel) {
                        //we have an expression operator and hence a list of field shcemas
                        Schema.FieldSchema fs = ((ExpressionOperator)expressionOperator).getFieldSchema();
                         mFieldSchema = Schema.FieldSchema.copyAndLink(fs, expressionOperator);
                    } else {
                        //we have a relational operator as input and hence a schema
                        log.debug("expression operator alias: " + expressionOperator.getAlias());
                        log.debug("expression operator schema: " + expressionOperator.getSchema());
                        log.debug("expression operator type: " + expressionOperator.getType());
                        //TODO
                        //the type of the operator will be unknown. when type checking is in place
                        //add the type of the operator as a parameter to the fieldschema creation
                        mFieldSchema = new Schema.FieldSchema(expressionOperator.getAlias(), expressionOperator.getSchema(), DataType.TUPLE);
                        mFieldSchema.setParent(null, expressionOperator);
                        //mFieldSchema = new Schema.FieldSchema(expressionOperator.getAlias(), expressionOperator.getSchema());
                    }
                    mIsFieldSchemaComputed = true;
                } catch (FrontendException fee) {
                    mFieldSchema = null;
                    mIsFieldSchemaComputed = false;
                    throw fee;
                }
            } else {
                //its n list of columns to project including a single column
                List fss = new ArrayList(mProjection.size());
                //try {
                        log.debug("expressionOperator is not null");
                        if(mProjection.size() == 1) {
                            //if there is only one element then extract and return the field schema
                            log.debug("Only one element");
                            if(!mSentinel) {
                                log.debug("Input is an expression operator");
                                Schema.FieldSchema expOpFs = ((ExpressionOperator)expressionOperator).getFieldSchema();
                                if(null != expOpFs) {
                                    Schema s = expOpFs.schema;
                                    if(null != s) {
                                        Schema.FieldSchema fs;
                                        if(s.isTwoLevelAccessRequired()) {
                                            // this is the case where the schema is that of
                                            // a bag which has just one tuple fieldschema which
                                            // in turn has a list of fieldschemas. So the field
                                            // schema we are trying to construct would be of the
                                            // item we are trying to project inside the tuple 
                                            // fieldschema - because currently when we say b.i where
                                            // b is a bag, we are trying to access the item i
                                            // present in the tuple in the bag.
                                            
                                            // check that indeed we only have one field schema
                                            // which is that of a tuple
                                            if(s.getFields().size() != 1) {
                                                int errCode = 1008;
                                                String msg = "Expected a bag schema with a single " +
                                                "element of type "+ DataType.findTypeName(DataType.TUPLE) +
                                                " but got a bag schema with multiple elements.";
                                                throw new FrontendException(msg, errCode, PigException.INPUT, false, null);
                                            }
                                            Schema.FieldSchema tupleFS = s.getField(0);
                                            if(tupleFS.type != DataType.TUPLE) {
                                                int errCode = 1009;
                                                String msg = "Expected a bag schema with a single " +
                                                "element of type "+ DataType.findTypeName(DataType.TUPLE) +
                                                " but got an element of type " +
                                                DataType.findTypeName(tupleFS.type);
                                                throw new FrontendException(msg, errCode, PigException.INPUT, false, null);
                                            }
                                            fs = tupleFS.schema.getField(mProjection.get(0));
                                        } else {
                                            // normal single level access
                                            fs = s.getField(mProjection.get(0));
                                        }
                                        mFieldSchema = FieldSchema.copyAndLink( fs, expressionOperator );
                                    } else {
                                        mFieldSchema = new Schema.FieldSchema(null, DataType.BYTEARRAY);
                                        mFieldSchema.setParent(expOpFs.canonicalName, expressionOperator);
                                    }
                                } else {
                                    mFieldSchema = new Schema.FieldSchema(null, DataType.BYTEARRAY);
                                    mFieldSchema.setParent(null, expressionOperator);
                                }
                            } else {
                                log.debug("Input is a logical operator");
                                Schema s = expressionOperator.getSchema();
                                log.debug("s: " + s);
                                if(null != s) {
                                    Schema.FieldSchema fs = s.getField(mProjection.get(0));
                                    mFieldSchema = FieldSchema.copyAndLink( fs, expressionOperator );
                                    log.debug("mFieldSchema alias: " + mFieldSchema.alias);
                                    log.debug("mFieldSchema schema: " + mFieldSchema.schema);
                                } else {
                                    mFieldSchema = new Schema.FieldSchema(null, DataType.BYTEARRAY);
                                    mFieldSchema.setParent(null, expressionOperator);
                                }
                                mType = mFieldSchema.type ;
                            }
                            mIsFieldSchemaComputed = true;
                            return mFieldSchema;
                        }
                        
                        for (int colNum : mProjection) {
                            log.debug("Col: " + colNum);
                            Schema.FieldSchema fs;
                            if(!mSentinel) {
                                Schema.FieldSchema expOpFs = ((ExpressionOperator)expressionOperator).getFieldSchema();
                                if(null != expOpFs) {
                                    Schema s = expOpFs.schema;
                                    log.debug("Schema s: " + s);
                                    if(null != s) {
                                        if(colNum < s.size()) {
                                            Schema.FieldSchema parentFs = s.getField(colNum);
                                            fs = Schema.FieldSchema.copyAndLink(parentFs, expressionOperator );
                                            fss.add(fs);
                                        } else {
                                            fs = new Schema.FieldSchema(null, DataType.BYTEARRAY);
                                            fss.add(fs);
                                            fs.setParent(expOpFs.canonicalName, expressionOperator);
                                        }
                                    } else {
                                        fs = new Schema.FieldSchema(null, DataType.BYTEARRAY);
                                        fss.add(fs);
                                        fs.setParent(expOpFs.canonicalName, expressionOperator);
                                    }
                                } else {
                                    fs = new Schema.FieldSchema(null, DataType.BYTEARRAY);
                                    fss.add(new Schema.FieldSchema(null, DataType.BYTEARRAY));
                                    fs.setParent(null, expressionOperator);
                                }
                            } else {
                                Schema s = expressionOperator.getSchema();
                                if(null != s) {
                                    Schema.FieldSchema parentFs = s.getField(colNum);
                                    fs = Schema.FieldSchema.copyAndLink(parentFs, expressionOperator);
                                    fss.add(fs);
                                } else {
                                    fs = new Schema.FieldSchema(null, DataType.BYTEARRAY);
                                    fss.add(fs);
                                    fs.setParent(null, expressionOperator);
                                }
                            }
                        }
    
                //} catch(ParseException pe) {
                //    mFieldSchema = null;
                //    mIsFieldSchemaComputed = false;
                //    throw new FrontendException(pe.getMessage());
                //}
                mFieldSchema = new Schema.FieldSchema(expressionOperator.getAlias(), new Schema(fss));
                Schema.FieldSchema expOpFs = ((ExpressionOperator)expressionOperator).getFieldSchema();
                mFieldSchema.setParent( expOpFs.canonicalName, expressionOperator );
                mIsFieldSchemaComputed = true;
            }

        }

        if(null != mFieldSchema) {
            mType = mFieldSchema.type;
        }
        
        List succList = mPlan.getSuccessors(this) ;
        List predList = mPlan.getPredecessors(this) ;
        if((null != succList) && !(succList.get(0) instanceof ExpressionOperator)) {
            if(!DataType.isSchemaType(mType)) {
                Schema pjSchema = new Schema(mFieldSchema);
                mFieldSchema = new Schema.FieldSchema(getAlias(), pjSchema, DataType.TUPLE);
                mFieldSchema.setParent(null, expressionOperator);
            } else {
                if(null != mFieldSchema) {
                    mFieldSchema.type = DataType.TUPLE;
                }
            }
            setOverloaded(true);
            setType(DataType.TUPLE);
        } else if(null != predList) {
            LogicalOperator projectInput = getExpression();
            if(((projectInput instanceof LOProject) || !(predList.get(0) instanceof ExpressionOperator)) && (projectInput.getType() == DataType.BAG)) {
                if(!DataType.isSchemaType(mType)) {
                    Schema pjSchema = new Schema(mFieldSchema);
                    mFieldSchema = new Schema.FieldSchema(getAlias(), pjSchema, DataType.BAG);
                    mFieldSchema.setParent( ((LOProject)expressionOperator).mFieldSchema.canonicalName, expressionOperator );
                } else {
                    if(null != mFieldSchema) {
                        mFieldSchema.type = DataType.BAG;
                    }
                }
                setType(DataType.BAG);
            }
        }
        
        log.debug("Exiting getFieldSchema");
        return mFieldSchema;
    }

    public boolean isSingleProjection()  {
        return mProjection.size() == 1 ;
    }

    @Override
    public void visit(LOVisitor v) throws VisitorException {
        v.visit(this);
    }

    @Override
    public Schema getSchema() throws FrontendException{
        // Called to make sure we've constructed the field schema before trying
        // to read it.
        getFieldSchema();
        if (mFieldSchema != null){
            return mFieldSchema.schema ;
        }
        else {
            return null ;
        }
    }

    /* For debugging only */
    public String toDetailString() {
        StringBuilder sb = new StringBuilder() ;
        sb.append("LOProject") ;
        sb.append(" Id=" + this.mKey.id) ;
        sb.append(" Projection=") ;
        boolean isFirst = true ;
        for(int i=0;i< mProjection.size();i++) {
            if (isFirst) {
                isFirst = false ;
            }
            else {
                sb.append(",") ;
            }
            sb.append(mProjection.get(i)) ;
        }
        sb.append(" isStart=") ;
        sb.append(mIsStar) ;
        sb.append(" isSentinel=") ;
        sb.append(mSentinel) ;
        return sb.toString() ;
    }

    /**
     * @see org.apache.pig.impl.logicalLayer.ExpressionOperator#clone()
     * Do not use the clone method directly. Operators are cloned when logical plans
     * are cloned using {@link LogicalPlanCloner}
     */
    @Override
    protected Object clone() throws CloneNotSupportedException {
        LOProject clone = (LOProject)super.clone();
        
        // deep copy project specific attributes
        clone.mProjection = new ArrayList();
        for (Iterator it = mProjection.iterator(); it.hasNext();) {
            clone.mProjection.add(Integer.valueOf(it.next()));
        }
        
        return clone;
    }

    /**
     * @param sendEmptyBagOnEOP the sendEmptyBagOnEOP to set
     */
    public void setSendEmptyBagOnEOP(boolean sendEmptyBagOnEOP) {
        this.sendEmptyBagOnEOP = sendEmptyBagOnEOP;
    }

    /**
     * @return the sendEmptyBagOnEOP
     */
    public boolean isSendEmptyBagOnEOP() {
        return sendEmptyBagOnEOP;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy