Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.impl.logicalLayer;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Iterator;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pig.PigException;
import org.apache.pig.data.DataType;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.plan.PlanVisitor;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.impl.logicalLayer.parser.ParseException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* LOProject is designed like a singly linked list; A few examples will
* illustrate the point about the linked list nature of the design;
* a = load 'input1' as (name, age);
* b = group a by name;
* foreach b generate a, a.name;
* The project operator occurs in two places in the above script:
* generate a(here) and a.name(here)
* In the first occurrence, we are trying to project the elements of
* the bag a; In order to retrieve the bag, we need to project the
* the second column ($1) or column number 1 (using the zero based index)
* from the input (the relation or bag b)
* In the second occurence, we are trying to project the first column
* ($0) or column number 0 from the bag a which in turn is the column
* number 1 in the relation b; As you can see, the nested structure or
* the singly linked list nature is clearly visible;
* Given that it's a singly linked list, the null pointer or the sentinel
* is marked explictly using the boolean variable mSentinel; The sentinel
* is marked true only when the input is a relational operator; This occurs
* when we create the innermost operator
*/
public class LOProject extends ExpressionOperator {
private static final long serialVersionUID = 2L;
/**
* The expression and the column to be projected.
*/
private LogicalOperator mExp;
private List mProjection;
private boolean mIsStar = false;
private static Log log = LogFactory.getLog(LOProject.class);
private boolean mSentinel;
private boolean mOverloaded = false;
private boolean sendEmptyBagOnEOP = false;
/**
*
* @param plan
* Logical plan this operator is a part of.
* @param key
* Operator key to assign to this node.
* @param exp
* the expression which might contain the column to project
* @param projection
* the list of columns to project
*/
public LOProject(LogicalPlan plan, OperatorKey key, LogicalOperator exp,
List projection) {
super(plan, key);
mExp = exp;
mProjection = projection;
if(mExp instanceof ExpressionOperator) {
mSentinel = false;
} else {
mSentinel = true;
}
}
/**
*
* @param plan
* Logical plan this operator is a part of.
* @param key
* Operator key to assign to this node.
* @param exp
* the expression which might contain the column to project
* @param projection
* the column to project
*/
public LOProject(LogicalPlan plan, OperatorKey key, LogicalOperator exp,
Integer projection) {
super(plan, key);
mExp = exp;
mProjection = new ArrayList(1);
mProjection.add(projection);
if(mExp instanceof ExpressionOperator) {
mSentinel = false;
} else {
mSentinel = true;
}
}
public LogicalOperator getExpression() {
return mExp;
}
public void setExpression(LogicalOperator exp) {
mExp = exp;
}
public boolean isStar() {
return mIsStar;
}
public List getProjection() {
return mProjection;
}
public void setProjection(List proj) {
mProjection = proj;
}
public int getCol() {
if (mProjection.size() != 1)
throw new RuntimeException(
"Internal error: improper use of getCol in "
+ LOProject.class.getName());
return mProjection.get(0);
}
public void setStar(boolean b) {
mIsStar = b;
}
public boolean getSentinel() {
return mSentinel;
}
public void setSentinel(boolean b) {
mSentinel = b;
}
public boolean getOverloaded() {
return mOverloaded;
}
public void setOverloaded(boolean b) {
mOverloaded = b;
}
@Override
public String name() {
return "Project " + mKey.scope + "-" + mKey.id + " Projections: " + (mIsStar? " [*] ": mProjection) + " Overloaded: " + mOverloaded;
}
@Override
public boolean supportsMultipleInputs() {
return false;
}
@Override
public Schema.FieldSchema getFieldSchema() throws FrontendException {
log.debug("Inside getFieldSchemas");
log.debug("Number of columns: " + mProjection.size());
for (int i : mProjection) {
log.debug("Column: " + i);
}
if (mExp == null){
String msg = "The input for a projection operator cannot be null";
int errCode = 2998;
throw new FrontendException(msg, errCode, PigException.BUG, false, null);
}
LogicalOperator expressionOperator = mExp;
log.debug("expressionOperator = " + expressionOperator);
log.debug("mIsStar: " + mIsStar);
if (!mIsFieldSchemaComputed) {
if (mIsStar) {
log.debug("mIsStar is true");
try {
if(!mSentinel) {
//we have an expression operator and hence a list of field shcemas
Schema.FieldSchema fs = ((ExpressionOperator)expressionOperator).getFieldSchema();
mFieldSchema = Schema.FieldSchema.copyAndLink(fs, expressionOperator);
} else {
//we have a relational operator as input and hence a schema
log.debug("expression operator alias: " + expressionOperator.getAlias());
log.debug("expression operator schema: " + expressionOperator.getSchema());
log.debug("expression operator type: " + expressionOperator.getType());
//TODO
//the type of the operator will be unknown. when type checking is in place
//add the type of the operator as a parameter to the fieldschema creation
mFieldSchema = new Schema.FieldSchema(expressionOperator.getAlias(), expressionOperator.getSchema(), DataType.TUPLE);
mFieldSchema.setParent(null, expressionOperator);
//mFieldSchema = new Schema.FieldSchema(expressionOperator.getAlias(), expressionOperator.getSchema());
}
mIsFieldSchemaComputed = true;
} catch (FrontendException fee) {
mFieldSchema = null;
mIsFieldSchemaComputed = false;
throw fee;
}
} else {
//its n list of columns to project including a single column
List fss = new ArrayList(mProjection.size());
//try {
log.debug("expressionOperator is not null");
if(mProjection.size() == 1) {
//if there is only one element then extract and return the field schema
log.debug("Only one element");
if(!mSentinel) {
log.debug("Input is an expression operator");
Schema.FieldSchema expOpFs = ((ExpressionOperator)expressionOperator).getFieldSchema();
if(null != expOpFs) {
Schema s = expOpFs.schema;
if(null != s) {
Schema.FieldSchema fs;
if(s.isTwoLevelAccessRequired()) {
// this is the case where the schema is that of
// a bag which has just one tuple fieldschema which
// in turn has a list of fieldschemas. So the field
// schema we are trying to construct would be of the
// item we are trying to project inside the tuple
// fieldschema - because currently when we say b.i where
// b is a bag, we are trying to access the item i
// present in the tuple in the bag.
// check that indeed we only have one field schema
// which is that of a tuple
if(s.getFields().size() != 1) {
int errCode = 1008;
String msg = "Expected a bag schema with a single " +
"element of type "+ DataType.findTypeName(DataType.TUPLE) +
" but got a bag schema with multiple elements.";
throw new FrontendException(msg, errCode, PigException.INPUT, false, null);
}
Schema.FieldSchema tupleFS = s.getField(0);
if(tupleFS.type != DataType.TUPLE) {
int errCode = 1009;
String msg = "Expected a bag schema with a single " +
"element of type "+ DataType.findTypeName(DataType.TUPLE) +
" but got an element of type " +
DataType.findTypeName(tupleFS.type);
throw new FrontendException(msg, errCode, PigException.INPUT, false, null);
}
fs = tupleFS.schema.getField(mProjection.get(0));
} else {
// normal single level access
fs = s.getField(mProjection.get(0));
}
mFieldSchema = FieldSchema.copyAndLink( fs, expressionOperator );
} else {
mFieldSchema = new Schema.FieldSchema(null, DataType.BYTEARRAY);
mFieldSchema.setParent(expOpFs.canonicalName, expressionOperator);
}
} else {
mFieldSchema = new Schema.FieldSchema(null, DataType.BYTEARRAY);
mFieldSchema.setParent(null, expressionOperator);
}
} else {
log.debug("Input is a logical operator");
Schema s = expressionOperator.getSchema();
log.debug("s: " + s);
if(null != s) {
Schema.FieldSchema fs = s.getField(mProjection.get(0));
mFieldSchema = FieldSchema.copyAndLink( fs, expressionOperator );
log.debug("mFieldSchema alias: " + mFieldSchema.alias);
log.debug("mFieldSchema schema: " + mFieldSchema.schema);
} else {
mFieldSchema = new Schema.FieldSchema(null, DataType.BYTEARRAY);
mFieldSchema.setParent(null, expressionOperator);
}
mType = mFieldSchema.type ;
}
mIsFieldSchemaComputed = true;
return mFieldSchema;
}
for (int colNum : mProjection) {
log.debug("Col: " + colNum);
Schema.FieldSchema fs;
if(!mSentinel) {
Schema.FieldSchema expOpFs = ((ExpressionOperator)expressionOperator).getFieldSchema();
if(null != expOpFs) {
Schema s = expOpFs.schema;
log.debug("Schema s: " + s);
if(null != s) {
if(colNum < s.size()) {
Schema.FieldSchema parentFs = s.getField(colNum);
fs = Schema.FieldSchema.copyAndLink(parentFs, expressionOperator );
fss.add(fs);
} else {
fs = new Schema.FieldSchema(null, DataType.BYTEARRAY);
fss.add(fs);
fs.setParent(expOpFs.canonicalName, expressionOperator);
}
} else {
fs = new Schema.FieldSchema(null, DataType.BYTEARRAY);
fss.add(fs);
fs.setParent(expOpFs.canonicalName, expressionOperator);
}
} else {
fs = new Schema.FieldSchema(null, DataType.BYTEARRAY);
fss.add(new Schema.FieldSchema(null, DataType.BYTEARRAY));
fs.setParent(null, expressionOperator);
}
} else {
Schema s = expressionOperator.getSchema();
if(null != s) {
Schema.FieldSchema parentFs = s.getField(colNum);
fs = Schema.FieldSchema.copyAndLink(parentFs, expressionOperator);
fss.add(fs);
} else {
fs = new Schema.FieldSchema(null, DataType.BYTEARRAY);
fss.add(fs);
fs.setParent(null, expressionOperator);
}
}
}
//} catch(ParseException pe) {
// mFieldSchema = null;
// mIsFieldSchemaComputed = false;
// throw new FrontendException(pe.getMessage());
//}
mFieldSchema = new Schema.FieldSchema(expressionOperator.getAlias(), new Schema(fss));
Schema.FieldSchema expOpFs = ((ExpressionOperator)expressionOperator).getFieldSchema();
mFieldSchema.setParent( expOpFs.canonicalName, expressionOperator );
mIsFieldSchemaComputed = true;
}
}
if(null != mFieldSchema) {
mType = mFieldSchema.type;
}
List succList = mPlan.getSuccessors(this) ;
List predList = mPlan.getPredecessors(this) ;
if((null != succList) && !(succList.get(0) instanceof ExpressionOperator)) {
if(!DataType.isSchemaType(mType)) {
Schema pjSchema = new Schema(mFieldSchema);
mFieldSchema = new Schema.FieldSchema(getAlias(), pjSchema, DataType.TUPLE);
mFieldSchema.setParent(null, expressionOperator);
} else {
if(null != mFieldSchema) {
mFieldSchema.type = DataType.TUPLE;
}
}
setOverloaded(true);
setType(DataType.TUPLE);
} else if(null != predList) {
LogicalOperator projectInput = getExpression();
if(((projectInput instanceof LOProject) || !(predList.get(0) instanceof ExpressionOperator)) && (projectInput.getType() == DataType.BAG)) {
if(!DataType.isSchemaType(mType)) {
Schema pjSchema = new Schema(mFieldSchema);
mFieldSchema = new Schema.FieldSchema(getAlias(), pjSchema, DataType.BAG);
mFieldSchema.setParent( ((LOProject)expressionOperator).mFieldSchema.canonicalName, expressionOperator );
} else {
if(null != mFieldSchema) {
mFieldSchema.type = DataType.BAG;
}
}
setType(DataType.BAG);
}
}
log.debug("Exiting getFieldSchema");
return mFieldSchema;
}
public boolean isSingleProjection() {
return mProjection.size() == 1 ;
}
@Override
public void visit(LOVisitor v) throws VisitorException {
v.visit(this);
}
@Override
public Schema getSchema() throws FrontendException{
// Called to make sure we've constructed the field schema before trying
// to read it.
getFieldSchema();
if (mFieldSchema != null){
return mFieldSchema.schema ;
}
else {
return null ;
}
}
/* For debugging only */
public String toDetailString() {
StringBuilder sb = new StringBuilder() ;
sb.append("LOProject") ;
sb.append(" Id=" + this.mKey.id) ;
sb.append(" Projection=") ;
boolean isFirst = true ;
for(int i=0;i< mProjection.size();i++) {
if (isFirst) {
isFirst = false ;
}
else {
sb.append(",") ;
}
sb.append(mProjection.get(i)) ;
}
sb.append(" isStart=") ;
sb.append(mIsStar) ;
sb.append(" isSentinel=") ;
sb.append(mSentinel) ;
return sb.toString() ;
}
/**
* @see org.apache.pig.impl.logicalLayer.ExpressionOperator#clone()
* Do not use the clone method directly. Operators are cloned when logical plans
* are cloned using {@link LogicalPlanCloner}
*/
@Override
protected Object clone() throws CloneNotSupportedException {
LOProject clone = (LOProject)super.clone();
// deep copy project specific attributes
clone.mProjection = new ArrayList();
for (Iterator it = mProjection.iterator(); it.hasNext();) {
clone.mProjection.add(Integer.valueOf(it.next()));
}
return clone;
}
/**
* @param sendEmptyBagOnEOP the sendEmptyBagOnEOP to set
*/
public void setSendEmptyBagOnEOP(boolean sendEmptyBagOnEOP) {
this.sendEmptyBagOnEOP = sendEmptyBagOnEOP;
}
/**
* @return the sendEmptyBagOnEOP
*/
public boolean isSendEmptyBagOnEOP() {
return sendEmptyBagOnEOP;
}
}