All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pig.impl.logicalLayer.schema.Schema Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pig.impl.logicalLayer.schema;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pig.PigException;
import org.apache.pig.ResourceSchema;
import org.apache.pig.ResourceSchema.ResourceFieldSchema;
import org.apache.pig.data.DataType;
import org.apache.pig.impl.logicalLayer.CanonicalNamer;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.util.MultiMap;

/**
 * The Schema class encapsulates the notion of a schema for a relational operator.
 * A schema is a list of columns that describe the output of a relational operator.
 * Each column in the relation is represented as a FieldSchema, a static class inside
 * the Schema. A column by definition has an alias, a type and a possible schema (if the
 * column is a bag or a tuple). In addition, each column in the schema has a unique
 * auto generated name used for tracking the lineage of the column in a sequence of
 * statements.
 *
 * The lineage of the column is tracked using a map of the predecessors' columns to
 * the operators that generate the predecessor columns. The predecessor columns are the
 * columns required in order to generate the column under consideration.  Similarly, a
 * reverse lookup of operators that generate the predecessor column to the predecessor
 * column is maintained.
 */

public class Schema implements Serializable, Cloneable {

    private static final long serialVersionUID = 2L;

    public static class FieldSchema implements Serializable, Cloneable {
        /**
         * 
         */
        private static final long serialVersionUID = 2L;

        /**
         * Alias for this field.
         */
        public String alias;

        /**
         * Datatype, using codes from {@link org.apache.pig.data.DataType}.
         */
        public byte type;

        /**
         * If this is a tuple itself, it can have a schema. Otherwise this field
         * must be null.
         */
        public Schema schema;

        /**
         * Canonical name.  This name uniquely identifies a field throughout
         * the query.  Unlike a an alias, it cannot be changed.  It will
         * change when the field is transformed in some way (such as being
         * used in an arithmetic expression or passed to a udf).  At that
         * point a new canonical name will be generated for the field.
         */
        public String canonicalName = null;

        /**
         * Canonical namer object to generate new canonical names on
         * request. In order to ensure unique and consistent names, across
         * all field schema objects, the object is made static.
         */
        public static final CanonicalNamer canonicalNamer = new CanonicalNamer();
        
        private static Log log = LogFactory.getLog(Schema.FieldSchema.class);

        /**
         * Constructor for any type.
         * 
         * @param a
         *            Alias, if known. If unknown leave null.
         * @param t
         *            Type, using codes from
         *            {@link org.apache.pig.data.DataType}.
         */
        public FieldSchema(String a, byte t) {
            alias = a;
            type = t;
            schema = null;            
            canonicalName = CanonicalNamer.getNewName();
        }

        /**
         * Constructor for tuple fields.
         * 
         * @param a
         *            Alias, if known. If unknown leave null.
         * @param s
         *            Schema of this tuple.
         */
        public FieldSchema(String a, Schema s) {
            alias = a;
            type = DataType.TUPLE;
            schema = s;
            canonicalName = CanonicalNamer.getNewName();
        }

        /**
         * Constructor for tuple fields.
         * 
         * @param a
         *            Alias, if known. If unknown leave null.
         * @param s
         *            Schema of this tuple.
         * @param t
         *            Type, using codes from
         *            {@link org.apache.pig.data.DataType}.
         * 
         */
        public FieldSchema(String a, Schema s, byte t)  throws FrontendException {
            alias = a;
            schema = s;
            log.debug("t: " + t + " Bag: " + DataType.BAG + " tuple: " + DataType.TUPLE);
            
            if ((null != s) && !(DataType.isSchemaType(t))) {
                int errCode = 1020;
                throw new FrontendException("Only a BAG, TUPLE or MAP can have schemas. Got "
                        + DataType.findTypeName(t), errCode, PigException.INPUT);
            }
            
            type = t;
            canonicalName = CanonicalNamer.getNewName();
        }

        /**
         * Copy Constructor.
         * 
         * @param fs
         *           Source FieldSchema
         * 
         */
        public FieldSchema(FieldSchema fs)  {
            if(null != fs) {
                alias = fs.alias;
                if(null != fs.schema) {
                    schema = new Schema(fs.schema);
                } else {
                    schema = null;
                }
                type = fs.type;
            } else {
                alias = null;
                schema = null;
                type = DataType.UNKNOWN;
            }
            canonicalName = CanonicalNamer.getNewName();
        }

        /**
         *  Two field schemas are equal if types and schemas
         *  are equal in all levels.
         *
         *  In order to relax alias equivalent requirement,
         *  instead use equals(FieldSchema fschema,
                               FieldSchema fother,
                               boolean relaxInner,
                               boolean relaxAlias)
          */

        @Override
        public boolean equals(Object other) {
            if (!(other instanceof FieldSchema)) return false;
            FieldSchema otherfs = (FieldSchema)other;

            return FieldSchema.equals(this, otherfs, false, false) ;
        }


        @Override
        public int hashCode() {
            return (this.type * 17)
                    + ( (schema==null? 0:schema.hashCode()) * 23 )
                    + ( (alias==null? 0:alias.hashCode()) * 29 ) ;
        }

        /**
         * Recursively compare two schemas to check if the input schema 
         * can be cast to the cast schema
         * @param castFs schema of the cast operator
         * @param  inputFs schema of the cast input
         * @return true or falsew!
         */
        public static boolean castable(
                Schema.FieldSchema castFs,
                Schema.FieldSchema inputFs) {
            if(castFs == null && inputFs == null) {
                return false;
            }
            
            if (castFs == null) {
                return false ;
            }
    
            if (inputFs == null) {
                return false ;
            }
            byte inputType = inputFs.type;
            byte castType = castFs.type;
    
            if (DataType.isSchemaType(castFs.type)) {
                if(inputType == DataType.BYTEARRAY) {
                    //good
                } else if (inputType == castType) {
                    // Don't do the comparison if both embedded schemas are
                    // null.  That will cause Schema.equals to return false,
                    // even though we want to view that as true.
                    if (!(castFs.schema == null && inputFs.schema == null)) { 
                        // compare recursively using schema
                        if (!Schema.castable(castFs.schema, inputFs.schema)) {
                            return false ;
                        }
                    }
                } else {
                    return false;
                }
            } else {
                if (inputType == castType) {
                    //good
                }
                else if (DataType.isNumberType(inputType) &&
                    DataType.isNumberType(castType) ) {
                    //good
                }
                else if (inputType == DataType.BYTEARRAY) {
                    //good
                }
                else if (  ( DataType.isNumberType(inputType) || 
                             inputType == DataType.CHARARRAY 
                           )  &&
                           (  (castType == DataType.CHARARRAY) ||
                              (castType == DataType.BYTEARRAY) ||
                              (DataType.isNumberType(castType))
                           ) 
                        ) {
                    //good
                } else {
                    return false;
                }
            }
    
            return true ;
        }

        /***
         * Compare two field schema for equality
         * @param fschema
         * @param fother
         * @param relaxInner If true, we don't check inner tuple schemas
         * @param relaxAlias If true, we don't check aliases
         * @return true if FieldSchemas are equal, false otherwise
         */
        public static boolean equals(FieldSchema fschema,
                                     FieldSchema fother,
                                     boolean relaxInner,
                                     boolean relaxAlias) {
            if (fschema == null) {
                return false ;
            }

            if (fother == null) {
                return false ;
            }

            if (fschema.type != fother.type) {
                return false ;
            }


            if (!relaxAlias) {
                if ( (fschema.alias == null) &&
                     (fother.alias == null) ) {
                    // good
                }
                else if ( (fschema.alias != null) &&
                          (fother.alias == null) ) {
                    return false ;
                }
                else if ( (fschema.alias == null) &&
                          (fother.alias != null) ) {
                    return false ;
                }
                else if (!fschema.alias.equals(fother.alias)) {
                    return false ;
                }
            }

            if ( (!relaxInner) && (DataType.isSchemaType(fschema.type))) {
                // Don't do the comparison if both embedded schemas are
                // null.  That will cause Schema.equals to return false,
                // even though we want to view that as true.
                if (!(fschema.schema == null && fother.schema == null)) {
                    // compare recursively using schema
                    if (!Schema.equals(fschema.schema, fother.schema, false, relaxAlias)) {
                        return false ;
                    }
                }
            }

            return true ;
        }

        @Override
        public String toString() {
            StringBuilder sb = new StringBuilder();
            if (alias != null) {
                sb.append(alias);
                sb.append(": ");
            }
            sb.append(DataType.findTypeName(type));

            if (schema != null) {
                sb.append("(");
                sb.append(schema.toString());
                sb.append(")");
            }

//            if (canonicalName != null) {
//                sb.append(" cn: ");
//                sb.append(canonicalName);
//            }

            return sb.toString();
        }

        /**
         * Make a deep copy of this FieldSchema and return it.
         * @return clone of the this FieldSchema.
         * @throws CloneNotSupportedException
         */
        @Override
        public FieldSchema clone() throws CloneNotSupportedException {
            // Strings are immutable, so we don't need to copy alias.  Schemas
            // are mutable so we need to make a copy.
            try {
                FieldSchema fs = new FieldSchema(alias,
                    (schema == null ? null : schema.clone()), type);
                fs.canonicalName = CanonicalNamer.getNewName();
                return fs;
            } catch (FrontendException fe) {
                throw new RuntimeException(
                    "Should never fail to clone a FieldSchema", fe);
            }
        }

        /***
        * Recursively prefix merge two schemas
        * @param otherFs the other field schema to be merged with
        * @return the prefix merged field schema this can be null if one schema is null and
        *         allowIncompatibleTypes is true
        *
        * @throws SchemaMergeException if they cannot be merged
        */

        public Schema.FieldSchema mergePrefixFieldSchema(Schema.FieldSchema otherFs) throws SchemaMergeException {
            return mergePrefixFieldSchema(otherFs, true, false);
        }

        /***
         * Recursively prefix merge two schemas
         * @param otherFs the other field schema to be merged with
         * @param otherTakesAliasPrecedence true if aliases from the other
         *                                  field schema take precedence
         * @return the prefix merged field schema this can be null if one schema is null and
         *         allowIncompatibleTypes is true
         *
         * @throws SchemaMergeException if they cannot be merged
         */

         public Schema.FieldSchema mergePrefixFieldSchema(Schema.FieldSchema otherFs,
                                             boolean otherTakesAliasPrecedence)
                                                 throws SchemaMergeException {
             return mergePrefixFieldSchema(otherFs, otherTakesAliasPrecedence, false);
         }
         
        /***
        * Recursively prefix merge two schemas
        * @param otherFs the other field schema to be merged with
        * @param otherTakesAliasPrecedence true if aliases from the other
        *                                  field schema take precedence
        * @param allowMergeableTypes true if "mergeable" types should be allowed.
        *   Two types are mergeable if any of the following conditions is true IN THE
        *   BELOW ORDER of checks:
        *   1) if either one has a type null or unknown and other has a type OTHER THAN
        *   null or unknown, the result type will be the latter non null/unknown type
        *   2) If either type is bytearray, then result type will be the other (possibly non BYTEARRAY) type
        *   3) If current type can be cast to the other type, then the result type will be the
        *   other type 
        * @return the prefix merged field schema this can be null. 
        *
        * @throws SchemaMergeException if they cannot be merged
        */

        public Schema.FieldSchema mergePrefixFieldSchema(Schema.FieldSchema otherFs,
                                            boolean otherTakesAliasPrecedence, boolean allowMergeableTypes)
                                                throws SchemaMergeException {
            Schema.FieldSchema myFs = this;
            Schema.FieldSchema mergedFs = null;
            byte mergedType = DataType.NULL;
    
            if(null == otherFs) {
                return myFs;
            }

            if(isNullOrUnknownType(myFs) && isNullOrUnknownType(otherFs)) {
                int errCode = 1021;
                String msg = "Type mismatch. No useful type for merging. Field Schema: " + myFs + ". Other Field Schema: " + otherFs;
                throw new SchemaMergeException(msg, errCode, PigException.INPUT);
            } else if(myFs.type == otherFs.type) {
                mergedType = myFs.type;
            } else if (!isNullOrUnknownType(myFs) && isNullOrUnknownType(otherFs)) {
                mergedType = myFs.type;
            } else {
                if (allowMergeableTypes) {
                    if (isNullOrUnknownType(myFs) && !isNullOrUnknownType(otherFs)) {
                        mergedType = otherFs.type;
                    }  else if(otherFs.type == DataType.BYTEARRAY) {
                        // just set mergeType to myFs's type (could even be BYTEARRAY)
                        mergedType = myFs.type;
                    } else {
                        if(castable(otherFs, myFs)) {
                            mergedType = otherFs.type;
                        } else {
                            int errCode = 1022;
                            String msg = "Type mismatch for merging schema prefix. Field Schema: " + myFs + ". Other Field Schema: " + otherFs;
                            throw new SchemaMergeException(msg, errCode, PigException.INPUT);
                        }
                    }
                } else {
                    int errCode = 1022;
                    String msg = "Type mismatch merging schema prefix. Field Schema: " + myFs + ". Other Field Schema: " + otherFs;
                    throw new SchemaMergeException(msg, errCode, PigException.INPUT);
                }
            }
    
            String mergedAlias = mergeAlias(myFs.alias,
                                            otherFs.alias,
                                            otherTakesAliasPrecedence) ;
    
            if (!DataType.isSchemaType(mergedType)) {
                // just normal merge
                mergedFs = new FieldSchema(mergedAlias, mergedType) ;
            }
            else {
                Schema mergedSubSchema = null;
                // merge inner schemas because both sides have schemas
                if(null != myFs.schema) {
                    mergedSubSchema = myFs.schema.mergePrefixSchema(otherFs.schema,
                                                     otherTakesAliasPrecedence, allowMergeableTypes);
                } else {
                    mergedSubSchema = otherFs.schema;
                    setSchemaDefaultType(mergedSubSchema, DataType.BYTEARRAY);
                }
                // create the merged field
                try {
                    mergedFs = new FieldSchema(mergedAlias, mergedSubSchema, mergedType) ;
                } catch (FrontendException fee) {
                    int errCode = 1023;
                    String msg = "Unable to create field schema.";
                    throw new SchemaMergeException(msg, errCode, PigException.BUG, fee);
                }
            }
            return mergedFs;
        }

        /**
         * Recursively set NULL type to the specifid type 
         * @param fs the field schema whose NULL type has to be set 
         * @param t the specified type
         */
        public static void setFieldSchemaDefaultType(Schema.FieldSchema fs, byte t) {
            if(null == fs) return;
            if(DataType.NULL == fs.type) {
                fs.type = t;
            }
            if(DataType.isSchemaType(fs.type)) {
                setSchemaDefaultType(fs.schema, t);
            }
        }

        
        private boolean isNullOrUnknownType(FieldSchema fs) {
            return (fs.type == DataType.NULL || fs.type == DataType.UNKNOWN);
        }

        /**
         * Find a field schema instance in this FieldSchema hierarchy (including "this")
         * that matches the given canonical name.
         * 
         * @param canonicalName canonical name
         * @return the FieldSchema instance found
         */
		public FieldSchema findFieldSchema(String canonicalName) {
	        if( this.canonicalName.equals(canonicalName) ) {
	        	return this;
	        }
	        if( this.schema != null )
	        	return schema.findFieldSchema( canonicalName );
	        return null;
        }

    }

    private List mFields;
    private Map mAliases;
    private MultiMap mFieldSchemas;
    private static Log log = LogFactory.getLog(Schema.class);
    // In bags which have a schema with a tuple which contains
    // the fields present in it, if we access the second field (say)
    // we are actually trying to access the second field in the
    // tuple in the bag. This is currently true for two cases:
    // 1) bag constants - the schema of bag constant has a tuple
    // which internally has the actual elements
    // 2) When bags are loaded from input data, if the user 
    // specifies a schema with the "bag" type, he has to specify
    // the bag as containing a tuple with the actual elements in 
    // the schema declaration. However in both the cases above,
    // the user can still say b.i where b is the bag and i is 
    // an element in the bag's tuple schema. So in these cases,
    // the access should translate to a lookup for "i" in the 
    // tuple schema present in the bag. To indicate this, the
    // flag below is used. It is false by default because, 
    // currently we use bag as the type for relations. However 
    // the schema of a relation does NOT have a tuple fieldschema
    // with items in it. Instead, the schema directly has the 
    // field schema of the items. So for a relation "b", the 
    // above b.i access would be a direct single level access
    // of i in b's schema. This is treated as the "default" case
    private boolean twoLevelAccessRequired = false;

    public Schema() {
        mFields = new ArrayList();
        mAliases = new HashMap();
        mFieldSchemas = new MultiMap();
    }

    /**
     * @param fields List of field schemas that describes the fields.
     */
    public Schema(List fields) {
        mFields = fields;
        mAliases = new HashMap(fields.size());
        mFieldSchemas = new MultiMap();
        for (FieldSchema fs : fields) {
            if(null != fs) {
                if (fs.alias != null) {
                    mAliases.put(fs.alias, fs);
                    mFieldSchemas.put(fs.canonicalName, fs.alias);
                }
            }
        }
    }

    /**
     * Create a schema with only one field.
     * @param fieldSchema field to put in this schema.
     */
    public Schema(FieldSchema fieldSchema) {
        mFields = new ArrayList(1);
        mFields.add(fieldSchema);
        mAliases = new HashMap(1);
        mFieldSchemas = new MultiMap();
        if(null != fieldSchema) {
            if (fieldSchema.alias != null) {
                mAliases.put(fieldSchema.alias, fieldSchema);
                mFieldSchemas.put(fieldSchema.canonicalName, fieldSchema.alias);
            }
        }
    }

    /**
     * Copy Constructor.
     * @param s source schema
     */
    public Schema(Schema s) {

        if(null != s) {
            twoLevelAccessRequired = s.twoLevelAccessRequired;
            mFields = new ArrayList(s.size());
            mAliases = new HashMap();
            mFieldSchemas = new MultiMap();
            try {
                for (int i = 0; i < s.size(); ++i) {
                    FieldSchema fs = new FieldSchema(s.getField(i));
                    mFields.add(fs);
                    if(null != fs) {
                        if (fs.alias != null) {
                            mAliases.put(fs.alias, fs);
                            mFieldSchemas.put(fs.canonicalName, fs.alias);
                        }
                    }
                }
            } catch (FrontendException pe) {
                mFields = new ArrayList();
                mAliases = new HashMap();
                mFieldSchemas = new MultiMap();
            }
        } else {
            mFields = new ArrayList();
            mAliases = new HashMap();
            mFieldSchemas = new MultiMap();
        }
    }

    /**
     * Given an alias name, find the associated FieldSchema.
     * @param alias Alias to look up.
     * @return FieldSchema, or null if no such alias is in this tuple.
     */
    public FieldSchema getField(String alias) throws FrontendException {
        FieldSchema fs = mAliases.get(alias);
        if(null == fs) {
            String cocoPrefix = "::" + alias;
            Map aliasMatches = new HashMap();
            //build the map of aliases that have cocoPrefix as the suffix
            for(String key: mAliases.keySet()) {
                if(key.endsWith(cocoPrefix)) {
                    Integer count = aliasMatches.get(key);
                    if(null == count) {
                        aliasMatches.put(key, 1);
                    } else {
                        aliasMatches.put(key, ++count);
                    }
                }
            }
            //process the map to check if
            //1. are there multiple keys with count == 1
            //2. are there keys with count > 1 --> should never occur
            //3. if thers is a single key with count == 1 we have our match

            if(aliasMatches.keySet().size() == 0) {
                return null;
            }
            if(aliasMatches.keySet().size() == 1) {
                Object[] keys = aliasMatches.keySet().toArray();
                String key = (String)keys[0];
                if(aliasMatches.get(key) > 1) {
                    int errCode = 1024;
                    throw new FrontendException("Found duplicate aliases: " + key, errCode, PigException.INPUT);
                }
                return mAliases.get(key);
            } else {
                // check if the multiple aliases obtained actually
                // point to the same field schema - then just return
                // that field schema
                Set set = new HashSet();
                for (String key: aliasMatches.keySet()) {
                    set.add(mAliases.get(key));
                }
                if(set.size() == 1) {
                    return set.iterator().next();
                }
                
                boolean hasNext = false;
                StringBuilder sb = new StringBuilder("Found more than one match: ");
                for (String key: aliasMatches.keySet()) {
                    if(hasNext) {
                        sb.append(", ");
                    } else {
                        hasNext = true;
                    }
                    sb.append(key);
                }
                int errCode = 1025;
                throw new FrontendException(sb.toString(), errCode, PigException.INPUT);
            }
        } else {
            return fs;
        }
    }

    
    /**
     * Given an alias name, find the associated FieldSchema. If exact name is 
     * not found see if any field matches the part of the 'namespaced' alias.
     * eg. if given alias is nm::a , and schema is (a,b). It will return 
     * FieldSchema of a.
     * if given alias is nm::a and schema is (nm2::a, b), it will return null
     * @param alias Alias to look up.
     * @return FieldSchema, or null if no such alias is in this tuple.
     */
    public FieldSchema getFieldSubNameMatch(String alias) throws FrontendException {
        if(alias == null)
            return null;
        FieldSchema fs = getField(alias);
        if(fs != null){
            return fs;
        }
        //fs is null
        final String sep = "::";
        ArrayList matchedFieldSchemas = new ArrayList();
        if(alias.contains(sep)){
            for(FieldSchema field : mFields) {
                if(alias.endsWith(sep + field.alias)){
                    matchedFieldSchemas.add(field);
                }
            }
        }
        if(matchedFieldSchemas.size() > 1){
            boolean hasNext = false;
            StringBuilder sb = new StringBuilder("Found more than one " +
            "sub alias name match: ");
            for (FieldSchema matchFs : matchedFieldSchemas) {
                if(hasNext) {
                    sb.append(", ");
                } else {
                    hasNext = true;
                }
                sb.append(matchFs.alias);
            }
            int errCode = 1116;
            throw new FrontendException(sb.toString(), errCode, PigException.INPUT);
        }else if(matchedFieldSchemas.size() == 1){
            fs = matchedFieldSchemas.get(0);
        }

        return fs;
    }
    
    
    
    /**
     * Given a field number, find the associated FieldSchema.
     *
     * @param fieldNum
     *            Field number to look up.
     * @return FieldSchema for this field.
     * @throws ParseException
     *             if the field number exceeds the number of fields in the
     *             tuple.
     */
    public FieldSchema getField(int fieldNum) throws FrontendException {
        if (fieldNum >= mFields.size()) {
            int errCode = 1026;
        	String detailedMsg = "Attempt to access field: " + fieldNum + " from schema: " + this;
        	String msg = "Attempt to fetch field " + fieldNum + " from schema of size " + mFields.size();
            throw new FrontendException(msg, errCode, PigException.INPUT, false, detailedMsg);
        }

        return mFields.get(fieldNum);
    }

    /**
     * Find the number of fields in the schema.
     *
     * @return number of fields.
     */
    public int size() {
        return mFields.size();
    }

    /**
     * Reconcile this schema with another schema.  The schema being reconciled
     * with should have the same number of columns.  The use case is where a
     * schema already exists but may not have alias and or type information.  If
     * an alias exists in this schema and a new one is given, then the new one
     * will be used.  Similarly with types, though this needs to be used
     * carefully, as types should not be lightly changed.
     * @param other Schema to reconcile with.
     * @throws ParseException if this cannot be reconciled.
     */
    public void reconcile(Schema other) throws FrontendException {

        if (other != null) {
        
            if (other.size() != size()) {
                int errCode = 1027;
            	String msg = "Cannot reconcile schemas with different "
                    + "sizes.  This schema has size " + size() + " other has size "
                    + "of " + other.size();
            	String detailedMsg = "Schema size mismatch. This schema: " + this + " other schema: " + other;
                throw new FrontendException(msg, errCode, PigException.INPUT, false, detailedMsg);
            }

            Iterator i = other.mFields.iterator();
            for (int j = 0; i.hasNext(); j++) {
                FieldSchema otherFs = i.next();
                FieldSchema ourFs = mFields.get(j);
                log.debug("ourFs: " + ourFs + " otherFs: " + otherFs);
                if (otherFs.alias != null) {
                    log.debug("otherFs.alias: " + otherFs.alias);
                    if (ourFs.alias != null) {
                        log.debug("Removing ourFs.alias: " + ourFs.alias);
                        mAliases.remove(ourFs.alias);
                        Collection aliases = mFieldSchemas.get(ourFs.canonicalName);
                        if (aliases != null) {
                            List listAliases = new ArrayList();
                            for(String alias: aliases) {
                                listAliases.add(alias);
                            }
                            for(String alias: listAliases) {
                                log.debug("Removing alias " + alias + " from multimap");
                                mFieldSchemas.remove(ourFs.canonicalName, alias);
                            }
                        }
                    }
                    ourFs.alias = otherFs.alias;
                    log.debug("Setting alias to: " + otherFs.alias);
                    mAliases.put(ourFs.alias, ourFs);
                    if(null != ourFs.alias) {
                        mFieldSchemas.put(ourFs.canonicalName, ourFs.alias);
                    }
                }
                if (otherFs.type != DataType.UNKNOWN) {
                    ourFs.type = otherFs.type;
                    log.debug("Setting type to: "
                            + DataType.findTypeName(otherFs.type));
                }
                if (otherFs.schema != null) {
                    ourFs.schema = otherFs.schema;
                    log.debug("Setting schema to: " + otherFs.schema);
                }

            }
        }
    }

    /***
     * For two schemas to be equal, they have to be deeply equal.
     * Use Schema.equals(Schema schema,
                         Schema other,
                         boolean relaxInner,
                         boolean relaxAlias)
       if relaxation of aliases is a requirement.
     */
    @Override
    public boolean equals(Object other) {
        if (!(other instanceof Schema)) return false;

        Schema s = (Schema)other;
        return Schema.equals(this, s, false, false) ;

    }

    /**
     * Make a deep copy of a schema.
     * @throws CloneNotSupportedException
     */
    @Override
    public Schema clone() throws CloneNotSupportedException {
        Schema s = new Schema();

        // Build a map between old and new field schemas, so we can properly
        // construct the new alias and field schema maps.  Populate the field
        // list with copies of the existing field schemas.
        Map fsMap =
            new HashMap(size());
        Map fsCanonicalNameMap =
            new HashMap(size());
        for (FieldSchema fs : mFields) {
            FieldSchema copy = fs.clone();
            s.mFields.add(copy);
            fsMap.put(fs, copy);
            fsCanonicalNameMap.put(fs.canonicalName, copy);
        }

        // Build the aliases map
        for (String alias : mAliases.keySet()) {
            FieldSchema oldFs = mAliases.get(alias);
            assert(oldFs != null);
            FieldSchema newFs = fsMap.get(oldFs);
            assert(newFs != null);
            s.mAliases.put(alias, newFs);
        }

        // Build the field schemas map
        for (String oldFsCanonicalName : mFieldSchemas.keySet()) {
            FieldSchema newFs = fsCanonicalNameMap.get(oldFsCanonicalName);
            assert(newFs != null);
            s.mFieldSchemas.put(newFs.canonicalName, mFieldSchemas.get(oldFsCanonicalName));
        }

        s.twoLevelAccessRequired = twoLevelAccessRequired;
        return s;
    }



    static int[] primeList = { 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37,
                               41, 43, 47, 53, 59, 61, 67, 71, 73, 79,
                               83, 89, 97, 101, 103, 107, 109, 1133} ;

    @Override
    public int hashCode() {
        int idx = 0 ;
        int hashCode = 0 ;
        for(FieldSchema fs: this.mFields) {
            hashCode += fs.hashCode() * (primeList[idx % primeList.length]) ;
            idx++ ;
        }
        return hashCode ;
    }

    @Override
    public String toString() {
        StringBuilder sb = new StringBuilder();
        try {
            stringifySchema(sb, this, DataType.BAG) ;
        }
        catch (FrontendException fee) {
            throw new RuntimeException("PROBLEM PRINTING SCHEMA")  ;
        }
        return sb.toString();
    }


    // This is used for building up output string
    // type can only be BAG or TUPLE
    public static void stringifySchema(StringBuilder sb,
                                       Schema schema,
                                       byte type)
                                            throws FrontendException{

        if (type == DataType.TUPLE) {
            sb.append("(") ;
        }
        else if (type == DataType.BAG) {
            sb.append("{") ;
        }
        // TODO: Map Support

        if (schema != null) {
            boolean isFirst = true ;
            for (int i=0; i< schema.size() ;i++) {

                if (!isFirst) {
                    sb.append(",") ;
                }
                else {
                    isFirst = false ;
                }

                FieldSchema fs = schema.getField(i) ;

                if(fs == null) {
                    continue;
                }
                
                if (fs.alias != null) {
                    sb.append(fs.alias);
                    sb.append(": ");
                }

                if (DataType.isAtomic(fs.type)) {
                    sb.append(DataType.findTypeName(fs.type)) ;
                }
                else if ( (fs.type == DataType.TUPLE) ||
                          (fs.type == DataType.BAG) ) {
                    // safety net
                    if (schema != fs.schema) {
                        stringifySchema(sb, fs.schema, fs.type) ;
                    }
                    else {
                        throw new AssertionError("Schema refers to itself "
                                                 + "as inner schema") ;
                    }
                } else if (fs.type == DataType.MAP) {
                    sb.append(DataType.findTypeName(fs.type) + "[");
                    if (fs.schema!=null)
                        stringifySchema(sb, fs.schema, fs.type);
                    sb.append("]");
                } else {
                    sb.append(DataType.findTypeName(fs.type)) ;
                }
            }
        }

        if (type == DataType.TUPLE) {
            sb.append(")") ;
        }
        else if (type == DataType.BAG) {
            sb.append("}") ;
        }

    }

    public void add(FieldSchema f) {
        mFields.add(f);
        if(null != f) {
            mFieldSchemas.put(f.canonicalName, f.alias);
            if (null != f.alias) {
                mAliases.put(f.alias, f);
            }
        }
    }

    /**
     * Given an alias, find the associated position of the field schema.
     *
     * @param alias
     *            alias of the FieldSchema.
     * @return position of the FieldSchema.
     */
    public int getPosition(String alias) throws FrontendException{
        return getPosition(alias, false);
    }


    /**
     * Given an alias, find the associated position of the field schema.
     * It uses getFieldSubNameMatch to look for subName matches as well.
     * @param alias
     *            alias of the FieldSchema.
     * @return position of the FieldSchema.
     */
    public int getPositionSubName(String alias) throws FrontendException{
        return getPosition(alias, true);
    }
    
    
    private int getPosition(String alias, boolean isSubNameMatch)
    throws FrontendException {
        if(isSubNameMatch && twoLevelAccessRequired){
            // should not happen
            int errCode = 2248;
            String msg = "twoLevelAccessRequired==true is not supported with" +
            "and isSubNameMatch==true ";
            throw new FrontendException(msg, errCode, PigException.BUG);
        }
        if(twoLevelAccessRequired) {
            // this is the case where "this" schema is that of
            // a bag which has just one tuple fieldschema which
            // in turn has a list of fieldschemas. The alias supplied
            // should be treated as an alias in the tuple's schema
            
            // check that indeed we only have one field schema
            // which is that of a tuple
            if(mFields.size() != 1) {
                int errCode = 1008;
                String msg = "Expected a bag schema with a single " +
                "element of type "+ DataType.findTypeName(DataType.TUPLE) +
                " but got a bag schema with multiple elements.";
                throw new FrontendException(msg, errCode, PigException.INPUT);
            }
            Schema.FieldSchema tupleFS = mFields.get(0);
            if(tupleFS.type != DataType.TUPLE) {
                int errCode = 1009;
                String msg = "Expected a bag schema with a single " +
                        "element of type "+ DataType.findTypeName(DataType.TUPLE) +
                        " but got an element of type " +
                        DataType.findTypeName(tupleFS.type);
                throw new FrontendException(msg, errCode, PigException.INPUT);
            }
            
            // check if the alias supplied is that of the tuple 
            // itself - then disallow it since we do not allow access
            // to the tuple itself - we only allow access to the fields
            // in the tuple
            if(alias.equals(tupleFS.alias)) {
                int errCode = 1028;
                String msg = "Access to the tuple ("+ alias + ") of " +
                        "the bag is disallowed. Only access to the elements of " +
                        "the tuple in the bag is allowed.";
                throw new FrontendException(msg, errCode, PigException.INPUT);
            }
            
            // all is good - get the position from the tuple's schema
            return tupleFS.schema.getPosition(alias);
        } else {
            FieldSchema fs = isSubNameMatch ? getFieldSubNameMatch(alias) : getField(alias);
    
            if (null == fs) {
                return -1;
            }
    
            log.debug("fs: " + fs);
            int index = -1;
            for(int i = 0; i < mFields.size(); ++i) {
                log.debug("mFields(" + i + "): " + mFields.get(i) + " alias: " + mFields.get(i).alias);
                if(fs == mFields.get(i)) {index = i;}
            }
    
            log.debug("index: " + index);
            return index;
            //return mFields.indexOf(fs);
        }
    }

    public void addAlias(String alias, FieldSchema fs) {
        if(null != alias) {
            mAliases.put(alias, fs);
            if(null != fs) {
                mFieldSchemas.put(fs.canonicalName, alias);
            }
        }
    }

    public Set getAliases() {
        return mAliases.keySet();
    }

    public void printAliases() {
        Set aliasNames = mAliases.keySet();
        for (String alias : aliasNames) {
            log.debug("Schema Alias: " + alias);
        }
    }

    public List getFields() {
        return mFields;
    }

    /**
     * Recursively compare two schemas to check if the input schema 
     * can be cast to the cast schema
     * @param cast schema of the cast operator
     * @param  input schema of the cast input
     * @return true or falsew!
     */
    public static boolean castable(Schema cast, Schema input) {

        // If both of them are null, they are castable
        if ((cast == null) && (input == null)) {
            return false ;
        }

        // otherwise
        if (cast == null) {
            return false ;
        }

        if (input == null) {
            return false ;
        }

        if (cast.size() > input.size()) return false;

        Iterator i = cast.mFields.iterator();
        Iterator j = input.mFields.iterator();

        while (i.hasNext()) {
        //iterate only for the number of fields in cast

            FieldSchema castFs = i.next() ;
            FieldSchema inputFs = j.next() ;

            // Compare recursively using field schema
            if (!FieldSchema.castable(castFs, inputFs)) {
                return false ;
            }

        }
        return true;
    }

    /**
     * Recursively compare two schemas for equality
     * @param schema
     * @param other
     * @param relaxInner if true, inner schemas will not be checked
     * @param relaxAlias if true, aliases will not be checked
     * @return true if schemas are equal, false otherwise
     */
    public static boolean equals(Schema schema,
                                 Schema other,
                                 boolean relaxInner,
                                 boolean relaxAlias) {

        // If both of them are null, they are equal
        if ((schema == null) && (other == null)) {
            return true ;
        }

        // otherwise
        if (schema == null) {
            return false ;
        }

        if (other == null) {
            return false ;
        }
        
        /*
         * Need to check for bags with schemas and bags with tuples that in turn have schemas.
         * Retrieve the tuple schema of the bag if twoLevelAccessRequired
         * Assuming that only bags exhibit this behavior and twoLevelAccessRequired is used
         * with the right intentions
         */
        if(schema.isTwoLevelAccessRequired() || other.isTwoLevelAccessRequired()) {
            if(schema.isTwoLevelAccessRequired()) {
                try {
                    schema = schema.getField(0).schema;
                } catch (FrontendException fee) {
                    return false;
                }
            }
            
            if(other.isTwoLevelAccessRequired()) {
                try {
                    other = other.getField(0).schema;
                } catch (FrontendException fee) {
                    return false;
                }
            }
            
            return Schema.equals(schema, other, relaxInner, relaxAlias);
        }

        if (schema.size() != other.size()) return false;

        Iterator i = schema.mFields.iterator();
        Iterator j = other.mFields.iterator();

        while (i.hasNext()) {

            FieldSchema myFs = i.next() ;
            FieldSchema otherFs = j.next() ;

            if (!relaxAlias) {
                if ( (myFs.alias == null) &&
                     (otherFs.alias == null) ) {
                    // good
                }
                else if ( (myFs.alias != null) &&
                     (otherFs.alias == null) ) {
                    return false ;
                }
                else if ( (myFs.alias == null) && 
                     (otherFs.alias != null) ) {
                    return false ;
                }
                else if (!myFs.alias.equals(otherFs.alias)) {
                    return false ;
                }
            }

            if (myFs.type != otherFs.type) {
                return false ;
            }

            if (!relaxInner) {
                // Compare recursively using field schema
                if (!FieldSchema.equals(myFs, otherFs, false, relaxAlias)) {
                    return false ;
                }
            }

        }
        return true;
    }


    /***
     * Merge this schema with the other schema
     * @param other the other schema to be merged with
     * @param otherTakesAliasPrecedence true if aliases from the other
     *                                  schema take precedence
     * @return the merged schema, null if they are not compatible
     */
    public Schema merge(Schema other, boolean otherTakesAliasPrecedence) {
        return mergeSchema(this, other, otherTakesAliasPrecedence) ;
    }

    /***
     * Recursively merge two schemas
     * @param schema the initial schema
     * @param other the other schema to be merged with
     * @param otherTakesAliasPrecedence true if aliases from the other
     *                                  schema take precedence
     * @return the merged schema, null if they are not compatible
     */
    public static Schema mergeSchema(Schema schema, Schema other,
                               boolean otherTakesAliasPrecedence) {
        try {
            Schema newSchema = mergeSchema(schema,
                                        other,
                                        otherTakesAliasPrecedence,
                                        false,
                                        false) ;
            return newSchema;
        }
        catch(SchemaMergeException sme) {
            // just mean they are not compatible
        }
        return null ;
    }

    /***
     * Recursively merge two schemas
     * @param schema the initial schema
     * @param other the other schema to be merged with
     * @param otherTakesAliasPrecedence true if aliases from the other
     *                                  schema take precedence
     * @param allowDifferentSizeMerge allow merging of schemas of different types
     * @param allowIncompatibleTypes 1) if types in schemas are not compatible
     *                               they will be treated as ByteArray (untyped)
     *                               2) if schemas in schemas are not compatible
     *                               and allowIncompatibleTypes is true
     *                               those inner schemas in the output
     *                               will be null.
     * @return the merged schema this can be null if one schema is null and
     *         allowIncompatibleTypes is true
     *
     * @throws SchemaMergeException if they cannot be merged
     */

    public static Schema mergeSchema(Schema schema,
                               Schema other,
                               boolean otherTakesAliasPrecedence,
                               boolean allowDifferentSizeMerge,
                               boolean allowIncompatibleTypes)
                                    throws SchemaMergeException {
        if(schema == null && other == null){
            //if both are null, they are not incompatible
            return null;
        }
        if (schema == null) {
            if (allowIncompatibleTypes) {
                return null ;
            }
            else {
                int errCode = 1029;
                String msg = "One of the schemas is null for merging schemas. Schema: " + schema + " Other schema: " + other;
                throw new SchemaMergeException(msg, errCode, PigException.INPUT) ;
            }
        }

        if (other == null) {
            if (allowIncompatibleTypes) {
                return null ;
            }
            else {
                int errCode = 1029;
                String msg = "One of the schemas is null for merging schemas. Schema: " + schema + " Other schema: " + other;
                throw new SchemaMergeException(msg, errCode, PigException.INPUT) ;
            }
        }

        if ( (schema.size() != other.size()) &&
             (!allowDifferentSizeMerge) ) {
            int errCode = 1030;
            String msg = "Different schema sizes for merging schemas. Schema size: " + schema.size() + " Other schema size: " + other.size();
            throw new SchemaMergeException(msg, errCode, PigException.INPUT) ;
        }

        List outputList = new ArrayList() ;

        List mylist = schema.mFields ;
        List otherlist = other.mFields ;

        // We iterate up to the smaller one's size
        int iterateLimit = schema.mFields.size() > other.mFields.size()?
                            other.mFields.size() : schema.mFields.size() ;

        int idx = 0;
        for (; idx< iterateLimit ; idx ++) {

            // Just for readability
            FieldSchema myFs = mylist.get(idx) ;
            FieldSchema otherFs = otherlist.get(idx) ;

            byte mergedType = DataType.mergeType(myFs.type, otherFs.type) ;

            // If the types cannot be merged
            if (mergedType == DataType.ERROR) {
                // If  treatIncompatibleAsByteArray is true,
                // we will treat it as bytearray
                if (allowIncompatibleTypes) {
                    mergedType = DataType.BYTEARRAY ;
                }
                // otherwise the schemas cannot be merged
                else {
                    int errCode = 1031;
                    String msg = "Incompatible types for merging schemas. Field schema type: "
                        + DataType.findTypeName(myFs.type) + " Other field schema type: "
                        + DataType.findTypeName(otherFs.type);
                    throw new SchemaMergeException(msg, errCode, PigException.INPUT) ;
                }
            }

            String mergedAlias = mergeAlias(myFs.alias,
                                            otherFs.alias,
                                            otherTakesAliasPrecedence) ;

            FieldSchema mergedFs = null ;
            if (!DataType.isSchemaType(mergedType)) {
                // just normal merge
                mergedFs = new FieldSchema(mergedAlias, mergedType) ;
            }
            else {
                // merge inner tuple because both sides are tuples
                //if inner schema are incompatible and allowIncompatibleTypes==true
                // an exception is thrown by mergeSchema
                Schema mergedSubSchema = mergeSchema(myFs.schema,
                                                     otherFs.schema,
                                                     otherTakesAliasPrecedence,
                                                     allowDifferentSizeMerge,
                                                     allowIncompatibleTypes) ;

                // create the merged field
                // the mergedSubSchema can be true if allowIncompatibleTypes
                try {
                    mergedFs = new FieldSchema(mergedAlias, mergedSubSchema, mergedType) ;
                } catch (FrontendException e) {
                    int errCode = 2124;
                    String errMsg = "Internal Error: Unexpected error creating field schema";
                    throw new SchemaMergeException(errMsg, errCode, PigException.BUG, e);
                }

            }
            outputList.add(mergedFs) ;
        }

        // Handle different schema size
        if (allowDifferentSizeMerge) {
            
            // if the first schema has leftover, then append the rest
            for(int i=idx; i < mylist.size(); i++) {

                FieldSchema fs = mylist.get(i) ;

                // for non-schema types
                if (!DataType.isSchemaType(fs.type)) {
                    outputList.add(new FieldSchema(fs.alias, fs.type)) ;
                }
                // for TUPLE & BAG
                else {
                    FieldSchema tmp = new FieldSchema(fs.alias, fs.schema) ;
                    tmp.type = fs.type ;
                    outputList.add(tmp) ;
                }
            }

             // if the second schema has leftover, then append the rest
            for(int i=idx; i < otherlist.size(); i++) {

                FieldSchema fs = otherlist.get(i) ;

                // for non-schema types
                if (!DataType.isSchemaType(fs.type)) {
                    outputList.add(new FieldSchema(fs.alias, fs.type)) ;
                }
                // for TUPLE & BAG
                else {
                    FieldSchema tmp = new FieldSchema(fs.alias, fs.schema) ;
                    tmp.type = fs.type ;
                    outputList.add(tmp) ;
                }
            }

        }

        Schema result = new Schema(outputList);
        if (schema.isTwoLevelAccessRequired()!=other.isTwoLevelAccessRequired()) {
            int errCode = 2124;
            String errMsg = "Cannot merge schema " + schema + " and " + other + ". One with twoLeverAccess flag, the other doesn't.";
            throw new SchemaMergeException(errMsg, errCode, PigException.BUG);
        }
        if (schema.isTwoLevelAccessRequired())
            result.setTwoLevelAccessRequired(true);
        return result;
    }

    /***
     * Merge two aliases. If one of aliases is null, return the other.
     * Otherwise check the precedence condition
     * @param alias
     * @param other
     * @param otherTakesPrecedence
     * @return
     */
    private static String mergeAlias(String alias, String other
                              ,boolean otherTakesPrecedence) {
        if (alias == null) {
            return other ;
        }
        else if (other == null) {
            return alias ;
        }
        else if (otherTakesPrecedence) {
            return other ;
        }
        else {
            return alias ;
        }
    }
    
    /**
     * Merges collection of schemas using their column aliases 
     * (unlike mergeSchema(..) functions which merge using positions)
     * Schema will not be merged if types are incompatible, 
     * as per DataType.mergeType(..)
     * For Tuples and Bags, SubSchemas have to be equal be considered compatible
     * @param schemas - list of schemas to be merged using their column alias
     * @return merged schema
     * @throws SchemaMergeException
     */
    public static Schema mergeSchemasByAlias(Collection schemas)
    throws SchemaMergeException{
        Schema mergedSchema = null;

        // list of schemas that have currently been merged, used in error message
        ArrayList mergedSchemas = new ArrayList(schemas.size());
        for(Schema sch : schemas){
            if(mergedSchema == null){
                mergedSchema = new Schema(sch);
                mergedSchemas.add(sch);
                continue;
            }
            try{
                mergedSchema = mergeSchemaByAlias(mergedSchema, sch);
                mergedSchemas.add(sch);
            }catch(SchemaMergeException e){
                String msg = "Error merging schema: ("  + sch + ") with " 
                + "merged schema: (" + mergedSchema + ")" + " of schemas : "
                + mergedSchemas;
                SchemaMergeException sme = new SchemaMergeException(msg, 
                        e.getErrorCode(), e);
                sme.setMarkedAsShowToUser(true);
                throw sme;
            }
        }
        return mergedSchema;
    }
    
    /**
     * Merges two schemas using their column aliases 
     * (unlike mergeSchema(..) functions which merge using positions)
     * Schema will not be merged if types are incompatible, 
     * as per DataType.mergeType(..)
     * For Tuples and Bags, SubSchemas have to be equal be considered compatible
     * @param schema1
     * @param schema2
     * @return Merged Schema
     * @throws SchemaMergeException if schemas cannot be merged
     */
    public static Schema mergeSchemaByAlias(Schema schema1,
            Schema schema2)
    throws SchemaMergeException{
        Schema mergedSchema = new Schema();
        HashSet schema2colsAdded = new HashSet();
        // add/merge fields present in first schema 
        for(FieldSchema fs1 : schema1.getFields()){
            checkNullAlias(fs1, schema1);
            FieldSchema fs2 = getFieldSubNameMatchThrowSchemaMergeException(schema2,fs1.alias);
            if(fs2 != null){
                if(schema2colsAdded.contains(fs2)){
                    // alias corresponds to multiple fields in schema1,
                    // just do a lookup on
                    // schema1 , that will throw the appropriate error.
                    getFieldSubNameMatchThrowSchemaMergeException(schema1, fs2.alias);
                }
                schema2colsAdded.add(fs2);
            }
            FieldSchema mergedFs = mergeFieldSchemaFirstLevelSameAlias(fs1,fs2);
            mergedSchema.add(mergedFs);
        }

        //add schemas from 2nd schema, that are not already present in
        // merged schema
        for(FieldSchema fs2 : schema2.getFields()){
            checkNullAlias(fs2, schema2);
            if(! schema2colsAdded.contains(fs2)){
                try {
                    mergedSchema.add(fs2.clone());
                } catch (CloneNotSupportedException e) {
                    throw new SchemaMergeException(
                            "Error encountered while merging schemas", e);
                }
            }
        }
        return mergedSchema;

    }

    private static void checkNullAlias(FieldSchema fs, Schema schema)
    throws SchemaMergeException {
        if(fs.alias == null){
            throw new SchemaMergeException(
                    "Schema having field with null alias cannot be merged " +
                    "using alias. Schema :" + schema,
                    1126
            );
        }
    }

    /**
     * Schema will not be merged if types are incompatible, 
     * as per DataType.mergeType(..)
     * For Tuples and Bags, SubSchemas have to be equal be considered compatible
     * Aliases are assumed to be same for both
     * @param fs1
     * @param fs2
     * @return
     * @throws SchemaMergeException
     */
    private static FieldSchema mergeFieldSchemaFirstLevelSameAlias(FieldSchema fs1,
            FieldSchema fs2) 
    throws SchemaMergeException {
        if(fs1 == null)
            return fs2;
        if(fs2 == null)
            return fs1;

        Schema innerSchema = null;
        
        String alias = mergeNameSpacedAlias(fs1.alias, fs2.alias);
        
        byte mergedType = DataType.mergeType(fs1.type, fs2.type) ;

        // If the types cannot be merged
        if (mergedType == DataType.ERROR) {
                int errCode = 1031;
                String msg = "Incompatible types for merging schemas. Field schema: "
                    + fs1 + " Other field schema: " + fs2;
                throw new SchemaMergeException(msg, errCode, PigException.INPUT) ;
        }
        if(DataType.isSchemaType(mergedType)) {
            // if one of them is a bytearray, pick inner schema of other one
            if( fs1.type == DataType.BYTEARRAY ){
                innerSchema = fs2.schema;
            }else if(fs2.type == DataType.BYTEARRAY){
                innerSchema = fs1.schema;
            }
            else {
                //in case of types with inner schema such as bags and tuples
                // the inner schema has to be same
                if(!equals(fs1.schema, fs2.schema, false, false)){
                    int errCode = 1032;
                    String msg = "Incompatible types for merging inner schemas of " +
                    " Field schema type: " + fs1 + " Other field schema type: " + fs2;
                    throw new SchemaMergeException(msg, errCode, PigException.INPUT) ;                
                }
                innerSchema = fs1.schema;
            }
        }
        try {
            return new FieldSchema(alias, innerSchema, mergedType) ;
        } catch (FrontendException e) {
            // this exception is not expected
            int errCode = 2124;
            throw new SchemaMergeException(
                    "Error in creating fieldSchema",
                    errCode,
                    PigException.BUG
            );
        }
    }
    
    
    /**
     * If one of the aliases is of form 'nm::str1', and other is of the form
     * 'str1', this returns str1
     * @param alias1
     * @param alias2
     * @return merged alias
     * @throws SchemaMergeException
     */
    private static String mergeNameSpacedAlias(String alias1, String alias2)
    throws SchemaMergeException {
        if(alias1.equals(alias2)){
            return alias1;
        }
        if(alias1.endsWith("::" + alias2)){
            return alias2;
        }
        if(alias2.endsWith("::" + alias1)){
            return alias1;
        }
        //the aliases are different, alias cannot be merged
        return null;
    }

    /**
     * Utility function that calls schema.getFiled(alias), and converts 
     * {@link FrontendException} to {@link SchemaMergeException}
     * @param schema
     * @param alias
     * @return FieldSchema
     * @throws SchemaMergeException
     */
    private static FieldSchema getFieldSubNameMatchThrowSchemaMergeException(
            Schema schema, String alias) throws SchemaMergeException {
        FieldSchema fs = null;
        try {
            fs = schema.getFieldSubNameMatch(alias);
        } catch (FrontendException e) {
            String msg = "Caught exception finding FieldSchema for alias " +
            alias;
            throw new SchemaMergeException(msg, e.getErrorCode(), e);
        }
        return fs;
    }
    
    
    
    /**
     * 
     * @param topLevelType DataType type of the top level element
     * @param innerTypes DataType types of the inner level element
     * @return nested schema representing type of top level element at first level and inner schema
	 * representing types of inner element(s)
     */
    public static Schema generateNestedSchema(byte topLevelType, byte... innerTypes) throws FrontendException{
        
        Schema innerSchema = new Schema();
        for (int i = 0; i < innerTypes.length; i++) {
            innerSchema.add(new Schema.FieldSchema(null, innerTypes[i]));
        }
        
        Schema.FieldSchema outerSchema = new Schema.FieldSchema(null, innerSchema, topLevelType);
        return new Schema(outerSchema);
    }

    /***
     * Recursively prefix merge two schemas
     * @param other the other schema to be merged with
     * @param otherTakesAliasPrecedence true if aliases from the other
     *                                  schema take precedence
     * @return the prefix merged schema this can be null if one schema is null and
     *         allowIncompatibleTypes is true
     *
     * @throws SchemaMergeException if they cannot be merged
     */

    public Schema mergePrefixSchema(Schema other,
                               boolean otherTakesAliasPrecedence)
                                    throws SchemaMergeException {
        return mergePrefixSchema(other, otherTakesAliasPrecedence, false);
    }
    
    /***
     * Recursively prefix merge two schemas
     * @param other the other schema to be merged with
     * @param otherTakesAliasPrecedence true if aliases from the other
     *                                  schema take precedence
     * @param allowMergeableTypes true if "mergeable" types should be allowed.
     *   Two types are mergeable if any of the following conditions is true IN THE 
     *   BELOW ORDER of checks:
     *   1) if either one has a type null or unknown and other has a type OTHER THAN
     *   null or unknown, the result type will be the latter non null/unknown type
     *   2) If either type is bytearray, then result type will be the other (possibly  non BYTEARRAY) type
     *   3) If current type can be cast to the other type, then the result type will be the
     *   other type 
     * @return the prefix merged schema this can be null if one schema is null and
     *         allowIncompatibleTypes is true
     *
     * @throws SchemaMergeException if they cannot be merged
     */

    public Schema mergePrefixSchema(Schema other,
                               boolean otherTakesAliasPrecedence, boolean allowMergeableTypes)
                                    throws SchemaMergeException {
        Schema schema = this;

        if (other == null) {
                return this ;
        }

        if (schema.size() < other.size()) {
            int errCode = 1033;
            String msg = "Schema size mismatch for merging schemas. Other schema size greater than schema size. Schema: " + this + ". Other schema: " + other;
            throw new SchemaMergeException(msg, errCode, PigException.INPUT);
        }

        List outputList = new ArrayList() ;

        List mylist = schema.mFields ;
        List otherlist = other.mFields ;

        // We iterate up to the smaller one's size
        int iterateLimit = other.mFields.size();

        int idx = 0;
        for (; idx< iterateLimit ; idx ++) {

            // Just for readability
            FieldSchema myFs = mylist.get(idx) ;
            FieldSchema otherFs = otherlist.get(idx) ;

            FieldSchema mergedFs = myFs.mergePrefixFieldSchema(otherFs, otherTakesAliasPrecedence, allowMergeableTypes);
            outputList.add(mergedFs) ;
        }
        // if the first schema has leftover, then append the rest
        for(int i=idx; i < mylist.size(); i++) {

            FieldSchema fs = mylist.get(i) ;

            // for non-schema types
            if (!DataType.isSchemaType(fs.type)) {
                outputList.add(new FieldSchema(fs.alias, fs.type)) ;
            }
            // for TUPLE & BAG
            else {
                try {
                    FieldSchema tmp = new FieldSchema(fs.alias, fs.schema, fs.type) ;
                    outputList.add(tmp) ;
                } catch (FrontendException fee) {
                    int errCode = 1023;
                    String msg = "Unable to create field schema.";
                    throw new SchemaMergeException(msg, errCode, PigException.INPUT, fee);
                }
            }
        }

        Schema s = new Schema(outputList) ;
        s.setTwoLevelAccessRequired(other.twoLevelAccessRequired);
        return s;
    }

    /**
     * Recursively set NULL type to the specifid type in a schema
     * @param s the schema whose NULL type has to be set 
     * @param t the specified type
     */
    public static void setSchemaDefaultType(Schema s, byte t) {
        if(null == s) return;
        for(Schema.FieldSchema fs: s.getFields()) {
            FieldSchema.setFieldSchemaDefaultType(fs, t);
        }
    }

    /**
     * @return the twoLevelAccess
     * @deprecated twoLevelAccess is no longer needed
     */
    @Deprecated
    public boolean isTwoLevelAccessRequired() {
        return twoLevelAccessRequired;
    }

    /**
     * @param twoLevelAccess the twoLevelAccess to set
     * @deprecated twoLevelAccess is no longer needed
     */
    @Deprecated
    public void setTwoLevelAccessRequired(boolean twoLevelAccess) {
        this.twoLevelAccessRequired = twoLevelAccess;
    }
    
    public static Schema getPigSchema(ResourceSchema rSchema) 
    throws FrontendException {
        if(rSchema == null) {
            return null;
        }
        List fsList = new ArrayList();
        for(ResourceFieldSchema rfs : rSchema.getFields()) {
            FieldSchema fs = new FieldSchema(rfs.getName(), 
                    rfs.getSchema() == null ? 
                            null : getPigSchema(rfs.getSchema()), rfs.getType());
            
            if(rfs.getType() == DataType.BAG) {
                if (fs.schema != null) { // allow partial schema
                    if (fs.schema.size() == 1) {
                        FieldSchema innerFs = fs.schema.getField(0);
                        if (innerFs.type != DataType.TUPLE) {
                            ResourceFieldSchema.throwInvalidSchemaException();
                        }
                    } else {
                        ResourceFieldSchema.throwInvalidSchemaException();
                    }
                } 
            }
            fsList.add(fs);
        }
        return new Schema(fsList);
    }

    /**
     * Look for a FieldSchema instance in the schema hierarchy which has the given canonical name.
     * @param canonicalName canonical name
     * @return the FieldSchema instance found
     */
	public FieldSchema findFieldSchema(String canonicalName) {
	    for( FieldSchema fs : mFields ) {
	    	if( fs.canonicalName.equals( canonicalName ) )
	    		return fs;
	    	if( fs.schema != null ) {
	    		FieldSchema result = fs.schema.findFieldSchema( canonicalName );
	    		if( result != null )
	    			return result;
	    	}
	    }
	    return null;
    }
    
}







© 2015 - 2025 Weber Informatics LLC | Privacy Policy