Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.newplan.logical.relational;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import org.apache.pig.PigException;
import org.apache.pig.data.DataType;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.newplan.logical.expression.LogicalExpression;
/**
* Schema, from a logical perspective.
*/
public class LogicalSchema {
public static class LogicalFieldSchema {
public String alias;
public byte type;
public long uid;
public LogicalSchema schema;
public LogicalFieldSchema(String alias, LogicalSchema schema, byte type) {
this(alias, schema, type, -1);
}
public LogicalFieldSchema(LogicalFieldSchema fs) {
this(fs.alias, fs.schema, fs.type, fs.uid);
}
public LogicalFieldSchema(String alias, LogicalSchema schema, byte type, long uid) {
this.alias = alias;
this.type = type;
this.schema = schema;
this.uid = uid;
}
/**
* Equality is defined as having the same type and either the same schema
* or both null schema. Alias and uid are not checked.
*/
public boolean isEqual(Object other) {
return isEqual(other, false);
}
/**
* Equality is defined as having the same type and either the same schema
* or both null schema. if compareAlias argument is set to true, alias
* is also compared.
* @param other
* @param compareAlias
* @return true if equal
*/
public boolean isEqual(Object other, boolean compareAlias) {
if (other instanceof LogicalFieldSchema) {
LogicalFieldSchema ofs = (LogicalFieldSchema)other;
if(compareAlias){
if(alias != null && !alias.equals(ofs.alias))
return false;
}
if (type != ofs.type) return false;
if (schema == null && ofs.schema == null) return true;
if (schema == null) return false;
else return schema.isEqual(ofs.schema, compareAlias);
} else {
return false;
}
}
public String toString(boolean verbose) {
String uidString = "";
if (verbose)
uidString="#" + uid;
String aliasToPrint = "";
if (alias!=null)
aliasToPrint = alias;
if( type == DataType.BAG ) {
if( schema == null ) {
return ( aliasToPrint + uidString + ":bag{}" );
}
return ( aliasToPrint + uidString + ":bag{" + schema.toString(verbose) + "}" );
} else if( type == DataType.TUPLE ) {
if( schema == null ) {
return ( aliasToPrint + uidString + ":tuple()" );
}
return ( aliasToPrint + uidString + ":tuple(" + schema.toString(verbose) + ")" );
} else if (type == DataType.MAP) {
if (schema == null ) {
return (aliasToPrint + uidString + ":map");
} else {
return (aliasToPrint + uidString + ":map(" + schema.toString(verbose) + ")");
}
}
return ( aliasToPrint + uidString + ":" + DataType.findTypeName(type) );
}
public String toString() {
return toString(true);
}
/**
* Give new value for uid if uid of field schema or those in fields schema
* of inner schema are -1.
*/
public void stampFieldSchema() {
if (uid==-1)
uid = LogicalExpression.getNextUid();
if (schema!=null) {
for (LogicalFieldSchema fs : schema.getFields()) {
fs.stampFieldSchema();
}
}
}
private boolean compatible(LogicalFieldSchema uidOnlyFieldSchema) {
if (uidOnlyFieldSchema==null)
return false;
if (this.schema==null && uidOnlyFieldSchema.schema!=null ||
this.schema!=null && uidOnlyFieldSchema.schema==null)
return false;
if (this.schema!=null) {
if (this.schema.size()!=uidOnlyFieldSchema.schema.size())
return false;
for (int i=0;i fields;
public LogicalSchema() {
fields = new ArrayList();
}
/**
* Reset uids of all fieldschema that the schema contains
*/
public void resetUid() {
for(LogicalFieldSchema fs : fields){
fs.resetUid();
}
}
/**
* Recursively compare two schemas to check if the input schema
* can be cast to the cast schema
* @param inSch schema of the cast input
* @param outSch schema of the cast operator
* @return true if castable
*/
public static boolean castable(LogicalSchema inSch, LogicalSchema outSch) {
// If both of them are null, they are castable
if ((outSch == null) && (inSch == null)) {
return false ;
}
// otherwise
if (outSch == null) {
return false ;
}
// Cast to a more specific type is good
if (inSch == null) {
return true ;
}
if (outSch.size() > inSch.size()) return false;
Iterator i = outSch.fields.iterator();
Iterator j = inSch.fields.iterator();
while (i.hasNext()) {
//iterate only for the number of fields in cast
LogicalFieldSchema outFs = i.next() ;
LogicalFieldSchema inFs = j.next() ;
// Compare recursively using field schema
if (!LogicalFieldSchema.castable(inFs, outFs)) {
return false ;
}
}
return true;
}
/**
* Add a field to this schema.
* @param field to be added to the schema
*/
public void addField(LogicalFieldSchema field) {
fields.add(field);
}
/**
* Fetch a field by alias
* @param alias
* @return field associated with alias, or null if no such field
* @throws FrontendException
*/
public LogicalFieldSchema getField(String alias) throws FrontendException {
LogicalFieldSchema result = null;
//first look for an exact match
for (LogicalFieldSchema fs : fields) {
if (fs.alias!=null && fs.alias.equals(alias) ) {
if (result==null) {
result = fs;
}
else {
StringBuilder sb = new StringBuilder("Found more than one match: " + result.alias + ", " + fs.alias);
throw new FrontendException(sb.toString(), 1025);
}
}
}
if(result != null){
return result;
}
//if no exact match is found, look for matches for scoped aliases
for (LogicalFieldSchema fs : fields) {
if (fs.alias!=null && fs.alias.matches(".*::"+alias+"$") ) {
if (result==null) {
result = fs;
}
else {
StringBuilder sb = new StringBuilder("Found more than one match: " + result.alias + ", " + fs.alias);
throw new FrontendException(sb.toString(), 1025);
}
}
}
return result;
}
/**
* Given an alias name, find the associated LogicalFieldSchema. If exact name is
* not found see if any field matches the part of the 'namespaced' alias.
* eg. if given alias is nm::a , and schema is (a,b). It will return
* FieldSchema of a.
* if given alias is nm::a and schema is (nm2::a, b), it will return null
* @param alias Alias to look up.
* @return LogicalFieldSchema, or null if no such alias is in this tuple.
*/
public LogicalFieldSchema getFieldSubNameMatch(String alias) throws FrontendException {
if(alias == null)
return null;
LogicalFieldSchema fs = getField(alias);
if(fs != null){
return fs;
}
//fs is null
final String sep = "::";
ArrayList matchedFieldSchemas = new ArrayList();
if(alias.contains(sep)){
for(LogicalFieldSchema field : fields) {
if(alias.endsWith(sep + field.alias)){
matchedFieldSchemas.add(field);
}
}
}
if(matchedFieldSchemas.size() > 1){
boolean hasNext = false;
StringBuilder sb = new StringBuilder("Found more than one " +
"sub alias name match: ");
for (LogicalFieldSchema matchFs : matchedFieldSchemas) {
if(hasNext) {
sb.append(", ");
} else {
hasNext = true;
}
sb.append(matchFs.alias);
}
int errCode = 1116;
throw new FrontendException(sb.toString(), errCode, PigException.INPUT);
}else if(matchedFieldSchemas.size() == 1){
fs = matchedFieldSchemas.get(0);
}
return fs;
}
public int getFieldPosition(String alias) {
LogicalFieldSchema fs = null;
try {
fs = getField(alias);
} catch (FrontendException e) {
}
if( fs == null ) {
return -1;
}
return fields.indexOf(fs);
}
/**
* Fetch a field by field number
* @param fieldNum field number to fetch
* @return field
*/
public LogicalFieldSchema getField(int fieldNum) {
return fields.get(fieldNum);
}
/**
* Get all fields
* @return list of all fields
*/
public List getFields() {
return fields;
}
/**
* Get the size of the schema.
* @return size
*/
public int size() {
return fields.size();
}
/**
* Two schemas are equal if they are of equal size and their fields
* schemas considered in order are equal. This function does
* not compare the alias of the fields.
*/
public boolean isEqual(Object other) {
return isEqual(other, false);
}
/**
* Two schemas are equal if they are of equal size and their fields
* schemas considered in order are equal. If compareAlias argument is
* set to true, the alias of the fields are also compared.
* @param other
* @param compareAlias
* @return true if equal
*/
public boolean isEqual(Object other, boolean compareAlias) {
if (other != null && other instanceof LogicalSchema) {
LogicalSchema os = (LogicalSchema)other;
if (size() != os.size()) return false;
for (int i = 0; i < size(); i++) {
if (!getField(i).isEqual(os.getField(i), compareAlias)) return false;
}
return true;
} else {
return false;
}
}
/**
* Look for the index of the field that contains the specified uid
* @param uid the uid to look for
* @return the index of the field, -1 if not found
*/
public int findField(long uid) {
for(int i=0; i< size(); i++) {
LogicalFieldSchema f = getField(i);
// if this field has the same uid, then return this field
if (f.uid == uid) {
return i;
}
// if this field has a schema, check its schema
if (f.schema != null) {
if (f.schema.findField(uid) != -1) {
return i;
}
}
}
return -1;
}
public static enum MergeMode {
LoadForEach,
LoadForEachInner,
Union,
UnionInner
}
/**
* Merge two schemas.
* @param s1 In Load/ForEach, s1 is user declared schema; In Union, s1 is left side.
* @param s2 In Load/ForEach, s2 is infered schema; In Union, s2 is right side.
* @param mode We merge schema in Load/Foreach/Union. In Load/Foreach, we always take s1 if compatible (s1 is set to be user defined schema),
* In union, we take more specific type (between numeric and string, we take string). In the case type mismatch in s1/s2,
* we expect TypeCheckingVisitor will fill the gap later.
* @return a merged schema, or null if the merge fails
*/
public static LogicalSchema merge(LogicalSchema s1, LogicalSchema s2, MergeMode mode) throws FrontendException {
// If any of the schema is null, take the other party
if (s1==null || s2==null) {
if (mode==MergeMode.LoadForEach||mode==MergeMode.LoadForEachInner) {
if (s1!=null) return s1.deepCopy();
else if (s2!=null) return s2.deepCopy();
else return null;
}
else // Union/UnionInner, return null
return null;
}
if (s1.size()!=s2.size()) {
if (mode==MergeMode.Union) // In union, incompatible type result a null schema
return null;
else
throw new FrontendException("Incompatable schema: left is \"" + s1.toString(false) + "\", right is \"" + s2.toString(false) + "\"", 1031);
}
LogicalSchema mergedSchema = new LogicalSchema();
for (int i=0;i schemas)
throws FrontendException{
LogicalSchema mergedSchema = null;
// list of schemas that have currently been merged, used in error message
ArrayList mergedSchemas = new ArrayList(schemas.size());
for(LogicalSchema sch : schemas){
if(mergedSchema == null){
mergedSchema = sch.deepCopy();
mergedSchemas.add(sch);
continue;
}
try{
mergedSchema = mergeSchemaByAlias( mergedSchema, sch );
mergedSchemas.add(sch);
}catch(FrontendException e){
String msg = "Error merging schema: (" + sch + ") with "
+ "merged schema: (" + mergedSchema + ")" + " of schemas : "
+ mergedSchemas;
throw new FrontendException(msg, e);
}
}
return mergedSchema;
}
/**
* Merges two schemas using their column aliases
* (unlike mergeSchema(..) functions which merge using positions)
* Schema will not be merged if types are incompatible,
* as per DataType.mergeType(..)
* For Tuples and Bags, SubSchemas have to be equal be considered compatible
*/
public static LogicalSchema mergeSchemaByAlias(LogicalSchema schema1, LogicalSchema schema2)
throws FrontendException{
LogicalSchema mergedSchema = new LogicalSchema();
HashSet schema2colsAdded = new HashSet();
// add/merge fields present in first schema
for(LogicalFieldSchema fs1 : schema1.getFields()){
checkNullAlias(fs1, schema1);
LogicalFieldSchema fs2 = schema2.getFieldSubNameMatch( fs1.alias );
if(fs2 != null){
if(schema2colsAdded.contains(fs2)){
// alias corresponds to multiple fields in schema1,
// just do a lookup on
// schema1 , that will throw the appropriate error.
schema1.getFieldSubNameMatch( fs2.alias );
}
schema2colsAdded.add(fs2);
LogicalFieldSchema mergedFs = LogicalFieldSchema.merge(fs1,fs2, MergeMode.Union);
mergedFs.alias = mergeNameSpacedAlias(fs1.alias, fs2.alias);
if (mergedFs.alias==null)
mergedFs.alias = fs1.alias;
mergedSchema.addField(mergedFs);
}
else
mergedSchema.addField(new LogicalFieldSchema(fs1));
}
//add schemas from 2nd schema, that are not already present in
// merged schema
for(LogicalFieldSchema fs2 : schema2.getFields()){
checkNullAlias(fs2, schema2);
if(! schema2colsAdded.contains(fs2)){
mergedSchema.addField( new LogicalFieldSchema( fs2 ) );
}
}
return mergedSchema;
}
private static void checkNullAlias(LogicalFieldSchema fs, LogicalSchema schema)
throws FrontendException {
if(fs.alias == null){
throw new FrontendException(
"Schema having field with null alias cannot be merged " +
"using alias. Schema :" + schema
);
}
}
/**
* If one of the aliases is of form 'nm::str1', and other is of the form
* 'str1', this returns str1
*/
private static String mergeNameSpacedAlias(String alias1, String alias2)
throws FrontendException {
if(alias1.equals(alias2)){
return alias1;
}
if(alias1.endsWith("::" + alias2)){
return alias2;
}
if(alias2.endsWith("::" + alias1)){
return alias1;
}
//the aliases are different, alias cannot be merged
return null;
}
/**
* Recursively compare two schemas for equality
* @param schema
* @param other
* @param relaxInner if true, inner schemas will not be checked
* @param relaxAlias if true, aliases will not be checked
* @return true if schemas are equal, false otherwise
*/
public static boolean equals(LogicalSchema schema,
LogicalSchema other,
boolean relaxInner,
boolean relaxAlias) {
// If both of them are null, they are equal
if ((schema == null) && (other == null)) {
return true ;
}
// otherwise
if (schema == null || other == null ) {
return false ;
}
if (schema.size() != other.size()) return false;
Iterator i = schema.fields.iterator();
Iterator j = other.fields.iterator();
while (i.hasNext()) {
LogicalFieldSchema myFs = i.next() ;
LogicalFieldSchema otherFs = j.next() ;
if (!relaxAlias) {
if( myFs.alias == null && otherFs.alias == null ) {
// good
} else if( myFs.alias == null ) {
return false ;
} else if( !myFs.alias.equals(otherFs.alias) ) {
return false ;
}
}
if (myFs.type != otherFs.type) {
return false ;
}
if (!relaxInner && !LogicalFieldSchema.equals( myFs, otherFs, false, relaxAlias ) ) {
// Compare recursively using field schema
return false ;
}
}
return true;
}
/***
* Old Pig schema does not require a tuple schema inside a bag;
* Now it is required to have that; this method is to fill the gap
*/
public void normalize() {
for (LogicalFieldSchema fs : getFields()) {
fs.normalize();
}
}
}