Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.data;
import java.io.File;
import java.io.IOException;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
import java.util.List;
import java.util.Queue;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.pig.PigConfiguration;
import org.apache.pig.classification.InterfaceAudience;
import org.apache.pig.classification.InterfaceStability;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.util.JavaCompilerHelper;
import org.apache.pig.impl.util.ObjectSerializer;
import com.google.common.collect.Lists;
/**
* This class encapsulates the generation of SchemaTuples, as well as some logic
* around shipping code to the distributed cache.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class SchemaTupleClassGenerator {
private static final Log LOG = LogFactory.getLog(SchemaTupleClassGenerator.class);
private SchemaTupleClassGenerator() {}
/**
* The GenContext mechanism provides a level of control in where SchemaTupleFactories
* are used. By attaching a GenContext enum type to the registration of a Schema,
* the code can express the intent of where a SchemaTupleFactory is intended to be used.
* In this way, if a load func and a join both involve Tuples of the same Schema, it's
* possible to use SchemaTupleFactories in one but not in the other.
*/
public static enum GenContext {
/**
* This context is used in UDF code. Currently, this is only used for
* the inputs to UDF's.
*/
UDF (PigConfiguration.SCHEMA_TUPLE_SHOULD_USE_IN_UDF, true, GenerateUdf.class),
/**
* This context is for POForEach. This will use the expected output of a ForEach
* to return a typed Tuple.
*/
FOREACH (PigConfiguration.SCHEMA_TUPLE_SHOULD_USE_IN_FOREACH, true, GenerateForeach.class),
/**
* This context controls whether or not SchemaTuples will be used in FR joins.
* Currently, they will be used in the HashMap that FR Joins construct.
*/
FR_JOIN (PigConfiguration.SCHEMA_TUPLE_SHOULD_USE_IN_FRJOIN, true, GenerateFrJoin.class),
/**
* This context controls whether or not SchemaTuples will be used in merge joins.
*/
MERGE_JOIN (PigConfiguration.SCHEMA_TUPLE_SHOULD_USE_IN_MERGEJOIN, true, GenerateMergeJoin.class),
/**
* All registered Schemas will also be registered in one additional context.
* This context will allow users to "force" the load of a SchemaTupleFactory
* if one is present in any context.
*/
FORCE_LOAD (PigConfiguration.SCHEMA_TUPLE_SHOULD_ALLOW_FORCE, true, GenerateForceLoad.class);
/**
* These annotations are used to mark a given SchemaTuple with
* the context in which is was intended to be generated.
*/
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
public @interface GenerateUdf {}
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
public @interface GenerateForeach {}
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
public @interface GenerateFrJoin {}
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
public @interface GenerateMergeJoin {}
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
public @interface GenerateForceLoad {}
private String key;
private boolean defaultValue;
private Class> annotation;
GenContext(String key, boolean defaultValue, Class> annotation) {
this.key = key;
this.defaultValue = defaultValue;
this.annotation = annotation;
}
public String key() {
return key;
}
public String getAnnotationCanonicalName() {
return annotation.getCanonicalName();
}
/**
* Checks the generated class to see if the annotation
* associated with this enum is present.
* @param clazz
* @return boolean type value
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
public boolean shouldGenerate(Class clazz) {
return clazz.getAnnotation(annotation) != null;
}
/**
* Given a job configuration file, this checks to see
* if the default value has been overriden.
* @param conf
* @return boolean type value
*/
public boolean shouldGenerate(Configuration conf) {
String shouldString = conf.get(key);
if (shouldString == null) {
return defaultValue;
}
return Boolean.parseBoolean(shouldString);
}
}
/**
* This value is used to distinguish all of the generated code.
* The general naming scheme used is SchemaTupe_identifier. Note that
* identifiers are incremented before code is actually generated.
*/
private static int nextGlobalClassIdentifier = 0;
protected static void resetGlobalClassIdentifier() {
nextGlobalClassIdentifier = 0;
}
/**
* This class actually generates the code for a given Schema.
* @param s as Schema
* @param appendable as boolean, true or false depending on whether it should be appendable
* @param id as int, id means identifier
* @param contexts which are a list of contexts in which the SchemaTuple is intended to be instantiated
*/
protected static void generateSchemaTuple(Schema s, boolean appendable, int id, File codeDir, GenContext... contexts) {
StringBuilder contextAnnotations = new StringBuilder();
for (GenContext context : contexts) {
LOG.info("Including context: " + context);
contextAnnotations.append("@").append(context.getAnnotationCanonicalName()).append("\n");
}
String codeString = produceCodeString(s, appendable, id, contextAnnotations.toString(), codeDir);
String name = "SchemaTuple_" + id;
LOG.info("Compiling class " + name + " for Schema: " + s + ", and appendability: " + appendable);
compileCodeString(name, codeString, codeDir);
}
private static int generateSchemaTuple(Schema s, boolean appendable, File codeDir, GenContext... contexts) {
int id = SchemaTupleClassGenerator.getNextGlobalClassIdentifier();
generateSchemaTuple(s, appendable, id, codeDir, contexts);
return id;
}
/**
* This method generates the actual SchemaTuple for the given Schema.
* @param schema
* @param whether the class should be appendable
* @param identifier
* @return the generated class's implementation
*/
private static String produceCodeString(Schema s, boolean appendable, int id, String contextAnnotations, File codeDir) {
TypeInFunctionStringOutFactory f = new TypeInFunctionStringOutFactory(s, id, appendable, contextAnnotations, codeDir);
for (Schema.FieldSchema fs : s.getFields()) {
f.process(fs);
}
return f.end();
}
protected static int getNextGlobalClassIdentifier() {
return nextGlobalClassIdentifier++;
}
/**
* This method takes generated code, and compiles it down to a class file. It will output
* the generated class file to the static temporary directory for generated code. Note
* that the compiler will use the classpath that Pig is instantiated with, as well as the
* generated directory.
*
* @param String of generated code
* @param name of class
*/
//TODO in the future, we can use ASM to generate the bytecode directly.
private static void compileCodeString(String className, String generatedCodeString, File codeDir) {
JavaCompilerHelper compiler = new JavaCompilerHelper();
String tempDir = codeDir.getAbsolutePath();
compiler.addToClassPath(tempDir);
LOG.debug("Compiling SchemaTuple code with classpath: " + compiler.getClassPath());
compiler.compile(tempDir, new JavaCompilerHelper.JavaSourceFromString(className, generatedCodeString));
LOG.info("Successfully compiled class: " + className);
}
static class CompareToSpecificString extends TypeInFunctionStringOut {
private int id;
public CompareToSpecificString(int id, boolean appendable) {
super(appendable);
this.id = id;
}
public void prepare() {
add("@Override");
add("protected int generatedCodeCompareToSpecific(SchemaTuple_"+id+" t) {");
add(" int i = 0;");
}
public void process(int fieldNum, Schema.FieldSchema fs) {
add(" i = compare(checkIfNull_" + fieldNum + "(), getPos_"
+ fieldNum + "(), t.checkIfNull_" + fieldNum + "(), t.getPos_"
+ fieldNum + "());");
add(" if (i != 0) {");
add(" return i;");
add(" }");
}
public void end() {
add(" return i;");
add("}");
}
}
//TODO clear up how it deals with nulls etc. IE is the logic correct
static class CompareToString extends TypeInFunctionStringOut {
private int id;
public CompareToString(int id) {
this.id = id;
}
public void prepare() {
add("@Override");
add("protected int generatedCodeCompareTo(SchemaTuple t, boolean checkType) {");
add(" int i;");
}
boolean compTup = false;
boolean compStr = false;
boolean compIsNull = false;
boolean compByte = false;
public void process(int fieldNum, Schema.FieldSchema fs) {
add(" i = compareWithElementAtPos(checkIfNull_" + fieldNum + "(), getPos_" + fieldNum + "(), t, " + fieldNum + ");");
add(" if (i != 0) {");
add(" return i;");
add(" }");
}
public void end() {
add(" return 0;");
add("}");
}
}
static class HashCode extends TypeInFunctionStringOut {
public void prepare() {
add("@Override");
add("public int generatedCodeHashCode() {");
add(" int h = 17;");
}
public void process(int fieldPos, Schema.FieldSchema fs) {
add(" h = hashCodePiece(h, getPos_" + fieldPos + "(), checkIfNull_" + fieldPos + "());");
}
public void end() {
add(" return h;");
add("}");
}
}
static class FieldString extends TypeInFunctionStringOut {
private List> listOfQueuesForIds;
private Schema schema;
private int primitives = 0;
private int isNulls = 0;
private int booleanBytes = 0;
private int booleans = 0;
private File codeDir;
public void prepare() {
String s;
try {
s = ObjectSerializer.serialize(schema);
} catch (IOException e) {
throw new RuntimeException("Unable to serialize schema: " + schema, e);
}
add("private static Schema schema = staticSchemaGen(\"" + s + "\");");
}
public void process(int fieldPos, Schema.FieldSchema fs) {
if (!isTuple()) {
if (isPrimitive() && (primitives++ % 8 == 0)) {
add("private byte isNull_"+ isNulls++ +" = (byte)0xFF;");
}
if (isBoolean()) {
if (booleans++ % 8 == 0) {
add("private byte booleanByte_"+ booleanBytes++ +";");
}
} else {
add("private "+typeName()+" pos_"+fieldPos+";");
}
} else {
int id = SchemaTupleClassGenerator.generateSchemaTuple(fs.schema, isAppendable(), codeDir());
for (Queue q : listOfQueuesForIds) {
q.add(id);
}
add("private SchemaTuple_"+id+" pos_"+fieldPos+";");
}
}
@Override
public void end() {
addBreak();
add("@Override");
add("public Schema getSchema() {");
add(" return schema;");
add("}");
addBreak();
}
public FieldString(File codeDir, List> listOfQueuesForIds, Schema schema, boolean appendable) {
super(appendable);
this.codeDir = codeDir;
this.listOfQueuesForIds = listOfQueuesForIds;
this.schema = schema;
}
public File codeDir() {
return codeDir;
}
}
static class SetPosString extends TypeInFunctionStringOut {
private Queue idQueue;
private int byteField = 0; //this is for setting booleans
private int byteIncr = 0; //this is for counting the booleans we've encountered
public void process(int fieldPos, Schema.FieldSchema fs) {
if (!isTuple()) {
add("public void setPos_"+fieldPos+"("+typeName()+" v) {");
if (isPrimitive()) {
add(" setNull_"+fieldPos+"(false);");
}
if (!isBoolean()) {
add(" pos_"+fieldPos+" = v;");
} else {
add(" booleanByte_" + byteField + " = BytesHelper.setBitByPos(booleanByte_" + byteField + ", v, " + byteIncr++ + ");");
if (byteIncr % 8 == 0) {
byteIncr = 0;
byteField++;
}
}
add("}");
} else {
int nestedSchemaTupleId = idQueue.remove();
add("public void setPos_"+fieldPos+"(SchemaTuple_"+nestedSchemaTupleId+" t) {");
add(" pos_" + fieldPos + " = t;");
add("}");
addBreak();
add("public void setPos_"+fieldPos+"(SchemaTuple t) {");
add(" if (pos_"+fieldPos+" == null) {");
add(" pos_"+fieldPos+" = new SchemaTuple_"+nestedSchemaTupleId+"();");
add(" }");
add(" pos_" + fieldPos + ".setAndCatch(t);");
add("}");
addBreak();
add("public void setPos_"+fieldPos+"(Tuple t) {");
add(" if (pos_"+fieldPos+" == null) {");
add(" pos_"+fieldPos+" = new SchemaTuple_"+nestedSchemaTupleId+"();");
add(" }");
add(" pos_" + fieldPos + ".setAndCatch(t);");
add("}");
}
addBreak();
}
public SetPosString(Queue idQueue) {
this.idQueue = idQueue;
}
}
static class ListSetString extends TypeInFunctionStringOut {
public void prepare() {
add("@Override");
add("public void generatedCodeSetIterator(Iterator