org.apache.pig.data.TupleFactory Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.data;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.List;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigTupleDefaultRawComparator;
import org.apache.pig.classification.InterfaceAudience;
import org.apache.pig.classification.InterfaceStability;
/**
* A factory to construct tuples. This class is abstract so that users can
* override the tuple factory if they desire to provide their own that
* returns their implementation of a tuple. If the property
* pig.data.tuple.factory.name is set to a class name and
* pig.data.tuple.factory.jar is set to a URL pointing to a jar that
* contains the above named class, then {@link #getInstance()} will create a
* an instance of the named class using the indicated jar. Otherwise, it
* will create an instance of {@link DefaultTupleFactory}.
*/
@InterfaceAudience.Public
@InterfaceStability.Stable
public abstract class TupleFactory implements TupleMaker {
private static TupleFactory gSelf = null;
/**
* Get a reference to the singleton factory.
* @return The TupleFactory to use to construct tuples.
*/
public static TupleFactory getInstance() {
if (gSelf == null) {
String factoryName =
System.getProperty("pig.data.tuple.factory.name");
String factoryJar =
System.getProperty("pig.data.tuple.factory.jar");
if (factoryName != null && factoryJar != null) {
try {
URL[] urls = new URL[1];
urls[0] = new URL(factoryJar);
ClassLoader loader = new URLClassLoader(urls,
TupleFactory.class.getClassLoader());
Class c = Class.forName(factoryName, true, loader);
Object o = c.newInstance();
if (!(o instanceof TupleFactory)) {
throw new RuntimeException("Provided factory " +
factoryName + " does not extend TupleFactory!");
}
gSelf = (TupleFactory)o;
} catch (Exception e) {
if (e instanceof RuntimeException) {
// We just threw this
RuntimeException re = (RuntimeException)e;
throw re;
}
throw new RuntimeException("Unable to instantiate "
+ "tuple factory " + factoryName, e);
}
} else if (factoryName != null) {
try {
Class c = Class.forName(factoryName);
Object o = c.newInstance();
if (!(o instanceof TupleFactory)) {
throw new RuntimeException("Provided factory " +
factoryName + " does not extend TupleFactory!");
}
gSelf = (TupleFactory)o;
} catch (Exception e) {
if (e instanceof RuntimeException) {
// We just threw this
RuntimeException re = (RuntimeException)e;
throw re;
}
throw new RuntimeException("Unable to instantiate "
+ "tuple factory " + factoryName, e);
}
} else {
gSelf = new BinSedesTupleFactory();
}
}
return gSelf;
}
/**
* Create an empty tuple. This should be used as infrequently as
* possible, use newTuple(int) instead.
* @return Empty new tuple.
*/
public abstract Tuple newTuple();
/**
* Create a tuple with size fields. Whenever possible this is preferred
* over the null constructor, as the constructor can preallocate the
* size of the container holding the fields. Once this is called, it
* is legal to call Tuple.set(x, object), where x < size.
* @param size Number of fields in the tuple.
* @return Tuple with size fields
*/
public abstract Tuple newTuple(int size);
/**
* Create a tuple from the provided list of objects. The underlying list
* will be copied.
* @param c List of objects to use as the fields of the tuple.
* @return A tuple with the list objects as its fields
*/
public abstract Tuple newTuple(List c);
/**
* Create a tuple from a provided list of objects, keeping the provided
* list. The new tuple will take over ownership of the provided list.
* @param list List of objects that will become the fields of the tuple.
* @return A tuple with the list objects as its fields
*/
public abstract Tuple newTupleNoCopy(List list);
/**
* Create a tuple with a single element. This is useful because of
* the fact that bags (currently) only take tuples, we often end up
* sticking a single element in a tuple in order to put it in a bag.
* @param datum Datum to put in the tuple.
* @return A tuple with one field
*/
public abstract Tuple newTuple(Object datum);
/**
* Return the actual class representing a tuple that the implementing
* factory will be returning. This is needed because Hadoop needs
* to know the exact class we will be using for input and output.
* @return Class that implements tuple.
*/
public abstract Class extends Tuple> tupleClass();
protected TupleFactory() {
}
/**
* Provided for testing purposes only. This function should never be
* called by anybody but the unit tests.
*/
public static void resetSelf() {
gSelf = null;
}
/**
* Return the actual class implementing the raw comparator for tuples
* that the factory will be returning. Ovverride this to allow Hadoop to
* speed up tuple sorting. The actual returned class should know the
* serialization details for the tuple. The default implementation
* (PigTupleDefaultRawComparator) will serialize the data before comparison
* @return Class that implements tuple raw comparator.
*/
public Class extends TupleRawComparator> tupleRawComparatorClass() {
return PigTupleDefaultRawComparator.class;
}
/**
* This method is used to inspect whether the Tuples created by this factory
* will be of a fixed size when they are created. In practical terms, this means
* whether they support append or not.
* @return where the Tuple is fixed or not
*/
public abstract boolean isFixedSize();
}