All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pig.data.TupleFactory Maven / Gradle / Ivy

There is a newer version: 0.17.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pig.data;

import java.net.URL;
import java.net.URLClassLoader;
import java.util.List;

import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigTupleDefaultRawComparator;
import org.apache.pig.classification.InterfaceAudience;
import org.apache.pig.classification.InterfaceStability;

/**
 * A factory to construct tuples.  This class is abstract so that users can
 * override the tuple factory if they desire to provide their own that
 * returns their implementation of a tuple.  If the property
 * pig.data.tuple.factory.name is set to a class name and
 * pig.data.tuple.factory.jar is set to a URL pointing to a jar that
 * contains the above named class, then {@link #getInstance()} will create a
 * an instance of the named class using the indicated jar.  Otherwise, it
 * will create an instance of {@link DefaultTupleFactory}.
 */
@InterfaceAudience.Public
@InterfaceStability.Stable
public abstract class TupleFactory implements TupleMaker {
    private static TupleFactory gSelf = null;

    /**
     * Get a reference to the singleton factory.
     * @return The TupleFactory to use to construct tuples.
     */
    public static TupleFactory getInstance() {
        if (gSelf == null) {
            String factoryName =
                System.getProperty("pig.data.tuple.factory.name");
            String factoryJar =
                System.getProperty("pig.data.tuple.factory.jar");
            if (factoryName != null && factoryJar != null) {
                try {
                    URL[] urls = new URL[1];
                    urls[0] = new URL(factoryJar);
                    ClassLoader loader = new URLClassLoader(urls,
                        TupleFactory.class.getClassLoader());
                    Class c = Class.forName(factoryName, true, loader);
                    Object o = c.newInstance();
                    if (!(o instanceof TupleFactory)) {
                        throw new RuntimeException("Provided factory " +
                            factoryName + " does not extend TupleFactory!");
                    }
                    gSelf = (TupleFactory)o;
                } catch (Exception e) {
                    if (e instanceof RuntimeException) {
                        // We just threw this
                        RuntimeException re = (RuntimeException)e;
                        throw re;
                    }
                    throw new RuntimeException("Unable to instantiate "
                        + "tuple factory " + factoryName, e);
                }
            } else if (factoryName != null) {
                try {
                    Class c = Class.forName(factoryName);
                    Object o = c.newInstance();
                    if (!(o instanceof TupleFactory)) {
                        throw new RuntimeException("Provided factory " +
                            factoryName + " does not extend TupleFactory!");
                    }
                    gSelf = (TupleFactory)o;
                } catch (Exception e) {
                    if (e instanceof RuntimeException) {
                      // We just threw this
                      RuntimeException re = (RuntimeException)e;
                      throw re;
                    }
                    throw new RuntimeException("Unable to instantiate "
                        + "tuple factory " + factoryName, e);
                }
            } else {
                gSelf = new BinSedesTupleFactory();
            }
        }
        return gSelf;
    }
    
    /**
     * Create an empty tuple.  This should be used as infrequently as
     * possible, use newTuple(int) instead.
     * @return Empty new tuple.
     */
    public abstract Tuple newTuple();

    /**
     * Create a tuple with size fields.  Whenever possible this is preferred
     * over the null constructor, as the constructor can preallocate the
     * size of the container holding the fields.  Once this is called, it
     * is legal to call Tuple.set(x, object), where x < size.
     * @param size Number of fields in the tuple.
     * @return Tuple with size fields
     */
    public abstract Tuple newTuple(int size);
    
    /**
     * Create a tuple from the provided list of objects.  The underlying list
     * will be copied.
     * @param c List of objects to use as the fields of the tuple.
     * @return A tuple with the list objects as its fields
     */
    public abstract Tuple newTuple(List c);

    /**
     * Create a tuple from a provided list of objects, keeping the provided
     * list.  The new tuple will take over ownership of the provided list.
     * @param list List of objects that will become the fields of the tuple.
     * @return A tuple with the list objects as its fields
     */
    public abstract Tuple newTupleNoCopy(List list);

    /**
     * Create a tuple with a single element.  This is useful because of
     * the fact that bags (currently) only take tuples, we often end up
     * sticking a single element in a tuple in order to put it in a bag.
     * @param datum Datum to put in the tuple.
     * @return A tuple with one field
     */
    public abstract Tuple newTuple(Object datum);

    /**
     * Return the actual class representing a tuple that the implementing
     * factory will be returning.  This is needed because Hadoop needs
     * to know the exact class we will be using for input and output.
     * @return Class that implements tuple.
     */
    public abstract Class tupleClass();
    
    protected TupleFactory() {
    }

    /**
     * Provided for testing purposes only.  This function should never be
     * called by anybody but the unit tests.
     */
    public static void resetSelf() {
        gSelf = null;
    }
    
    /**
     * Return the actual class implementing the raw comparator for tuples
     * that the factory will be returning. Ovverride this to allow Hadoop to
     * speed up tuple sorting. The actual returned class should know the
     * serialization details for the tuple. The default implementation 
     * (PigTupleDefaultRawComparator) will serialize the data before comparison
     * @return Class that implements tuple raw comparator.
     */
    public Class tupleRawComparatorClass() {
        return PigTupleDefaultRawComparator.class;
    }

    /**
     * This method is used to inspect whether the Tuples created by this factory
     * will be of a fixed size when they are created. In practical terms, this means
     * whether they support append or not.
     * @return where the Tuple is fixed or not
     */
    public abstract boolean isFixedSize();

}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy