All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pig.LoadPushDown Maven / Gradle / Ivy

There is a newer version: 0.17.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pig;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;

import org.apache.pig.classification.InterfaceAudience;
import org.apache.pig.classification.InterfaceStability;
import org.apache.pig.impl.logicalLayer.FrontendException;

/**
 * This interface defines how to communicate to Pig what functionality can
 * be pushed into the loader.  If a given loader does not implement this interface
 * it will be assumed that it is unable to accept any functionality for push down.
 * @since Pig 0.7
 */
@InterfaceAudience.Public
@InterfaceStability.Evolving
public interface LoadPushDown {

    /**
     * Set of possible operations that Pig can push down to a loader. 
     */
    enum OperatorSet {PROJECTION};

    /**
     * Determine the operators that can be pushed to the loader.  
     * Note that by indicating a loader can accept a certain operator
     * (such as selection) the loader is not promising that it can handle
     * all selections.  When it is passed the actual operators to 
     * push down it will still have a chance to reject them.
     * @return list of all features that the loader can support
     */
    List getFeatures();

    /**
     * Indicate to the loader fields that will be needed.  This can be useful for
     * loaders that access data that is stored in a columnar format where indicating
     * columns to be accessed a head of time will save scans.  This method will
     * not be invoked by the Pig runtime if all fields are required. So implementations
     * should assume that if this method is not invoked, then all fields from 
     * the input are required. If the loader function cannot make use of this 
     * information, it is free to ignore it by returning an appropriate Response
     * @param requiredFieldList RequiredFieldList indicating which columns will be needed.
     * This structure is read only. User cannot make change to it inside pushProjection.
     * @return Indicates which fields will be returned
     * @throws FrontendException
     */
    public RequiredFieldResponse pushProjection(RequiredFieldList 
            requiredFieldList) throws FrontendException;
    
    /**
     * Describes a field that is required to execute a scripts.
     */
    @InterfaceAudience.Public
    @InterfaceStability.Evolving
    public static class RequiredField implements Serializable {
        
        private static final long serialVersionUID = 1L;
        
        // will hold name of the field (would be null if not supplied)
        private String alias; 

        // will hold the index (position) of the required field (would be -1 if not supplied), index is 0 based
        private int index; 

        // A list of sub fields in this field (this could be a list of hash keys for example). 
        // This would be null if the entire field is required and no specific sub fields are required. 
        // In the initial implementation only one level of subfields will be populated.
        private List subFields;
        
        // Type of this field - the value could be any current PIG DataType (as specified by the constants in DataType class).
        private byte type;

        public RequiredField() {
            // to allow piece-meal construction
        }
        
        /**
         * @param alias
         * @param index
         * @param subFields
         * @param type
         */
        public RequiredField(String alias, int index,
                List subFields, byte type) {
            this.alias = alias;
            this.index = index;
            this.subFields = subFields;
            this.type = type;
        }

        /**
         * @return the alias
         */
        public String getAlias() {
            return alias;
        }

        /**
         * @return the index
         */
        public int getIndex() {
            return index;
        }

        
        /**
         * @return the required sub fields. The return value is null if all
         *         subfields are required
         */
        public List getSubFields() {
            return subFields;
        }
        
        public void setSubFields(List subFields)
        {
            this.subFields = subFields;
        }

        /**
         * @return the type
         */
        public byte getType() {
            return type;
        }

        public void setType(byte t) {
            type = t;
        }
        
        public void setIndex(int i) {
            index = i;
        }
        
        public void setAlias(String alias)
        {
            this.alias = alias;
        }

        @Override
        public String toString() {
            if (index != -1)
                return "" + index;
            else if (alias != null)
                return alias;
            return "";
        }
    }

    /**
     * List of fields that Pig knows to be required to executed a script.
     */
    @InterfaceAudience.Public
    @InterfaceStability.Evolving
    public static class RequiredFieldList implements Serializable {
        
        private static final long serialVersionUID = 1L;
        
        // list of Required fields, this will be null if all fields are required
        private List fields = new ArrayList(); 
        
        /**
         * Set the list of required fields.
         * @param fields
         */
        public RequiredFieldList(List fields) {
            this.fields = fields;
        }

        /**
         * Geta ll required fields as a list.
         * @return the required fields - this will be null if all fields are
         *         required
         */
        public List getFields() {
            return fields;
        }

        public RequiredFieldList() {
        }
        
        @Override
        public String toString() {
            StringBuffer result = new StringBuffer();
            if (fields == null)
                result.append("*");
            else {
                result.append("[");
                for (int i = 0; i < fields.size(); i++) {
                    result.append(fields.get(i));
                    if (i != fields.size() - 1)
                        result.append(",");
                }
                result.append("]");
            }
            return result.toString();
        }
        
        /**
         * Add a field to the list of required fields.
         * @param rf required field to add to the list.
         */
        public void add(RequiredField rf)
        {
            fields.add(rf);
        }
    }

    /**
     * Indicates whether the loader will return the requested fields or all fields.
     */
    @InterfaceAudience.Public
    @InterfaceStability.Evolving
    public static class RequiredFieldResponse {
        // the loader should pass true if it will return data containing
        // only the List of RequiredFields in that order. false if it
        // will return all fields in the data
        private boolean requiredFieldRequestHonored;

        public RequiredFieldResponse(boolean requiredFieldRequestHonored) {
            this.requiredFieldRequestHonored = requiredFieldRequestHonored;
        }

        /**
         * Indicates whether the loader will return only the requested fields or all fields.
         * @return true if only requested fields will be returned, false if all fields will be
         * returned.
         */
        public boolean getRequiredFieldResponse() {
            return requiredFieldRequestHonored;
        }

        /**
         * Set whether the loader will return only the requesetd fields or all fields.
         * @param honored if true only requested fields will be returned, else all fields will be
         * returned.
         */
        public void setRequiredFieldResponse(boolean honored) {
            requiredFieldRequestHonored = honored;
        }
    }

    
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy