All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pig.impl.logicalLayer.schema.SchemaUtil Maven / Gradle / Ivy

There is a newer version: 0.17.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.pig.impl.logicalLayer.schema;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.pig.data.DataType;
import org.apache.pig.impl.logicalLayer.FrontendException;

/**
 * 
 * A utility class for simplify the schema creation, especially for bag and
 * tuple schema. Currently, it only support simple schema creation, nested tuple
 * and bag is not supported
 * 
 */

public class SchemaUtil {

    private static Set SUPPORTED_TYPE_SET;

    static {
        SUPPORTED_TYPE_SET = new HashSet();

        SUPPORTED_TYPE_SET.add(DataType.INTEGER);
        SUPPORTED_TYPE_SET.add(DataType.LONG);
        SUPPORTED_TYPE_SET.add(DataType.CHARARRAY);
        SUPPORTED_TYPE_SET.add(DataType.BOOLEAN);
        SUPPORTED_TYPE_SET.add(DataType.BYTE);
        SUPPORTED_TYPE_SET.add(DataType.BYTEARRAY);
        SUPPORTED_TYPE_SET.add(DataType.DOUBLE);
        SUPPORTED_TYPE_SET.add(DataType.FLOAT);
        SUPPORTED_TYPE_SET.add(DataType.MAP);
    }

    /**
     * Create a new tuple schema according the tuple name and two list: names of
     * fields, types of fields
     * 
     * @param tupleName
     * @param fieldNames
     * @param dataTypes
     * @return tuple schema
     * @throws FrontendException
     */
    public static Schema newTupleSchema(String tupleName,
            List fieldNames, List dataTypes)
            throws FrontendException {
        checkParameters(fieldNames, dataTypes);

        List tokenSchemas = new ArrayList();
        for (int i = 0; i < fieldNames.size(); ++i) {
            String name = fieldNames.get(i);
            Byte type = dataTypes.get(i);
            tokenSchemas.add(new Schema.FieldSchema(name, type));
        }

        Schema tupleSchema = new Schema(tokenSchemas);
        Schema.FieldSchema tupleField = new Schema.FieldSchema(tupleName,
                tupleSchema);

        return new Schema(tupleField);
    }

    /**
     * Create a new tuple schema according the tuple name and two arrays: names
     * of fields, types of fields
     * 
     * @param tupleName
     * @param fieldNames
     * @param dataTypes
     * @return tuple schema
     * @throws FrontendException
     */
    public static Schema newTupleSchema(String tupleName, String[] fieldNames,
            Byte[] dataTypes) throws FrontendException {
        return newTupleSchema(tupleName, Arrays.asList(fieldNames), Arrays
                .asList(dataTypes));
    }

    /**
     * Create a new tuple schema according the two list: names of fields, types
     * of fields, the default tuple name is t.
     * 
     * @param fieldNames
     * @param dataTypes
     * @return tuple schema
     * @throws FrontendException
     */
    public static Schema newTupleSchema(List fieldNames,
            List dataTypes) throws FrontendException {
        return newTupleSchema("t", fieldNames, dataTypes);
    }

    /**
     * Create a new tuple schema according one list: types of fields, the
     * default names of fields are f0,f1,f2..., and the tuple name is t.
     * 
     * @param dataTypes
     * @return tuple schema
     * @throws FrontendException
     */
    public static Schema newTupleSchema(List dataTypes)
            throws FrontendException {
        List names = newNames(dataTypes.size());
        return newTupleSchema("t", names, dataTypes);
    }

    /**
     * Create a new tuple schema according the two arrays: names of fields,
     * types of fields, the default tuple name is t.
     * 
     * @param names
     * @param dataTypes
     * @return tuple schema
     * @throws FrontendException
     */
    public static Schema newTupleSchema(String[] names, Byte[] dataTypes)
            throws FrontendException {
        return newTupleSchema("t", Arrays.asList(names), Arrays
                .asList(dataTypes));
    }

    /**
     * Create a new tuple schema according one array: types of fields, the
     * default names of fields are f0,f1,f2..., and the tuple name is t.
     * 
     * @param dataTypes
     * @return tuple schema
     * @throws FrontendException
     */
    public static Schema newTupleSchema(Byte[] dataTypes)
            throws FrontendException {
        return newTupleSchema(Arrays.asList(dataTypes));
    }

    private static List newNames(int size) {
        List names = new ArrayList();
        for (int i = 0; i < size; ++i) {
            names.add("f" + i);
        }
        return names;
    }

    /**
     * Create a bag schema according the bag name,tuple name and two list: name
     * of fields, type of fields
     * 
     * @param bagName
     * @param tupleName
     * @param fieldNames
     * @param dataTypes
     * @return bag schema
     * @throws FrontendException
     */
    public static Schema newBagSchema(String bagName, String tupleName,
            List fieldNames, List dataTypes)
            throws FrontendException {
        checkParameters(fieldNames, dataTypes);

        Schema tupleSchema = newTupleSchema(tupleName, fieldNames, dataTypes);
        Schema.FieldSchema bagField = new Schema.FieldSchema(bagName,
                tupleSchema, DataType.BAG);

        return new Schema(bagField);
    }

    public static Schema newBagSchema(String bagName, String tupleName,
            String[] fieldNames, Byte[] dataTypes) throws FrontendException {
        return newBagSchema(bagName, tupleName, Arrays.asList(fieldNames),
                Arrays.asList(dataTypes));
    }

    /**
     * Create a bag schema according two list: name of fields, type of fields,
     * and the default bag name is b, the default tuple name is t.
     * 
     * @param names
     * @param dataTypes
     * @return bag schema
     * @throws FrontendException
     */
    public static Schema newBagSchema(List names, List dataTypes)
            throws FrontendException {
        checkParameters(names, dataTypes);

        Schema tupleSchema = newTupleSchema(names, dataTypes);
        Schema.FieldSchema bagField = new Schema.FieldSchema("b", tupleSchema,
                DataType.BAG);

        return new Schema(bagField);
    }

    /**
     * Create a new tuple schema according one list: types of fields, the
     * default names of fields are f0,f1,f2..., and the tuple is t, the bag name
     * is b.
     * 
     * @param dataTypes
     * @return bag schema
     * @throws FrontendException
     */
    public static Schema newBagSchema(List dataTypes)
            throws FrontendException {
        List names = newNames(dataTypes.size());
        return newBagSchema(names, dataTypes);
    }

    /**
     * Create a new tuple schema according two arrays: names of field,types of
     * fields. The default tuple name is t, and the bag is b.
     * 
     * @param names
     * @param dataTypes
     * @return bag schema
     * @throws FrontendException
     */
    public static Schema newBagSchema(String[] names, Byte[] dataTypes)
            throws FrontendException {
        return newBagSchema(Arrays.asList(names), Arrays.asList(dataTypes));
    }

    /**
     * Create a new tuple schema according one array: the type of fields, the
     * tuple name is t, and the bag name is b.
     * 
     * @param dataTypes
     * @return bag schema
     * @throws FrontendException
     */
    public static Schema newBagSchema(Byte[] dataTypes)
            throws FrontendException {
        return newBagSchema(Arrays.asList(dataTypes));
    }

    private static void checkDataTypes(List dataTypes)
            throws FrontendException {
        for (Byte type : dataTypes) {
            if (!SUPPORTED_TYPE_SET.contains(type)) {
                throw new FrontendException(
                        "Currently pig do not support this kind of type using Schema:"
                                + DataType.findTypeName(type)
                                + ". You can write shema by yourself.");
            }
        }

    }

    private static void checkParameters(List names, List dataTypes)
            throws FrontendException {
        // TODO Auto-generated method stub
        checkDataTypes(dataTypes);
        if (names.size() != dataTypes.size()) {
            throw new FrontendException(
                    "The number of names is not equal to the number of dataTypes");
        }
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy