All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.hadoop.pig.PigUtils Maven / Gradle / Ivy

There is a newer version: 8.15.1
Show newest version
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.elasticsearch.hadoop.pig;

import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;
import java.util.Properties;

import javax.xml.bind.DatatypeConverter;

import org.apache.commons.logging.LogFactory;
import org.apache.pig.LoadPushDown.RequiredField;
import org.apache.pig.LoadPushDown.RequiredFieldList;
import org.apache.pig.ResourceSchema.ResourceFieldSchema;
import org.apache.pig.data.DataType;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
import org.elasticsearch.hadoop.EsHadoopIllegalArgumentException;
import org.elasticsearch.hadoop.cfg.PropertiesSettings;
import org.elasticsearch.hadoop.cfg.Settings;
import org.elasticsearch.hadoop.util.DateUtils;
import org.elasticsearch.hadoop.util.FieldAlias;
import org.elasticsearch.hadoop.util.SettingsUtils;
import org.elasticsearch.hadoop.util.StringUtils;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.ISODateTimeFormat;


class PigUtils {

    static final String MAPPING_NAMES = "es.mapping.names";
    static final String NAMED_TUPLE = "es.mapping.pig.tuple.use.field.names";
    static final boolean NAMED_TUPLE_DEFAULT = false;

    private final static boolean pig11Available;


    static {
        // initialize
        pig11Available = "datetime".equals(DataType.findTypeName((byte) 30));
    }

    static String convertDateToES(Object pigDate) {
        return (pig11Available ? Pig11OrHigherConverter.convertToES(pigDate) : PigUpTo10Converter.convertToES(pigDate));
    }

    static Object convertDateFromES(String esDate) {
        return (pig11Available ? Pig11OrHigherConverter.convertFromES(esDate) : PigUpTo10Converter.convertFromES(esDate));
    }

    static Object convertDateFromES(Long esDate) {
        return (pig11Available ? Pig11OrHigherConverter.convertFromES(esDate) : PigUpTo10Converter.convertFromES(esDate));
    }

    private static abstract class PigUpTo10Converter {
        static String convertToES(Object pigDate) {
            if (pigDate instanceof Number) {
                Calendar cal = Calendar.getInstance();
                cal.setTimeInMillis(((Number) pigDate).longValue());
                return DatatypeConverter.printDateTime(cal);
            }
            if (pigDate instanceof String) {
                return ((String) pigDate);
            }
            throw new EsHadoopIllegalArgumentException(String.format("Cannot convert [%s] to date", pigDate));
        }

        static Object convertFromES(String esDate) {
            return DateUtils.parseDate(esDate).getTimeInMillis();
        }

        static Object convertFromES(Long esDate) {
            return esDate;
        }
    }

    private static abstract class Pig11OrHigherConverter {
        static String convertToES(Object pigDate) {
            DateTime dt = (DateTime) pigDate;
            // ISODateTimeFormat.dateOptionalTimeParser() throws "printing not supported"
            return dt.toString();
        }

        static Object convertFromES(String esDate) {
            return ISODateTimeFormat.dateOptionalTimeParser().parseDateTime(esDate);
        }

        static Object convertFromES(Long esDate) {
            return new DateTime(esDate, DateTimeZone.UTC);
        }
    }

    static FieldAlias alias(Settings settings) {
        return new FieldAlias(SettingsUtils.aliases(settings.getProperty(MAPPING_NAMES), false), false);
    }

    static String asProjection(Schema schema, Properties props) {
        List fields = new ArrayList();
        addField(schema, fields, alias(new PropertiesSettings(props)), null);

        return StringUtils.concatenate(fields, ",");
    }

    private static void addField(Schema schema, List fields, FieldAlias fa, String currentNode) {
        for (FieldSchema field : schema.getFields()) {
            String node;
            if (field.alias != null) {
                // if no field
                node = fa.toES(field.alias);
                node = (currentNode != null ? currentNode + "." + node : node);
            }
            else {
                node = currentNode;
            }
            // && field.type != DataType.TUPLE
            if (field.schema != null) {
                addField(field.schema, fields, fa, node);
            }
            else {
                if (!StringUtils.hasText(node)) {
                    LogFactory.getLog(PigUtils.class).warn("Cannot detect alias for field in schema" + schema);
                }

                if (node != null) {
                    fields.add(fa.toES(node));
                }
            }
        }
    }

    static String asProjection(RequiredFieldList list, Properties props) {
        List fields = new ArrayList();
        FieldAlias alias = alias(new PropertiesSettings(props));
        for (RequiredField field : list.getFields()) {
            addField(field, fields, alias, "");
        }

        return StringUtils.concatenate(fields, ",");
    }

    private static void addField(RequiredField field, List fields, FieldAlias fa, String currentNode) {
        if (field.getSubFields() != null && !field.getSubFields().isEmpty()) {
            for (RequiredField subField : field.getSubFields()) {
                addField(subField, fields, fa, currentNode + "." + fa.toES(subField.getAlias()));
            }
        }

        else {
            fields.add(fa.toES(field.getAlias()));
        }
    }

    static boolean isComplexType(ResourceFieldSchema fieldSchema) {
        return (fieldSchema != null && fieldSchema.getType() >= 100);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy