All Downloads are FREE. Search and download functionalities are using the official Maven repository.

hivemall.tools.json.FromJsonUDF Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package hivemall.tools.json;

import hivemall.utils.hadoop.HiveUtils;
import hivemall.utils.hadoop.JsonSerdeUtils;
import hivemall.utils.lang.ArrayUtils;
import hivemall.utils.lang.ExceptionUtils;
import hivemall.utils.lang.StringUtils;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.Text;
import org.apache.hive.hcatalog.data.HCatRecordObjectInspectorFactory;

// @formatter:off
@Description(name = "from_json",
        value = "_FUNC_(string jsonString, const string returnTypes [, const array|const string columnNames])"
                + " - Return Hive object.",
        extended = "SELECT\n" + 
                "  from_json(to_json(map('one',1,'two',2)), 'map'),\n" +
                "  from_json(\n" + 
                "    '{ \"person\" : { \"name\" : \"makoto\" , \"age\" : 37 } }',\n" + 
                "    'struct', \n" + 
                "    array('person')\n" + 
                "  ),\n" + 
                "  from_json(\n" + 
                "    '[0.1,1.1,2.2]',\n" + 
                "    'array'\n" + 
                "  ),\n" + 
                "  from_json(to_json(\n" + 
                "    ARRAY(\n" + 
                "      NAMED_STRUCT(\"country\", \"japan\", \"city\", \"tokyo\"), \n" + 
                "      NAMED_STRUCT(\"country\", \"japan\", \"city\", \"osaka\")\n" + 
                "    )\n" + 
                "  ),'array>'),\n" + 
                "  from_json(to_json(\n" + 
                "    ARRAY(\n" + 
                "      NAMED_STRUCT(\"country\", \"japan\", \"city\", \"tokyo\"), \n" + 
                "      NAMED_STRUCT(\"country\", \"japan\", \"city\", \"osaka\")\n" + 
                "    ),\n" + 
                "    array('city')\n" + 
                "  ), 'array>'),\n" + 
                "  from_json(to_json(\n" + 
                "    ARRAY(\n" + 
                "      NAMED_STRUCT(\"country\", \"japan\", \"city\", \"tokyo\"), \n" + 
                "      NAMED_STRUCT(\"country\", \"japan\", \"city\", \"osaka\")\n" + 
                "    )\n" + 
                "  ),'array>');\n"
                + "```\n\n" +
                "```\n" +
                " {\"one\":1,\"two\":2}\n" +
                " {\"name\":\"makoto\",\"age\":37}\n" + 
                " [0.1,1.1,2.2]\n" + 
                " [{\"country\":\"japan\",\"city\":\"tokyo\"},{\"country\":\"japan\",\"city\":\"osaka\"}]\n" + 
                " [{\"country\":\"japan\",\"city\":\"tokyo\"},{\"country\":\"japan\",\"city\":\"osaka\"}]\n" + 
                " [{\"city\":\"tokyo\"},{\"city\":\"osaka\"}]")
//@formatter:on
@UDFType(deterministic = true, stateful = false)
public final class FromJsonUDF extends GenericUDF {

    private StringObjectInspector jsonOI;

    private List columnTypes;
    @Nullable
    private List columnNames;

    @Override
    public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
        if (argOIs.length != 2 && argOIs.length != 3) {
            throw new UDFArgumentException(
                "from_json takes two or three arguments: " + argOIs.length);
        }

        this.jsonOI = HiveUtils.asStringOI(argOIs[0]);

        String typeString = HiveUtils.getConstString(argOIs[1]);
        this.columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(typeString);

        if (argOIs.length == 3) {
            final ObjectInspector argOI2 = argOIs[2];
            if (HiveUtils.isConstString(argOI2)) {
                String names = HiveUtils.getConstString(argOI2);
                this.columnNames = ArrayUtils.asKryoSerializableList(names.split(","));
            } else if (HiveUtils.isConstStringListOI(argOI2)) {
                this.columnNames =
                        ArrayUtils.asKryoSerializableList(HiveUtils.getConstStringArray(argOI2));
            } else {
                throw new UDFArgumentException("Expected `const array` or `const string`"
                        + " but got an unexpected OI type for the third argument: " + argOI2);
            }
        }

        return getObjectInspector(columnTypes, columnNames);
    }

    @Nonnull
    private static ObjectInspector getObjectInspector(@Nonnull final List columnTypes,
            @Nullable List columnNames) throws UDFArgumentException {
        if (columnTypes.isEmpty()) {
            throw new UDFArgumentException("Returning columnTypes MUST NOT be null");
        }

        final ObjectInspector returnOI;
        final int numColumns = columnTypes.size();
        if (numColumns == 1) {
            TypeInfo type = columnTypes.get(0);
            returnOI =
                    HCatRecordObjectInspectorFactory.getStandardObjectInspectorFromTypeInfo(type);
        } else {
            if (columnNames == null) {
                columnNames = new ArrayList<>(numColumns);
                for (int i = 0; i < numColumns; i++) {
                    columnNames.add("c" + i);
                }
            } else {
                if (columnNames.size() != numColumns) {
                    throw new UDFArgumentException("#columnNames != #columnTypes. #columnName="
                            + columnNames.size() + ", #columnTypes=" + numColumns);
                }
            }
            final ObjectInspector[] fieldOIs = new ObjectInspector[numColumns];
            for (int i = 0; i < fieldOIs.length; i++) {
                TypeInfo type = columnTypes.get(i);
                fieldOIs[i] =
                        HCatRecordObjectInspectorFactory.getStandardObjectInspectorFromTypeInfo(
                            type);
            }
            returnOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames,
                Arrays.asList(fieldOIs));
        }
        return returnOI;
    }

    @Override
    public Object evaluate(DeferredObject[] args) throws HiveException {
        Object arg0 = args[0].get();
        if (arg0 == null) {
            return null;
        }
        Text jsonString = jsonOI.getPrimitiveWritableObject(arg0);

        final Object result;
        try {
            if (columnNames == null && columnTypes != null && columnTypes.size() == 1) {
                result = JsonSerdeUtils.deserialize(jsonString, columnTypes.get(0));
            } else {
                result = JsonSerdeUtils.deserialize(jsonString, columnNames, columnTypes);
            }
        } catch (Throwable e) {
            throw new HiveException("Failed to deserialize Json: \n" + jsonString.toString() + '\n'
                    + ExceptionUtils.prettyPrintStackTrace(e),
                e);
        }
        return result;
    }

    @Override
    public String getDisplayString(String[] args) {
        return "from_json(" + StringUtils.join(args, ',') + ")";
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy