All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.exec.FunctionTask Maven / Gradle / Ivy

There is a newer version: 4.0.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.exec;

import static org.apache.hadoop.util.StringUtils.stringifyException;

import java.io.IOException;
import java.util.List;
import java.util.Map;

import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.Function;
import org.apache.hadoop.hive.metastore.api.PrincipalType;
import org.apache.hadoop.hive.metastore.api.ResourceType;
import org.apache.hadoop.hive.metastore.api.ResourceUri;
import org.apache.hadoop.hive.ql.exec.FunctionInfo.FunctionResource;
import org.apache.hadoop.hive.ql.CompilationOpContext;
import org.apache.hadoop.hive.ql.DriverContext;
import org.apache.hadoop.hive.ql.QueryPlan;
import org.apache.hadoop.hive.ql.QueryState;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.CreateFunctionDesc;
import org.apache.hadoop.hive.ql.plan.DropFunctionDesc;
import org.apache.hadoop.hive.ql.plan.CreateMacroDesc;
import org.apache.hadoop.hive.ql.plan.DropMacroDesc;
import org.apache.hadoop.hive.ql.plan.FunctionWork;
import org.apache.hadoop.hive.ql.plan.api.StageType;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.util.ResourceDownloader;
import org.apache.hadoop.util.StringUtils;

/**
 * FunctionTask.
 *
 */
public class FunctionTask extends Task {
  private static final long serialVersionUID = 1L;
  private static transient final Logger LOG = LoggerFactory.getLogger(FunctionTask.class);

  public FunctionTask() {
    super();
  }

  @Override
  public void initialize(QueryState queryState, QueryPlan queryPlan, DriverContext ctx,
      CompilationOpContext opContext) {
    super.initialize(queryState, queryPlan, ctx, opContext);
  }

  @Override
  public int execute(DriverContext driverContext) {
    CreateFunctionDesc createFunctionDesc = work.getCreateFunctionDesc();
    if (createFunctionDesc != null) {
      if (createFunctionDesc.isTemp()) {
        return createTemporaryFunction(createFunctionDesc);
      } else {
        try {
          if (createFunctionDesc.getReplicationSpec().isInReplicationScope()) {
            String[] qualifiedNameParts = FunctionUtils.getQualifiedFunctionNameParts(
                    createFunctionDesc.getFunctionName());
            String dbName = qualifiedNameParts[0];
            String funcName = qualifiedNameParts[1];
            Map dbProps = Hive.get().getDatabase(dbName).getParameters();
            if (!createFunctionDesc.getReplicationSpec().allowEventReplacementInto(dbProps)) {
              // If the database is newer than the create event, then noop it.
              LOG.debug("FunctionTask: Create Function {} is skipped as database {} " +
                        "is newer than update", funcName, dbName);
              return 0;
            }
          }
          return createPermanentFunction(Hive.get(conf), createFunctionDesc);
        } catch (Exception e) {
          setException(e);
          LOG.error("Failed to create function", e);
          return 1;
        }
      }
    }

    DropFunctionDesc dropFunctionDesc = work.getDropFunctionDesc();
    if (dropFunctionDesc != null) {
      if (dropFunctionDesc.isTemp()) {
        return dropTemporaryFunction(dropFunctionDesc);
      } else {
        try {
          if (dropFunctionDesc.getReplicationSpec().isInReplicationScope()) {
            String[] qualifiedNameParts = FunctionUtils.getQualifiedFunctionNameParts(
                    dropFunctionDesc.getFunctionName());
            String dbName = qualifiedNameParts[0];
            String funcName = qualifiedNameParts[1];
            Map dbProps = Hive.get().getDatabase(dbName).getParameters();
            if (!dropFunctionDesc.getReplicationSpec().allowEventReplacementInto(dbProps)) {
              // If the database is newer than the drop event, then noop it.
              LOG.debug("FunctionTask: Drop Function {} is skipped as database {} " +
                        "is newer than update", funcName, dbName);
              return 0;
            }
          }
          return dropPermanentFunction(Hive.get(conf), dropFunctionDesc);
        } catch (Exception e) {
          setException(e);
          LOG.error("Failed to drop function", e);
          return 1;
        }
      }
    }

    if (work.getReloadFunctionDesc() != null) {
      try {
        Hive.get().reloadFunctions();
      } catch (Exception e) {
        setException(e);
        LOG.error("Failed to reload functions", e);
        return 1;
      }
    }

    CreateMacroDesc createMacroDesc = work.getCreateMacroDesc();
    if (createMacroDesc != null) {
      return createMacro(createMacroDesc);
    }

    DropMacroDesc dropMacroDesc = work.getDropMacroDesc();
    if (dropMacroDesc != null) {
      return dropMacro(dropMacroDesc);
    }
    return 0;
  }

  // todo authorization
  private int createPermanentFunction(Hive db, CreateFunctionDesc createFunctionDesc)
      throws HiveException, IOException {
    String[] qualifiedNameParts = FunctionUtils.getQualifiedFunctionNameParts(
        createFunctionDesc.getFunctionName());
    String dbName = qualifiedNameParts[0];
    String funcName = qualifiedNameParts[1];
    String registeredName = FunctionUtils.qualifyFunctionName(funcName, dbName);
    String className = createFunctionDesc.getClassName();

    List resources = createFunctionDesc.getResources();

    // For permanent functions, check for any resources from local filesystem.
    checkLocalFunctionResources(db, createFunctionDesc.getResources());

    FunctionInfo registered = null;
    HiveConf oldConf = SessionState.get().getConf();
    try {
      SessionState.get().setConf(conf);
      registered = FunctionRegistry.registerPermanentFunction(
        registeredName, className, true, toFunctionResource(resources));
    } catch (RuntimeException ex) {
      Throwable t = ex;
      while (t.getCause() != null) {
        t = t.getCause();
      }
    } finally {
      SessionState.get().setConf(oldConf);
    }

    if (registered == null) {
      console.printError("Failed to register " + registeredName
          + " using class " + createFunctionDesc.getClassName());
      return 1;
    }

    // Add to metastore
    Function func = new Function(
        funcName,
        dbName,
        className,
        SessionState.get().getUserName(), // TODO: should this use getUserFromAuthenticator?
        PrincipalType.USER,
        (int) (System.currentTimeMillis() / 1000),
        org.apache.hadoop.hive.metastore.api.FunctionType.JAVA,
        resources
    );
    db.createFunction(func);
    return 0;
  }

  private int createTemporaryFunction(CreateFunctionDesc createFunctionDesc) {
    try {
      // Add any required resources
      FunctionResource[] resources = toFunctionResource(createFunctionDesc.getResources());
      addFunctionResources(resources);

      Class udfClass = getUdfClass(createFunctionDesc);
      FunctionInfo registered = FunctionRegistry.registerTemporaryUDF(
          createFunctionDesc.getFunctionName(), udfClass, resources);
      if (registered != null) {
        return 0;
      }
      console.printError("FAILED: Class " + createFunctionDesc.getClassName()
          + " does not implement UDF, GenericUDF, or UDAF");
      return 1;
    } catch (HiveException e) {
      console.printError("FAILED: " + e.toString());
      LOG.info("create function: ", e);
      return 1;
    } catch (ClassNotFoundException e) {

      console.printError("FAILED: Class " + createFunctionDesc.getClassName() + " not found");
      LOG.info("create function: ", e);
      return 1;
    }
  }

  private int createMacro(CreateMacroDesc createMacroDesc) {
    FunctionRegistry.registerTemporaryMacro(
      createMacroDesc.getMacroName(),
      createMacroDesc.getBody(),
      createMacroDesc.getColNames(),
      createMacroDesc.getColTypes()
    );
    return 0;
  }

  private int dropMacro(DropMacroDesc dropMacroDesc) {
    try {
      FunctionRegistry.unregisterTemporaryUDF(dropMacroDesc.getMacroName());
      return 0;
    } catch (HiveException e) {
      LOG.info("drop macro: ", e);
      return 1;
    }
  }

  // todo authorization
  private int dropPermanentFunction(Hive db, DropFunctionDesc dropFunctionDesc) {
    try {
      String[] qualifiedNameParts = FunctionUtils.getQualifiedFunctionNameParts(
          dropFunctionDesc.getFunctionName());
      String dbName = qualifiedNameParts[0];
      String funcName = qualifiedNameParts[1];

      String registeredName = FunctionUtils.qualifyFunctionName(funcName, dbName);
      FunctionRegistry.unregisterPermanentFunction(registeredName);
      db.dropFunction(dbName, funcName);

      return 0;
    } catch (Exception e) {
      LOG.info("drop function: ", e);
      console.printError("FAILED: error during drop function: " + StringUtils.stringifyException(e));
      return 1;
    }
  }

  private int dropTemporaryFunction(DropFunctionDesc dropFunctionDesc) {
    try {
      FunctionRegistry.unregisterTemporaryUDF(dropFunctionDesc.getFunctionName());
      return 0;
    } catch (HiveException e) {
      LOG.info("drop function: ", e);
      return 1;
    }
  }

  private void checkLocalFunctionResources(Hive db, List resources)
      throws HiveException {
    // If this is a non-local warehouse, then adding resources from the local filesystem
    // may mean that other clients will not be able to access the resources.
    // So disallow resources from local filesystem in this case.
    if (resources != null && resources.size() > 0) {
      try {
        String localFsScheme = FileSystem.getLocal(db.getConf()).getUri().getScheme();
        String configuredFsScheme = FileSystem.get(db.getConf()).getUri().getScheme();
        if (configuredFsScheme.equals(localFsScheme)) {
          // Configured warehouse FS is local, don't need to bother checking.
          return;
        }

        for (ResourceUri res : resources) {
          String resUri = res.getUri();
          if (ResourceDownloader.isFileUri(resUri)) {
            throw new HiveException("Hive warehouse is non-local, but "
                + res.getUri() + " specifies file on local filesystem. "
                + "Resources on non-local warehouse should specify a non-local scheme/path");
          }
        }
      } catch (HiveException e) {
        throw e;
      } catch (Exception e) {
        LOG.error("Exception caught in checkLocalFunctionResources", e);
        throw new HiveException(e);
      }
    }
  }

  public static FunctionResource[] toFunctionResource(List resources)
      throws HiveException {
    if (resources == null) {
      return null;
    }
    FunctionResource[] converted = new FunctionResource[resources.size()];
    for (int i = 0; i < converted.length; i++) {
      ResourceUri resource = resources.get(i);
      SessionState.ResourceType type = getResourceType(resource.getResourceType());
      converted[i] = new FunctionResource(type, resource.getUri());
    }
    return converted;
  }

  public static SessionState.ResourceType getResourceType(ResourceType rt) {
    switch (rt) {
      case JAR:
        return SessionState.ResourceType.JAR;
      case FILE:
        return SessionState.ResourceType.FILE;
      case ARCHIVE:
        return SessionState.ResourceType.ARCHIVE;
      default:
        throw new AssertionError("Unexpected resource type " + rt);
    }
  }

  public static void addFunctionResources(FunctionResource[] resources) throws HiveException {
    if (resources != null) {
      Multimap mappings = HashMultimap.create();
      for (FunctionResource res : resources) {
        mappings.put(res.getResourceType(), res.getResourceURI());
      }
      for (SessionState.ResourceType type : mappings.keys()) {
        SessionState.get().add_resources(type, mappings.get(type));
      }
    }
  }

  private Class getUdfClass(CreateFunctionDesc desc) throws ClassNotFoundException {
    // get the session specified class loader from SessionState
    ClassLoader classLoader = Utilities.getSessionSpecifiedClassLoader();
    return Class.forName(desc.getClassName(), true, classLoader);
  }

  @Override
  public StageType getType() {
    return StageType.FUNC;
  }

  @Override
  public String getName() {
    return "FUNCTION";
  }

  /**
   * this needs access to session state resource downloads which in turn uses references to Registry objects.
   */
  @Override
  public boolean canExecuteInParallel() {
    return false;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy