All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.exec.AbstractMapOperator Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.exec;

import java.io.Serializable;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.CompilationOpContext;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Writable;


/**
 * Abstract Map operator. Common code of MapOperator and VectorMapOperator.
 **/
@SuppressWarnings("deprecation")
public abstract class AbstractMapOperator extends Operator
    implements Serializable, Cloneable {

  private static final long serialVersionUID = 1L;

  /**
   * Initialization call sequence:
   *
   *   (Operator)                     Operator.setConf(MapWork conf);
   *   (Operator)                     Operator.initialize(
   *                                      Configuration hconf, ObjectInspector[] inputOIs);
   *
   *   ([Vector]MapOperator)          @Override setChildren(Configuration hconf)
   *
   *   (Operator)                     Operator.passExecContext(ExecMapperContext execContext)
   *   (Operator)                     Operator.initializeLocalWork(Configuration hconf)
   *
   *   (AbstractMapOperator)          initializeMapOperator(Configuration hconf)
   *
   * [ (AbstractMapOperator)          initializeContexts() ]   // exec.tez.MapRecordProcessor only.
   *
   *   (Operator)                     Operator.setReporter(Reporter rep)
   *
   */

  /**
   * Counter.
   *
   */
  public static enum Counter {
    DESERIALIZE_ERRORS,
    RECORDS_IN
  }

  protected final transient LongWritable deserialize_error_count = new LongWritable();
  protected final transient LongWritable recordCounter = new LongWritable();
  protected transient long numRows = 0;

  private final Map connectedOperators
  = new TreeMap();

  private transient final Map normalizedPaths = new HashMap<>();

  private Path normalizePath(Path onefile, boolean schemaless) {
    //creating Path is expensive, so cache the corresponding
    //Path object in normalizedPaths
    Path path = normalizedPaths.get(onefile);
    if (path == null) {
      path = onefile;
      if (schemaless && path.toUri().getScheme() != null) {
        path = new Path(path.toUri().getPath());
      }
      normalizedPaths.put(onefile, path);
    }
    return path;
  }

  protected String getNominalPath(Path fpath) {
    Path nominal = null;
    boolean schemaless = fpath.toUri().getScheme() == null;
    for (Path onefile : conf.getPathToAliases().keySet()) {
      Path onepath = normalizePath(onefile, schemaless);
      Path curfpath = fpath;
      if(!schemaless && onepath.toUri().getScheme() == null) {
        curfpath = new Path(fpath.toUri().getPath());
      }
      // check for the operators who will process rows coming to this Map Operator
      if (onepath.toUri().relativize(curfpath.toUri()).equals(curfpath.toUri())) {
        // not from this
        continue;
      }
      if (nominal != null) {
        throw new IllegalStateException("Ambiguous input path " + fpath);
      }
      nominal = onefile;
      break;
    }
    if (nominal == null) {
      throw new IllegalStateException("Invalid input path " + fpath);
    }
    return nominal.toString();
  }

  public abstract void initEmptyInputChildren(List> children, Configuration hconf)
      throws SerDeException, Exception;


  /** Kryo ctor. */
  protected AbstractMapOperator() {
    super();
  }

  public AbstractMapOperator(CompilationOpContext ctx) {
    super(ctx);
  }

  public abstract void setChildren(Configuration hconf) throws Exception;


  public void initializeMapOperator(Configuration hconf) throws HiveException {
    // set that parent initialization is done and call initialize on children
    state = State.INIT;

    statsMap.put(Counter.DESERIALIZE_ERRORS.toString(), deserialize_error_count);

    numRows = 0;

    String context = hconf.get(Operator.CONTEXT_NAME_KEY, "");
    if (context != null && !context.isEmpty()) {
      context = "_" + context.replace(" ","_");
    }
    statsMap.put(Counter.RECORDS_IN + context, recordCounter);
  }

  public abstract void initializeContexts() throws HiveException;

  public abstract Deserializer getCurrentDeserializer();

  public abstract void process(Writable value) throws HiveException;

  @Override
  public void closeOp(boolean abort) throws HiveException {
    recordCounter.set(numRows);
    super.closeOp(abort);
  }

  public void clearConnectedOperators() {
    connectedOperators.clear();
  }

  public void setConnectedOperators(int tag, DummyStoreOperator dummyOp) {
    connectedOperators.put(tag, dummyOp);
  }

  public Map getConnectedOperators() {
    return connectedOperators;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy