All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliyun.odps.mapred.bridge.utils.ValidatorFactory Maven / Gradle / Ivy

There is a newer version: 0.52.1-public
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.aliyun.odps.mapred.bridge.utils;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.lang.StringUtils;

import com.aliyun.odps.Column;
import com.aliyun.odps.OdpsException;
import com.aliyun.odps.Table;
import com.aliyun.odps.data.TableInfo;
import com.aliyun.odps.data.VolumeInfo;
import com.aliyun.odps.mapred.bridge.ErrorCode;
import com.aliyun.odps.mapred.bridge.MetaExplorer;
import com.aliyun.odps.mapred.conf.JobConf;
import com.aliyun.odps.mapred.utils.InputUtils;
import com.aliyun.odps.mapred.utils.OutputUtils;
import com.aliyun.odps.mapred.utils.SchemaUtils;
import com.aliyun.odps.pipeline.Pipeline;
import com.aliyun.odps.pipeline.Pipeline.TransformNode;

public class ValidatorFactory {

  private static boolean between(long i, long l, long h) {
    if (i > h || i < l) {
      return false;
    }
    return true;
  }

  private static boolean validateColumns(String[] columns, Column[] schema, StringBuilder errorMsg) {
    Set schemaColums = new HashSet();
    for (int i = 0; i < schema.length; ++i) {
      schemaColums.add(schema[i].getName());
    }
    for (int i = 0; i < columns.length; ++i) {
      if (!schemaColums.contains(columns[i])) {
        errorMsg.append("Can't find column " + columns[i] + " from key schema.");
        return false;
      }
    }
    return true;
  }

  static void throwException(ErrorCode e) throws OdpsException {
    throw new OdpsException(e.code, e.msg);
  }

  static void throwException(ErrorCode e, String info) throws OdpsException {
    throw new OdpsException(e.code, e.msg + ":" + info);
  }

  public static void throwException(ErrorCode e, Throwable cause) throws OdpsException {
    throw new OdpsException(e.code, e.msg, cause);

  }

  static class CompositeValidator implements Validator {

    List validators = new ArrayList();

    public void addValidator(Validator v) {
      validators.add(v);
    }

    @Override
    public void validate() throws OdpsException {
      for (Validator v : validators) {
        v.validate();
      }
    }
  }

  /**
   * Meta Validator.
   *
   * Validate the input/output tables available.
   */
  static class InputOutputValidator implements Validator {

    private JobConf job;
    private MetaExplorer explorer;

    public InputOutputValidator(JobConf job, MetaExplorer explorer) {
      this.job = job;
      this.explorer = explorer;
    }

    /**
     * Validate if table
     *
     * 1. Exists
     * 2. Not a view
     * 3. Partition column exists (if applicable)
     * 4. Column exists (if applicable)
     *
     * @param table
     *     The table
     * @param distinctTables
     *     Cache tables in the same group.
     * @throws OdpsException
     */
    private void validateTable(TableInfo table, Map distinctTables, boolean isInput)
        throws OdpsException {

      Table tableDesc = distinctTables.get(
          table.getProjectName() + "." + table.getTableName());
      if (tableDesc == null) {
        // check if table exists
        if (!explorer.existsTable(table.getProjectName(), table.getTableName())) {
          throwException(ErrorCode.TABLE_NOT_FOUND, table.toString());
        }

        // view is now unsupported
        tableDesc = explorer.getTable(table.getProjectName(),
                                      table.getTableName());
        if (tableDesc.isVirtualView() && (!isInput || job.isPipeline() || (InputUtils.getTables(job).length > 1))) {
          throwException(ErrorCode.VIEW_TABLE, table.toString());
        }
        distinctTables.put(table.getProjectName() + "." + table.getTableName(),
                           tableDesc);
      }

      // Check if part spec is valid
      Map partSpec = table.getPartSpec();
      if (partSpec != null) {
        List cols = tableDesc.getSchema().getPartitionColumns();
        List colNames = new ArrayList();
        for (Column c : cols) {
          colNames.add(c.getName());
        }
        for (String key : partSpec.keySet()) {
          if (!colNames.contains(key.toLowerCase())) {
            throwException(ErrorCode.PARTITION_COLUMN_NOT_FOUND, key);
          }
        }
      }

      // Check if select columns exist in source table.
      if (table.getCols() != null) {
        List schema = tableDesc.getSchema().getColumns();
        HashSet names = new HashSet(Arrays.asList(table.getCols()));
        names.removeAll(Arrays.asList(SchemaUtils.getNames(schema.toArray(new Column[schema
            .size()]))));
        if (!names.isEmpty()) {
          throwException(ErrorCode.COLUMN_NOT_FOUND, StringUtils.join(names, ","));
        }
      }
    }

    /**
     * Validate if volume exists and is not duplicated with other volume in the same group
     *
     * @param volume
     *     Volume
     * @param distinctVolumes
     *     Cache volumes in the same group
     * @throws OdpsException
     */
    private void validateVolume(VolumeInfo volume,
                                Set distinctVolumes) throws OdpsException {
      String volumeDesc =
          volume.getProjectName() + "." + volume.getVolumeName() + "." + volume
              .getPartSpec();
      if (distinctVolumes.contains(volumeDesc)) {
        throwException(ErrorCode.DUPLICATED_VOLUME_FOUND, volume.toString());
      }
      distinctVolumes.add(volumeDesc);
      // Check if volume spec is valid
      if (volume.getProjectName() == null) {
        volume.setProjectName(explorer.getDefaultProject());
      }
      if (volume.getVolumeName() == null || volume.getVolumeName().isEmpty()) {
        throwException(ErrorCode.MALFORMED_VOLUME_SPEC,
                       "volume name not specified");
      }
      if (volume.getPartSpec() == null || volume.getPartSpec().isEmpty()) {
        throwException(ErrorCode.MALFORMED_VOLUME_SPEC,
                       "volume partition not specified");
      }

      // Check if volume exists
      if (!explorer.existsVolume(volume.getProjectName(),
                                 volume.getVolumeName())) {
        throwException(ErrorCode.VOLUME_NOT_FOUND, volume.toString());
      }
      // Check if label is valid
      if (!between(volume.getLabel().length(), 3, 32) || !volume.getLabel()
          .matches("[A-Z,a-z,0-9,_,#,\\.,\\-]*")) {
        throwException(ErrorCode.MALFORMED_VOLUME_SPEC,
                       "illegal label " + volume.getLabel()
                       + ", expecting [A-Z,a-z,0-9,_,#,\\.,\\-]* with length of [3,32]");
      }
    }

    private void validateTables(TableInfo[] tables)
        throws OdpsException {
      Map distinctOutputTables = new HashMap();
      Set labelNames = new HashSet();
      for (TableInfo table : tables) {
        validateTable(table, distinctOutputTables, false);
        if (labelNames.contains(table.getLabel())) {
          throwException(ErrorCode.OUTPUT_LABEL_NOT_UNIQUE, table.getLabel());
        }
        labelNames.add(table.getLabel());
      }
    }

    private void validateVolumes(VolumeInfo[] volumes) throws OdpsException {
      Set distinctVolumes = new HashSet();
      Set labelNames = new HashSet();
      for (VolumeInfo volume : volumes) {
        validateVolume(volume, distinctVolumes);
        if (labelNames.contains(volume.getLabel())) {
          throwException(ErrorCode.VOLUME_LABEL_NOT_UNIQUE, volume.getLabel());
        }
        labelNames.add(volume.getLabel());
      }
    }

    @Override
    public void validate() throws OdpsException {
      // Validate inputs
      TableInfo[] tables = InputUtils.getTables(job);
      if (tables != null && tables.length > 0) {
        if (tables.length > 1024) {
          throwException(ErrorCode.TOO_MANY_INPUT_TABLE,
                         "Expecting no more than 1024 partitions. ");
        }
        Map distinctInputTables = new HashMap();
        for (TableInfo table : tables) {
          validateTable(table, distinctInputTables, true);
          if (distinctInputTables.size() > 64) {
            throwException(ErrorCode.TOO_MANY_INPUT_TABLE,
                           "Expecting no more than 64 distinct tables. ");
          }
        }
      }

      // Validate outputs
      tables = OutputUtils.getTables(job);
      if (tables != null && tables.length > 0) {
        validateTables(tables);
      }

      // Validate input volumes
      VolumeInfo[] volumes = InputUtils.getVolumes(job);
      if (volumes != null && volumes.length > 0) {
        if (volumes.length > 1024) {
          throwException(ErrorCode.TOO_MANY_INPUT_VOLUME,
                         "Expecting no more than 1024 partitions. ");
        }
        validateVolumes(volumes);
      }

      // Validate output volumes
      volumes = OutputUtils.getVolumes(job);
      if (volumes != null && volumes.length > 0) {
        if (volumes.length > 1024) {
          throwException(ErrorCode.TOO_MANY_OUTPUT_VOLUME,
                         "Expecting no more than 1024 partitions. ");
        }
        validateVolumes(volumes);
      }
    }

  }

  /**
   * Config Validator.
   *
   * Validate the job configuration.
   */
  static class ConfigValidator implements Validator {

    private JobConf job;

    public ConfigValidator(JobConf job) {
      this.job = job;
    }

    @Override
    public void validate() throws OdpsException {
      if (job.get("odps.mapred.map.class") == null) {
        throwException(ErrorCode.ILLEGAL_CONFIG, "Mapper class not specified.");
      }

      if (InputUtils.getTables(job) == null && !between(job.getNumMapTasks(), 0, 99999)) {
        throwException(ErrorCode.ILLEGAL_CONFIG, "Map tasks " + job.getNumMapTasks()
                                                 + " out of bound, should be in [0, 99999] range.");
      }

      if (!between(job.getNumReduceTasks(), 0, 99999)) {
        throwException(ErrorCode.ILLEGAL_CONFIG, "Reduce tasks " + job.getNumReduceTasks()
                                                 + " out of bound, should be in [0, 99999] range.");
      }

      if (job.getNumReduceTasks() != 0 && job.get("odps.mapred.reduce.class") == null) {
        throwException(ErrorCode.ILLEGAL_CONFIG, "Reducer class not specified.");
      }

      if (job.getNumReduceTasks() != 0 && !validateSchema(job.getMapOutputKeySchema())) {
        throwException(ErrorCode.ILLEGAL_CONFIG,
                       "Malformed map output key schema:" + job.get(
                           "odps.mapred.mapoutput.key.schema"));
      }
      if (job.getNumReduceTasks() != 0 && !validateSchema(job.getMapOutputValueSchema())) {
        throwException(ErrorCode.ILLEGAL_CONFIG,
                       "Malformed map output value schema:" + job.get(
                           "odps.mapred.mapoutput.value.schema"));
      }

      if (!between(job.getMemoryForJVM(), 256, 12 * 1024)) {
        throwException(ErrorCode.ILLEGAL_CONFIG, "Memory for jvm " + job.getMemoryForJVM()
                                                 + " out of bound, should be in [256, 12288] range.");
      }
      if (job.getOutputKeySortColumns().length != job.getOutputKeySortOrder().length) {
        throwException(
            ErrorCode.ILLEGAL_CONFIG,
            "Key sort columns length should match key sort order length. Sort columns are "
            + Arrays.toString(job.getOutputKeySortColumns()) + " but sort order is "
            + Arrays.toString(job.getOutputKeySortOrder()));
      }
      StringBuilder errorMsg = new StringBuilder();
      if (!validatePartitionColumns(job, errorMsg)) {
        throwException(ErrorCode.ILLEGAL_CONFIG,
            "Key partition columns should be inside of output key columns. " + errorMsg);
      }
      if (!between(job.getFunctionTimeout(), 1, 3600)) {
        throwException(ErrorCode.ILLEGAL_CONFIG, "Function timeout " + job.getFunctionTimeout()
                                                 + " out of bound, should be in [1, 3600] range.");
      }
      if (!between(job.getInstancePriority(), 0, 9)) {
        throwException(ErrorCode.ILLEGAL_CONFIG, "Instance priority" + job.getInstancePriority()
                                                 + " out of bound, should be in [0, 9] range.");
      }
//      if (!between((long)(job.getCombinerCacheSpillPercent()*100), 0, 100)) {
//        throwException(ErrorCode.ILLEGAL_CONFIG, "Combiner Cache Spill Percent" + job.getCombinerCacheSpillPercent()
//                                                 + " out of bound, should be in [0, 1] range.");
//      }
    }

    private boolean validatePartitionColumns(JobConf job, StringBuilder errorMsg) {
      if (job.getNumReduceTasks() > 0 && job.getPartitionerClass() == null) {
          return validateColumns(job.getPartitionColumns(), job.getMapOutputKeySchema(), errorMsg);
      }

      return true;
    }

    private boolean validateSchema(Column[] schema) {
      if (schema == null || schema.length < 1) {
        return false;
      }
      return true;
    }
  }

  /**
   * Resource Validator.
   *
   * Validate the resources.
   */
  static class ResourceValidator implements Validator {

    private JobConf job;
    private MetaExplorer explorer;

    public ResourceValidator(JobConf job, MetaExplorer explorer) {
      this.job = job;
      this.explorer = explorer;
    }

    @Override
    public void validate() throws OdpsException {
      // Check if resource exist
      String[] res = job.getResources();
      if (res == null || res.length <= 0) {
        return;
      }
      if (res.length > 256) {
        throwException(ErrorCode.TOO_MANY_RESOURCE_ITEMS,
                       "Acturally " + String.valueOf(res.length));
      }
    }
  }

  static class PipelineValidtor implements Validator {

    private Pipeline pipeline;

    public PipelineValidtor(JobConf job) {
      this.pipeline = Pipeline.fromJobConf(job);
    }

    @Override
    public void validate() throws OdpsException {
      if (pipeline == null) {
        throwException(ErrorCode.ILLEGAL_CONFIG, "Pipeline not specified.");
      }
      if (pipeline.getNodeNum() == 0) {
        throwException(ErrorCode.ILLEGAL_CONFIG, "Pipeline is empty.");
      }
      if (pipeline.getFirstNode().getType().equals("reduce")) {
        throwException(ErrorCode.ILLEGAL_CONFIG, "First operator of pipeline must be mapper");
      }
      for (int i = 0; i < pipeline.getNodeNum(); i++) {
        if (i > 0 && pipeline.getNode(i).getType().equals("map")) {
          throwException(ErrorCode.ILLEGAL_CONFIG, "Operators after first node must be reducer");
        }
        if (i < pipeline.getNodeNum() - 1) {
          if (pipeline.getNode(i).getOutputKeySchema() == null) {
            throwException(ErrorCode.ILLEGAL_CONFIG, "Operator ouput key schema not set");
          }
          if (pipeline.getNode(i).getOutputValueSchema() == null) {
            throwException(ErrorCode.ILLEGAL_CONFIG, "Operator ouput value schema not set");
          }
        }
      }
      StringBuilder errorMsg = new StringBuilder();
      if (!validatePartitionColumns(pipeline, errorMsg)) {
        throwException(ErrorCode.ILLEGAL_CONFIG,
            "Key partition columns should be inside of output key columns. " + errorMsg);
      }
    }

    private boolean validatePartitionColumns(Pipeline pipeline, StringBuilder errorMsg) {
      TransformNode node = null;
      for (int i = 0; i < pipeline.getNodeNum() - 1; ++i) {
        node = pipeline.getNode(i);
        if (node.getPartitionerClass() == null
            && !validateColumns(node.getPartitionColumns(), node.getOutputKeySchema(), errorMsg)) {
          return false;
        }
      }

      return true;
    }

  }

  public static Validator getValidator(JobConf job, MetaExplorer explorer) {
    CompositeValidator validator = new CompositeValidator();
    if (Pipeline.fromJobConf(job) == null) {
      validator.addValidator(new ConfigValidator(job));
    } else {
      validator.addValidator(new PipelineValidtor(job));
    }
    validator.addValidator(new InputOutputValidator(job, explorer));
    validator.addValidator(new ResourceValidator(job, explorer));
    return validator;

  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy