All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.manganit.half.client.PigClient Maven / Gradle / Ivy

There is a newer version: 0.2
Show newest version
/*
 * Copyright 2017 Manganit.com
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package com.manganit.half.client;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import org.apache.pig.ExecType;
import org.apache.pig.PigServer;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.executionengine.ExecJob;
import org.apache.pig.tools.pigstats.InputStats;
import org.apache.pig.tools.pigstats.JobStats;
import org.apache.pig.tools.pigstats.OutputStats;
import org.apache.pig.tools.pigstats.PigStats;
import org.apache.pig.tools.pigstats.PigStats.JobGraph;

import org.apache.log4j.Logger;
import java.net.URL;
import java.net.URLClassLoader;

/**
 *
 * @author Damien Claveau
 *
 */

public class PigClient {

  private final Logger logger;
  private final static String libSeparator = ";";

  private PigServer pigServer;
  private final String scriptName;
  private Map propertyFileParamaters;
  private List propertyFiles;

  private List pigStatistics;

  public PigClient(Map propertyFileParamaters,
                   String scriptName, String processId, String worflowId) {
      this.logger = Logger.getLogger(PigClient.class);

      this.scriptName = scriptName;
      this.propertyFileParamaters = propertyFileParamaters;

  }

  public PigClient(List propertyFiles,
                   String scriptName, String processId, String worflowId) {
      this.logger = Logger.getLogger(PigClient.class);

      this.scriptName = scriptName;
      this.propertyFiles = propertyFiles;

  }

  private List getJars(String jarsNames) {
      List jars = Arrays.asList(jarsNames.split(libSeparator));
      return jars;
  }

  // =========================================================================================================
  // Method that actually runs the script

  /*
  prop.setProperty("mapreduce.job.queuename", queue);
  prop.setProperty("hive.metastore.uris", hiveMetastore);
  */


  public void run(Properties pigServerProps, String jobName, String jarPath) throws IOException, Exception {
      logger.debug("ENTER>>run");
      // Start of execution
      Date dateBegin = new Date();

      // Remote Pig instance, Create the MapReduce job
      start(pigServerProps, jobName, jarPath);
      executeScript();
      stop();

      // Total process time
      Date dateEnd = new Date();
      long execution_time = dateEnd.getTime() - dateBegin.getTime();
      logger.debug("Pig Execution time (s): " + execution_time / 1000);
      logger.debug("EXIT>>run");

  }

  private void start(Properties prop, String jobName, String jarPath) {

      try {
          pigServer = createPig(prop, jobName, jarPath);
          pigServer.setBatchOn();

      } catch (ExecException ex) {
          throw new RuntimeException("Cannot create pig server", ex);
      } catch (IOException ex) {
          throw new RuntimeException("Cannot create pig server", ex);
      }

  }

  private void executeScript() throws IOException, Exception {

      logger.debug("Loading the script " + scriptName);

      // Load the script
      if(this.propertyFileParamaters == null){
          pigServer.registerScript(scriptName, this.propertyFileParamaters);
      }else{
          pigServer.registerScript(scriptName, this.propertyFiles);
      }


      logger.debug("Script loaded");

      try {
          logger.debug("Excuting the script");
          List jobs = pigServer.executeBatch();
          logger.debug("script executed");
          logger.debug("End of executeBatch");

          // Showing the stats
          pigStatistics = new ArrayList();
          logger.debug("Number of executed jobs : " + jobs.size());
          int count = 0;
          for (ExecJob execJob : jobs) {

              PigStats pigStats = execJob.getStatistics();

              pigStatistics.add(pigStats);

              logAllStats(pigStats, count++);
              logger.debug(System.getProperty("line.separator"));
              logger.debug("End of statistics");

              if (execJob.getStatus() == ExecJob.JOB_STATUS.FAILED) {
                  throw new Exception("Pig execution failed");
              } else {
                  logger.debug(execJob.getStatus().toString());
              }

          }
      } finally {
          pigServer.discardBatch();
      }
  }

  public void stop() {
      // close pig
      if (null != pigServer) {
          pigServer.shutdown();
          pigServer = null;
      }
  }

  private PigServer createPig(Properties prop, String jobName, String jarPath
  ) throws ExecException, IOException {

      logger.debug("Executing Pig in cluster mode");

      prop.setProperty("stop.on.failure", "true");
      prop.setProperty("pig.noSplitCombination", "true");

      pigServer = new PigServer(ExecType.MAPREDUCE, prop);

      pigServer.setJobName(jobName);

      registerJar(jarPath);
      logger.debug("PigServer successfully instantiated");

      return pigServer;
  }


  // Registers all the jars needed for the execution
  private void registerJar(String jarPath) throws IOException {

      // Hive jars, needed for the slave nodes
      File dir = new File(".");
      File[] filesList = dir.listFiles();
      for (File file : filesList) {
          if (file.isFile()) {
              String filename = file.getName();
              for (String jarPrefix : getJars(jarPath)) {
                  if (filename.startsWith(jarPrefix)) {
                      pigServer.registerJar(filename);
                      logger.debug("Registered Jar : " + filename);
                      System.out.println("Registered Jar : " + filename);
                  }
              }
          }
      }
      // From classpath
      ClassLoader sysClassLoader = ClassLoader.getSystemClassLoader();
      URL[] urls = ((URLClassLoader) sysClassLoader).getURLs();
      String filename;
      for (URL url : urls) {
          filename = new File(url.getFile()).getName();
          for (String jarPrefix : getJars(jarPath)) {
              if (filename.startsWith(jarPrefix)) {
                  pigServer.registerJar(url.getFile());
                  logger.debug("Registered Jar : " + url.getFile());
                  System.out.println("Registered Jar : " + url.getFile());
              }
          }
      }
  }

  // Printing all the stats
  private void logAllStats(PigStats pigStats, int count) {
      logger.debug("Job number " + count);
      logger.debug("This job contains " + pigStats.getNumberJobs()
              + " smaller jobs");

      logger.debug("--------------- EXECUTION PLAN ---------------");
      JobGraph graph = pigStats.getJobGraph();
      logger.debug(graph.toString());

      logger.debug("");
      logger.debug("---------------- GLOBAL STATS ----------------");
      logAllPigStats(pigStats);

      logger.debug("");
      logger.debug("----------------- JOB STATS ------------------");
      for (JobStats jobStat : graph.getJobList()) {
          printSingleJobStats(jobStat);
      }

  }

  // Overall stats
  private void logAllPigStats(PigStats pigStats) {
      logger.debug("BYTES_WRITTEN: "
              + Long.toString(pigStats.getBytesWritten()));
      logger.debug("DURATION (ms): " + Long.toString(pigStats.getDuration()));
      logger.debug("ERROR_CODE: " + Long.toString(pigStats.getErrorCode()));
      logger.debug("ERROR_MESSAGE: " + pigStats.getErrorMessage());
      logger.debug("FEATURES: " + pigStats.getFeatures());
      logger.debug("HADOOP_VERSION: " + pigStats.getHadoopVersion());
      logger.debug("NUMBER_JOBS: " + Long.toString(pigStats.getNumberJobs()));
      logger.debug("PIG_VERSION: " + pigStats.getPigVersion());
      logger.debug("PROACTIVE_SPILL_COUNT_OBJECTS: "
              + Long.toString(pigStats.getProactiveSpillCountObjects()));
      logger.debug("PROACTIVE_SPILL_COUNT_RECORDS: "
              + Long.toString(pigStats.getProactiveSpillCountRecords()));
      logger.debug("RECORD_WRITTEN: "
              + Long.toString(pigStats.getRecordWritten()));
      logger.debug("RETURN_CODE: " + Long.toString(pigStats.getReturnCode()));
      logger.debug("SCRIPT_ID: " + pigStats.getScriptId());
      logger.debug("SMM_SPILL_COUNT: "
              + Long.toString(pigStats.getSMMSpillCount()));
      logger.info("INPUT STATS: ");
      int number = 0;
      for (InputStats inputStats : pigStats.getInputStats()) {
          number++;
          logger.info("------------------");
          logger.info("INPUT NAME " + number + ": " + inputStats.getName());
          logger.debug("INPUT LOCATION " + number + ": " + inputStats.getLocation());
          logger.info("INPUT RECORDS " + number + ": " + inputStats.getNumberRecords());
          logger.info("INPUT BYTES " + number + ": " + Long.toString(inputStats.getBytes()));
      }
      number = 0;
      logger.info("OUTPUT STATS: ");
      for (OutputStats outputStats : pigStats.getOutputStats()) {
          number++;
          logger.info("------------------");
          logger.info("OUTPUT NAME " + number + ": " + outputStats.getName());
          logger.debug("OUTPUT LOCATION " + number + ": " + outputStats.getLocation());
          logger.info("OUTPUT RECORDS " + number + ": " + outputStats.getNumberRecords());
          logger.info("OUTPUT BYTES " + number + ": " + Long.toString(outputStats.getBytes()));
      }
  }

  // =========================================================================================================
  // Stats for a single job
  private void printSingleJobStats(JobStats jobStats) {
      logger.debug(System.getProperty("line.separator"));
      logger.debug("Alias: " + jobStats.getAlias());

      Exception e = jobStats.getException();
      if (e != null) {
          logger.debug("Exception stack: " + e.getStackTrace());
      }

      String err = jobStats.getErrorMessage();
      if (err != null && !err.isEmpty()) {
          logger.debug("Error message: " + err);
      }

      logger.debug("HDFS_BYTES_WRITTEN: " + jobStats.getHdfsBytesWritten());
      logger.debug("MAP_INPUT_RECORDS: " + jobStats.getMapInputRecords());
      logger.debug("MAP_OUTPUT_RECORDS: " + jobStats.getMapOutputRecords());
      logger.debug("REDUCE_INPUT_RECORDS: "
              + jobStats.getReduceInputRecords());
      logger.debug("REDUCE_OUTPUT_RECORDS: "
              + jobStats.getReduceOutputRecords());
      logger.debug("AVG_MAP_TIME (ms): " + jobStats.getAvgMapTime());
      logger.debug("AVG_REDUCE_TIME (ms): " + jobStats.getAvgREduceTime());
      logger.debug("BYTES_WRITTEN: " + jobStats.getBytesWritten());
      logger.debug("ERROR_MESSAGE: " + jobStats.getErrorMessage());
      logger.debug("FEATURE: " + jobStats.getFeature());
      logger.debug("JOB_ID: " + jobStats.getJobId());
      logger.debug("MAX_MAP_TIME (ms): "
              + Long.toString(jobStats.getMaxMapTime()));
      logger.debug("MIN_MAP_TIME (ms): "
              + Long.toString(jobStats.getMinMapTime()));
      logger.debug("MAX_REDUCE_TIME (ms): "
              + Long.toString(jobStats.getMaxReduceTime()));
      logger.debug("MIN_REDUCE_TIME (ms): "
              + Long.toString(jobStats.getMinReduceTime()));
      logger.debug("NUMBER_MAPS: " + Long.toString(jobStats.getNumberMaps()));
      logger.debug("NUMBER_REDUCESS: "
              + Long.toString(jobStats.getNumberReduces()));
      logger.debug("PROACTIVE_SPILL_COUNT_OBJECTS: "
              + Long.toString(jobStats.getProactiveSpillCountObjects()));
      logger.debug("PROACTIVE_SPILL_COUNT_RECSS: "
              + Long.toString(jobStats.getProactiveSpillCountRecs()));
      logger.debug("RECORD_WRITTENS: "
              + Long.toString(jobStats.getRecordWrittern()));
      logger.debug("SMMS_SPILL_COUNTS: "
              + Long.toString(jobStats.getSMMSpillCount()));

      jobStats.getErrorMessage();

      logger.debug(System.getProperty("line.separator"));
  }

  public List getStatistics() {
      return pigStatistics;
  }

  }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy