All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pig.tools.pigstats.SimplePigStats Maven / Gradle / Ivy

There is a newer version: 0.17.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pig.tools.pigstats;

import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobID;
import org.apache.hadoop.mapred.jobcontrol.Job;
import org.apache.pig.ExecType;
import org.apache.pig.PigException;
import org.apache.pig.PigRunner.ReturnCode;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceOper;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.NativeMapReduceOper;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROpPlanVisitor;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.util.PlanHelper;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.plan.DependencyOrderWalker;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.newplan.Operator;
import org.apache.pig.newplan.OperatorPlan;
import org.apache.pig.newplan.PlanVisitor;
import org.apache.pig.tools.pigstats.JobStats.JobState;

/**
 * SimplePigStats encapsulates the statistics collected from a running script. 
 * It includes status of the execution, the DAG of its MR jobs, as well as 
 * information about outputs and inputs of the script. 
 */
final class SimplePigStats extends PigStats {
    
    private static final Log LOG = LogFactory.getLog(SimplePigStats.class);
    
    private static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";  
        
    private PigContext pigContext;
    
    private JobClient jobClient;
    
    private JobControlCompiler jcc;
    
    private JobGraph jobPlan;
    
    private Map jobMroMap;
     
    private Map mroJobMap;
    
    // successful jobs so far
    private Set jobSeen = new HashSet();
    
    private Map aliasOuputMap;
      
    private long startTime = -1;
    private long endTime = -1;
    
    private String userId;
                 
    /**
     * This class builds the job DAG from a MR plan
     */
    private class JobGraphBuilder extends MROpPlanVisitor {

        public JobGraphBuilder(MROperPlan plan) {
            super(plan, new DependencyOrderWalker(
                    plan));
            jobPlan = new JobGraph();
            mroJobMap = new HashMap();        
        }
        
        @Override
        public void visitMROp(MapReduceOper mr) throws VisitorException {
            JobStats js = new JobStats(
                    mr.getOperatorKey().toString(), jobPlan);            
            jobPlan.add(js);
            List preds = getPlan().getPredecessors(mr);
            if (preds != null) {
                for (MapReduceOper pred : preds) {
                    JobStats jpred = mroJobMap.get(pred);
                    if (!jobPlan.isConnected(jpred, js)) {
                        jobPlan.connect(jpred, js);
                    }
                }
            }
            mroJobMap.put(mr, js);            
        }        
    }
    
    /**
     * This class prints a JobGraph
     */
    static class JobGraphPrinter extends PlanVisitor {
        
        StringBuffer buf;

        protected JobGraphPrinter(OperatorPlan plan) {
            super(plan,
                    new org.apache.pig.newplan.DependencyOrderWalker(
                            plan));
            buf = new StringBuffer();
        }
        
        public void visit(JobStats op) throws FrontendException {
            buf.append(op.getJobId());
            List succs = plan.getSuccessors(op);
            if (succs != null) {
                buf.append("\t->\t");
                for (Operator p : succs) {                  
                    buf.append(((JobStats)p).getJobId()).append(",");
                }               
            }
            buf.append("\n");
        }
        
        @Override
        public String toString() {
            buf.append("\n");
            return buf.toString();
        }        
    }
    
    @Override
    public List getAllErrorMessages() {
        throw new UnsupportedOperationException();
    }

    @Override
    public Map> getAllStats() {
        throw new UnsupportedOperationException();
    }

    @Override
    public boolean isEmbedded() {
        return false;
    }
    
    @Override
    public boolean isSuccessful() {
        return (getNumberJobs()==0 && returnCode==ReturnCode.UNKNOWN
                || returnCode == ReturnCode.SUCCESS);
    }
 
    @Override
    public Properties getPigProperties() {
        if (pigContext == null) return null;
        return pigContext.getProperties();
    }

    @Override
    public JobGraph getJobGraph() {
        return jobPlan;
    }
 
    @Override
    public List getOutputLocations() {
        ArrayList locations = new ArrayList();
        for (OutputStats output : getOutputStats()) {
            locations.add(output.getLocation());
        }
        return Collections.unmodifiableList(locations);
    }
 
    @Override
    public List getOutputNames() {
        ArrayList names = new ArrayList();
        for (OutputStats output : getOutputStats()) {            
            names.add(output.getName());
        }
        return Collections.unmodifiableList(names);
    }
 
    @Override
    public long getNumberBytes(String location) {
        if (location == null) return -1;
        String name = new Path(location).getName();
        long count = -1;
        for (OutputStats output : getOutputStats()) {
            if (name.equals(output.getName())) {
                count = output.getBytes();
                break;
            }
        }
        return count;
    }

    @Override
    public long getNumberRecords(String location) {
        if (location == null) return -1;
        String name = new Path(location).getName();
        long count = -1;
        for (OutputStats output : getOutputStats()) {
            if (name.equals(output.getName())) {
                count = output.getNumberRecords();
                break;
            }
        }
        return count;
    }
 
    @Override
    public String getOutputAlias(String location) {
        if (location == null) return null;
        String name = new Path(location).getName();
        String alias = null;
        for (OutputStats output : getOutputStats()) {
            if (name.equals(output.getName())) {
                alias = output.getAlias();
                break;
            }
        }
        return alias;
    }

    @Override
    public long getSMMSpillCount() {
        Iterator it = jobPlan.iterator();
        long ret = 0;
        while (it.hasNext()) {
            ret += it.next().getSMMSpillCount();
        }
        return ret;
    }

    @Override
    public long getProactiveSpillCountObjects() {
        Iterator it = jobPlan.iterator();
        long ret = 0;
        while (it.hasNext()) {            
            ret += it.next().getProactiveSpillCountObjects();
        }
        return ret;
    }
    
    @Override
    public long getProactiveSpillCountRecords() {
        Iterator it = jobPlan.iterator();
        long ret = 0;
        while (it.hasNext()) {            
            ret += it.next().getProactiveSpillCountRecs();
        }
        return ret;
    }
    
    @Override
    public long getBytesWritten() {
        Iterator it = jobPlan.iterator();
        long ret = 0;
        while (it.hasNext()) {
            long n = it.next().getBytesWritten();
            if (n > 0) ret += n;
        }
        return ret;
    }
    
    @Override
    public long getRecordWritten() {
        Iterator it = jobPlan.iterator();
        long ret = 0;
        while (it.hasNext()) {
            long n = it.next().getRecordWrittern();
            if (n > 0) ret += n;
        }
        return ret;
    }
   
    @Override
    public String getScriptId() {
        return ScriptState.get().getId();
    }
    
    @Override
    public String getFeatures() {
        return ScriptState.get().getScriptFeatures();
    }
    
    @Override
    public long getDuration() {
        return (startTime > 0 && endTime > 0) ? (endTime - startTime) : -1;
    }
    
    @Override
    public int getNumberJobs() {
        return jobPlan.size();
    }
        
    @Override
    public List getOutputStats() {
        List outputs = new ArrayList();
        Iterator iter = jobPlan.iterator();
        while (iter.hasNext()) {
            for (OutputStats os : iter.next().getOutputs()) {
                outputs.add(os);
            }
        }        
        return Collections.unmodifiableList(outputs);       
    }
    
    @Override
    public OutputStats result(String alias) {
        if (aliasOuputMap == null) {
            aliasOuputMap = new HashMap();
            Iterator iter = jobPlan.iterator();
            while (iter.hasNext()) {
                for (OutputStats os : iter.next().getOutputs()) {
                    String a = os.getAlias();
                    if (a == null || a.length() == 0) {
                        LOG.warn("Output alias isn't avalable for " + os.getLocation());
                        continue;
                    }
                    aliasOuputMap.put(a, os);
                }
            }    
        }
        return aliasOuputMap.get(alias);
    }
    
    @Override
    public List getInputStats() {
        List inputs = new ArrayList();
        Iterator iter = jobPlan.iterator();
        while (iter.hasNext()) {
            for (InputStats is : iter.next().getInputs()) {
                inputs.add(is);
            }
        }        
        return Collections.unmodifiableList(inputs);       
    }
    
    SimplePigStats() {        
        jobMroMap = new HashMap(); 
        jobPlan = new JobGraph();
    }
    
    void start(PigContext pigContext, JobClient jobClient, 
            JobControlCompiler jcc, MROperPlan mrPlan) {
        
        if (pigContext == null || jobClient == null || jcc == null) {
            LOG.warn("invalid params: " + pigContext + jobClient + jcc);
            return;
        }
        
        this.pigContext = pigContext;
        this.jobClient = jobClient;
        this.jcc = jcc;         
        
        // build job DAG with job ids assigned to null 
        try {
            new JobGraphBuilder(mrPlan).visit();
        } catch (VisitorException e) {
            LOG.warn("unable to build job plan", e);
        }
        
        startTime = System.currentTimeMillis();
        userId = System.getProperty("user.name");
    }
    
    void stop() {
        endTime = System.currentTimeMillis();
        int m = getNumberSuccessfulJobs();
        int n = getNumberFailedJobs();
 
        if (n == 0 && m > 0 && m == jobPlan.size()) {
            returnCode = ReturnCode.SUCCESS;
        } else if (m > 0 && m < jobPlan.size()) {
            returnCode = ReturnCode.PARTIAL_FAILURE;
        } else {
            returnCode = ReturnCode.FAILURE;
        }
    }
    
    boolean isInitialized() {
        return startTime > 0;
    }

    @Override
    public JobClient getJobClient() {
        return jobClient;
    }
    
    JobControlCompiler getJobControlCompiler() {
        return jcc;
    }
        
    @SuppressWarnings("deprecation")
    JobStats addJobStats(Job job) {
        MapReduceOper mro = jobMroMap.get(job);
         
        if (mro == null) {
            LOG.warn("unable to get MR oper for job: " + job.toString());
            return null;
        }
        JobStats js = mroJobMap.get(mro);
        
        JobID jobId = job.getAssignedJobID();
        js.setId(jobId);
        js.setAlias(mro);
        js.setConf(job.getJobConf());
        return js;
    }
    
    @SuppressWarnings("deprecation")
    public JobStats addJobStatsForNative(NativeMapReduceOper mr) {
        JobStats js = mroJobMap.get(mr);
        js.setId(new JobID(mr.getJobId(), NativeMapReduceOper.getJobNumber())); 
        js.setAlias(mr);
        
        return js;
    }
            
    void display() {
        if (returnCode == ReturnCode.UNKNOWN) {
            LOG.warn("unknown return code, can't display the results");
            return;
        }
        if (pigContext == null) {
            LOG.warn("unknown exec type, don't display the results");
            return;
        }
 
        // currently counters are not working in local mode - see PIG-1286
        ExecType execType = pigContext.getExecType();
        if (execType == ExecType.LOCAL) {
            LOG.info("Detected Local mode. Stats reported below may be incomplete");
        }
        
        SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT);
        StringBuilder sb = new StringBuilder();
        sb.append("\nHadoopVersion\tPigVersion\tUserId\tStartedAt\tFinishedAt\tFeatures\n");
        sb.append(getHadoopVersion()).append("\t").append(getPigVersion()).append("\t")
            .append(userId).append("\t")
            .append(sdf.format(new Date(startTime))).append("\t")
            .append(sdf.format(new Date(endTime))).append("\t")
            .append(getFeatures()).append("\n");
        sb.append("\n");
        if (returnCode == ReturnCode.SUCCESS) {
            sb.append("Success!\n");
        } else if (returnCode == ReturnCode.PARTIAL_FAILURE) {
            sb.append("Some jobs have failed! Stop running all dependent jobs\n");
        } else {
            sb.append("Failed!\n");
        }
        sb.append("\n");
                
        if (returnCode == ReturnCode.SUCCESS 
                || returnCode == ReturnCode.PARTIAL_FAILURE) {            
            sb.append("Job Stats (time in seconds):\n");
            if (execType == ExecType.LOCAL) {
                sb.append(JobStats.SUCCESS_HEADER_LOCAL).append("\n");
            } else {
                sb.append(JobStats.SUCCESS_HEADER).append("\n");
            }
            List arr = jobPlan.getSuccessfulJobs();
            for (JobStats js : arr) {                
                sb.append(js.getDisplayString(execType == ExecType.LOCAL));
            }
            sb.append("\n");
        }
        if (returnCode == ReturnCode.FAILURE
                || returnCode == ReturnCode.PARTIAL_FAILURE) {
            sb.append("Failed Jobs:\n");
            sb.append(JobStats.FAILURE_HEADER).append("\n");
            List arr = jobPlan.getFailedJobs();
            for (JobStats js : arr) {   
                sb.append(js.getDisplayString(execType == ExecType.LOCAL));
            }
            sb.append("\n");
        }
        sb.append("Input(s):\n");
        for (InputStats is : getInputStats()) {
            sb.append(is.getDisplayString(execType == ExecType.LOCAL));
        }
        sb.append("\n");
        sb.append("Output(s):\n");
        for (OutputStats ds : getOutputStats()) {
            sb.append(ds.getDisplayString(execType == ExecType.LOCAL));
        }
        
        if (execType != ExecType.LOCAL) {
            sb.append("\nCounters:\n");
            sb.append("Total records written : " + getRecordWritten()).append("\n");
            sb.append("Total bytes written : " + getBytesWritten()).append("\n");
            sb.append("Spillable Memory Manager spill count : "
                    + getSMMSpillCount()).append("\n");
            sb.append("Total bags proactively spilled: " 
                    + getProactiveSpillCountObjects()).append("\n");
            sb.append("Total records proactively spilled: " 
                    + getProactiveSpillCountRecords()).append("\n");
        }
        
        sb.append("\nJob DAG:\n").append(jobPlan.toString());
        
        LOG.info("Script Statistics: \n" + sb.toString());
    }
    
    void mapMROperToJob(MapReduceOper mro, Job job) {
        if (mro == null) {
            LOG.warn("null MR operator");
        } else {
            JobStats js = mroJobMap.get(mro);
            if (js == null) {
                LOG.warn("null job stats for mro: " + mro.getOperatorKey());
            } else {
                jobMroMap.put(job, mro);
            }
        }
    }   
    
    void setBackendException(Job job, Exception e) {
        if (e instanceof PigException) {
            LOG.error("ERROR " + ((PigException)e).getErrorCode() + ": " 
                    + e.getLocalizedMessage());
        } else if (e != null) {
            LOG.error("ERROR: " + e.getLocalizedMessage());
        }
        
        if (job.getAssignedJobID() == null || e == null) {
            LOG.debug("unable to set backend exception");
            return;
        }
        String id = job.getAssignedJobID().toString();
        Iterator iter = jobPlan.iterator();
        while (iter.hasNext()) {
            JobStats js = iter.next();
            if (id.equals(js.getJobId())) {
                js.setBackendException(e);
                break;
            }
        }
    }
    
    PigContext getPigContext() {
        return pigContext;
    }
    
    int getNumberSuccessfulJobs() {
        Iterator iter = jobPlan.iterator();
        int count = 0;
        while (iter.hasNext()) {
            if (iter.next().getState() == JobState.SUCCESS) count++; 
        }
        return count;
    }
    
    int getNumberFailedJobs() {
        Iterator iter = jobPlan.iterator();
        int count = 0;
        while (iter.hasNext()) {
            if (iter.next().getState() == JobState.FAILED) count++; 
        }
        return count;
    }
    
    boolean isJobSeen(Job job) {
        return !jobSeen.add(job);    
    }
    
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy