All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pig.tools.pigstats.PigStatsUtil Maven / Gradle / Ivy

There is a newer version: 0.17.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.pig.tools.pigstats;

import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore;
import org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims;
import org.apache.pig.tools.pigstats.mapreduce.SimplePigStats;

/**
 * A utility class for Pig Statistics
 */
public class PigStatsUtil {

    public static final String MAP_INPUT_RECORDS
            = "MAP_INPUT_RECORDS";
    public static final String MAP_OUTPUT_RECORDS
            = "MAP_OUTPUT_RECORDS";
    public static final String REDUCE_INPUT_RECORDS
            = "REDUCE_INPUT_RECORDS";
    public static final String REDUCE_OUTPUT_RECORDS
            = "REDUCE_OUTPUT_RECORDS";
    public static final String HDFS_BYTES_WRITTEN
            = "HDFS_BYTES_WRITTEN";
    public static final String HDFS_BYTES_READ
            = "HDFS_BYTES_READ";

    public static final String MULTI_INPUTS_RECORD_COUNTER
            = "Input records from ";
    public static final String MULTI_INPUTS_COUNTER_GROUP
            = "MultiInputCounters";
    public static final String MULTI_STORE_RECORD_COUNTER
            = "Output records in ";
    public static final String MULTI_STORE_COUNTER_GROUP
            = "MultiStoreCounters";

    /**
     * @deprecated use {@link org.apache.pig.tools.pigstats.mapreduce.MRPigStatsUtil#TASK_COUNTER_GROUP} instead.
     */
    @Deprecated
    public static final String TASK_COUNTER_GROUP
            = "org.apache.hadoop.mapred.Task$Counter";

    /**
     * @deprecated use {@link org.apache.pig.tools.pigstats.mapreduce.MRPigStatsUtil#FS_COUNTER_GROUP} instead.
     */
    @Deprecated
    public static final String FS_COUNTER_GROUP
            = HadoopShims.getFsCounterGroupName();

    /**
     * Returns an empty PigStats object Use of this method is not advised as it
     * will return the MR execution engine version of PigStats by default, and
     * is not necessarily empty depending on the timing.
     *
     * @return an empty PigStats object
     */
    @Deprecated
    public static PigStats getEmptyPigStats() {
        return PigStats.start(new SimplePigStats());
    }

    /**
     * Returns the PigStats with the given return code
     *
     * @param code the return code
     * @return the PigStats with the given return code
     */
    public static PigStats getPigStats(int code) {
        PigStats ps = PigStats.get();
        if (ps == null) {
            ps = PigStats.start(new EmptyPigStats());
        }
        ps.setReturnCode(code);
        return ps;
    }

    public static void setErrorMessage(String msg) {
        PigStats ps = PigStats.get();
        if (ps == null) {
            ps = PigStats.start(new EmptyPigStats());
        }
        ps.setErrorMessage(msg);
    }

    public static void setErrorCode(int code) {
        PigStats ps = PigStats.get();
        if (ps == null) {
            ps = PigStats.start(new EmptyPigStats());
        }
        ps.setErrorCode(code);
    }

    public static void setErrorThrowable(Throwable t) {
        PigStats ps = PigStats.get();
        if (ps == null) {
            ps = PigStats.start(new EmptyPigStats());
        }
        ps.setErrorThrowable(t);
    }

    private static Pattern pattern = Pattern.compile("tmp(-)?[\\d]{1,10}$");

    public static boolean isTempFile(String fileName) {
        Matcher result = pattern.matcher(fileName);
        return result.find();
    }

    public static void setStatsMap(Map> statsMap) {
        PigStats.start(new EmbeddedPigStats(statsMap));
    }

    /**
     * Returns the counter name for the given input file name
     *
     * @param fname the input file name
     * @return the counter name
     */
    public static String getMultiInputsCounterName(String fname, int index) {
        String shortName = getShortName(fname);
        return (shortName == null) ? null
                : MULTI_INPUTS_RECORD_COUNTER + "_" + index + "_" + shortName;
    }

    /**
     * Returns the counter name for the given {@link POStore}
     *
     * @param store the POStore
     * @return the counter name
     */
    public static String getMultiStoreCounterName(POStore store) {
        String shortName = getShortName(store.getSFile().getFileName());
        return (shortName == null) ? null
                : MULTI_STORE_RECORD_COUNTER + "_" + store.getIndex() + "_" + shortName;
    }

    // Restrict total string size of a counter name to 64 characters.
    // Leave 24 characters for prefix string.
    private static final int COUNTER_NAME_LIMIT = 40;
    private static final String SEPARATOR = "/";
    private static final String SEMICOLON = ";";

    private static String getShortName(String uri) {
        int scolon = uri.indexOf(SEMICOLON);
        int slash;
        if (scolon!=-1) {
            slash = uri.lastIndexOf(SEPARATOR, scolon);
        } else {
            slash = uri.lastIndexOf(SEPARATOR);
        }
        String shortName = null;
        if (scolon==-1) {
            shortName = uri.substring(slash+1);
        }
        if (slash < scolon) {
            shortName = uri.substring(slash+1, scolon);
        }
        if (shortName != null && shortName.length() > COUNTER_NAME_LIMIT) {
            shortName = shortName.substring(shortName.length()
                    - COUNTER_NAME_LIMIT);
        }
        return shortName;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy