All Downloads are FREE. Search and download functionalities are using the official Maven repository.

hivemall.utils.hadoop.HadoopUtils Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package hivemall.utils.hadoop;

import hivemall.utils.lang.RandomUtils;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URI;
import java.util.Iterator;
import java.util.Map.Entry;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.MapredContext;
import org.apache.hadoop.hive.ql.exec.MapredContextAccessor;
import org.apache.hadoop.io.compress.CodecPool;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.io.compress.CompressionInputStream;
import org.apache.hadoop.io.compress.Decompressor;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobID;
import org.apache.hadoop.mapred.TaskID;

public final class HadoopUtils {

    private HadoopUtils() {}

    public static BufferedReader getBufferedReader(File file) throws IOException {
        MapredContext context = MapredContextAccessor.get();
        return getBufferedReader(file, context);
    }

    public static BufferedReader getBufferedReader(File file, MapredContext context)
            throws IOException {
        URI fileuri = file.toURI();
        Path path = new Path(fileuri);

        Configuration conf = context.getJobConf();
        CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
        CompressionCodec codec = ccf.getCodec(path);

        if (codec == null) {
            return new BufferedReader(new FileReader(file));
        } else {
            Decompressor decompressor = CodecPool.getDecompressor(codec);
            FileInputStream fis = new FileInputStream(file);
            CompressionInputStream cis = codec.createInputStream(fis, decompressor);
            BufferedReader br = new BufferedReaderExt(new InputStreamReader(cis), decompressor);
            return br;
        }
    }

    private static final class BufferedReaderExt extends BufferedReader {

        private Decompressor decompressor;

        BufferedReaderExt(Reader in, Decompressor decompressor) {
            super(in);
            this.decompressor = decompressor;
        }

        @Override
        public void close() throws IOException {
            super.close();
            if (decompressor != null) {
                CodecPool.returnDecompressor(decompressor);
                this.decompressor = null;
            }
        }

    }

    @Nonnull
    public static String getJobId() {
        MapredContext ctx = MapredContextAccessor.get();
        if (ctx == null) {
            throw new IllegalStateException("MapredContext is not set");
        }
        JobConf conf = ctx.getJobConf();
        if (conf == null) {
            throw new IllegalStateException("JobConf is not set");
        }
        String jobId = conf.get("mapred.job.id");
        if (jobId == null) {
            jobId = conf.get("mapreduce.job.id");
            if (jobId == null) {
                String queryId = conf.get("hive.query.id");
                if (queryId != null) {
                    return queryId;
                }
                String taskidStr = conf.get("mapred.task.id");
                if (taskidStr == null) {
                    throw new IllegalStateException("Cannot resolve jobId: " + toString(conf));
                }
                jobId = getJobIdFromTaskId(taskidStr);
            }
        }
        return jobId;
    }

    @Nonnull
    public static String getJobIdFromTaskId(@Nonnull String taskidStr) {
        if (!taskidStr.startsWith("task_")) {// workaround for Tez
            taskidStr = taskidStr.replace("task", "task_");
            taskidStr = taskidStr.substring(0, taskidStr.lastIndexOf('_'));
        }
        TaskID taskId = TaskID.forName(taskidStr);
        JobID jobId = taskId.getJobID();
        return jobId.toString();
    }

    public static int getTaskId() {
        MapredContext ctx = MapredContextAccessor.get();
        if (ctx == null) {
            throw new IllegalStateException("MapredContext is not set");
        }
        JobConf jobconf = ctx.getJobConf();
        if (jobconf == null) {
            throw new IllegalStateException("JobConf is not set");
        }
        int taskid = jobconf.getInt("mapred.task.partition", -1);
        if (taskid == -1) {
            taskid = jobconf.getInt("mapreduce.task.partition", -1);
            if (taskid == -1) {
                throw new IllegalStateException(
                    "Both mapred.task.partition and mapreduce.task.partition are not set: "
                            + toString(jobconf));
            }
        }
        return taskid;
    }

    public static int getTaskId(final int defaultValue) {
        MapredContext ctx = MapredContextAccessor.get();
        if (ctx == null) {
            return defaultValue;
        }
        JobConf jobconf = ctx.getJobConf();
        if (jobconf == null) {
            return defaultValue;
        }
        int taskid = jobconf.getInt("mapred.task.partition", -1);
        if (taskid == -1) {
            taskid = jobconf.getInt("mapreduce.task.partition", -1);
            if (taskid == -1) {
                return defaultValue;
            }
        }
        return taskid;
    }

    public static String getUniqueTaskIdString() {
        MapredContext ctx = MapredContextAccessor.get();
        if (ctx != null) {
            JobConf jobconf = ctx.getJobConf();
            if (jobconf != null) {
                int taskid = jobconf.getInt("mapred.task.partition", -1);
                if (taskid == -1) {
                    taskid = jobconf.getInt("mapreduce.task.partition", -1);
                }
                if (taskid != -1) {
                    return String.valueOf(taskid);
                }
            }
        }
        return RandomUtils.getUUID();
    }

    @Nonnull
    public static String toString(@Nonnull JobConf jobconf) {
        return toString(jobconf, null);
    }

    @Nonnull
    public static String toString(@Nonnull JobConf jobconf, @Nullable String regexKey) {
        final Iterator> itor = jobconf.iterator();
        boolean hasNext = itor.hasNext();
        if (!hasNext) {
            return "";
        }
        final StringBuilder buf = new StringBuilder(1024);
        do {
            Entry e = itor.next();
            hasNext = itor.hasNext();
            String k = e.getKey();
            if (k == null) {
                continue;
            }
            if (regexKey == null || k.matches(regexKey)) {
                String v = e.getValue();
                buf.append(k).append('=').append(v);
                if (hasNext) {
                    buf.append(',');
                }
            }
        } while (hasNext);
        return buf.toString();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy