All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ml.shifu.guagua.hadoop.io.GuaguaInputSplit Maven / Gradle / Ivy

/*
 * Copyright [2013-2014] PayPal Software Foundation
 *  
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *  
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package ml.shifu.guagua.hadoop.io;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

/**
 * {@link InputSplit} implementation in guagua. If mapper with {@link GuaguaInputSplit#isMaster} true means it is
 * master, and the master's FileSplit is {@code null}.
 */
public class GuaguaInputSplit extends InputSplit implements Writable {

    /**
     * Whether the input split is master split.
     */
    private boolean isMaster;

    /**
     * File splits used for that mapper task. For master task, it is almost null. Using array here to make guagua
     * support combining small files into one split.
     */
    private FileSplit[] fileSplits;

    /**
     * Arbitrary object to store extra information for each file split
     */
    private Object[] extensions;

    /**
     * Default constructor without any setting
     */
    public GuaguaInputSplit() {
    }

    /**
     * Constructor with {@link #isMaster} and {@link #fileSplits} settings.
     * 
     * @param isMaster
     *            Whether the input split is master split.
     * @param fileSplits
     *            File splits used for mapper task.
     */
    public GuaguaInputSplit(boolean isMaster, FileSplit... fileSplits) {
        this.isMaster = isMaster;
        this.fileSplits = fileSplits;
    }

    /**
     * Constructor with {@link #isMaster} and one FileSplit settings.
     * 
     * @param isMaster
     *            Whether the input split is master split.
     * @param fileSplit
     *            File split used for mapper task.
     */
    public GuaguaInputSplit(boolean isMaster, FileSplit fileSplit) {
        this(isMaster, new FileSplit[] { fileSplit });
    }

    /*
     * (non-Javadoc)
     * 
     * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
     */
    @Override
    public void write(DataOutput out) throws IOException {
        out.writeBoolean(this.isMaster());
        if(!this.isMaster()) {
            int length = this.getFileSplits().length;
            out.writeInt(length);
            for(int i = 0; i < length; i++) {
                this.getFileSplits()[i].write(out);
            }
        }
    }

    /*
     * (non-Javadoc)
     * 
     * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
     */
    @Override
    public void readFields(DataInput in) throws IOException {
        this.setMaster(in.readBoolean());
        if(!isMaster()) {
            int len = in.readInt();
            FileSplit[] splits = new FileSplit[len];
            for(int i = 0; i < len; i++) {
                splits[i] = new FileSplit(null, 0, 0, (String[]) null);
                splits[i].readFields(in);
            }
            this.setFileSplits(splits);
        }
    }

    /**
     * For master split, use Long.MAX_VALUE as its length to make it is the first task for Hadoop job. It
     * is convenient for users to check master in Hadoop UI.
     */
    @Override
    public long getLength() throws IOException, InterruptedException {
        if(isMaster()) {
            return Long.MAX_VALUE;
        }
        long len = 0;
        for(FileSplit split: this.getFileSplits()) {
            len += split.getLength();
        }
        return len;
    }

    /**
     * Data locality functions, return all hosts for all file splits.
     */
    @Override
    public String[] getLocations() throws IOException, InterruptedException {
        if(this.getFileSplits() == null || this.getFileSplits().length == 0) {
            return new String[0];
        }

        List hosts = new ArrayList();
        for(FileSplit fileSplit: this.getFileSplits()) {
            if(fileSplit != null) {
                hosts.addAll(Arrays.asList(fileSplit.getLocations()));
            }
        }
        return hosts.toArray(new String[0]);
    }

    public boolean isMaster() {
        return isMaster;
    }

    public void setMaster(boolean isMaster) {
        this.isMaster = isMaster;
    }

    public FileSplit[] getFileSplits() {
        return fileSplits;
    }

    public void setFileSplits(FileSplit[] fileSplits) {
        this.fileSplits = fileSplits;
    }

    public Object[] getExtensions() {
        return extensions;
    }

    public void setExtensions(Object[] extensions) {
        this.extensions = extensions;
    }

    @Override
    public String toString() {
        return String.format("GuaguaInputSplit [isMaster=%s, fileSplits=%s]", isMaster,
                Arrays.toString(this.fileSplits));
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy