org.apache.pig.impl.io.PigNullableWritable Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of pig Show documentation
The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pig.impl.io;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.Tuple;

/**
 * A base class for all types that pig uses to move data between map and
 * reduce.  It implements WritableComparable so that compareTo etc. can be
 * called.  It also wraps a WritableComparable 'value'.  This is set by each
 * different type to be an object of its specific type.
 * It also provides a getIndex() and setIndex() calls that are used to get
 * and set the index.  These can be used by LocalRearrange, the partitioner,
 * and Package to determine the index.
 *
 * Index and the null indicator are packed into one byte to save space.
 */

//Put in to make the compiler not complain about WritableComparable
//being a generic type.
@SuppressWarnings("unchecked")
public abstract class PigNullableWritable implements WritableComparable, Cloneable {

    /**
     * indices in multiquery optimized maps
     * will have the Most Significant Bit set
     * This is a bitmask used in those cases.
     */
    public static final byte mqFlag = (byte)0x80;

    /**
     *  regular indices used in group and cogroup
     *  can only go from 0x00 to 0x7F
     */
    public static final byte idxSpace = (byte)0x7F;

    private boolean mNull;

    protected WritableComparable mValue;

    private byte mIndex;

    @Override
    public PigNullableWritable clone() throws CloneNotSupportedException {
        try {
            PigNullableWritable clone = this.getClass().newInstance();
            clone.mNull = this.mNull;
            clone.mValue = this.mValue;
            clone.mIndex = this.mIndex;
            return clone;
        } catch (Exception e) {
            throw new RuntimeException("Exception while cloning " + this, e);
        }
    }

    /**
     * Compare two nullable objects.  Step one is to check if either or both
     * are null.  If one is null and the other is not, then the one that is
     * null is declared to be less.  If both are null the indices are
     * compared.  If neither are null the indices are again compared.  If
     * these are equal, finally the values are compared.
     *
     * These comparators are used by hadoop as part of the post-map sort, when
     * the data is still in object format.
     */
    @Override
    public int compareTo(Object o) {
        PigNullableWritable w = (PigNullableWritable)o;

        if ((mIndex & mqFlag) != 0) { // this is a multi-query index

            if ((mIndex & idxSpace) < (w.mIndex & idxSpace)) return -1;
            else if ((mIndex & idxSpace) > (w.mIndex & idxSpace)) return 1;
        }

        if (!mNull && !w.mNull) {
            int result = mValue.compareTo(w.mValue);

            // If any of the field inside tuple is null, then we do not merge keys
            // See PIG-927
            if (result == 0 && mValue instanceof Tuple && w.mValue instanceof Tuple)
            {
                try {
                    for (int i=0;i<((Tuple)mValue).size();i++)
                        if (((Tuple)mValue).get(i)==null)
                            return mIndex - w.mIndex;
                } catch (ExecException e) {
                    throw new RuntimeException("Unable to access tuple field", e);
                }
            }
            return result;
        } else if (mNull && w.mNull) {
            // If they're both null, compare the indicies
            if ((mIndex & idxSpace) < (w.mIndex & idxSpace)) return -1;
            else if ((mIndex & idxSpace) > (w.mIndex & idxSpace)) return 1;
            else return 0;
        }
        else if (mNull) return -1;
        else return 1;
    }

    /* (non-Javadoc)
     * @see org.apache.hadoop.io.IntWritable#readFields(java.io.DataInput)
     */
    @Override
    public void readFields(DataInput in) throws IOException {
        mNull = in.readBoolean();
        if (!mNull) mValue.readFields(in);
        mIndex = in.readByte();
    }

    /* (non-Javadoc)
     * @see org.apache.hadoop.io.IntWritable#write(java.io.DataOutput)
     */
    @Override
    public void write(DataOutput out) throws IOException {
        out.writeBoolean(mNull);
        if (!mNull) mValue.write(out);
        out.writeByte(mIndex);
    }

    /**
     * @return the isNull
     */
    public boolean isNull() {
        return mNull;
    }

    /**
     * @param isNull the isNull to set
     */
    public void setNull(boolean isNull) {
        mNull = isNull;
    }

    /**
     * @return the index for this value
     */
    public byte getIndex() {
        return mIndex;
    }

    /**
     * @param index for this value.
     */
    public void setIndex(byte index) {
        mIndex = index;
    }

    /**
     * @return The wrapped value as a pig type, not as a WritableComparable.
     */
    abstract public Object getValueAsPigType();

    @Override
    public int hashCode() {
        // For now, always give a null a hash code of 0.  It isn't clear this
        // is what we'll always want.  If nulls make a significant but
        // not overwhelming amount of the data we may want them to get their
        // own partition.  If they make up a big enough percentage of the
        // data we may want to split them across partitions (though that
        // would obviously limit how they could be dealt with afterwards).
        if (mNull) return 0;
        else return mValue.hashCode();
    }



    @Override
    public boolean equals(Object arg0) {
        return compareTo(arg0)==0;
    }

    @Override
    public String toString() {
        return "Null: " + mNull + " index: " + mIndex + (mNull ? "" : " " + mValue.toString());
    }
}