org.apache.hadoop.hbase.mapreduce.TableSplit Maven / Gradle / Ivy

/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.mapreduce;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Arrays;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapreduce.InputSplit;

/**
 * A table split corresponds to a key range (low, high) and an optional scanner.
 * All references to row below refer to the key of the row.
 */
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class TableSplit extends InputSplit
implements Writable, Comparable {
  /** @deprecated LOG variable would be made private. */
  @Deprecated
  public static final Log LOG = LogFactory.getLog(TableSplit.class);
  
  // should be < 0 (@see #readFields(DataInput))
  // version 1 supports Scan data member
  enum Version {
    UNVERSIONED(0),
    // Initial number we put on TableSplit when we introduced versioning.
    INITIAL(-1),
    // Added an encoded region name field for easier identification of split -> region
    WITH_ENCODED_REGION_NAME(-2);

    final int code;
    static final Version[] byCode;
    static {
      byCode = Version.values();
      for (int i = 0; i < byCode.length; i++) {
        if (byCode[i].code != -1 * i) {
          throw new AssertionError("Values in this enum should be descending by one");
        }
      }
    }

    Version(int code) {
      this.code = code;
    }

    boolean atLeast(Version other) {
      return code <= other.code;
    }

    static Version fromCode(int code) {
      return byCode[code * -1];
    }
  }
  
  private static final Version VERSION = Version.WITH_ENCODED_REGION_NAME;
  private TableName tableName;
  private byte [] startRow;
  private byte [] endRow;
  private String regionLocation;
  private String encodedRegionName = "";
  private String scan = ""; // stores the serialized form of the Scan
  private long length; // Contains estimation of region size in bytes

  /** Default constructor. */
  public TableSplit() {
    this((TableName)null, null, HConstants.EMPTY_BYTE_ARRAY,
      HConstants.EMPTY_BYTE_ARRAY, "");
  }

  /**
   * @deprecated As of release 0.96
   *             (HBASE-9508).
   *             This will be removed in HBase 2.0.0.
   *             Use {@link TableSplit#TableSplit(TableName, byte[], byte[], String)}.
   */
  @Deprecated
  public TableSplit(final byte [] tableName, Scan scan, byte [] startRow, byte [] endRow,
      final String location) {
    this(TableName.valueOf(tableName), scan, startRow, endRow, location);
  }

  /**
   * Creates a new instance while assigning all variables.
   * Length of region is set to 0
   * Encoded name of the region is set to blank
   *
   * @param tableName  The name of the current table.
   * @param scan The scan associated with this split.
   * @param startRow  The start row of the split.
   * @param endRow  The end row of the split.
   * @param location  The location of the region.
   */
  public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
                    final String location) {
    this(tableName, scan, startRow, endRow, location, 0L);
  }

  /**
   * Creates a new instance while assigning all variables.
   * Encoded name of region is set to blank
   *
   * @param tableName  The name of the current table.
   * @param scan The scan associated with this split.
   * @param startRow  The start row of the split.
   * @param endRow  The end row of the split.
   * @param location  The location of the region.
   */
  public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
      final String location, long length) {
    this(tableName, scan, startRow, endRow, location, "", length);
  }

  /**
   * Creates a new instance while assigning all variables.
   *
   * @param tableName  The name of the current table.
   * @param scan The scan associated with this split.
   * @param startRow  The start row of the split.
   * @param endRow  The end row of the split.
   * @param encodedRegionName The region ID.
   * @param location  The location of the region.
   */
  public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
      final String location, final String encodedRegionName, long length) {
    this.tableName = tableName;
    try {
      this.scan =
        (null == scan) ? "" : TableMapReduceUtil.convertScanToString(scan);
    } catch (IOException e) {
      LOG.warn("Failed to convert Scan to String", e);
    }
    this.startRow = startRow;
    this.endRow = endRow;
    this.regionLocation = location;
    this.encodedRegionName = encodedRegionName;
    this.length = length;
  }

  /**
   * @deprecated As of release 0.96
   *             (HBASE-9508).
   *             This will be removed in HBase 2.0.0.
   *             Use {@link TableSplit#TableSplit(TableName, byte[], byte[], String)}.
   */
  @Deprecated
  public TableSplit(final byte [] tableName, byte[] startRow, byte[] endRow,
      final String location) {
    this(TableName.valueOf(tableName), startRow, endRow, location);
  }

  /**
   * Creates a new instance without a scanner.
   * Length of region is set to 0
   *
   * @param tableName The name of the current table.
   * @param startRow The start row of the split.
   * @param endRow The end row of the split.
   * @param location The location of the region.
   */
  public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
      final String location) {
    this(tableName, null, startRow, endRow, location);
  }

  /**
   * Creates a new instance without a scanner.
   *
   * @param tableName The name of the current table.
   * @param startRow The start row of the split.
   * @param endRow The end row of the split.
   * @param location The location of the region.
   * @param length Size of region in bytes
   */
  public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
                    final String location, long length) {
    this(tableName, null, startRow, endRow, location, length);
  }

  /**
   * Returns a Scan object from the stored string representation.
   *
   * @return Returns a Scan object based on the stored scanner.
   * @throws IOException
   */
  public Scan getScan() throws IOException {
    return TableMapReduceUtil.convertStringToScan(this.scan);
  }

  /**
   * Returns the table name converted to a byte array.
   * @see #getTable()
   * @return The table name.
   */
  public byte [] getTableName() {
    return tableName.getName();
  }

  /**
   * Returns the table name.
   *
   * @return The table name.
   */
  public TableName getTable() {
    // It is ugly that usually to get a TableName, the method is called getTableName.  We can't do
    // that in here though because there was an existing getTableName in place already since
    // deprecated.
    return tableName;
  }

  /**
   * Returns the start row.
   *
   * @return The start row.
   */
  public byte [] getStartRow() {
    return startRow;
  }

  /**
   * Returns the end row.
   *
   * @return The end row.
   */
  public byte [] getEndRow() {
    return endRow;
  }

  /**
   * Returns the region location.
   *
   * @return The region's location.
   */
  public String getRegionLocation() {
    return regionLocation;
  }

  /**
   * Returns the region's location as an array.
   *
   * @return The array containing the region location.
   * @see org.apache.hadoop.mapreduce.InputSplit#getLocations()
   */
  @Override
  public String[] getLocations() {
    return new String[] {regionLocation};
  }

  /**
   * Returns the region's encoded name.
   *
   * @return The region's encoded name.
   */
  public String getEncodedRegionName() {
    return encodedRegionName;
  }

  /**
   * Returns the length of the split.
   *
   * @return The length of the split.
   * @see org.apache.hadoop.mapreduce.InputSplit#getLength()
   */
  @Override
  public long getLength() {
    return length;
  }

  /**
   * Reads the values of each field.
   *
   * @param in  The input to read from.
   * @throws IOException When reading the input fails.
   */
  @Override
  public void readFields(DataInput in) throws IOException {
    Version version = Version.UNVERSIONED;
    // TableSplit was not versioned in the beginning.
    // In order to introduce it now, we make use of the fact
    // that tableName was written with Bytes.writeByteArray,
    // which encodes the array length as a vint which is >= 0.
    // Hence if the vint is >= 0 we have an old version and the vint
    // encodes the length of tableName.
    // If < 0 we just read the version and the next vint is the length.
    // @see Bytes#readByteArray(DataInput)
    int len = WritableUtils.readVInt(in);
    if (len < 0) {
      // what we just read was the version
      version = Version.fromCode(len);
      len = WritableUtils.readVInt(in);
    }
    byte[] tableNameBytes = new byte[len];
    in.readFully(tableNameBytes);
    tableName = TableName.valueOf(tableNameBytes);
    startRow = Bytes.readByteArray(in);
    endRow = Bytes.readByteArray(in);
    regionLocation = Bytes.toString(Bytes.readByteArray(in));
    if (version.atLeast(Version.INITIAL)) {
      scan = Bytes.toString(Bytes.readByteArray(in));
    }
    length = WritableUtils.readVLong(in);
    if (version.atLeast(Version.WITH_ENCODED_REGION_NAME)) {
      encodedRegionName = Bytes.toString(Bytes.readByteArray(in));
    }
  }

  /**
   * Writes the field values to the output.
   *
   * @param out  The output to write to.
   * @throws IOException When writing the values to the output fails.
   */
  @Override
  public void write(DataOutput out) throws IOException {
    WritableUtils.writeVInt(out, VERSION.code);
    Bytes.writeByteArray(out, tableName.getName());
    Bytes.writeByteArray(out, startRow);
    Bytes.writeByteArray(out, endRow);
    Bytes.writeByteArray(out, Bytes.toBytes(regionLocation));
    Bytes.writeByteArray(out, Bytes.toBytes(scan));
    WritableUtils.writeVLong(out, length);
    Bytes.writeByteArray(out, Bytes.toBytes(encodedRegionName));
  }

  /**
   * Returns the details about this instance as a string.
   *
   * @return The values of this instance as a string.
   * @see java.lang.Object#toString()
   */
  @Override
  public String toString() {
    StringBuilder sb = new StringBuilder();
    sb.append("HBase table split(");
    sb.append("table name: ").append(tableName);
    // null scan input is represented by ""
    String printScan = "";
    if (!scan.equals("")) {
      try {
        // get the real scan here in toString, not the Base64 string
        printScan = TableMapReduceUtil.convertStringToScan(scan).toString();
      }
      catch (IOException e) {
        printScan = "";
      }
    }
    sb.append(", scan: ").append(printScan);
    sb.append(", start row: ").append(Bytes.toStringBinary(startRow));
    sb.append(", end row: ").append(Bytes.toStringBinary(endRow));
    sb.append(", region location: ").append(regionLocation);
    sb.append(", encoded region name: ").append(encodedRegionName);
    sb.append(")");
    return sb.toString();
  }

  /**
   * Compares this split against the given one.
   *
   * @param split  The split to compare to.
   * @return The result of the comparison.
   * @see java.lang.Comparable#compareTo(java.lang.Object)
   */
  @Override
  public int compareTo(TableSplit split) {
    // If The table name of the two splits is the same then compare start row
    // otherwise compare based on table names
    int tableNameComparison =
        getTable().compareTo(split.getTable());
    return tableNameComparison != 0 ? tableNameComparison : Bytes.compareTo(
        getStartRow(), split.getStartRow());
  }

  @Override
  public boolean equals(Object o) {
    if (o == null || !(o instanceof TableSplit)) {
      return false;
    }
    return tableName.equals(((TableSplit)o).tableName) &&
      Bytes.equals(startRow, ((TableSplit)o).startRow) &&
      Bytes.equals(endRow, ((TableSplit)o).endRow) &&
      regionLocation.equals(((TableSplit)o).regionLocation);
  }

  @Override
  public int hashCode() {
    int result = tableName != null ? tableName.hashCode() : 0;
    result = 31 * result + (scan != null ? scan.hashCode() : 0);
    result = 31 * result + (startRow != null ? Arrays.hashCode(startRow) : 0);
    result = 31 * result + (endRow != null ? Arrays.hashCode(endRow) : 0);
    result = 31 * result + (regionLocation != null ? regionLocation.hashCode() : 0);
    result = 31 * result + (encodedRegionName != null ? encodedRegionName.hashCode() : 0);
    return result;
  }
}