org.apache.hadoop.hbase.mapreduce.TableSplit Maven / Gradle / Ivy
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapreduce;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Arrays;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapreduce.InputSplit;
/**
* A table split corresponds to a key range (low, high) and an optional scanner.
* All references to row below refer to the key of the row.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class TableSplit extends InputSplit
implements Writable, Comparable {
/** @deprecated LOG variable would be made private. */
@Deprecated
public static final Log LOG = LogFactory.getLog(TableSplit.class);
// should be < 0 (@see #readFields(DataInput))
// version 1 supports Scan data member
enum Version {
UNVERSIONED(0),
// Initial number we put on TableSplit when we introduced versioning.
INITIAL(-1),
// Added an encoded region name field for easier identification of split -> region
WITH_ENCODED_REGION_NAME(-2);
final int code;
static final Version[] byCode;
static {
byCode = Version.values();
for (int i = 0; i < byCode.length; i++) {
if (byCode[i].code != -1 * i) {
throw new AssertionError("Values in this enum should be descending by one");
}
}
}
Version(int code) {
this.code = code;
}
boolean atLeast(Version other) {
return code <= other.code;
}
static Version fromCode(int code) {
return byCode[code * -1];
}
}
private static final Version VERSION = Version.WITH_ENCODED_REGION_NAME;
private TableName tableName;
private byte [] startRow;
private byte [] endRow;
private String regionLocation;
private String encodedRegionName = "";
private String scan = ""; // stores the serialized form of the Scan
private long length; // Contains estimation of region size in bytes
/** Default constructor. */
public TableSplit() {
this((TableName)null, null, HConstants.EMPTY_BYTE_ARRAY,
HConstants.EMPTY_BYTE_ARRAY, "");
}
/**
* @deprecated As of release 0.96
* (HBASE-9508).
* This will be removed in HBase 2.0.0.
* Use {@link TableSplit#TableSplit(TableName, byte[], byte[], String)}.
*/
@Deprecated
public TableSplit(final byte [] tableName, Scan scan, byte [] startRow, byte [] endRow,
final String location) {
this(TableName.valueOf(tableName), scan, startRow, endRow, location);
}
/**
* Creates a new instance while assigning all variables.
* Length of region is set to 0
* Encoded name of the region is set to blank
*
* @param tableName The name of the current table.
* @param scan The scan associated with this split.
* @param startRow The start row of the split.
* @param endRow The end row of the split.
* @param location The location of the region.
*/
public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
final String location) {
this(tableName, scan, startRow, endRow, location, 0L);
}
/**
* Creates a new instance while assigning all variables.
* Encoded name of region is set to blank
*
* @param tableName The name of the current table.
* @param scan The scan associated with this split.
* @param startRow The start row of the split.
* @param endRow The end row of the split.
* @param location The location of the region.
*/
public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
final String location, long length) {
this(tableName, scan, startRow, endRow, location, "", length);
}
/**
* Creates a new instance while assigning all variables.
*
* @param tableName The name of the current table.
* @param scan The scan associated with this split.
* @param startRow The start row of the split.
* @param endRow The end row of the split.
* @param encodedRegionName The region ID.
* @param location The location of the region.
*/
public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
final String location, final String encodedRegionName, long length) {
this.tableName = tableName;
try {
this.scan =
(null == scan) ? "" : TableMapReduceUtil.convertScanToString(scan);
} catch (IOException e) {
LOG.warn("Failed to convert Scan to String", e);
}
this.startRow = startRow;
this.endRow = endRow;
this.regionLocation = location;
this.encodedRegionName = encodedRegionName;
this.length = length;
}
/**
* @deprecated As of release 0.96
* (HBASE-9508).
* This will be removed in HBase 2.0.0.
* Use {@link TableSplit#TableSplit(TableName, byte[], byte[], String)}.
*/
@Deprecated
public TableSplit(final byte [] tableName, byte[] startRow, byte[] endRow,
final String location) {
this(TableName.valueOf(tableName), startRow, endRow, location);
}
/**
* Creates a new instance without a scanner.
* Length of region is set to 0
*
* @param tableName The name of the current table.
* @param startRow The start row of the split.
* @param endRow The end row of the split.
* @param location The location of the region.
*/
public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
final String location) {
this(tableName, null, startRow, endRow, location);
}
/**
* Creates a new instance without a scanner.
*
* @param tableName The name of the current table.
* @param startRow The start row of the split.
* @param endRow The end row of the split.
* @param location The location of the region.
* @param length Size of region in bytes
*/
public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
final String location, long length) {
this(tableName, null, startRow, endRow, location, length);
}
/**
* Returns a Scan object from the stored string representation.
*
* @return Returns a Scan object based on the stored scanner.
* @throws IOException
*/
public Scan getScan() throws IOException {
return TableMapReduceUtil.convertStringToScan(this.scan);
}
/**
* Returns the table name converted to a byte array.
* @see #getTable()
* @return The table name.
*/
public byte [] getTableName() {
return tableName.getName();
}
/**
* Returns the table name.
*
* @return The table name.
*/
public TableName getTable() {
// It is ugly that usually to get a TableName, the method is called getTableName. We can't do
// that in here though because there was an existing getTableName in place already since
// deprecated.
return tableName;
}
/**
* Returns the start row.
*
* @return The start row.
*/
public byte [] getStartRow() {
return startRow;
}
/**
* Returns the end row.
*
* @return The end row.
*/
public byte [] getEndRow() {
return endRow;
}
/**
* Returns the region location.
*
* @return The region's location.
*/
public String getRegionLocation() {
return regionLocation;
}
/**
* Returns the region's location as an array.
*
* @return The array containing the region location.
* @see org.apache.hadoop.mapreduce.InputSplit#getLocations()
*/
@Override
public String[] getLocations() {
return new String[] {regionLocation};
}
/**
* Returns the region's encoded name.
*
* @return The region's encoded name.
*/
public String getEncodedRegionName() {
return encodedRegionName;
}
/**
* Returns the length of the split.
*
* @return The length of the split.
* @see org.apache.hadoop.mapreduce.InputSplit#getLength()
*/
@Override
public long getLength() {
return length;
}
/**
* Reads the values of each field.
*
* @param in The input to read from.
* @throws IOException When reading the input fails.
*/
@Override
public void readFields(DataInput in) throws IOException {
Version version = Version.UNVERSIONED;
// TableSplit was not versioned in the beginning.
// In order to introduce it now, we make use of the fact
// that tableName was written with Bytes.writeByteArray,
// which encodes the array length as a vint which is >= 0.
// Hence if the vint is >= 0 we have an old version and the vint
// encodes the length of tableName.
// If < 0 we just read the version and the next vint is the length.
// @see Bytes#readByteArray(DataInput)
int len = WritableUtils.readVInt(in);
if (len < 0) {
// what we just read was the version
version = Version.fromCode(len);
len = WritableUtils.readVInt(in);
}
byte[] tableNameBytes = new byte[len];
in.readFully(tableNameBytes);
tableName = TableName.valueOf(tableNameBytes);
startRow = Bytes.readByteArray(in);
endRow = Bytes.readByteArray(in);
regionLocation = Bytes.toString(Bytes.readByteArray(in));
if (version.atLeast(Version.INITIAL)) {
scan = Bytes.toString(Bytes.readByteArray(in));
}
length = WritableUtils.readVLong(in);
if (version.atLeast(Version.WITH_ENCODED_REGION_NAME)) {
encodedRegionName = Bytes.toString(Bytes.readByteArray(in));
}
}
/**
* Writes the field values to the output.
*
* @param out The output to write to.
* @throws IOException When writing the values to the output fails.
*/
@Override
public void write(DataOutput out) throws IOException {
WritableUtils.writeVInt(out, VERSION.code);
Bytes.writeByteArray(out, tableName.getName());
Bytes.writeByteArray(out, startRow);
Bytes.writeByteArray(out, endRow);
Bytes.writeByteArray(out, Bytes.toBytes(regionLocation));
Bytes.writeByteArray(out, Bytes.toBytes(scan));
WritableUtils.writeVLong(out, length);
Bytes.writeByteArray(out, Bytes.toBytes(encodedRegionName));
}
/**
* Returns the details about this instance as a string.
*
* @return The values of this instance as a string.
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("HBase table split(");
sb.append("table name: ").append(tableName);
// null scan input is represented by ""
String printScan = "";
if (!scan.equals("")) {
try {
// get the real scan here in toString, not the Base64 string
printScan = TableMapReduceUtil.convertStringToScan(scan).toString();
}
catch (IOException e) {
printScan = "";
}
}
sb.append(", scan: ").append(printScan);
sb.append(", start row: ").append(Bytes.toStringBinary(startRow));
sb.append(", end row: ").append(Bytes.toStringBinary(endRow));
sb.append(", region location: ").append(regionLocation);
sb.append(", encoded region name: ").append(encodedRegionName);
sb.append(")");
return sb.toString();
}
/**
* Compares this split against the given one.
*
* @param split The split to compare to.
* @return The result of the comparison.
* @see java.lang.Comparable#compareTo(java.lang.Object)
*/
@Override
public int compareTo(TableSplit split) {
// If The table name of the two splits is the same then compare start row
// otherwise compare based on table names
int tableNameComparison =
getTable().compareTo(split.getTable());
return tableNameComparison != 0 ? tableNameComparison : Bytes.compareTo(
getStartRow(), split.getStartRow());
}
@Override
public boolean equals(Object o) {
if (o == null || !(o instanceof TableSplit)) {
return false;
}
return tableName.equals(((TableSplit)o).tableName) &&
Bytes.equals(startRow, ((TableSplit)o).startRow) &&
Bytes.equals(endRow, ((TableSplit)o).endRow) &&
regionLocation.equals(((TableSplit)o).regionLocation);
}
@Override
public int hashCode() {
int result = tableName != null ? tableName.hashCode() : 0;
result = 31 * result + (scan != null ? scan.hashCode() : 0);
result = 31 * result + (startRow != null ? Arrays.hashCode(startRow) : 0);
result = 31 * result + (endRow != null ? Arrays.hashCode(endRow) : 0);
result = 31 * result + (regionLocation != null ? regionLocation.hashCode() : 0);
result = 31 * result + (encodedRegionName != null ? encodedRegionName.hashCode() : 0);
return result;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy