org.hbase.async.RegionInfo Maven / Gradle / Ivy

/*
 * Copyright (C) 2010-2012  The Async HBase Authors.  All rights reserved.
 * This file is part of Async HBase.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *   - Redistributions of source code must retain the above copyright notice,
 *     this list of conditions and the following disclaimer.
 *   - Redistributions in binary form must reproduce the above copyright notice,
 *     this list of conditions and the following disclaimer in the documentation
 *     and/or other materials provided with the distribution.
 *   - Neither the name of the StumbleUpon nor the names of its contributors
 *     may be used to endorse or promote products derived from this software
 *     without specific prior written permission.
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
package org.hbase.async;

import java.util.Comparator;
import java.util.Arrays;

import com.google.protobuf.InvalidProtocolBufferException;
import com.google.protobuf.ByteString;

import org.jboss.netty.buffer.ChannelBuffer;
import org.jboss.netty.buffer.ChannelBuffers;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.hbase.async.generated.HBasePB;
import static org.hbase.async.HBaseClient.EMPTY_ARRAY;

/**
 * Stores basic information about a region.
 */
final class RegionInfo implements Comparable {

  private static final Logger LOG = LoggerFactory.getLogger(RegionInfo.class);

  private final byte[] table;
  // The region name is of the form:
  //   table_name,start_key,timestamp[.MD5.]
  // So it contains the start_key.
  private final byte[] region_name;
  private final byte[] stop_key;

  /**
   * Constructor.
   */
  public RegionInfo(final byte[] table,
                    final byte[] region_name,
                    final byte[] stop_key) {
    this.table = table;
    this.region_name = region_name;
    if (stop_key.length == 0) {
      this.stop_key = EMPTY_ARRAY;
    } else {
      this.stop_key = stop_key;
    }
  }

  /** Returns the name of the table this region belongs to.  */
  public byte[] table() {
    return table;
  }

  /** Returns the name of the region.  */
  public byte[] name() {
    return region_name;
  }

  /** Returns the stop key (exclusive) of this region.  */
  public byte[] stopKey() {
    return stop_key;
  }

  /**
   * Returns the protobuf representation of this region.
   */
  HBasePB.RegionSpecifier toProtobuf() {
    return HBasePB.RegionSpecifier.newBuilder()
      .setType(HBasePB.RegionSpecifier.RegionSpecifierType.REGION_NAME)
      .setValue(ByteString.copyFrom(region_name))
      .build();
  }

  /**
   * Creates a new {@link RegionInfo} from a META {@link KeyValue}.
   * @param kv The {@link KeyValue} to use, which is assumed to be from
   * the cell {@code info:regioninfo} of a {@code .META.} region.
   * @param out_start_key A (@code {new byte[1][]}).
   * The start row of the region will be stored in {@code out_start_key[0]}.
   * Think "pointer-to-pointer" in Java (yeah!).
   * @return A newly created {@link RegionInfo}.
   * If calling {@link #table} on the object returned gives a reference to
   * {@link HBaseClient#EMPTY_ARRAY}, then the META entry indicates that the
   * region has been split (and thus this entry shouldn't be used).
   * @throws RegionOfflineException if the META entry indicates that the
   * region is offline.
   * @throws BrokenMetaException if the {@link KeyValue} seems invalid.
   */
  static RegionInfo fromKeyValue(final KeyValue kv,
                                 final byte[][] out_start_key) {
    switch (kv.value()[0]) {
      case 0:  // pre 0.92 -- fall through.
      case 1:  // 0.92 to 0.94
        return deserializeOldRegionInfo(kv, out_start_key);
      case 80: // 0.95+
        return deserializeProtobufRegionInfo(kv, out_start_key);
      default:
        throw new IllegalStateException("Unsupported region info version: "
                                        + kv.value()[0] + " in .META.  entry: "
                                        + kv);
    }
  }

  /**
   * Creates a new {@link RegionInfo} from a pre-0.95 META {@link KeyValue}.
   */
  private static RegionInfo
  deserializeOldRegionInfo(final KeyValue kv, final byte[][] out_start_key) {
    final ChannelBuffer buf = ChannelBuffers.wrappedBuffer(kv.value());
    buf.readByte(); // Skip the version.
    // version 1 was introduced in HBase 0.92 (see HBASE-451).
    // The differences between v0 and v1 are irrelevant to us,
    // as we only look at the first few fields, and they didn't
    // change across these 2 versions.
    final byte[] stop_key = HBaseRpc.readByteArray(buf);
    final boolean offline = buf.readByte() != 0;
    final long region_id = buf.readLong();
    final byte[] region_name = HBaseRpc.readByteArray(buf);
    // TODO(tsuna): Can we easily de-dup this array with another RegionInfo?
    byte[] table;
    try {
      table = tableFromRegionName(region_name);
    } catch (IllegalArgumentException e) {
      throw BrokenMetaException.badKV(null, "an `info:regioninfo' cell"
                                      + " has a " + e.getMessage(), kv);
    }
    final boolean split = buf.readByte() != 0;
    final byte[] start_key = HBaseRpc.readByteArray(buf);
    // Table description and hash code are left, but we don't care.

    if (LOG.isDebugEnabled()) {
      LOG.debug("Got " + Bytes.pretty(table) + "'s region ["
                + Bytes.pretty(start_key) + '-'
                + Bytes.pretty(stop_key) + ") offline=" + offline
                + ", region_id=" + region_id + ", region_name="
                + Bytes.pretty(region_name) + ", split=" + split);
    }
    // RegionServers set both `offline' and `split' to `false' on the parent
    // region after it's been split.  We normally don't expect to ever observe
    // such regions as any META lookup should find the new daughter regions.
    // But just in case, we make sure to not throw an exception in this case.
    if (offline && !split) {
      throw new RegionOfflineException(region_name);
    }
    // If the region has been split, we put a special marker instead of
    // the table name to indicate that this region has been split.
    final RegionInfo region = new RegionInfo(split ? EMPTY_ARRAY : table,
                                             region_name, stop_key);
    out_start_key[0] = start_key;
    return region;
  }

  /**
   * Creates a new {@link RegionInfo} from a 0.95+ META {@link KeyValue}.
   */
  private static RegionInfo
  deserializeProtobufRegionInfo(final KeyValue kv, final byte[][] out_start_key) {
    final byte[] value = kv.value();
    final int magic = Bytes.getInt(value);
    if (magic != HBaseClient.PBUF_MAGIC) {
      throw BrokenMetaException.badKV(null, "the magic number is invalid", kv);
    }
    final HBasePB.RegionInfo pb;
    try {
      pb = HBasePB.RegionInfo.PARSER.parseFrom(value, 4, value.length - 4);
    } catch (InvalidProtocolBufferException e) {
      throw new BrokenMetaException("Failed to decode " + Bytes.pretty(value),
                                    e);
    }
    final byte[] region_id = Long.toString(pb.getRegionId()).getBytes();
    final byte[] table = Bytes.get(pb.getTableName().getQualifier());
    final byte[] start_key = Bytes.get(pb.getStartKey());
    final byte[] stop_key = Bytes.get(pb.getEndKey());
    final byte[] region_name = kv.key();

    final boolean offline = pb.getOffline();
    final boolean split = pb.getSplit();
    // XXX what to do with the `recovering' field?
    if (offline && !split) {
      throw new RegionOfflineException(region_name);
    }
    out_start_key[0] = start_key;
    return new RegionInfo(split ? EMPTY_ARRAY : table, region_name, stop_key);
  }

  /**
   * Given the name of a region, returns the name of the table it belongs to.
   * @throws IllegalArgumentException if the name of the region is malformed.
   */
  static byte[] tableFromRegionName(final byte[] region_name) {
    int comma = 1;  // Can't be at the beginning.
    for (/**/; comma < region_name.length; comma++) {
      if (region_name[comma] == ',') {
        break;
      }
    }
    if (comma == region_name.length) {
      throw new IllegalArgumentException("Malformed region name, contains no"
        + " comma: " + Bytes.pretty(region_name));
    }
    return Arrays.copyOf(region_name, comma);
  }

  /**
   * Given name of a region, returns its start key
   * @throws IllegalArgumentException if the name of the region is malformed
   * @param region_name Full region_name created in the constructor
   * @return byte Array of the start key
   */
  static byte[] startKeyFromRegionName(final byte[] region_name){
    int key_begin = 0;
    int key_end= 1;
    int comma = 0;
    for (/**/; key_end < region_name.length; key_end++) {
      if (region_name[key_end] == ',') {
        comma++;
        if (comma == 1){
          key_begin = key_end+1;
        }
        if (comma == 2){
          break;
        }
      }
    }

    // If reached the end and the string being returned is not empty
    if (key_end == region_name.length &&  (comma == 2)) {
      throw new IllegalArgumentException("Malformed region name, not enough"
              + " commas: " + Bytes.pretty(region_name));
    }

    // Only return the string if region length is greater than 0
    if (key_end - key_begin > 0 && region_name.length > 0){
      return Arrays.copyOfRange(region_name, key_begin, key_end);
    }
    // Otherwise, return an empty string as start key aka this is the 
    // start key for the first region in the table.
    else {
      return EMPTY_ARRAY;
    }

  }

  @Override
  public int compareTo(final RegionInfo other) {
    return Bytes.memcmp(region_name, other.region_name);
  }

  public boolean equals(final Object other) {
    if (other == null || !(other instanceof RegionInfo)) {
      return false;
    }
    return compareTo((RegionInfo) other) == 0;
  }

  public int hashCode() {
    return Arrays.hashCode(table)
      ^ Arrays.hashCode(region_name)
      ^ Arrays.hashCode(stop_key);
  }

  /** Returns a hint as to how many bytes are needed for {@link #toString}.  */
  int stringSizeHint() {
    return 48  // boilerplate
      + table.length + 2
      // region_name and stop_key are likely to contain non-ascii characters,
      // so let's multiply its length by 2 to avoid re-allocations.
      + region_name.length * 2
      + stop_key.length * 2;
  }

  public String toString() {
    final StringBuilder buf = new StringBuilder(stringSizeHint());
    toStringbuf(buf);
    return buf.toString();
  }

  /** Like {@link #toString} but puts the output in the given buffer.  */
  void toStringbuf(final StringBuilder buf) {
    buf.append("RegionInfo(table=");
    if (table == EMPTY_ARRAY) {
      buf.append("");
    } else {
      Bytes.pretty(buf, table);
    }
    buf.append(", region_name=");
    Bytes.pretty(buf, region_name);
    buf.append(", stop_key=");
    Bytes.pretty(buf, stop_key);
    buf.append(')');
  }

  /** Singleton to compare region names.  */
  static final RegionNameCmp REGION_NAME_CMP = new RegionNameCmp();

  /**
   * Comparator for region names.
   * We can't just use {@link Bytes.MEMCMP} because it doesn't play nicely
   * with the way META keys are built as the first region has an empty start
   * key.  Let's assume we know about those 2 regions in our cache:
   *    *   .META.,,1
   *   tableA,,1273018455182
   * 
   * We're given an RPC to execute on {@code tableA}, row {@code \000} (1 byte
   * row key containing a 0).  If we use {@code memcmp} to sort the entries in
   * the cache, when we search for the entry right before {@code tableA,\000,:}
   * we'll erroneously find {@code .META.,,1} instead of the entry for first
   * region of {@code tableA}.
   * 
   * Since this scheme breaks natural ordering, we need this comparator to
   * implement a special version of {@code memcmp} to handle this scenario.
   */
  private static final class RegionNameCmp implements Comparator {

    private RegionNameCmp() {  // Can't instantiate outside of this class.
    }

    @Override
    public int compare(final byte[] a, final byte[] b) {
      final int length = Math.min(a.length, b.length);
      if (a == b) {  // Do this after accessing a.length and b.length
        return 0;    // in order to NPE if either a or b is null.
      }
      // Reminder: region names are of the form:
      //   table_name,start_key,timestamp[.MD5.]
      // First compare the table names.
      int i;
      for (i = 0; i < length; i++) {
        final byte ai = a[i];  // Saves one pointer deference every iteration.
        final byte bi = b[i];  // Saves one pointer deference every iteration.
        if (ai != bi) {  // The name of the tables differ.
          if (ai == ',') {
            return -1001;  // `a' has a smaller table name.  a < b
          } else if (bi == ',') {
            return 1001;  // `b' has a smaller table name.  a > b
          }
          return (ai & 0xFF) - (bi & 0xFF);  // "promote" to unsigned.
        }
        if (ai == ',') {  // Remember: at this point ai == bi.
          break;  // We're done comparing the table names.  They're equal.
        }
      }

      // Now find the last comma in both `a' and `b'.  We need to start the
      // search from the end as the row key could have an arbitrary number of
      // commas and we don't know its length.
      final int a_comma = findCommaFromEnd(a, i);
      final int b_comma = findCommaFromEnd(b, i);
      // If either `a' or `b' is followed immediately by another comma, then
      // they are the first region (it's the empty start key).
      i++;   // No need to check against `length', there MUST be more bytes.

      // Compare keys.
      final int first_comma = Math.min(a_comma, b_comma);
      for (/*nothing*/; i < first_comma; i++) {
        final byte ai = a[i];
        final byte bi = b[i];
        if (ai != bi) {  // The keys differ.
          return (ai & 0xFF) - (bi & 0xFF);  // "promote" to unsigned.
        }
      }
      if (a_comma < b_comma) {
        return -1002;  // `a' has a shorter key.  a < b
      } else if (b_comma < a_comma) {
        return 1002;  // `b' has a shorter key.  a > b
      }

      // Keys have the same length and have compared identical.  Compare the
      // rest, which essentially means: use start code as a tie breaker.
      for (/*nothing*/; i < length; i++) {
        final byte ai = a[i];
        final byte bi = b[i];
        if (ai != bi) {  // The start codes differ.
          return (ai & 0xFF) - (bi & 0xFF);  // "promote" to unsigned.
        }
      }

      return a.length - b.length;
    }

    private static int findCommaFromEnd(final byte[] b, final int offset) {
      for (int i = b.length - 1; i > offset; i--) {
        if (b[i] == ',') {
          return i;
        }
      }
      throw new IllegalArgumentException("No comma found in " + Bytes.pretty(b)
                                         + " after offset " + offset);
    }

  }

}