org.kitesdk.data.hbase.impl.HBaseUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of kite-data-hbase Show documentation
Show all versions of kite-data-hbase Show documentation
The Kite Data HBase module provides integration with HBase.
/**
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.data.hbase.impl;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
import org.kitesdk.compat.DynMethods;
/**
* Static utility functions for working with the HBase API.
*/
public class HBaseUtils {
/**
* Interface for Internal HBase Operations
*/
private static interface Operation {
// Add a Column to an Operation
void addColumn(byte[] family, byte[] column);
// Add a Family to an Operation
void addFamily(byte[] family);
}
// CDK-506 Add compat for hbase-0.98
private static final DynMethods.UnboundMethod GET_FAMILY_MAP_METHOD =
new DynMethods.Builder("getFamilyMap").impl(Put.class).build();
/**
* Given a list of puts, create a new put with the values in each put merged
* together. It is expected that no puts have a value for the same fully
* qualified column. Return the new put.
*
* @param key
* The key of the new put.
* @param putList
* The list of puts to merge
* @return the new Put instance
*/
public static Put mergePuts(byte[] keyBytes, List putList) {
Put put = new Put(keyBytes);
for (Put putToMerge : putList) {
Map> familyMap =
(Map>) GET_FAMILY_MAP_METHOD.invoke(putToMerge);
for (List keyValueList : familyMap.values()) {
for (KeyValue keyValue : keyValueList) {
// don't use put.add(KeyValue) since it doesn't work with HBase 0.96 onwards
put.add(keyValue.getFamily(), keyValue.getQualifier(),
keyValue.getTimestamp(), keyValue.getValue());
}
}
}
return put;
}
/**
* Given a list of PutActions, create a new PutAction with the values in each
* put merged together. It is expected that no puts have a value for the same
* fully qualified column. Return the new PutAction.
*
* @param key
* The key of the new put.
* @param putActionList
* The list of PutActions to merge
* @return the new PutAction instance
*/
public static PutAction mergePutActions(byte[] keyBytes,
List putActionList) {
VersionCheckAction checkAction = null;
List putsToMerge = new ArrayList();
for (PutAction putActionToMerge : putActionList) {
putsToMerge.add(putActionToMerge.getPut());
VersionCheckAction checkActionToMerge = putActionToMerge.getVersionCheckAction();
if (checkActionToMerge != null) {
checkAction = checkActionToMerge;
}
}
Put put = mergePuts(keyBytes, putsToMerge);
return new PutAction(put, checkAction);
}
/**
* Add a Collection of Columns to an Operation, Only Add Single Columns
* If Their Family Isn't Already Being Added.
* @param columns
* Collection of columns to add to the operation
* @param operation
* The HBase operation to add the columns to
*/
private static void addColumnsToOperation(Collection columns, Operation operation) {
// Keep track of whole family additions
Set familySet = new HashSet();
// Iterate through each of the required columns
for (String column : columns) {
// Split the column by : (family : column)
String[] familyAndColumn = column.split(":");
// Check if this is a family only
if (familyAndColumn.length == 1) {
// Add family to whole family additions, and add to scanner
familySet.add(familyAndColumn[0]);
operation.addFamily(Bytes.toBytes(familyAndColumn[0]));
} else {
// Add this column, as long as it's entire family wasn't added.
if (!familySet.contains(familyAndColumn[0])) {
operation.addColumn(Bytes.toBytes(familyAndColumn[0]), Bytes.toBytes(familyAndColumn[1]));
}
}
}
}
/**
* Add a Collection of Columns to a Scanner, Only Add Single Columns
* If Their Family Isn't Already Being Added.
* @param columns
* Collection of columns to add to the scan
* @param scan
* The scanner object to add the columns to
*/
public static void addColumnsToScan(Collection columns, final Scan scan) {
addColumnsToOperation(columns, new Operation() {
@Override
public void addColumn(byte[] family, byte[] column) {
scan.addColumn(family, column);
}
@Override
public void addFamily(byte[] family) {
scan.addFamily(family);
}
});
}
/**
* Add a Collection of Columns to a Get, Only Add Single Columns
* If Their Family Isn't Already Being Added.
* @param columns
* Collection of columns to add to the Get
* @param get
* The Get object to add the columns to
*/
public static void addColumnsToGet(Collection columns, final Get get) {
addColumnsToOperation(columns, new Operation() {
@Override
public void addColumn(byte[] family, byte[] column) {
get.addColumn(family, column);
}
@Override
public void addFamily(byte[] family) {
get.addFamily(family);
}
});
}
}