com.flipkart.hbaseobjectmapper.AbstractHBDAO Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hbase-object-mapper Show documentation
Show all versions of hbase-object-mapper Show documentation
HBase ORM is a light-weight, thread-safe and performant library that enables:
[1] object-oriented access of HBase rows (Data Access Object) with minimal code and good testability
[2] reading from and/or writing to HBase tables in Hadoop MapReduce jobs
package com.flipkart.hbaseobjectmapper;
import com.flipkart.hbaseobjectmapper.codec.DeserializationException;
import com.flipkart.hbaseobjectmapper.exceptions.FieldNotMappedToHBaseColumnException;
import com.google.common.reflect.TypeToken;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
import java.io.Serializable;
import java.lang.reflect.Array;
import java.lang.reflect.Field;
import java.lang.reflect.Type;
import java.util.*;
/**
* A Data Access Object class that enables simple random access (read/write) of HBase rows.
*
* Please note: Unliked the {@link HBObjectMapper} class, this class is not thread-safe
*
*
* @param Data type of row key (must be '{@link Comparable} with itself' and must be {@link Serializable})
* @param Entity type that maps to an HBase row (this type must have implemented {@link HBRecord} interface)
*/
public abstract class AbstractHBDAO, T extends HBRecord> {
/**
* Default number of versions to fetch
*/
private static final int DEFAULT_NUM_VERSIONS = 1;
protected static final HBObjectMapper hbObjectMapper = new HBObjectMapper();
protected final HTable hTable;
protected final Class rowKeyClass;
protected final Class hbRecordClass;
private final Map fields;
/**
* Constructs a data access object. Classes extending this class must call this constructor using super
*
* @param conf Hadoop configuration
* @throws IOException Exceptions thrown by HBase
* @throws IllegalStateException Annotation(s) on base entity may be incorrect
*/
@SuppressWarnings("unchecked")
protected AbstractHBDAO(Configuration conf) throws IOException {
hbRecordClass = (Class) new TypeToken(getClass()) {
}.getRawType();
rowKeyClass = (Class) new TypeToken(getClass()) {
}.getRawType();
if (hbRecordClass == null || rowKeyClass == null)
throw new IllegalStateException(String.format("Unable to resolve HBase record/rowkey type (record class is resolving to %s and rowkey class is resolving to %s)", hbRecordClass, rowKeyClass));
HBTable hbTable = hbRecordClass.getAnnotation(HBTable.class);
if (hbTable == null)
throw new IllegalStateException(String.format("Type %s should be annotated with %s for use in class %s", hbRecordClass.getName(), HBTable.class.getName(), AbstractHBDAO.class.getName()));
this.hTable = new HTable(conf, hbTable.value());
this.fields = hbObjectMapper.getHBFields(hbRecordClass);
}
/**
* Get specified number of versions of a row from HBase table by it's row key
*
* @param rowKey Row key
* @param versions Number of versions to be retrieved
* @return HBase row, deserialized as object of your bean-like class (that implements {@link HBRecord})
* @throws IOException When HBase call fails
*/
public T get(R rowKey, int versions) throws IOException {
Result result = this.hTable.get(new Get(hbObjectMapper.rowKeyToBytes(rowKey)).setMaxVersions(versions));
return hbObjectMapper.readValue(rowKey, result, hbRecordClass);
}
/**
* Get a row from HBase table by it's row key
*
* @param rowKey Row key
* @return HBase row, deserialized as object of your bean-like class (that implements {@link HBRecord})
* @throws IOException When HBase call fails
*/
public T get(R rowKey) throws IOException {
return get(rowKey, DEFAULT_NUM_VERSIONS);
}
/**
* Get specified number of versions of rows from HBase table by array of row keys (This method is a bulk variant of {@link #get(Serializable, int)} method)
*
* @param rowKeys Row keys to fetch
* @param versions Number of versions of columns to fetch
* @return Array of HBase rows, deserialized as object of your bean-like class (that implements {@link HBRecord})
* @throws IOException When HBase call fails
*/
public T[] get(R[] rowKeys, int versions) throws IOException {
List gets = new ArrayList<>(rowKeys.length);
for (R rowKey : rowKeys) {
gets.add(new Get(hbObjectMapper.rowKeyToBytes(rowKey)).setMaxVersions(versions));
}
Result[] results = this.hTable.get(gets);
@SuppressWarnings("unchecked") T[] records = (T[]) Array.newInstance(hbRecordClass, rowKeys.length);
for (int i = 0; i < records.length; i++) {
records[i] = hbObjectMapper.readValue(rowKeys[i], results[i], hbRecordClass);
}
return records;
}
/**
* Get rows from HBase table by array of row keys (This method is a bulk variant of {@link #get(Serializable)} method)
*
* @param rowKeys Row keys to fetch
* @return Array of HBase rows, deserialized as object of your bean-like class (that implements {@link HBRecord})
* @throws IOException When HBase call fails
*/
public T[] get(R[] rowKeys) throws IOException {
return get(rowKeys, DEFAULT_NUM_VERSIONS);
}
/**
* Get specified number of versions of rows from HBase table by list of row keys (This method is a multi-version variant of {@link #get(List)} method)
*
* @param rowKeys Row keys to fetch
* @param versions Number of versions of columns to fetch
* @return Array of rows corresponding to row keys passed, deserialized as objects of your bean-like class
* @throws IOException When HBase call fails
*/
public List get(List rowKeys, int versions) throws IOException {
List gets = new ArrayList<>(rowKeys.size());
for (R rowKey : rowKeys) {
gets.add(new Get(hbObjectMapper.rowKeyToBytes(rowKey)).setMaxVersions(versions));
}
Result[] results = this.hTable.get(gets);
List records = new ArrayList<>(rowKeys.size());
for (Result result : results) {
records.add(hbObjectMapper.readValue(result, hbRecordClass));
}
return records;
}
/**
* Get rows from HBase table by list of row keys (This method is a bulk variant of {@link #get(Serializable)} method)
*
* @param rowKeys Row keys to fetch
* @return List of rows corresponding to row keys passed, deserialized as objects of your bean-like class
* @throws IOException When HBase call fails
*/
public List get(List rowKeys) throws IOException {
return get(rowKeys, DEFAULT_NUM_VERSIONS);
}
/**
* Get specified number of versions of rows from HBase table by a range of row keys (start and end) - this is a multi-version variant of {@link #get(Serializable, Serializable)}
*
* @param startRowKey Row start
* @param endRowKey Row end
* @param versions Number of versions to fetch
* @return List of rows corresponding to row keys passed, deserialized as objects of your bean-like class
* @throws IOException When HBase call fails
*/
public List get(R startRowKey, R endRowKey, int versions) throws IOException {
Scan scan = new Scan(hbObjectMapper.rowKeyToBytes(startRowKey), hbObjectMapper.rowKeyToBytes(endRowKey)).setMaxVersions(versions);
ResultScanner scanner = hTable.getScanner(scan);
List records = new ArrayList<>();
for (Result result : scanner) {
records.add(hbObjectMapper.readValue(result, hbRecordClass));
}
return records;
}
/**
* Get specified number of versions of rows from HBase table by a range of row keys (start to end)
*
* @param startRowKey Row start
* @param endRowKey Row end
* @return List of rows corresponding to row keys passed, deserialized as objects of your bean-like class
* @throws IOException When HBase call fails
*/
public List get(R startRowKey, R endRowKey) throws IOException {
return get(startRowKey, endRowKey, DEFAULT_NUM_VERSIONS);
}
/**
* Persist your bean-like object (of a class that implements {@link HBRecord}) to HBase table
*
* @param object Object that needs to be persisted
* @return Row key of the persisted object, represented as a {@link String}
* @throws IOException When HBase call fails
*/
public R persist(HBRecord object) throws IOException {
Put put = hbObjectMapper.writeValueAsPut(object);
hTable.put(put);
return object.composeRowKey();
}
/**
* Persist a list of your bean-like objects (of a class that implements {@link HBRecord}) to HBase table (this is a bulk variant of {@link #persist(HBRecord)} method)
*
* @param objects List of objects that needs to be persisted
* @return Row keys of the persisted objects, represented as a {@link String}
* @throws IOException When HBase call fails
*/
public List persist(List extends HBRecord> objects) throws IOException {
List puts = new ArrayList<>(objects.size());
List rowKeys = new ArrayList<>(objects.size());
for (HBRecord object : objects) {
puts.add(hbObjectMapper.writeValueAsPut(object));
rowKeys.add(object.composeRowKey());
}
hTable.put(puts);
return rowKeys;
}
/**
* Delete a row from an HBase table for a given row key
*
* @param rowKey row key to delete
* @throws IOException When HBase call fails
*/
public void delete(R rowKey) throws IOException {
Delete delete = new Delete(hbObjectMapper.rowKeyToBytes(rowKey));
this.hTable.delete(delete);
}
/**
* Delete HBase row by object (of class that implements {@link HBRecord}
*
* @param object Object to delete
* @throws IOException When HBase call fails
*/
public void delete(HBRecord object) throws IOException {
this.delete(object.composeRowKey());
}
/**
* Delete HBase rows for an array of row keys
*
* @param rowKeys row keys to delete
* @throws IOException When HBase call fails
*/
public void delete(R[] rowKeys) throws IOException {
List deletes = new ArrayList<>(rowKeys.length);
for (R rowKey : rowKeys) {
deletes.add(new Delete(hbObjectMapper.rowKeyToBytes(rowKey)));
}
this.hTable.delete(deletes);
}
/**
* Delete HBase rows by object references
*
* @param records Records to delete
* @throws IOException When HBase call fails
*/
public void delete(List extends HBRecord> records) throws IOException {
List deletes = new ArrayList<>(records.size());
for (HBRecord record : records) {
deletes.add(new Delete(hbObjectMapper.rowKeyToBytes(record.composeRowKey())));
}
this.hTable.delete(deletes);
}
/**
* Get HBase table name
*
* @return Name of table read as String
*/
public String getTableName() {
HBTable hbTable = hbRecordClass.getAnnotation(HBTable.class);
return hbTable.value();
}
/**
* Get list of mapped column families
*
* @return A {@link Set} containing names of column families as mapped in the entity class
*/
public Set getColumnFamilies() {
return hbObjectMapper.getColumnFamilies(hbRecordClass);
}
/**
* Get list of fields (private variables of your bean-like class)
*
* @return A {@link Set} containing names of fields
*/
public Set getFields() {
return fields.keySet();
}
/**
* Get reference to HBase table
*
* @return {@link HTable} object
*/
public HTable getHBaseTable() {
return hTable;
}
private Field getField(String fieldName) {
Field field = fields.get(fieldName);
if (field == null) {
throw new IllegalArgumentException(String.format("Unrecognized field: '%s'. Choose one of %s", fieldName, fields.values().toString()));
}
return field;
}
private static void populateFieldValuesToMap(Field field, Result result, Map> map) throws DeserializationException {
if (result.isEmpty())
return;
WrappedHBColumn hbColumn = new WrappedHBColumn(field);
List cells = result.getColumnCells(Bytes.toBytes(hbColumn.family()), Bytes.toBytes(hbColumn.column()));
for (Cell cell : cells) {
Type fieldType = hbObjectMapper.getFieldType(field, hbColumn.isMultiVersioned());
final String rowKey = Bytes.toString(CellUtil.cloneRow(cell));
if (!map.containsKey(rowKey))
map.put(rowKey, new TreeMap());
map.get(rowKey).put(cell.getTimestamp(), hbObjectMapper.byteArrayToValue(CellUtil.cloneValue(cell), fieldType, hbColumn.codecFlags()));
}
}
/**
* Fetch value of column for a given row key and field
*
* @param rowKey Row key to reference HBase row
* @param fieldName Name of the private variable of your bean-like object (of a class that implements {@link HBRecord}) whose corresponding column needs to be fetched
* @return Value of the column (boxed), null if row with given rowKey doesn't exist or such field doesn't exist for the row
* @throws IOException When HBase call fails
*/
public Object fetchFieldValue(R rowKey, String fieldName) throws IOException {
final NavigableMap fieldValues = fetchFieldValue(rowKey, fieldName, 1);
if (fieldValues == null || fieldValues.isEmpty()) return null;
else return fieldValues.lastEntry().getValue();
}
/**
* Fetch multiple versions of column values by row key and field name
*
* @param rowKey Row key to reference HBase row
* @param fieldName Name of the private variable of your bean-like object (of a class that implements {@link HBRecord}) whose corresponding column needs to be fetched
* @param versions Number of versions of column to fetch
* @return {@link NavigableMap} of timestamps and values of the column (boxed), null if row with given rowKey doesn't exist or such field doesn't exist for the row
* @throws IOException When HBase call fails
*/
public NavigableMap fetchFieldValue(R rowKey, String fieldName, int versions) throws IOException {
R[] array = (R[]) Array.newInstance(rowKeyClass, 1);
array[0] = rowKey;
return fetchFieldValues(array, fieldName, versions).get(rowKey);
}
/**
* Fetch values of an HBase column for a range of row keys (start and end) and field name
*
* @param startRowKey Start row key (scan start)
* @param endRowKey End row key (scan end)
* @param fieldName Name of the private variable of your bean-like object (of a class that implements {@link HBRecord}) whose corresponding column needs to be fetched
* @return Map of row key and column value
* @throws IOException When HBase call fails
*/
public Map fetchFieldValues(R startRowKey, R endRowKey, String fieldName) throws IOException {
final Map> multiVersionedMap = fetchFieldValues(startRowKey, endRowKey, fieldName, 1);
return toSingleVersioned(multiVersionedMap, 10);
}
private static Map toSingleVersioned(Map> multiVersionedMap, int mapInitialCapacity) {
Map map = new HashMap<>(mapInitialCapacity);
for (Map.Entry> e : multiVersionedMap.entrySet()) {
map.put(e.getKey(), e.getValue().lastEntry().getValue());
}
return map;
}
/**
* Fetch specified number of versions of values of an HBase column for a range of row keys (start and end) and field name
*
* @param startRowKey Start row key (scan start)
* @param endRowKey End row key (scan end)
* @param fieldName Name of the private variable of your bean-like object (of a class that implements {@link HBRecord}) whose corresponding column needs to be fetched
* @param versions Number of versions of column to fetch
* @return Map of row key and column values (versioned)
* @throws IOException When HBase call fails
*/
public NavigableMap> fetchFieldValues(R startRowKey, R endRowKey, String fieldName, int versions) throws IOException {
Field field = getField(fieldName);
WrappedHBColumn hbColumn = new WrappedHBColumn(field);
Scan scan = new Scan(hbObjectMapper.rowKeyToBytes(startRowKey), hbObjectMapper.rowKeyToBytes(endRowKey));
scan.addColumn(Bytes.toBytes(hbColumn.family()), Bytes.toBytes(hbColumn.column()));
scan.setMaxVersions(versions);
ResultScanner scanner = hTable.getScanner(scan);
NavigableMap> map = new TreeMap<>();
for (Result result : scanner) {
populateFieldValuesToMap(field, result, map);
}
return map;
}
/**
* Fetch column values for a given array of row keys (bulk variant of method {@link #fetchFieldValue(Serializable, String)})
*
* @param rowKeys Array of row keys to fetch
* @param fieldName Name of the private variable of your bean-like object (of a class that implements {@link HBRecord}) whose corresponding column needs to be fetched
* @return Map of row key and column values
* @throws IOException Exception from HBase
*/
public Map fetchFieldValues(R[] rowKeys, String fieldName) throws IOException {
final Map> multiVersionedMap = fetchFieldValues(rowKeys, fieldName, 1);
return toSingleVersioned(multiVersionedMap, rowKeys.length);
}
/**
* Fetch specified number of versions of values of an HBase column for an array of row keys
*
* @param rowKeys Array of row keys to fetch
* @param fieldName Name of the private variable of your bean-like object (of a class that implements {@link HBRecord}) whose corresponding column needs to be fetched
* @param versions Number of versions of column to fetch
* @return Map of row key and column values (versioned)
* @throws IOException When HBase call fails
*/
public Map> fetchFieldValues(R[] rowKeys, String fieldName, int versions) throws IOException {
Field field = getField(fieldName);
WrappedHBColumn hbColumn = new WrappedHBColumn(field);
if (!hbColumn.isPresent()) {
throw new FieldNotMappedToHBaseColumnException(hbRecordClass, fieldName);
}
List gets = new ArrayList<>(rowKeys.length);
for (R rowKey : rowKeys) {
Get get = new Get(hbObjectMapper.rowKeyToBytes(rowKey));
get.setMaxVersions(versions);
get.addColumn(Bytes.toBytes(hbColumn.family()), Bytes.toBytes(hbColumn.column()));
gets.add(get);
}
Result[] results = this.hTable.get(gets);
Map> map = new HashMap<>(rowKeys.length);
for (Result result : results) {
populateFieldValuesToMap(field, result, map);
}
return map;
}
}
|
© 2015 - 2025 Weber Informatics LLC | Privacy Policy