org.kitesdk.data.hbase.avro.SpecificAvroDao Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of kite-data-hbase Show documentation
Show all versions of kite-data-hbase Show documentation
The Kite Data HBase module provides integration with HBase.
/**
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.data.hbase.avro;
import org.kitesdk.data.DatasetException;
import org.kitesdk.data.SchemaNotFoundException;
import org.kitesdk.data.ValidationException;
import org.kitesdk.data.hbase.impl.BaseDao;
import org.kitesdk.data.hbase.impl.BaseEntityMapper;
import org.kitesdk.data.hbase.impl.CompositeBaseDao;
import org.kitesdk.data.hbase.impl.Dao;
import org.kitesdk.data.hbase.impl.EntityMapper;
import org.kitesdk.data.hbase.impl.SchemaManager;
import com.google.common.collect.Lists;
import java.io.InputStream;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.avro.Schema;
import org.apache.avro.specific.SpecificRecord;
import org.apache.hadoop.hbase.client.HTablePool;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A Dao for Avro's SpecificRecords. In this Dao implementation, both the
* underlying key record type, and the entity type are SpecificRecords. This Dao
* allows us to persist and fetch these SpecificRecords to and from HBase.
*
* @param
* The entity type.
*/
public class SpecificAvroDao extends BaseDao {
private static Logger LOG = LoggerFactory.getLogger(SpecificAvroDao.class);
private static final AvroKeyEntitySchemaParser parser = new AvroKeyEntitySchemaParser();
/**
* Construct the SpecificAvroDao.
*
* @param tablePool
* An HTablePool instance to use for connecting to HBase.
* @param tableName
* The name of the table this Dao will read from and write to.
* @param keySchemaString
* The Avro schema string that represents the StorageKey structure for row
* keys in this table.
* @param entitySchemaString
* The json string representing the special avro record schema, that
* contains metadata in annotations of the Avro record fields. See
* {@link AvroEntityMapper} for details.
* @param keyClass
* The class of the SpecificRecord this DAO will use as a key
* @param entityClass
* The class of the SpecificRecord this DAO will persist and fetch.
*/
public SpecificAvroDao(HTablePool tablePool, String tableName,
String entitySchemaString, Class entityClass) {
super(tablePool, tableName, buildEntityMapper(entitySchemaString,
entitySchemaString, entityClass));
}
/**
* Construct the SpecificAvroDao.
*
* @param tablePool
* An HTablePool instance to use for connecting to HBase.
* @param tableName
* The name of the table this Dao will read from and write to.
* @param keySchemaStream
* The json stream representing the avro schema for the key.
* @param entitySchemaStream
* The json stream representing the special avro record schema, that
* contains metadata in annotations of the Avro record fields. See
* {@link AvroEntityMapper} for details.
* @param keyClass
* The class of the SpecificRecord this DAO will use as a key
* @param entityClass
* The class of the SpecificRecord this DAO will persist and fetch.
*/
public SpecificAvroDao(HTablePool tablePool, String tableName,
InputStream entitySchemaStream, Class entityClass) {
this(tablePool, tableName, AvroUtils
.inputStreamToString(entitySchemaStream), entityClass);
}
/**
* Construct the SpecificAvroDao with an EntityManager, which will provide the
* entity mapper to this Dao that knows how to map the different entity schema
* versions defined by the managed schema.
*
* @param tablePool
* An HTabePool instance to use for connecting to HBase.
* @param tableName
* The table name of the managed schema.
* @param entityName
* The entity name of the managed schema.
* @param schemaManager
* The SchemaManager which will be used to query schema information
* from the meta store.
*/
public SpecificAvroDao(HTablePool tablePool, String tableName,
String entityName, SchemaManager schemaManager) {
super(tablePool, tableName, new VersionedAvroEntityMapper.Builder()
.setSchemaManager(schemaManager).setTableName(tableName)
.setEntityName(entityName).setSpecific(true). build());
}
/**
* Create a CompositeDao, which will return SpecificRecord instances
* represented by the entitySchemaString avro schema. This avro schema must be
* a composition of the schemas in the subEntitySchemaStrings list.
*
* @param tablePool
* An HTablePool instance to use for connecting to HBase
* @param tableName
* The table name this dao will read from and write to
* @param keySchemaString
* The Avro schema string that represents the StorageKey structure for row
* keys in this table.
* @param subEntitySchemaStrings
* The list of entities that make up the composite. This list must be
* in the same order as the fields defined in the entitySchemaString.
* @param keyClass
* The class of the SpecificRecord representing the StorageKey of rows this
* dao will fetch.
* @param entityClass
* The class of the SpecificRecord this DAO will persist and fetch.
* @return The CompositeDao instance.
* @throws SchemaNotFoundException
* @throws ValidationException
*/
@SuppressWarnings("unchecked")
public static Dao buildCompositeDao(
HTablePool tablePool, String tableName,
List subEntitySchemaStrings, Class entityClass) {
List> entityMappers = new ArrayList>();
for (String subEntitySchemaString : subEntitySchemaStrings) {
AvroEntitySchema subEntitySchema = parser
.parseEntitySchema(subEntitySchemaString);
Class subEntityClass;
try {
subEntityClass = (Class) Class.forName(subEntitySchema
.getAvroSchema().getFullName());
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
entityMappers.add(SpecificAvroDao. buildEntityMapper(
subEntitySchemaString, subEntitySchemaString, subEntityClass));
}
return new SpecificCompositeAvroDao(tablePool, tableName,
entityMappers, entityClass);
}
/**
* Create a CompositeDao, which will return SpecificRecord instances
* in a Map container.
*
* @param tablePool
* An HTablePool instance to use for connecting to HBase
* @param tableName
* The table name this dao will read from and write to
* @param keySchemaString
* The Avro schema string that represents the StorageKey structure for row
* keys in this table.
* @param subEntitySchemaStrings
* The list of entities that make up the composite.
* @param keyClass
* The class of the SpecificRecord representing the StorageKey of rows this
* dao will fetch.
* @return The CompositeDao instance.
* @throws SchemaNotFoundException
* @throws ValidationException
*/
@SuppressWarnings("unchecked")
public static Dao<
Map> buildCompositeDao(
HTablePool tablePool, String tableName,
List subEntitySchemaStrings) {
List> entityMappers = new ArrayList>();
for (String subEntitySchemaString : subEntitySchemaStrings) {
AvroEntitySchema subEntitySchema = parser
.parseEntitySchema(subEntitySchemaString);
Class subEntityClass;
try {
subEntityClass = (Class) Class.forName(subEntitySchema
.getAvroSchema().getFullName());
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
entityMappers.add(SpecificAvroDao. buildEntityMapper(
subEntitySchemaString, subEntitySchemaString,
subEntityClass));
}
return new SpecificMapCompositeAvroDao(tablePool, tableName, entityMappers);
}
/**
* Create a CompositeDao, which will return SpecificRecord instances
* represented by the entitySchemaString avro schema. This avro schema must be
* a composition of the schemas in the subEntitySchemaStrings list.
*
* @param tablePool
* An HTablePool instance to use for connecting to HBase
* @param tableName
* The table name this dao will read from and write to
* @param keySchemaStream
* The Avro schema input stream that represents the StorageKey structure for
* row keys in this table.
* @param subEntitySchemaStreams
* The list of entities that make up the composite. This list must be
* in the same order as the fields defined in the entitySchemaString.
* @param keyClass
* The class of the SpecificRecord representing the StorageKey of rows this
* dao will fetch.
* @param entityClass
* The class of the SpecificRecord this DAO will persist and fetch.
* @return The CompositeDao instance.
* @throws SchemaNotFoundException
* @throws ValidationException
*/
public static Dao buildCompositeDaoWithInputStream(
HTablePool tablePool, String tableName,
List subEntitySchemaStreams, Class entityClass) {
List subEntitySchemaStrings = new ArrayList();
for (InputStream subEntitySchemaStream : subEntitySchemaStreams) {
subEntitySchemaStrings.add(AvroUtils
.inputStreamToString(subEntitySchemaStream));
}
return buildCompositeDao(tablePool, tableName, subEntitySchemaStrings,
entityClass);
}
/**
* Create a CompositeDao, which will return SpecificRecord instances
* represented by the entitySchemaString avro schema. This avro schema must be
* a composition of the schemas in the subEntitySchemaStrings list.
*
* @param tablePool
* An HTabePool instance to use for connecting to HBase.
* @param tableName
* The table name of the managed schema.
* @param entityClass
* The class that is the composite record, which is made up of fields
* referencing the sub records.
* @param schemaManager
* The SchemaManager which will use to create the entity mapper that
* will power this dao.
* @return The CompositeDao instance.
* @throws SchemaNotFoundException
*/
public static Dao buildCompositeDaoWithEntityManager(
HTablePool tablePool, String tableName, Class entityClass,
SchemaManager schemaManager) {
Schema entitySchema = getSchemaFromEntityClass(entityClass);
List> entityMappers = new ArrayList>();
for (Schema.Field field : entitySchema.getFields()) {
entityMappers.add(new VersionedAvroEntityMapper.Builder()
.setSchemaManager(schemaManager).setTableName(tableName)
.setEntityName(getSchemaName(field.schema())).setSpecific(true)
. build());
}
return new SpecificCompositeAvroDao(tablePool, tableName,
entityMappers, entityClass);
}
private static String getSchemaName(Schema schema) {
if (schema.getType() == Schema.Type.UNION) {
List types = schema.getTypes();
if (types.size() == 2) {
if (types.get(0).getType() == Schema.Type.NULL) {
return types.get(1).getName();
} else if (types.get(1).getType() == Schema.Type.NULL) {
return types.get(0).getName();
}
}
throw new IllegalArgumentException("Unsupported union schema: " + schema);
}
return schema.getName();
}
/**
* Create a CompositeDao, which will return SpecificRecord instances
* in a Map container.
*
* @param tablePool
* An HTablePool instance to use for connecting to HBase.
* @param tableName
* The table name of the managed schema.
* @param subEntityClasses
* The classes that make up the subentities.
* @param schemaManager
* The SchemaManager which will use to create the entity mapper that
* will power this dao.
* @return The CompositeDao instance.
* @throws SchemaNotFoundException
*/
public static Dao