com.uber.hoodie.index.HBaseIndex Maven / Gradle / Ivy
/*
* Copyright (c) 2016 Uber Technologies, Inc. ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.index;
import com.google.common.base.Optional;
import com.uber.hoodie.config.HoodieWriteConfig;
import com.uber.hoodie.WriteStatus;
import com.uber.hoodie.common.model.HoodieKey;
import com.uber.hoodie.common.model.HoodieRecordLocation;
import com.uber.hoodie.common.model.HoodieRecordPayload;
import com.uber.hoodie.common.model.HoodieTableMetadata;
import com.uber.hoodie.common.model.HoodieRecord;
import com.uber.hoodie.config.HoodieIndexConfig;
import com.uber.hoodie.exception.HoodieDependentSystemUnavailableException;
import com.uber.hoodie.exception.HoodieIndexException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function2;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
* Hoodie Index implementation backed by HBase
*/
public class HBaseIndex extends HoodieIndex {
private final static byte[] SYSTEM_COLUMN_FAMILY = Bytes.toBytes("_s");
private final static byte[] COMMIT_TS_COLUMN = Bytes.toBytes("commit_ts");
private final static byte[] FILE_NAME_COLUMN = Bytes.toBytes("file_name");
private final static byte[] PARTITION_PATH_COLUMN = Bytes.toBytes("partition_path");
private static Logger logger = LogManager.getLogger(HBaseIndex.class);
private final String tableName;
public HBaseIndex(HoodieWriteConfig config, JavaSparkContext jsc) {
super(config, jsc);
this.tableName = config.getProps().getProperty(HoodieIndexConfig.HBASE_TABLENAME_PROP);
}
@Override
public JavaPairRDD> fetchRecordLocation(
JavaRDD hoodieKeys, HoodieTableMetadata metadata) {
throw new UnsupportedOperationException("HBase index does not implement check exist yet");
}
private static Connection hbaseConnection = null;
private Connection getHBaseConnection() {
Configuration hbaseConfig = HBaseConfiguration.create();
String quorum = config.getProps().getProperty(HoodieIndexConfig.HBASE_ZKQUORUM_PROP);
hbaseConfig.set("hbase.zookeeper.quorum", quorum);
String port = config.getProps().getProperty(HoodieIndexConfig.HBASE_ZKPORT_PROP);
hbaseConfig.set("hbase.zookeeper.property.clientPort", port);
try {
return ConnectionFactory.createConnection(hbaseConfig);
} catch (IOException e) {
throw new HoodieDependentSystemUnavailableException(
HoodieDependentSystemUnavailableException.HBASE, quorum + ":" + port);
}
}
/**
* Function that tags each HoodieRecord with an existing location, if known.
*/
class LocationTagFunction
implements Function2>, Iterator>> {
private final HoodieTableMetadata metadata;
LocationTagFunction(HoodieTableMetadata metadata) {
this.metadata = metadata;
}
@Override
public Iterator> call(Integer partitionNum,
Iterator> hoodieRecordIterator) {
// Grab the global HBase connection
synchronized (HBaseIndex.class) {
if (hbaseConnection == null) {
hbaseConnection = getHBaseConnection();
}
}
List> taggedRecords = new ArrayList<>();
HTable hTable = null;
try {
hTable = (HTable) hbaseConnection.getTable(TableName.valueOf(tableName));
// Do the tagging.
while (hoodieRecordIterator.hasNext()) {
HoodieRecord rec = hoodieRecordIterator.next();
// TODO(vc): This may need to be a multi get.
Result result = hTable.get(
new Get(Bytes.toBytes(rec.getRecordKey())).setMaxVersions(1)
.addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN)
.addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN)
.addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN));
// first, attempt to grab location from HBase
if (result.getRow() != null) {
String commitTs =
Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN));
String fileId =
Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN));
// if the last commit ts for this row is less than the system commit ts
if (!metadata.isCommitsEmpty() && metadata.isCommitTsSafe(commitTs)) {
rec.setCurrentLocation(new HoodieRecordLocation(commitTs, fileId));
}
}
taggedRecords.add(rec);
}
} catch (IOException e) {
throw new HoodieIndexException(
"Failed to Tag indexed locations because of exception with HBase Client", e);
}
finally {
if (hTable != null) {
try {
hTable.close();
} catch (IOException e) {
// Ignore
}
}
}
return taggedRecords.iterator();
}
}
@Override
public JavaRDD> tagLocation(JavaRDD> recordRDD,
HoodieTableMetadata metadata) {
return recordRDD.mapPartitionsWithIndex(this.new LocationTagFunction(metadata), true);
}
class UpdateLocationTask implements Function2, Iterator> {
@Override
public Iterator call(Integer partition, Iterator statusIterator) {
List writeStatusList = new ArrayList<>();
// Grab the global HBase connection
synchronized (HBaseIndex.class) {
if (hbaseConnection == null) {
hbaseConnection = getHBaseConnection();
}
}
HTable hTable = null;
try {
hTable = (HTable) hbaseConnection.getTable(TableName.valueOf(tableName));
while (statusIterator.hasNext()) {
WriteStatus writeStatus = statusIterator.next();
List puts = new ArrayList<>();
try {
for (HoodieRecord rec : writeStatus.getWrittenRecords()) {
if (!writeStatus.isErrored(rec.getKey())) {
Put put = new Put(Bytes.toBytes(rec.getRecordKey()));
HoodieRecordLocation loc = rec.getNewLocation();
put.addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN,
Bytes.toBytes(loc.getCommitTime()));
put.addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN,
Bytes.toBytes(loc.getFileId()));
put.addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN,
Bytes.toBytes(rec.getPartitionPath()));
puts.add(put);
}
}
hTable.put(puts);
hTable.flushCommits();
} catch (Exception e) {
Exception we = new Exception("Error updating index for " + writeStatus, e);
logger.error(we);
writeStatus.setGlobalError(we);
}
writeStatusList.add(writeStatus);
}
} catch (IOException e) {
throw new HoodieIndexException(
"Failed to Update Index locations because of exception with HBase Client", e);
} finally {
if (hTable != null) {
try {
hTable.close();
} catch (IOException e) {
// Ignore
}
}
}
return writeStatusList.iterator();
}
}
@Override
public JavaRDD updateLocation(JavaRDD writeStatusRDD,
HoodieTableMetadata metadata) {
return writeStatusRDD.mapPartitionsWithIndex(new UpdateLocationTask(), true);
}
@Override
public boolean rollbackCommit(String commitTime) {
// TODO (weiy)
return true;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy