All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.uber.hoodie.index.HBaseIndex Maven / Gradle / Ivy

There is a newer version: 0.4.7
Show newest version
/*
 * Copyright (c) 2016 Uber Technologies, Inc. ([email protected])
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *          http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.uber.hoodie.index;

import com.google.common.base.Optional;
import com.uber.hoodie.config.HoodieWriteConfig;
import com.uber.hoodie.WriteStatus;
import com.uber.hoodie.common.model.HoodieKey;
import com.uber.hoodie.common.model.HoodieRecordLocation;
import com.uber.hoodie.common.model.HoodieRecordPayload;
import com.uber.hoodie.common.model.HoodieTableMetadata;
import com.uber.hoodie.common.model.HoodieRecord;

import com.uber.hoodie.config.HoodieIndexConfig;
import com.uber.hoodie.exception.HoodieDependentSystemUnavailableException;
import com.uber.hoodie.exception.HoodieIndexException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function2;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/**
 * Hoodie Index implementation backed by HBase
 */
public class HBaseIndex extends HoodieIndex {
    private final static byte[] SYSTEM_COLUMN_FAMILY = Bytes.toBytes("_s");
    private final static byte[] COMMIT_TS_COLUMN = Bytes.toBytes("commit_ts");
    private final static byte[] FILE_NAME_COLUMN = Bytes.toBytes("file_name");
    private final static byte[] PARTITION_PATH_COLUMN = Bytes.toBytes("partition_path");

    private static Logger logger = LogManager.getLogger(HBaseIndex.class);

    private final String tableName;

    public HBaseIndex(HoodieWriteConfig config, JavaSparkContext jsc) {
        super(config, jsc);
        this.tableName = config.getProps().getProperty(HoodieIndexConfig.HBASE_TABLENAME_PROP);
    }

    @Override
    public JavaPairRDD> fetchRecordLocation(
        JavaRDD hoodieKeys, HoodieTableMetadata metadata) {
        throw new UnsupportedOperationException("HBase index does not implement check exist yet");
    }

    private static Connection hbaseConnection = null;

    private Connection getHBaseConnection() {
        Configuration hbaseConfig = HBaseConfiguration.create();
        String quorum = config.getProps().getProperty(HoodieIndexConfig.HBASE_ZKQUORUM_PROP);
        hbaseConfig.set("hbase.zookeeper.quorum", quorum);
        String port = config.getProps().getProperty(HoodieIndexConfig.HBASE_ZKPORT_PROP);
        hbaseConfig.set("hbase.zookeeper.property.clientPort", port);
        try {
            return ConnectionFactory.createConnection(hbaseConfig);
        } catch (IOException e) {
            throw new HoodieDependentSystemUnavailableException(
                HoodieDependentSystemUnavailableException.HBASE, quorum + ":" + port);
        }
    }

    /**
     * Function that tags each HoodieRecord with an existing location, if known.
     */
    class LocationTagFunction
            implements Function2>, Iterator>> {

        private final HoodieTableMetadata metadata;

        LocationTagFunction(HoodieTableMetadata metadata) {
            this.metadata = metadata;
        }

        @Override
        public Iterator> call(Integer partitionNum,
                                           Iterator> hoodieRecordIterator) {
            // Grab the global HBase connection
            synchronized (HBaseIndex.class) {
                if (hbaseConnection == null) {
                    hbaseConnection = getHBaseConnection();
                }
            }
            List> taggedRecords = new ArrayList<>();
            HTable hTable = null;
            try {
                hTable = (HTable) hbaseConnection.getTable(TableName.valueOf(tableName));
                // Do the tagging.
                while (hoodieRecordIterator.hasNext()) {
                    HoodieRecord rec = hoodieRecordIterator.next();
                    // TODO(vc): This may need to be a multi get.
                    Result result = hTable.get(
                            new Get(Bytes.toBytes(rec.getRecordKey())).setMaxVersions(1)
                                    .addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN)
                                    .addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN)
                                    .addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN));

                    // first, attempt to grab location from HBase
                    if (result.getRow() != null) {
                        String commitTs =
                                Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN));
                        String fileId =
                                Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN));

                        // if the last commit ts for this row is less than the system commit ts
                        if (!metadata.isCommitsEmpty() && metadata.isCommitTsSafe(commitTs)) {
                            rec.setCurrentLocation(new HoodieRecordLocation(commitTs, fileId));
                        }
                    }
                    taggedRecords.add(rec);
                }
            } catch (IOException e) {
                throw new HoodieIndexException(
                    "Failed to Tag indexed locations because of exception with HBase Client", e);
            }

            finally {
                if (hTable != null) {
                    try {
                        hTable.close();
                    } catch (IOException e) {
                        // Ignore
                    }
                }

            }
            return taggedRecords.iterator();
        }
    }

    @Override
    public JavaRDD> tagLocation(JavaRDD> recordRDD,
                                             HoodieTableMetadata metadata) {
        return recordRDD.mapPartitionsWithIndex(this.new LocationTagFunction(metadata), true);
    }

    class UpdateLocationTask implements Function2, Iterator> {
        @Override
        public Iterator call(Integer partition, Iterator statusIterator) {

            List writeStatusList = new ArrayList<>();
            // Grab the global HBase connection
            synchronized (HBaseIndex.class) {
                if (hbaseConnection == null) {
                    hbaseConnection = getHBaseConnection();
                }
            }
            HTable hTable = null;
            try {
                hTable = (HTable) hbaseConnection.getTable(TableName.valueOf(tableName));
                while (statusIterator.hasNext()) {
                    WriteStatus writeStatus = statusIterator.next();
                    List puts = new ArrayList<>();
                    try {
                        for (HoodieRecord rec : writeStatus.getWrittenRecords()) {
                            if (!writeStatus.isErrored(rec.getKey())) {
                                Put put = new Put(Bytes.toBytes(rec.getRecordKey()));
                                HoodieRecordLocation loc = rec.getNewLocation();
                                put.addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN,
                                    Bytes.toBytes(loc.getCommitTime()));
                                put.addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN,
                                    Bytes.toBytes(loc.getFileId()));
                                put.addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN,
                                    Bytes.toBytes(rec.getPartitionPath()));
                                puts.add(put);
                            }
                        }
                        hTable.put(puts);
                        hTable.flushCommits();
                    } catch (Exception e) {
                        Exception we = new Exception("Error updating index for " + writeStatus, e);
                        logger.error(we);
                        writeStatus.setGlobalError(we);
                    }
                    writeStatusList.add(writeStatus);
                }
            } catch (IOException e) {
                throw new HoodieIndexException(
                    "Failed to Update Index locations because of exception with HBase Client", e);
            } finally {
                if (hTable != null) {
                    try {
                        hTable.close();
                    } catch (IOException e) {
                        // Ignore
                    }
                }
            }
            return writeStatusList.iterator();
        }
    }

    @Override
    public JavaRDD updateLocation(JavaRDD writeStatusRDD,
                                               HoodieTableMetadata metadata) {
        return writeStatusRDD.mapPartitionsWithIndex(new UpdateLocationTask(), true);
    }

    @Override
    public boolean rollbackCommit(String commitTime) {
        // TODO (weiy)
        return true;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy