All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.uber.hoodie.index.hbase.HBaseIndex Maven / Gradle / Ivy

There is a newer version: 0.4.7
Show newest version
/*
 *  Copyright (c) 2017 Uber Technologies, Inc. ([email protected])
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *           http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 *
 */

package com.uber.hoodie.index.hbase;

import com.google.common.base.Optional;
import com.uber.hoodie.common.table.HoodieTimeline;
import com.uber.hoodie.common.table.timeline.HoodieInstant;
import com.uber.hoodie.config.HoodieWriteConfig;
import com.uber.hoodie.WriteStatus;
import com.uber.hoodie.common.model.HoodieKey;
import com.uber.hoodie.common.model.HoodieRecordLocation;
import com.uber.hoodie.common.model.HoodieRecordPayload;
import com.uber.hoodie.common.model.HoodieRecord;

import com.uber.hoodie.config.HoodieIndexConfig;
import com.uber.hoodie.exception.HoodieDependentSystemUnavailableException;
import com.uber.hoodie.exception.HoodieIndexException;
import com.uber.hoodie.index.HoodieIndex;
import com.uber.hoodie.table.HoodieTable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function2;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/**
 * Hoodie Index implementation backed by HBase
 */
public class HBaseIndex extends HoodieIndex {
    private final static byte[] SYSTEM_COLUMN_FAMILY = Bytes.toBytes("_s");
    private final static byte[] COMMIT_TS_COLUMN = Bytes.toBytes("commit_ts");
    private final static byte[] FILE_NAME_COLUMN = Bytes.toBytes("file_name");
    private final static byte[] PARTITION_PATH_COLUMN = Bytes.toBytes("partition_path");

    private static Logger logger = LogManager.getLogger(HBaseIndex.class);

    private final String tableName;

    public HBaseIndex(HoodieWriteConfig config, JavaSparkContext jsc) {
        super(config, jsc);
        this.tableName = config.getProps().getProperty(HoodieIndexConfig.HBASE_TABLENAME_PROP);
    }

    @Override
    public JavaPairRDD> fetchRecordLocation(
        JavaRDD hoodieKeys, HoodieTable table) {
        throw new UnsupportedOperationException("HBase index does not implement check exist yet");
    }

    private static Connection hbaseConnection = null;

    private Connection getHBaseConnection() {
        Configuration hbaseConfig = HBaseConfiguration.create();
        String quorum = config.getProps().getProperty(HoodieIndexConfig.HBASE_ZKQUORUM_PROP);
        hbaseConfig.set("hbase.zookeeper.quorum", quorum);
        String port = config.getProps().getProperty(HoodieIndexConfig.HBASE_ZKPORT_PROP);
        hbaseConfig.set("hbase.zookeeper.property.clientPort", port);
        try {
            return ConnectionFactory.createConnection(hbaseConfig);
        } catch (IOException e) {
            throw new HoodieDependentSystemUnavailableException(
                HoodieDependentSystemUnavailableException.HBASE, quorum + ":" + port);
        }
    }

    /**
     * Function that tags each HoodieRecord with an existing location, if known.
     */
    class LocationTagFunction
            implements Function2>, Iterator>> {

        private final HoodieTable hoodieTable;

        LocationTagFunction(HoodieTable hoodieTable) {
            this.hoodieTable = hoodieTable;
        }

        @Override
        public Iterator> call(Integer partitionNum,
                                           Iterator> hoodieRecordIterator) {
            // Grab the global HBase connection
            synchronized (HBaseIndex.class) {
                if (hbaseConnection == null) {
                    hbaseConnection = getHBaseConnection();
                }
            }
            List> taggedRecords = new ArrayList<>();
            HTable hTable = null;
            try {
                hTable = (HTable) hbaseConnection.getTable(TableName.valueOf(tableName));
                // Do the tagging.
                while (hoodieRecordIterator.hasNext()) {
                    HoodieRecord rec = hoodieRecordIterator.next();
                    // TODO(vc): This may need to be a multi get.
                    Result result = hTable.get(
                            new Get(Bytes.toBytes(rec.getRecordKey())).setMaxVersions(1)
                                    .addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN)
                                    .addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN)
                                    .addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN));

                    // first, attempt to grab location from HBase
                    if (result.getRow() != null) {
                        String commitTs =
                                Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN));
                        String fileId =
                                Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN));

                        HoodieTimeline commitTimeline = hoodieTable.getCompletedCommitTimeline();
                        // if the last commit ts for this row is less than the system commit ts
                        if (!commitTimeline.empty() && commitTimeline.containsInstant(
                            new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTs))) {
                            rec.setCurrentLocation(new HoodieRecordLocation(commitTs, fileId));
                        }
                    }
                    taggedRecords.add(rec);
                }
            } catch (IOException e) {
                throw new HoodieIndexException(
                    "Failed to Tag indexed locations because of exception with HBase Client", e);
            }

            finally {
                if (hTable != null) {
                    try {
                        hTable.close();
                    } catch (IOException e) {
                        // Ignore
                    }
                }

            }
            return taggedRecords.iterator();
        }
    }

    @Override
    public JavaRDD> tagLocation(JavaRDD> recordRDD, HoodieTable hoodieTable) {
        return recordRDD.mapPartitionsWithIndex(this.new LocationTagFunction(hoodieTable), true);
    }

    class UpdateLocationTask implements Function2, Iterator> {
        @Override
        public Iterator call(Integer partition, Iterator statusIterator) {

            List writeStatusList = new ArrayList<>();
            // Grab the global HBase connection
            synchronized (HBaseIndex.class) {
                if (hbaseConnection == null) {
                    hbaseConnection = getHBaseConnection();
                }
            }
            HTable hTable = null;
            try {
                hTable = (HTable) hbaseConnection.getTable(TableName.valueOf(tableName));
                while (statusIterator.hasNext()) {
                    WriteStatus writeStatus = statusIterator.next();
                    List puts = new ArrayList<>();
                    List deletes = new ArrayList<>();
                    try {
                        for (HoodieRecord rec : writeStatus.getWrittenRecords()) {
                            if (!writeStatus.isErrored(rec.getKey())) {
                                java.util.Optional loc = rec.getNewLocation();
                                if(loc.isPresent()) {
                                    Put put = new Put(Bytes.toBytes(rec.getRecordKey()));
                                    put.addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN,
                                            Bytes.toBytes(loc.get().getCommitTime()));
                                    put.addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN,
                                            Bytes.toBytes(loc.get().getFileId()));
                                    put.addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN,
                                            Bytes.toBytes(rec.getPartitionPath()));
                                    puts.add(put);
                                } else {
                                    //Delete existing index for a deleted record
                                    Delete delete = new Delete(Bytes.toBytes(rec.getRecordKey()));
                                    deletes.add(delete);
                                }
                            }
                        }
                        hTable.put(puts);
                        hTable.delete(deletes);
                        hTable.flushCommits();
                    } catch (Exception e) {
                        Exception we = new Exception("Error updating index for " + writeStatus, e);
                        logger.error(we);
                        writeStatus.setGlobalError(we);
                    }
                    writeStatusList.add(writeStatus);
                }
            } catch (IOException e) {
                throw new HoodieIndexException(
                    "Failed to Update Index locations because of exception with HBase Client", e);
            } finally {
                if (hTable != null) {
                    try {
                        hTable.close();
                    } catch (IOException e) {
                        // Ignore
                    }
                }
            }
            return writeStatusList.iterator();
        }
    }

    @Override
    public JavaRDD updateLocation(JavaRDD writeStatusRDD,
        HoodieTable hoodieTable) {
        return writeStatusRDD.mapPartitionsWithIndex(new UpdateLocationTask(), true);
    }

    @Override
    public boolean rollbackCommit(String commitTime) {
        // Can't really rollback here. HBase only can let you go from recordKey to fileID,
        // not the other way around
        return true;
    }

    /**
     * Only looks up by recordKey
     *
     * @return
     */
    @Override
    public boolean isGlobal() {
        return true;
    }

    /**
     * Mapping is available in HBase already.
     *
     * @return
     */
    @Override
    public boolean canIndexLogFiles() {
        return true;
    }

    /**
     * Index needs to be explicitly updated after storage write.
     *
     * @return
     */
    @Override
    public boolean isImplicitWithStorage() {
        return false;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy