All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.omid.transaction.CompactorScanner Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.regionserver;

import org.apache.phoenix.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.phoenix.thirdparty.com.google.common.base.Optional;
import org.apache.phoenix.thirdparty.com.google.common.collect.Iterators;
import org.apache.phoenix.thirdparty.com.google.common.collect.PeekingIterator;
import org.apache.commons.collections4.map.LRUMap;
import org.apache.omid.HBaseShims;
import org.apache.omid.committable.CommitTable;
import org.apache.omid.committable.CommitTable.Client;
import org.apache.omid.committable.CommitTable.CommitTimestamp;
import org.apache.omid.transaction.CellUtils;
import org.apache.omid.transaction.CellInfo;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.util.Bytes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import java.util.SortedMap;
import java.util.concurrent.ExecutionException;

import static org.apache.omid.committable.CommitTable.CommitTimestamp.Location.SHADOW_CELL;

public class CompactorScanner implements InternalScanner {
    private static final Logger LOG = LoggerFactory.getLogger(CompactorScanner.class);
    private final InternalScanner internalScanner;
    private final CommitTable.Client commitTableClient;

    private final boolean isMajorCompaction;
    private final boolean retainNonTransactionallyDeletedCells;
    private final long lowWatermark;

    private final Region hRegion;

    private boolean hasMoreRows = false;
    private List currentRowWorthValues = new ArrayList();
    private final LRUMap> commitCache;

    public CompactorScanner(ObserverContext e,
                            InternalScanner internalScanner,
                            Client commitTableClient,
                            boolean isMajorCompaction,
                            boolean preserveNonTransactionallyDeletedCells) throws IOException {
        this.internalScanner = internalScanner;
        this.commitTableClient = commitTableClient;
        this.isMajorCompaction = isMajorCompaction;
        this.retainNonTransactionallyDeletedCells = preserveNonTransactionallyDeletedCells;
        this.lowWatermark = getLowWatermarkFromCommitTable();
        // Obtain the table in which the scanner is going to operate
        this.hRegion = HBaseShims.getRegionCoprocessorRegion(e.getEnvironment());
        commitCache = new LRUMap<>(1000);
        LOG.info("Scanner cleaning up uncommitted txs older than LW [{}] in region [{}]",
                lowWatermark, hRegion.getRegionInfo());
    }

    @Override
    public boolean next(List results) throws IOException {
        return next(results, -1);
    }

    public boolean next(List result, ScannerContext scannerContext) throws IOException {
        int limit = scannerContext.getBatchLimit();
        return next(result, limit);
    }

    public boolean next(List result, int limit) throws IOException {

        if (currentRowWorthValues.isEmpty()) {
            // 1) Read next row
            List scanResult = new ArrayList();
            hasMoreRows = internalScanner.next(scanResult);
            if (LOG.isTraceEnabled()) {
                LOG.trace("Row: Result {} limit {} more rows? {}", scanResult, limit, hasMoreRows);
            }
            // 2) Traverse result list separating normal cells from shadow
            // cells and building a map to access easily the shadow cells.
            SortedMap> cellToSc = CellUtils.mapCellsToShadowCells(scanResult);

            // 3) traverse the list of row key values isolated before and
            // check which ones should be discarded
            Map lastTimestampedCellsInRow = new HashMap<>();
            PeekingIterator>> iter
                    = Iterators.peekingIterator(cellToSc.entrySet().iterator());
            while (iter.hasNext()) {
                Map.Entry> entry = iter.next();
                Cell cell = entry.getKey();
                Optional shadowCellOp = entry.getValue();

                if (cell.getTimestamp() > lowWatermark) {
                    retain(currentRowWorthValues, cell, shadowCellOp);
                    continue;
                }

                if (shouldRetainNonTransactionallyDeletedCell(cell)) {
                    retain(currentRowWorthValues, cell, shadowCellOp);
                    continue;
                }

                // During a minor compaction the coprocessor may only see a
                // subset of store files and may not have the all the versions
                // of a cell available for consideration. Therefore, if it
                // deletes a cell with a tombstone during a minor compaction,
                // an older version of the cell may become visible again. So,
                // we have to remove tombstones only in major compactions.
                if (isMajorCompaction) {
                    // Strong assumption that family delete cells arrive first before any other column
                    if (CellUtils.isTombstone(cell)) {
                        if (shadowCellOp.isPresent()) {
                            skipToNextColumn(cell, iter);
                        } else {
                            Optional commitTimestamp = queryCommitTimestamp(cell);
                            // Clean the cell only if it is valid
                            if (commitTimestamp.isPresent() && commitTimestamp.get().isValid()) {
                                skipToNextColumn(cell, iter);
                            }
                        }
                        continue;
                    }
                }

                if (shadowCellOp.isPresent()) {
                    saveLastTimestampedCell(lastTimestampedCellsInRow, cell, shadowCellOp.get());
                } else {
                    Optional commitTimestamp = queryCommitTimestamp(cell);
                    if (commitTimestamp.isPresent() && commitTimestamp.get().isValid()) {
                        // Build the missing shadow cell...
                        byte[] shadowCellValue = Bytes.toBytes(commitTimestamp.get().getValue());
                        Cell shadowCell = CellUtils.buildShadowCellFromCell(cell, shadowCellValue);
                        saveLastTimestampedCell(lastTimestampedCellsInRow, cell, shadowCell);
                    } else {
                        LOG.trace("Discarding cell {}", cell);
                    }
                }
            }
            retainLastTimestampedCellsSaved(currentRowWorthValues, lastTimestampedCellsInRow);

            // 4) Sort the list
            Collections.sort(currentRowWorthValues, KeyValue.COMPARATOR);
        }

        // Chomp current row worth values up to the limit
        if (currentRowWorthValues.size() <= limit || limit == -1) {
            result.addAll(currentRowWorthValues);
            currentRowWorthValues.clear();
        } else {
            result.addAll(currentRowWorthValues.subList(0, limit));
            currentRowWorthValues.subList(0, limit).clear();
        }
        LOG.trace("Results to preserve {}", result);

        return hasMoreRows;
    }

    @Override
    public void close() throws IOException {
        internalScanner.close();
    }

    // ----------------------------------------------------------------------------------------------------------------
    // Helper methods
    // ----------------------------------------------------------------------------------------------------------------

    @VisibleForTesting
    public boolean shouldRetainNonTransactionallyDeletedCell(Cell cell) {
        return (CellUtil.isDelete(cell) || CellUtil.isDeleteFamily(cell))
                &&
                retainNonTransactionallyDeletedCells;
    }

    private void saveLastTimestampedCell(Map lastCells, Cell cell, Cell shadowCell) {
        String cellKey = Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength())
                + ":"
                + Bytes.toString(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());
        LOG.trace("Cell Key: {}", cellKey);

        if (!lastCells.containsKey(cellKey)) {
            lastCells.put(cellKey, new CellInfo(cell, shadowCell));
        } else {
            if (lastCells.get(cellKey).getTimestamp() < cell.getTimestamp()) {
                lastCells.put(cellKey, new CellInfo(cell, shadowCell));
            } else {
                LOG.trace("Forgetting old cell {}", cell);
            }
        }
    }

    private long getLowWatermarkFromCommitTable() throws IOException {
        try {
            LOG.trace("About to read log watermark from commit table");
            return commitTableClient.readLowWatermark().get();
        } catch (InterruptedException ie) {
            Thread.currentThread().interrupt();
            LOG.warn("Interrupted getting low watermark from commit table", ie);
            throw new IOException("Interrupted getting low watermark from commit table");
        } catch (ExecutionException ee) {
            LOG.warn("Problem getting low watermark from commit table");
            throw new IOException("Problem getting low watermark from commit table", ee.getCause());
        }
    }


    private Result getShadowCell(byte[] row, byte[] family, byte[] qualifier, long timestamp) throws IOException {
        Get g = new Get(row);
        g.addColumn(family, qualifier);
        g.setTimeStamp(timestamp);
        Result r = hRegion.get(g);
        return r;
    }


    private Optional getCommitTimestampWithRaces(Cell cell) throws IOException {
        try {
            byte[] family = CellUtil.cloneFamily(cell);
            byte[] qualifier = CellUtils.addShadowCellSuffixPrefix(cell.getQualifierArray(),
                    cell.getQualifierOffset(),
                    cell.getQualifierLength());
            // 2) Then check the commit table
            Optional ct = commitTableClient.getCommitTimestamp(cell.getTimestamp()).get();
            if (ct.isPresent()) {
                if (ct.get().isValid()) {
                    return Optional.of(ct.get());
                }
                // If invalid still should check sc because maybe we got falsely invalidated by another compaction or ll client
            }

            // 3) Read from shadow cell
            Result r = getShadowCell(CellUtil.cloneRow(cell), family, qualifier, cell.getTimestamp());
            if (r.containsColumn(CellUtil.cloneFamily(cell), qualifier)) {
                Optional retval = Optional.of(new CommitTimestamp(SHADOW_CELL,
                        Bytes.toLong(r.getValue(family, qualifier)), true));
                return retval;
            }

            // [OMID-146] - we have to invalidate a transaction if it hasn't reached the commit table
            // 4) invalidate the entry
            Boolean invalidated = commitTableClient.tryInvalidateTransaction(cell.getTimestamp()).get();
            if (invalidated) {
                // If we are running lowLatency Omid, we could have managed to invalidate a ct entry,
                // but the committing client already wrote to shadow cells:
                Result r2 = getShadowCell(CellUtil.cloneRow(cell), family, qualifier, cell.getTimestamp());
                if (r2.containsColumn(CellUtil.cloneFamily(cell), qualifier)) {
                    Optional retval = Optional.of(new CommitTimestamp(SHADOW_CELL,
                            Bytes.toLong(r2.getValue(family, qualifier)), true));
                    commitTableClient.deleteCommitEntry(cell.getTimestamp());
                    return retval;
                }
                return Optional.absent();
            }

            // 5) We did not manage to invalidate the transactions then check the commit table
            Optional ct2 = commitTableClient.getCommitTimestamp(cell.getTimestamp()).get();
            if (ct2.isPresent()) {
                return Optional.of(ct2.get());
            }

            // 6) Read from shadow cell
            Result r2 = getShadowCell(CellUtil.cloneRow(cell), family, qualifier, cell.getTimestamp());
            if (r2.containsColumn(CellUtil.cloneFamily(cell), qualifier)) {
                Optional retval = Optional.of(new CommitTimestamp(SHADOW_CELL,
                        Bytes.toLong(r2.getValue(family, qualifier)), true));
                return retval;
            }

        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            throw new IOException("Interrupted while getting commit timestamp from commit table");
        } catch (ExecutionException e) {
            throw new IOException("Error getting commit timestamp from commit table", e);
        }

        return Optional.absent();
    }

    private Optional queryCommitTimestamp(Cell cell) throws IOException {

        // 1) First check the cache
        Optional cachedValue = commitCache.get(cell.getTimestamp());
        if (cachedValue != null) {
            return cachedValue;
        }
        Optional value = getCommitTimestampWithRaces(cell);
        commitCache.put(cell.getTimestamp(), value);
        return value;
    }

    private void retain(List result, Cell cell, Optional shadowCell) {
        LOG.trace("Retaining cell {}", cell);
        result.add(cell);
        if (shadowCell.isPresent()) {
            LOG.trace("...with shadow cell {}", cell, shadowCell.get());
            result.add(shadowCell.get());
        } else {
            LOG.trace("...without shadow cell! (TS is above Low Watermark)");
        }
    }

    private void retainLastTimestampedCellsSaved(List result, Map lastTimestampedCellsInRow) {
        for (CellInfo cellInfo : lastTimestampedCellsInRow.values()) {
            LOG.trace("Retaining last cell {} with shadow cell {}", cellInfo.getCell(), cellInfo.getShadowCell());
            result.add(cellInfo.getCell());
            result.add(cellInfo.getShadowCell());
        }
    }

    private void skipToNextColumn(Cell cell, PeekingIterator>> iter) {
        boolean isFamilyDelete = CellUtils.isFamilyDeleteCell(cell);
        while (iter.hasNext()
                && CellUtil.matchingFamily(iter.peek().getKey(), cell)
                && (CellUtil.matchingQualifier(iter.peek().getKey(), cell) || isFamilyDelete)) {
            iter.next();
        }
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy