All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.fluo.core.impl.ParallelSnapshotScanner Maven / Gradle / Ivy

There is a newer version: 1.0.0-beta-2
Show newest version
/*
 * Copyright 2014 Fluo authors (see AUTHORS)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.fluo.core.impl;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import io.fluo.accumulo.util.ColumnConstants;
import io.fluo.accumulo.values.WriteValue;
import io.fluo.api.data.Bytes;
import io.fluo.api.data.Column;
import io.fluo.core.exceptions.StaleScanException;
import io.fluo.core.util.ByteUtil;
import io.fluo.core.util.UtilWaitThread;
import org.apache.accumulo.core.client.BatchScanner;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;

public class ParallelSnapshotScanner {

  private Environment env;
  private long startTs;
  private HashSet unscannedRows;
  private Set columns;
  private TxStats stats;

  ParallelSnapshotScanner(Collection rows, Set columns, Environment env, long startTs, TxStats stats) {
    this.unscannedRows = new HashSet<>(rows);
    this.columns = columns;
    this.env = env;
    this.startTs = startTs;
    this.stats = stats;
  }

  private BatchScanner setupBatchScanner(Collection rows, Set columns) {
    BatchScanner scanner;
    try {
      // TODO hardcoded number of threads!
      // one thread is probably good.. going for throughput
      scanner = env.getConnector().createBatchScanner(env.getTable(), env.getAuthorizations(), 1);
    } catch (TableNotFoundException e) {
      throw new RuntimeException(e);
    }

    scanner.clearColumns();
    scanner.clearScanIterators();

    List ranges = new ArrayList<>(rows.size());

    for (Bytes row : rows) {
      ranges.add(Range.exact(ByteUtil.toText(row)));
    }

    scanner.setRanges(ranges);

    SnapshotScanner.setupScanner(scanner, new ArrayList<>(columns), startTs);

    return scanner;
  }

  Map> scan() {

    long waitTime = SnapshotScanner.INITIAL_WAIT_TIME;
    long startTime = System.currentTimeMillis();

    Map> ret = new HashMap<>();

    while (true) {
      List> locks = new ArrayList<>();

      scan(ret, locks);

      if (locks.size() > 0) {

        boolean resolvedAll = LockResolver.resolveLocks(env, startTs, stats, locks, startTime);

        if (!resolvedAll) {
          UtilWaitThread.sleep(waitTime);
          stats.incrementLockWaitTime(waitTime);
          waitTime = Math.min(SnapshotScanner.MAX_WAIT_TIME, waitTime * 2);
        }
        // TODO, could only rescan the row/cols that were locked instead of just the entire row

        // retain the rows that were locked for future scans
        HashSet lockedRows = new HashSet<>();
        for (Entry entry : locks) {
          lockedRows.add(ByteUtil.toBytes(entry.getKey().getRowData()));
        }

        unscannedRows.retainAll(lockedRows);

        continue;
      }

      for (Map cols : ret.values())
        stats.incrementEntriesReturned(cols.size());

      return ret;
    }
  }

  void scan(Map> ret, List> locks) {

    BatchScanner bs = setupBatchScanner(unscannedRows, columns);
    try {
      for (Entry entry : bs) {
        Bytes row = ByteUtil.toBytes(entry.getKey().getRowData());
        Bytes cf = ByteUtil.toBytes(entry.getKey().getColumnFamilyData());
        Bytes cq = ByteUtil.toBytes(entry.getKey().getColumnQualifierData());

        Column col = new Column(cf, cq).setVisibility(ByteUtil.toBytes(entry.getKey().getColumnVisibilityData()));

        long colType = entry.getKey().getTimestamp() & ColumnConstants.PREFIX_MASK;

        if (colType == ColumnConstants.LOCK_PREFIX) {
          locks.add(entry);
        } else if (colType == ColumnConstants.DATA_PREFIX) {
          Map cols = ret.get(row);
          if (cols == null) {
            cols = new HashMap<>();
            ret.put(row, cols);
          }

          cols.put(col, Bytes.wrap(entry.getValue().get()));
        } else if (colType == ColumnConstants.WRITE_PREFIX) {
          if (WriteValue.isTruncated(entry.getValue().get())) {
            throw new StaleScanException();
          } else {
            throw new IllegalArgumentException();
          }
        } else {
          throw new IllegalArgumentException("Unexpected column type " + colType);
        }
      }
    } finally {
      bs.close();
    }
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy