All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.coprocessor.AggregateImplementation Maven / Gradle / Ivy

There is a newer version: 4.15.0-HBase-1.5
Show newest version
/*
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.coprocessor;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.NavigableSet;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.Coprocessor;
import org.apache.hadoop.hbase.CoprocessorEnvironment;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.ResponseConverter;
import org.apache.hadoop.hbase.protobuf.generated.AggregateProtos.AggregateRequest;
import org.apache.hadoop.hbase.protobuf.generated.AggregateProtos.AggregateResponse;
import org.apache.hadoop.hbase.protobuf.generated.AggregateProtos.AggregateService;
import org.apache.hadoop.hbase.regionserver.InternalScanner;

import com.google.protobuf.ByteString;
import com.google.protobuf.Message;
import com.google.protobuf.RpcCallback;
import com.google.protobuf.RpcController;
import com.google.protobuf.Service;

/**
 * A concrete AggregateProtocol implementation. Its system level coprocessor
 * that computes the aggregate function at a region level.
 * {@link ColumnInterpreter} is used to interpret column value. This class is
 * parameterized with the following (these are the types with which the {@link ColumnInterpreter}
 * is parameterized, and for more description on these, refer to {@link ColumnInterpreter}):
 * @param T Cell value data type
 * @param S Promoted data type
 * @param P PB message that is used to transport initializer specific bytes
 * @param Q PB message that is used to transport Cell (<T>) instance
 * @param R PB message that is used to transport Promoted (<S>) instance
 */
@InterfaceAudience.Private
public class AggregateImplementation 
extends AggregateService implements CoprocessorService, Coprocessor {
  protected static final Log log = LogFactory.getLog(AggregateImplementation.class);
  private RegionCoprocessorEnvironment env;

  /**
   * Gives the maximum for a given combination of column qualifier and column
   * family, in the given row range as defined in the Scan object. In its
   * current implementation, it takes one column family and one column qualifier
   * (if provided). In case of null column qualifier, maximum value for the
   * entire column family will be returned.
   */
  @Override
  public void getMax(RpcController controller, AggregateRequest request,
      RpcCallback done) {
    InternalScanner scanner = null;
    AggregateResponse response = null;
    T max = null;
    try {
      ColumnInterpreter ci = constructColumnInterpreterFromRequest(request);
      T temp;
      Scan scan = ProtobufUtil.toScan(request.getScan());
      scanner = env.getRegion().getScanner(scan);
      List results = new ArrayList();
      byte[] colFamily = scan.getFamilies()[0];
      NavigableSet qualifiers = scan.getFamilyMap().get(colFamily);
      byte[] qualifier = null;
      if (qualifiers != null && !qualifiers.isEmpty()) {
        qualifier = qualifiers.pollFirst();
      }
      // qualifier can be null.
      boolean hasMoreRows = false;
      do {
        hasMoreRows = scanner.next(results);
        int listSize = results.size();
        for (int i = 0; i < listSize; i++) {
          temp = ci.getValue(colFamily, qualifier, results.get(i));
          max = (max == null || (temp != null && ci.compare(temp, max) > 0)) ? temp : max;
        }
        results.clear();
      } while (hasMoreRows);
      if (max != null) {
        AggregateResponse.Builder builder = AggregateResponse.newBuilder();
        builder.addFirstPart(ci.getProtoForCellType(max).toByteString());
        response = builder.build();
      }
    } catch (IOException e) {
      ResponseConverter.setControllerException(controller, e);
    } finally {
      if (scanner != null) {
        try {
          scanner.close();
        } catch (IOException ignored) {}
      }
    }
    log.info("Maximum from this region is "
        + env.getRegion().getRegionInfo().getRegionNameAsString() + ": " + max);
    done.run(response);
  }

  /**
   * Gives the minimum for a given combination of column qualifier and column
   * family, in the given row range as defined in the Scan object. In its
   * current implementation, it takes one column family and one column qualifier
   * (if provided). In case of null column qualifier, minimum value for the
   * entire column family will be returned.
   */
  @Override
  public void getMin(RpcController controller, AggregateRequest request,
      RpcCallback done) {
    AggregateResponse response = null;
    InternalScanner scanner = null;
    T min = null;
    try {
      ColumnInterpreter ci = constructColumnInterpreterFromRequest(request);
      T temp;
      Scan scan = ProtobufUtil.toScan(request.getScan());
      scanner = env.getRegion().getScanner(scan);
      List results = new ArrayList();
      byte[] colFamily = scan.getFamilies()[0];
      NavigableSet qualifiers = scan.getFamilyMap().get(colFamily);
      byte[] qualifier = null;
      if (qualifiers != null && !qualifiers.isEmpty()) {
        qualifier = qualifiers.pollFirst();
      }
      boolean hasMoreRows = false;
      do {
        hasMoreRows = scanner.next(results);
        int listSize = results.size();
        for (int i = 0; i < listSize; i++) {
          temp = ci.getValue(colFamily, qualifier, results.get(i));
          min = (min == null || (temp != null && ci.compare(temp, min) < 0)) ? temp : min;
        }
        results.clear();
      } while (hasMoreRows);
      if (min != null) {
        response = AggregateResponse.newBuilder().addFirstPart( 
          ci.getProtoForCellType(min).toByteString()).build();
      }
    } catch (IOException e) {
      ResponseConverter.setControllerException(controller, e);
    } finally {
      if (scanner != null) {
        try {
          scanner.close();
        } catch (IOException ignored) {}
      }
    }
    log.info("Minimum from this region is "
        + env.getRegion().getRegionInfo().getRegionNameAsString() + ": " + min);
    done.run(response);
  }

  /**
   * Gives the sum for a given combination of column qualifier and column
   * family, in the given row range as defined in the Scan object. In its
   * current implementation, it takes one column family and one column qualifier
   * (if provided). In case of null column qualifier, sum for the entire column
   * family will be returned.
   */
  @Override
  public void getSum(RpcController controller, AggregateRequest request,
      RpcCallback done) {
    AggregateResponse response = null;
    InternalScanner scanner = null;
    long sum = 0l;
    try {
      ColumnInterpreter ci = constructColumnInterpreterFromRequest(request);
      S sumVal = null;
      T temp;
      Scan scan = ProtobufUtil.toScan(request.getScan());
      scanner = env.getRegion().getScanner(scan);
      byte[] colFamily = scan.getFamilies()[0];
      NavigableSet qualifiers = scan.getFamilyMap().get(colFamily);
      byte[] qualifier = null;
      if (qualifiers != null && !qualifiers.isEmpty()) {
        qualifier = qualifiers.pollFirst();
      }
      List results = new ArrayList();
      boolean hasMoreRows = false;
      do {
        hasMoreRows = scanner.next(results);
        int listSize = results.size();
        for (int i = 0; i < listSize; i++) {
          temp = ci.getValue(colFamily, qualifier, results.get(i));
          if (temp != null)
            sumVal = ci.add(sumVal, ci.castToReturnType(temp));
        }
        results.clear();
      } while (hasMoreRows);
      if (sumVal != null) {
        response = AggregateResponse.newBuilder().addFirstPart( 
          ci.getProtoForPromotedType(sumVal).toByteString()).build();
      }
    } catch (IOException e) {
      ResponseConverter.setControllerException(controller, e);
    } finally {
      if (scanner != null) {
        try {
          scanner.close();
        } catch (IOException ignored) {}
      }
    }
    log.debug("Sum from this region is "
        + env.getRegion().getRegionInfo().getRegionNameAsString() + ": " + sum);
    done.run(response);
  }

  /**
   * Gives the row count for the given column family and column qualifier, in
   * the given row range as defined in the Scan object.
   */
  @Override
  public void getRowNum(RpcController controller, AggregateRequest request,
      RpcCallback done) {
    AggregateResponse response = null;
    long counter = 0l;
    List results = new ArrayList();
    InternalScanner scanner = null;
    try {
      Scan scan = ProtobufUtil.toScan(request.getScan());
      byte[][] colFamilies = scan.getFamilies();
      byte[] colFamily = colFamilies != null ? colFamilies[0] : null;
      NavigableSet qualifiers = colFamilies != null ?
          scan.getFamilyMap().get(colFamily) : null;
      byte[] qualifier = null;
      if (qualifiers != null && !qualifiers.isEmpty()) {
        qualifier = qualifiers.pollFirst();
      }
      if (scan.getFilter() == null && qualifier == null)
        scan.setFilter(new FirstKeyOnlyFilter());
      scanner = env.getRegion().getScanner(scan);
      boolean hasMoreRows = false;
      do {
        hasMoreRows = scanner.next(results);
        if (results.size() > 0) {
          counter++;
        }
        results.clear();
      } while (hasMoreRows);
      ByteBuffer bb = ByteBuffer.allocate(8).putLong(counter);
      bb.rewind();
      response = AggregateResponse.newBuilder().addFirstPart( 
          ByteString.copyFrom(bb)).build();
    } catch (IOException e) {
      ResponseConverter.setControllerException(controller, e);
    } finally {
      if (scanner != null) {
        try {
          scanner.close();
        } catch (IOException ignored) {}
      }
    }
    log.info("Row counter from this region is "
        + env.getRegion().getRegionInfo().getRegionNameAsString() + ": " + counter);
    done.run(response);
  }

  /**
   * Gives a Pair with first object as Sum and second object as row count,
   * computed for a given combination of column qualifier and column family in
   * the given row range as defined in the Scan object. In its current
   * implementation, it takes one column family and one column qualifier (if
   * provided). In case of null column qualifier, an aggregate sum over all the
   * entire column family will be returned.
   * 

* The average is computed in * AggregationClient#avg(byte[], ColumnInterpreter, Scan) by * processing results from all regions, so its "ok" to pass sum and a Long * type. */ @Override public void getAvg(RpcController controller, AggregateRequest request, RpcCallback done) { AggregateResponse response = null; InternalScanner scanner = null; try { ColumnInterpreter ci = constructColumnInterpreterFromRequest(request); S sumVal = null; Long rowCountVal = 0l; Scan scan = ProtobufUtil.toScan(request.getScan()); scanner = env.getRegion().getScanner(scan); byte[] colFamily = scan.getFamilies()[0]; NavigableSet qualifiers = scan.getFamilyMap().get(colFamily); byte[] qualifier = null; if (qualifiers != null && !qualifiers.isEmpty()) { qualifier = qualifiers.pollFirst(); } List results = new ArrayList(); boolean hasMoreRows = false; do { results.clear(); hasMoreRows = scanner.next(results); int listSize = results.size(); for (int i = 0; i < listSize; i++) { sumVal = ci.add(sumVal, ci.castToReturnType(ci.getValue(colFamily, qualifier, results.get(i)))); } rowCountVal++; } while (hasMoreRows); if (sumVal != null) { ByteString first = ci.getProtoForPromotedType(sumVal).toByteString(); AggregateResponse.Builder pair = AggregateResponse.newBuilder(); pair.addFirstPart(first); ByteBuffer bb = ByteBuffer.allocate(8).putLong(rowCountVal); bb.rewind(); pair.setSecondPart(ByteString.copyFrom(bb)); response = pair.build(); } } catch (IOException e) { ResponseConverter.setControllerException(controller, e); } finally { if (scanner != null) { try { scanner.close(); } catch (IOException ignored) {} } } done.run(response); } /** * Gives a Pair with first object a List containing Sum and sum of squares, * and the second object as row count. It is computed for a given combination of * column qualifier and column family in the given row range as defined in the * Scan object. In its current implementation, it takes one column family and * one column qualifier (if provided). The idea is get the value of variance first: * the average of the squares less the square of the average a standard * deviation is square root of variance. */ @Override public void getStd(RpcController controller, AggregateRequest request, RpcCallback done) { InternalScanner scanner = null; AggregateResponse response = null; try { ColumnInterpreter ci = constructColumnInterpreterFromRequest(request); S sumVal = null, sumSqVal = null, tempVal = null; long rowCountVal = 0l; Scan scan = ProtobufUtil.toScan(request.getScan()); scanner = env.getRegion().getScanner(scan); byte[] colFamily = scan.getFamilies()[0]; NavigableSet qualifiers = scan.getFamilyMap().get(colFamily); byte[] qualifier = null; if (qualifiers != null && !qualifiers.isEmpty()) { qualifier = qualifiers.pollFirst(); } List results = new ArrayList(); boolean hasMoreRows = false; do { tempVal = null; hasMoreRows = scanner.next(results); int listSize = results.size(); for (int i = 0; i < listSize; i++) { tempVal = ci.add(tempVal, ci.castToReturnType(ci.getValue(colFamily, qualifier, results.get(i)))); } results.clear(); sumVal = ci.add(sumVal, tempVal); sumSqVal = ci.add(sumSqVal, ci.multiply(tempVal, tempVal)); rowCountVal++; } while (hasMoreRows); if (sumVal != null) { ByteString first_sumVal = ci.getProtoForPromotedType(sumVal).toByteString(); ByteString first_sumSqVal = ci.getProtoForPromotedType(sumSqVal).toByteString(); AggregateResponse.Builder pair = AggregateResponse.newBuilder(); pair.addFirstPart(first_sumVal); pair.addFirstPart(first_sumSqVal); ByteBuffer bb = ByteBuffer.allocate(8).putLong(rowCountVal); bb.rewind(); pair.setSecondPart(ByteString.copyFrom(bb)); response = pair.build(); } } catch (IOException e) { ResponseConverter.setControllerException(controller, e); } finally { if (scanner != null) { try { scanner.close(); } catch (IOException ignored) {} } } done.run(response); } /** * Gives a List containing sum of values and sum of weights. * It is computed for the combination of column * family and column qualifier(s) in the given row range as defined in the * Scan object. In its current implementation, it takes one column family and * two column qualifiers. The first qualifier is for values column and * the second qualifier (optional) is for weight column. */ @Override public void getMedian(RpcController controller, AggregateRequest request, RpcCallback done) { AggregateResponse response = null; InternalScanner scanner = null; try { ColumnInterpreter ci = constructColumnInterpreterFromRequest(request); S sumVal = null, sumWeights = null, tempVal = null, tempWeight = null; Scan scan = ProtobufUtil.toScan(request.getScan()); scanner = env.getRegion().getScanner(scan); byte[] colFamily = scan.getFamilies()[0]; NavigableSet qualifiers = scan.getFamilyMap().get(colFamily); byte[] valQualifier = null, weightQualifier = null; if (qualifiers != null && !qualifiers.isEmpty()) { valQualifier = qualifiers.pollFirst(); // if weighted median is requested, get qualifier for the weight column weightQualifier = qualifiers.pollLast(); } List results = new ArrayList(); boolean hasMoreRows = false; do { tempVal = null; tempWeight = null; hasMoreRows = scanner.next(results); int listSize = results.size(); for (int i = 0; i < listSize; i++) { Cell kv = results.get(i); tempVal = ci.add(tempVal, ci.castToReturnType(ci.getValue(colFamily, valQualifier, kv))); if (weightQualifier != null) { tempWeight = ci.add(tempWeight, ci.castToReturnType(ci.getValue(colFamily, weightQualifier, kv))); } } results.clear(); sumVal = ci.add(sumVal, tempVal); sumWeights = ci.add(sumWeights, tempWeight); } while (hasMoreRows); ByteString first_sumVal = ci.getProtoForPromotedType(sumVal).toByteString(); S s = sumWeights == null ? ci.castToReturnType(ci.getMinValue()) : sumWeights; ByteString first_sumWeights = ci.getProtoForPromotedType(s).toByteString(); AggregateResponse.Builder pair = AggregateResponse.newBuilder(); pair.addFirstPart(first_sumVal); pair.addFirstPart(first_sumWeights); response = pair.build(); } catch (IOException e) { ResponseConverter.setControllerException(controller, e); } finally { if (scanner != null) { try { scanner.close(); } catch (IOException ignored) {} } } done.run(response); } @SuppressWarnings("unchecked") ColumnInterpreter constructColumnInterpreterFromRequest( AggregateRequest request) throws IOException { String className = request.getInterpreterClassName(); Class cls; try { cls = Class.forName(className); ColumnInterpreter ci = (ColumnInterpreter) cls.newInstance(); if (request.hasInterpreterSpecificBytes()) { ByteString b = request.getInterpreterSpecificBytes(); P initMsg = ProtobufUtil.getParsedGenericInstance(ci.getClass(), 2, b); ci.initialize(initMsg); } return ci; } catch (ClassNotFoundException e) { throw new IOException(e); } catch (InstantiationException e) { throw new IOException(e); } catch (IllegalAccessException e) { throw new IOException(e); } } @Override public Service getService() { return this; } /** * Stores a reference to the coprocessor environment provided by the * {@link org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost} from the region where this * coprocessor is loaded. Since this is a coprocessor endpoint, it always expects to be loaded * on a table region, so always expects this to be an instance of * {@link RegionCoprocessorEnvironment}. * @param env the environment provided by the coprocessor host * @throws IOException if the provided environment is not an instance of * {@code RegionCoprocessorEnvironment} */ @Override public void start(CoprocessorEnvironment env) throws IOException { if (env instanceof RegionCoprocessorEnvironment) { this.env = (RegionCoprocessorEnvironment)env; } else { throw new CoprocessorException("Must be loaded on a table region!"); } } @Override public void stop(CoprocessorEnvironment env) throws IOException { // nothing to do } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy