All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.mapreduce.PutCombiner Maven / Gradle / Ivy

The newest version!
/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.mapreduce;

import java.io.IOException;
import java.util.List;
import java.util.Map.Entry;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.mapreduce.Reducer;

/**
 * Combine Puts. Merges Put instances grouped by K into a single
 * instance.
 * @see TableMapReduceUtil
 */
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class PutCombiner extends Reducer {
  private static final Log LOG = LogFactory.getLog(PutCombiner.class);

  @Override
  protected void reduce(K row, Iterable vals, Context context)
      throws IOException, InterruptedException {
    // Using HeapSize to create an upper bound on the memory size of
    // the puts and flush some portion of the content while looping. This
    // flush could result in multiple Puts for a single rowkey. That is
    // acceptable because Combiner is run as an optimization and it's not
    // critical that all Puts are grouped perfectly.
    long threshold = context.getConfiguration().getLong(
        "putcombiner.row.threshold", 1L * (1<<30));
    int cnt = 0;
    long curSize = 0;
    Put put = null;
    Map> familyMap = null;
    for (Put p : vals) {
      cnt++;
      if (put == null) {
        put = p;
        familyMap = put.getFamilyCellMap();
      } else {
        for (Entry> entry : p.getFamilyCellMap()
            .entrySet()) {
          List cells = familyMap.get(entry.getKey());
          List kvs = (cells != null) ? (List) cells : null;
          for (Cell cell : entry.getValue()) {
            KeyValue kv = KeyValueUtil.ensureKeyValueTypeForMR(cell);
            curSize += kv.heapSize();
            if (kvs != null) {
              kvs.add(kv);
            }
          }
          if (cells == null) {
            familyMap.put(entry.getKey(), entry.getValue());
          }
        }
        if (cnt % 10 == 0) context.setStatus("Combine " + cnt);
        if (curSize > threshold) {
          if (LOG.isDebugEnabled()) {
            LOG.debug(String.format("Combined %d Put(s) into %d.", cnt, 1));
          }
          context.write(row, put);
          put = null;
          curSize = 0;
          cnt = 0;
        }
      }
    }
    if (put != null) {
      if (LOG.isDebugEnabled()) {
        LOG.debug(String.format("Combined %d Put(s) into %d.", cnt, 1));
      }
      context.write(row, put);
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy