org.apache.kylin.source.hive.cardinality.ColumnCardinalityReducer Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kylin.source.hive.cardinality;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.kylin.engine.mr.KylinReducer;
import org.apache.kylin.measure.BufferedMeasureCodec;
import org.apache.kylin.measure.hllc.HLLCounter;
/**
* @author Jack
*
*/
public class ColumnCardinalityReducer extends KylinReducer {
public static final int ONE = 1;
private Map hllcMap = new HashMap();
@Override
protected void doSetup(Context context) throws IOException {
super.bindCurrentConfiguration(context.getConfiguration());
}
@Override
public void doReduce(IntWritable key, Iterable values, Context context) throws IOException, InterruptedException {
int skey = key.get();
for (BytesWritable v : values) {
ByteBuffer buffer = ByteBuffer.wrap(v.getBytes());
HLLCounter hll = new HLLCounter();
hll.readRegisters(buffer);
getHllc(skey).merge(hll);
hll.clear();
}
}
private HLLCounter getHllc(Integer key) {
if (!hllcMap.containsKey(key)) {
hllcMap.put(key, new HLLCounter());
}
return hllcMap.get(key);
}
@Override
protected void doCleanup(Context context) throws IOException, InterruptedException {
List keys = new ArrayList();
Iterator it = hllcMap.keySet().iterator();
while (it.hasNext()) {
keys.add(it.next());
}
Collections.sort(keys);
it = keys.iterator();
while (it.hasNext()) {
int key = it.next();
HLLCounter hllc = hllcMap.get(key);
ByteBuffer buf = ByteBuffer.allocate(BufferedMeasureCodec.DEFAULT_BUFFER_SIZE);
buf.clear();
hllc.writeRegisters(buf);
buf.flip();
context.write(new IntWritable(key), new LongWritable(hllc.getCountEstimate()));
}
}
}