All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.phoenix.mapreduce.FormatToKeyValueReducer Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.phoenix.mapreduce;

import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.TreeMap;
import java.util.TreeSet;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.KeyValueSortReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.phoenix.hbase.index.util.KeyValueBuilder;
import org.apache.phoenix.jdbc.PhoenixConnection;
import org.apache.phoenix.mapreduce.bulkload.TableRowkeyPair;
import org.apache.phoenix.mapreduce.bulkload.TargetTableRefFunctions;
import org.apache.phoenix.query.QueryConstants;
import org.apache.phoenix.schema.PColumn;
import org.apache.phoenix.schema.PColumnFamily;
import org.apache.phoenix.schema.PTable;
import org.apache.phoenix.schema.PTable.ImmutableStorageScheme;
import org.apache.phoenix.util.Closeables;
import org.apache.phoenix.util.EncodedColumnsUtil;
import org.apache.phoenix.util.PhoenixRuntime;
import org.apache.phoenix.util.QueryUtil;
import org.apache.phoenix.util.SchemaUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Reducer class for the bulkload jobs.
 * Performs similar functionality to {@link KeyValueSortReducer}
 */
public class FormatToKeyValueReducer
        extends Reducer {

    protected static final Logger LOGGER = LoggerFactory.getLogger(FormatToKeyValueReducer.class);


    protected List tableNames;
    protected List logicalNames;
    protected KeyValueBuilder builder;
    private Map> columnIndexes;


    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
        Configuration conf = context.getConfiguration();

        // pass client configuration into driver
        Properties clientInfos = new Properties();
        for (Map.Entry entry : conf) {
            clientInfos.setProperty(entry.getKey(), entry.getValue());
        }
        try (PhoenixConnection conn = (PhoenixConnection) QueryUtil
                .getConnectionOnServer(clientInfos, conf)) {
            builder = conn.getKeyValueBuilder();
            final String tableNamesConf = conf.get(FormatToBytesWritableMapper.TABLE_NAMES_CONFKEY);
            final String logicalNamesConf = conf.get(FormatToBytesWritableMapper.LOGICAL_NAMES_CONFKEY);
            tableNames = TargetTableRefFunctions.NAMES_FROM_JSON.apply(tableNamesConf);
            logicalNames = TargetTableRefFunctions.NAMES_FROM_JSON.apply(logicalNamesConf);
            initColumnsMap(conn);
        } catch (SQLException e) {
            throw new RuntimeException(e);
        }
    }

    private void initColumnsMap(PhoenixConnection conn) throws SQLException {
        Map indexMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
        columnIndexes = new HashMap<>();
        int columnIndex = 0;
        for (int index = 0; index < logicalNames.size(); index++) {
            PTable table = PhoenixRuntime.getTable(conn, logicalNames.get(index));
            if (!table.getImmutableStorageScheme().equals(ImmutableStorageScheme.ONE_CELL_PER_COLUMN)) {
                List cfs = table.getColumnFamilies();
                for (int i = 0; i < cfs.size(); i++) {
                    byte[] family = cfs.get(i).getName().getBytes();
                    Pair pair = new Pair<>(family,
                            QueryConstants.SINGLE_KEYVALUE_COLUMN_QUALIFIER_BYTES);
                    columnIndexes.put(new Integer(columnIndex), pair);
                    columnIndex++;
                }
            } else {
                List cls = table.getColumns();
                for (int i = 0; i < cls.size(); i++) {
                    PColumn c = cls.get(i);
                    byte[] family = new byte[0];
                    byte[] cq;
                    if (!SchemaUtil.isPKColumn(c)) {
                        family = c.getFamilyName().getBytes();
                        cq = c.getColumnQualifierBytes();
                    } else {
                        cq = c.getName().getBytes();
                    }
                    byte[] cfn = Bytes.add(family, QueryConstants.NAMESPACE_SEPARATOR_BYTES, cq);
                    Pair pair = new Pair<>(family, cq);
                    if (!indexMap.containsKey(cfn)) {
                        indexMap.put(cfn, new Integer(columnIndex));
                        columnIndexes.put(new Integer(columnIndex), pair);
                        columnIndex++;
                    }
                }
            }
            byte[] emptyColumnFamily = SchemaUtil.getEmptyColumnFamily(table);
            byte[] emptyKeyValue = EncodedColumnsUtil.getEmptyKeyValueInfo(table).getFirst();
            Pair pair = new Pair<>(emptyColumnFamily, emptyKeyValue);
            columnIndexes.put(new Integer(columnIndex), pair);
            columnIndex++;
        }
    }

    @Override
    protected void reduce(TableRowkeyPair key, Iterable values,
                          Reducer.Context context)
            throws IOException, InterruptedException {
        TreeSet map = new TreeSet(KeyValue.COMPARATOR);
        for (ImmutableBytesWritable aggregatedArray : values) {
            DataInputStream input = new DataInputStream(new ByteArrayInputStream(aggregatedArray.get()));
            while (input.available() != 0) {
                byte type = input.readByte();
                long timestamp = WritableUtils.readVLong(input);
                int index = WritableUtils.readVInt(input);
                ImmutableBytesWritable family;
                ImmutableBytesWritable cq;
                ImmutableBytesWritable value = QueryConstants.EMPTY_COLUMN_VALUE_BYTES_PTR;
                Pair pair = columnIndexes.get(index);
                family = new ImmutableBytesWritable(pair.getFirst());
                cq = new ImmutableBytesWritable(pair.getSecond());
                int len = WritableUtils.readVInt(input);
                if (len > 0) {
                    byte[] array = new byte[len];
                    input.read(array);
                    value = new ImmutableBytesWritable(array);
                }
                KeyValue kv;
                KeyValue.Type kvType = KeyValue.Type.codeToType(type);
                switch (kvType) {
                    case Put: // not null value
                        kv = builder.buildPut(key.getRowkey(), family, cq, timestamp, value);
                        break;
                    case DeleteColumn: // null value
                        kv = builder.buildDeleteColumns(key.getRowkey(), family, cq, timestamp);
                        break;
                    default:
                        throw new IOException("Unsupported KeyValue type " + kvType);
                }
                map.add(kv);
            }
            Closeables.closeQuietly(input);
        }
        context.setStatus("Read " + map.getClass());
        int index = 0;
        for (KeyValue kv : map) {
            context.write(key, kv);
            if (++index % 100 == 0) context.setStatus("Wrote " + index);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy