org.apache.phoenix.mapreduce.ODPSMapper Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of ali-phoenix-core Show documentation
Core Phoenix codebase
There is a newer version: 5.1.0-HBase-2.0.0.2
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.phoenix.mapreduce;

import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.phoenix.jdbc.PhoenixConnection;
import org.apache.phoenix.mapreduce.bulkload.TableRowkeyPair;
import org.apache.phoenix.mapreduce.bulkload.TargetTableRefFunctions;
import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil;
import org.apache.phoenix.schema.types.PDataType;
import org.apache.phoenix.util.*;
import org.apache.phoenix.util.csv.CsvUpsertExecutor;

import java.io.*;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.*;

import static org.apache.phoenix.mapreduce.FormatToBytesWritableMapper.*;
import static org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil.initColumnIndexes;
import static org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil.writeAggregatedRow;

public class ODPSMapper extends Mapper  {
    public static final String ODPS_COLUMN_INFO_CONFKEY = "odps.mapreduce.import.columninfos";
    public static final String PHOENIX_MAPPED_COLUMN_INFO_CONFKEY = "phoenix.mapped.import.columninfos";
    public static final String ACCESS_KEY_ID_CONFKEY =  "odps.access.key.id";
    public static final String ACCESS_KEY_SECRET_CONFKEY = "odps.access.key.secret";
    public static final String ODPS_URL_CONFKEY = "odps.url";
    public static final String ODPS_TUNNEL_URL_CONFKEY = "odps.tunnel.url";
    public static final String ODPS_PROJECT_CONFKEY = "odps.project.name";
    public static final String ODPS_TABLE_NAME_CONFKEY = "odps.table.name";
    public static final String ODPS_TABLE_PARTITION_SPEC_CONFKEY = "odps.table.partition.spec";
    public static final String ODPS_PARTITION_NUMBER_CONFKEY = "odps.table.split.number";
    public static final String ODPS_INPUT_CLASS = "odps.input.class";
    public static final String ODPS_ERROR_DATA_PATH = "odps.error.data.path";

    protected PhoenixConnection conn;
    protected UpsertExecutor upsertExecutor;
    protected ImportPreUpsertKeyValueProcessor preUpdateProcessor;
    protected List tableNames;
    protected List logicalNames;
    protected UpsertExecutor.UpsertListener upsertListener;
    protected Map columnIndexes;

    private List mappedColumnInfoList;
    private FSDataOutputStream outputStream;
    private FileSystem fs;
    private Path filePath;

    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
        Configuration conf = context.getConfiguration();

        // pass client configuration into driver
        Properties clientInfos = new Properties();
        for (Map.Entry entry : conf) {
            clientInfos.setProperty(entry.getKey(), entry.getValue());
        }

        try {
            conn = (PhoenixConnection) QueryUtil.getConnectionOnServer(clientInfos, conf);
            // We are dependent on rolling back before performing commits, so we need to be sure
            // that auto-commit is not turned on
            conn.setAutoCommit(false);

            final String tableNamesConf = conf.get(TABLE_NAMES_CONFKEY);
            final String logicalNamesConf = conf.get(LOGICAL_NAMES_CONFKEY);
            tableNames = TargetTableRefFunctions.NAMES_FROM_JSON.apply(tableNamesConf);
            logicalNames = TargetTableRefFunctions.NAMES_FROM_JSON.apply(logicalNamesConf);
            columnIndexes = initColumnIndexes(conn, logicalNames);
        } catch (SQLException | ClassNotFoundException e) {
            throw new RuntimeException(e);
        }

        String targetTableName = conf.get(TABLE_NAME_CONFKEY);
        mappedColumnInfoList =  PhoenixMapReduceUtil.buildTargetTableColumns(conf);
        upsertListener = initUpsertListener(context, conf.getBoolean(IGNORE_INVALID_ROW_CONFKEY, true));
        preUpdateProcessor = PhoenixConfigurationUtil.loadPreUpsertProcessor(conf);
        upsertExecutor = initUpsertExecutor(conn, targetTableName, mappedColumnInfoList);
        filePath = new Path(conf.get(ODPS_ERROR_DATA_PATH) + Path.SEPARATOR + context.getTaskAttemptID().toString());
        fs = FileSystem.get(conf);
    }


    private UpsertExecutor.UpsertListener initUpsertListener(
            final Context context, final boolean ignoreRecordErrors) {
        return new UpsertExecutor.UpsertListener() {
            @Override
            public void upsertDone(long upsertCount) {
                context.getCounter(COUNTER_GROUP_NAME, "Upserts Done").increment(1L);
            }

            @Override
            public void errorOnRecord(Row record, Throwable throwable) {
                LOG.error("Error on record " + record, throwable);
                context.getCounter(COUNTER_GROUP_NAME, "Errors on records").increment(1L);
                try {
                    if (outputStream == null) {
                        outputStream = fs.create(filePath);
                    }
                    outputStream.write(record.toString().getBytes("UTF-8"));
                } catch (IOException e) {
                    LOG.error("write row:" + record.toString() + " into " + filePath.toString() + " failed", e.getCause());
                    throw new IllegalStateException(e.getMessage());
                }
                if (!ignoreRecordErrors) {
                    Throwables.propagate(throwable);
                }
            }
        };
    }

    private class Row {
        private List values;

        public Row(List values) {
            this.values = values;
        }

        public String get(int idx) {
            return values.get(idx);
        }

        public int size() {
            return values.size();
        }

        @Override
        public String toString() {
            return Joiner.on(",").useForNull("").join(values).concat("\t\n");
        }
    }

    private UpsertExecutor initUpsertExecutor(Connection conn, String tableName,
            List columnInfoList) {
        return new UpsertExecutor(conn, tableName, columnInfoList, upsertListener) {
            @Override protected void execute(Row row) {
                try {
                    if (row.size() < conversionFunctions.size()) {
                        String message = String.format("record does not have enough values (has %d, but needs %d)",
                                row.size(), conversionFunctions.size());
                        throw new IllegalArgumentException(message);
                    }
                    for (int fieldIndex = 0; fieldIndex < conversionFunctions.size(); fieldIndex++) {
                        Object sqlValue = conversionFunctions.get(fieldIndex).apply(row.get(fieldIndex));
                        if (sqlValue != null) {
                            preparedStatement.setObject(fieldIndex + 1, sqlValue);
                        } else {
                            preparedStatement.setNull(fieldIndex + 1, dataTypes.get(fieldIndex).getSqlType());
                        }
                    }
                    preparedStatement.execute();
                    upsertListener.upsertDone(++upsertCount);
                } catch (Exception e) {
                    LOG.warn("Error on record " + row, e);
                    upsertListener.errorOnRecord(row, e);
                }
            }

            @Override
            protected Function createConversionFunction(PDataType dataType) {
                if (dataType.isArrayType()) {
                    throw new IllegalStateException("Unsupported array type!");
                } else {
                    return new CsvUpsertExecutor.SimpleDatatypeConversionFunction(dataType, conn);
                }
            }

        };
    }

    @Override
    protected void map(NullWritable key, MapWritable value, Context context)
            throws IOException, InterruptedException {
        if (conn == null) {
            throw new RuntimeException("Connection not initialized.");
        }
        try {
            HashMap columnNamesToStrValues = new HashMap<>(value.size());
            for (Map.Entry entry : value.entrySet()) {
                columnNamesToStrValues.put(entry.getKey().toString(), entry.getValue().toString());
            }
            List values = new ArrayList<>(value.size());
            for (ColumnInfo mcol : mappedColumnInfoList) {
                values.add(columnNamesToStrValues.get(mcol.getColumnName()));
            }
            upsertExecutor.execute(ImmutableList.of(new Row(values)));
            Map> map = new HashMap<>();
            Iterator>> uncommittedDataIterator
                    = PhoenixRuntime.getUncommittedDataIterator(conn, true);
            while (uncommittedDataIterator.hasNext()) {
                Pair> kvPair = uncommittedDataIterator.next();
                List keyValueList = kvPair.getSecond();
                keyValueList = preUpdateProcessor.preUpsert(kvPair.getFirst(), keyValueList);
                byte[] first = kvPair.getFirst();
                // Create a list of KV for each table
                for (int i = 0; i < tableNames.size(); i++) {
                    if (Bytes.compareTo(Bytes.toBytes(tableNames.get(i)), first) == 0) {
                        if (!map.containsKey(i)) {
                            map.put(i, new ArrayList());
                        }
                        List list = map.get(i);
                        for (KeyValue kv : keyValueList) {
                            list.add(kv);
                        }
                        break;
                    }
                }
            }
            for (Map.Entry> rowEntry : map.entrySet()) {
                int tableIndex = rowEntry.getKey();
                List lkv = rowEntry.getValue();
                // All KV values combines to a single byte array
                writeAggregatedRow(context, tableNames.get(tableIndex), lkv, columnIndexes);
            }
            conn.rollback();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    protected void cleanup(Context context) throws IOException, InterruptedException {
        super.cleanup(context);
        if (outputStream != null) {
            outputStream.close();
        }
    }
}