org.apache.phoenix.mapreduce.ODPSMapper Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.phoenix.mapreduce;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.phoenix.jdbc.PhoenixConnection;
import org.apache.phoenix.mapreduce.bulkload.TableRowkeyPair;
import org.apache.phoenix.mapreduce.bulkload.TargetTableRefFunctions;
import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil;
import org.apache.phoenix.schema.types.PDataType;
import org.apache.phoenix.util.*;
import org.apache.phoenix.util.csv.CsvUpsertExecutor;
import java.io.*;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.*;
import static org.apache.phoenix.mapreduce.FormatToBytesWritableMapper.*;
import static org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil.initColumnIndexes;
import static org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil.writeAggregatedRow;
public class ODPSMapper extends Mapper {
public static final String ODPS_COLUMN_INFO_CONFKEY = "odps.mapreduce.import.columninfos";
public static final String PHOENIX_MAPPED_COLUMN_INFO_CONFKEY = "phoenix.mapped.import.columninfos";
public static final String ACCESS_KEY_ID_CONFKEY = "odps.access.key.id";
public static final String ACCESS_KEY_SECRET_CONFKEY = "odps.access.key.secret";
public static final String ODPS_URL_CONFKEY = "odps.url";
public static final String ODPS_TUNNEL_URL_CONFKEY = "odps.tunnel.url";
public static final String ODPS_PROJECT_CONFKEY = "odps.project.name";
public static final String ODPS_TABLE_NAME_CONFKEY = "odps.table.name";
public static final String ODPS_TABLE_PARTITION_SPEC_CONFKEY = "odps.table.partition.spec";
public static final String ODPS_PARTITION_NUMBER_CONFKEY = "odps.table.split.number";
public static final String ODPS_INPUT_CLASS = "odps.input.class";
public static final String ODPS_ERROR_DATA_PATH = "odps.error.data.path";
protected PhoenixConnection conn;
protected UpsertExecutor upsertExecutor;
protected ImportPreUpsertKeyValueProcessor preUpdateProcessor;
protected List tableNames;
protected List logicalNames;
protected UpsertExecutor.UpsertListener upsertListener;
protected Map columnIndexes;
private List mappedColumnInfoList;
private FSDataOutputStream outputStream;
private FileSystem fs;
private Path filePath;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
// pass client configuration into driver
Properties clientInfos = new Properties();
for (Map.Entry entry : conf) {
clientInfos.setProperty(entry.getKey(), entry.getValue());
}
try {
conn = (PhoenixConnection) QueryUtil.getConnectionOnServer(clientInfos, conf);
// We are dependent on rolling back before performing commits, so we need to be sure
// that auto-commit is not turned on
conn.setAutoCommit(false);
final String tableNamesConf = conf.get(TABLE_NAMES_CONFKEY);
final String logicalNamesConf = conf.get(LOGICAL_NAMES_CONFKEY);
tableNames = TargetTableRefFunctions.NAMES_FROM_JSON.apply(tableNamesConf);
logicalNames = TargetTableRefFunctions.NAMES_FROM_JSON.apply(logicalNamesConf);
columnIndexes = initColumnIndexes(conn, logicalNames);
} catch (SQLException | ClassNotFoundException e) {
throw new RuntimeException(e);
}
String targetTableName = conf.get(TABLE_NAME_CONFKEY);
mappedColumnInfoList = PhoenixMapReduceUtil.buildTargetTableColumns(conf);
upsertListener = initUpsertListener(context, conf.getBoolean(IGNORE_INVALID_ROW_CONFKEY, true));
preUpdateProcessor = PhoenixConfigurationUtil.loadPreUpsertProcessor(conf);
upsertExecutor = initUpsertExecutor(conn, targetTableName, mappedColumnInfoList);
filePath = new Path(conf.get(ODPS_ERROR_DATA_PATH) + Path.SEPARATOR + context.getTaskAttemptID().toString());
fs = FileSystem.get(conf);
}
private UpsertExecutor.UpsertListener initUpsertListener(
final Context context, final boolean ignoreRecordErrors) {
return new UpsertExecutor.UpsertListener() {
@Override
public void upsertDone(long upsertCount) {
context.getCounter(COUNTER_GROUP_NAME, "Upserts Done").increment(1L);
}
@Override
public void errorOnRecord(Row record, Throwable throwable) {
LOG.error("Error on record " + record, throwable);
context.getCounter(COUNTER_GROUP_NAME, "Errors on records").increment(1L);
try {
if (outputStream == null) {
outputStream = fs.create(filePath);
}
outputStream.write(record.toString().getBytes("UTF-8"));
} catch (IOException e) {
LOG.error("write row:" + record.toString() + " into " + filePath.toString() + " failed", e.getCause());
throw new IllegalStateException(e.getMessage());
}
if (!ignoreRecordErrors) {
Throwables.propagate(throwable);
}
}
};
}
private class Row {
private List values;
public Row(List values) {
this.values = values;
}
public String get(int idx) {
return values.get(idx);
}
public int size() {
return values.size();
}
@Override
public String toString() {
return Joiner.on(",").useForNull("").join(values).concat("\t\n");
}
}
private UpsertExecutor initUpsertExecutor(Connection conn, String tableName,
List columnInfoList) {
return new UpsertExecutor(conn, tableName, columnInfoList, upsertListener) {
@Override protected void execute(Row row) {
try {
if (row.size() < conversionFunctions.size()) {
String message = String.format("record does not have enough values (has %d, but needs %d)",
row.size(), conversionFunctions.size());
throw new IllegalArgumentException(message);
}
for (int fieldIndex = 0; fieldIndex < conversionFunctions.size(); fieldIndex++) {
Object sqlValue = conversionFunctions.get(fieldIndex).apply(row.get(fieldIndex));
if (sqlValue != null) {
preparedStatement.setObject(fieldIndex + 1, sqlValue);
} else {
preparedStatement.setNull(fieldIndex + 1, dataTypes.get(fieldIndex).getSqlType());
}
}
preparedStatement.execute();
upsertListener.upsertDone(++upsertCount);
} catch (Exception e) {
LOG.warn("Error on record " + row, e);
upsertListener.errorOnRecord(row, e);
}
}
@Override
protected Function createConversionFunction(PDataType dataType) {
if (dataType.isArrayType()) {
throw new IllegalStateException("Unsupported array type!");
} else {
return new CsvUpsertExecutor.SimpleDatatypeConversionFunction(dataType, conn);
}
}
};
}
@Override
protected void map(NullWritable key, MapWritable value, Context context)
throws IOException, InterruptedException {
if (conn == null) {
throw new RuntimeException("Connection not initialized.");
}
try {
HashMap columnNamesToStrValues = new HashMap<>(value.size());
for (Map.Entry entry : value.entrySet()) {
columnNamesToStrValues.put(entry.getKey().toString(), entry.getValue().toString());
}
List values = new ArrayList<>(value.size());
for (ColumnInfo mcol : mappedColumnInfoList) {
values.add(columnNamesToStrValues.get(mcol.getColumnName()));
}
upsertExecutor.execute(ImmutableList.of(new Row(values)));
Map> map = new HashMap<>();
Iterator>> uncommittedDataIterator
= PhoenixRuntime.getUncommittedDataIterator(conn, true);
while (uncommittedDataIterator.hasNext()) {
Pair> kvPair = uncommittedDataIterator.next();
List keyValueList = kvPair.getSecond();
keyValueList = preUpdateProcessor.preUpsert(kvPair.getFirst(), keyValueList);
byte[] first = kvPair.getFirst();
// Create a list of KV for each table
for (int i = 0; i < tableNames.size(); i++) {
if (Bytes.compareTo(Bytes.toBytes(tableNames.get(i)), first) == 0) {
if (!map.containsKey(i)) {
map.put(i, new ArrayList());
}
List list = map.get(i);
for (KeyValue kv : keyValueList) {
list.add(kv);
}
break;
}
}
}
for (Map.Entry> rowEntry : map.entrySet()) {
int tableIndex = rowEntry.getKey();
List lkv = rowEntry.getValue();
// All KV values combines to a single byte array
writeAggregatedRow(context, tableNames.get(tableIndex), lkv, columnIndexes);
}
conn.rollback();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
super.cleanup(context);
if (outputStream != null) {
outputStream.close();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy