com.aliyun.odps.datahub.DatahubWriter Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.aliyun.odps.datahub;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.HashMap;
import java.util.Map;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.aliyun.odps.OdpsException;
import com.aliyun.odps.PartitionSpec;
import com.aliyun.odps.commons.proto.XstreamPack.BytesPairPB;
import com.aliyun.odps.commons.proto.XstreamPack.KVMapPB;
import com.aliyun.odps.commons.proto.XstreamPack.XStreamPack;
import com.aliyun.odps.commons.transport.Headers;
import com.aliyun.odps.commons.transport.Response;
import com.aliyun.odps.commons.util.IOUtils;
import com.aliyun.odps.rest.RestClient;
import com.google.protobuf.ByteString;
public class DatahubWriter {
private RestClient datahubServiceClient;
private String path;
private MessageDigest messageDigest;
private Map params;
private Map headers;
private WritePackResult lastPackResult;
public DatahubWriter(RestClient datahubServiceClient, String path, HashMap params,
Map headers) {
this.datahubServiceClient = datahubServiceClient;
this.path = path;
this.params = params;
this.headers = headers;
try {
this.messageDigest = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException(e.getMessage());
}
}
/**
* 向ODPS hub服务的非分区表写入一个pack
*
* @param recordPack
* {@link DatahubRecordPack} 对象
* @throws OdpsException, IOexception
*
* @return {@link WritePackResult}
*
*/
public WritePackResult write(DatahubRecordPack recordPack) throws OdpsException, IOException {
return write(null, recordPack, null, null);
}
/**
* 向ODPS hub服务的非分区表写入一个pack
*
* @param recordPack
* {@link DatahubRecordPack} 对象
* @param meta
* pack 属性
* @throws OdpsException, IOexception
*
* @return {@link WritePackResult}
*
*/
public WritePackResult write(DatahubRecordPack recordPack, byte [] meta) throws OdpsException, IOException {
return write(null, recordPack, meta, null);
}
/**
* 向ODPS hub服务的非分区表写入一个pack
*
* @param recordPack
* {@link DatahubRecordPack} 对象
* @param meta
* pack 属性
* @throws OdpsException, IOexception
*
* @return {@link WritePackResult}
*
*/
public WritePackResult write(DatahubRecordPack recordPack, Map meta) throws OdpsException, IOException {
return write(null, recordPack, null, meta);
}
/**
* 向ODPS hub服务的非分区表写入一个pack
*
* @param partitionSpec
* {@link com.aliyun.odps.PartitionSpec} 对象
* @param recordPack
* {@link DatahubRecordPack} 对象
* @param meta
* pack 属性
* @throws OdpsException, IOexception
*
* @return {@link WritePackResult}
*
*/
public WritePackResult write(PartitionSpec partitionSpec, DatahubRecordPack recordPack, Map meta) throws OdpsException, IOException {
return write(partitionSpec, recordPack, null, meta);
}
/**
* 向ODPS hub服务的分区表写入一个pack
*
* @param partitionSpec
* {@link com.aliyun.odps.PartitionSpec} 对象
* @param recordPack
* {@link DatahubRecordPack} 对象
* @throws OdpsException, IOexception
*
* @return {@link WritePackResult}
*
*/
public WritePackResult write(PartitionSpec partitionSpec, DatahubRecordPack recordPack)
throws OdpsException, IOException {
return write(partitionSpec, recordPack, null, null);
}
/**
* 向ODPS hub服务的分区表写入一个pack
*
* @param partitionSpec
* {@link com.aliyun.odps.PartitionSpec} 对象
* @param recordPack
* {@link DatahubRecordPack} 对象
* @param meta
* pack 属性
* @param kvMeta
* pack 属性
* @throws OdpsException, IOexception
*
* @return {@link WritePackResult}
*
*/
private WritePackResult write(PartitionSpec partitionSpec, DatahubRecordPack recordPack, byte [] meta, Map kvMeta)
throws OdpsException, IOException {
HashMap params = new HashMap(this.params);
HashMap headers = new HashMap(this.headers);
headers.put(DatahubHttpHeaders.CONTENT_ENCODING, "deflate");
try {
byte[] bytes = recordPack.getByteArray();
if (null == bytes || 0 == bytes.length) {
throw new DatahubException("record pack is empty.");
}
XStreamPack.Builder pack = XStreamPack.newBuilder();
pack.setPackData(ByteString.copyFrom(bytes));
if (null != meta) {
pack.setPackMeta(ByteString.copyFrom(meta));
}
KVMapPB.Builder kvMap = KVMapPB.newBuilder();
if (null != kvMeta) {
for (Map.Entry entry: kvMeta.entrySet())
{
BytesPairPB.Builder kv = BytesPairPB.newBuilder();
String key = entry.getKey();
if (DatahubConstants.RESERVED_META_PARTITION.equals(key)) {
throw new DatahubException("Invalid PackMeta: \"__partition__\"!");
}
kv.setKey(ByteString.copyFrom(key.getBytes("UTF-8")));
kv.setValue(ByteString.copyFrom(entry.getValue().getBytes("UTF-8")));
kvMap.addItems(kv);
}
}
pack.setKvMeta(kvMap);
bytes = pack.build().toByteArray();
if (partitionSpec != null && partitionSpec.toString().length() > 0) {
params.put(DatahubConstants.RES_PARTITION, partitionSpec.toString().replaceAll("'", ""));
}
params.put(DatahubConstants.RECORD_COUNT, String.valueOf(recordPack.getRecordCount()));
headers.put(Headers.CONTENT_MD5, generatorMD5(bytes));
Response resp = datahubServiceClient.requestForRawResponse(path, "PUT", params, headers,
new ByteArrayInputStream(bytes),
bytes.length);
if (!resp.isOK()) {
//TODO exception
DatahubException ex = new DatahubException(new ByteArrayInputStream(resp.getBody()));
ex.setRequestId(resp.getHeader(DatahubHttpHeaders.HEADER_ODPS_REQUEST_ID));
throw ex;
} else {
ByteArrayInputStream in = new ByteArrayInputStream(resp.getBody());
loadFromJson(in);
}
} catch (DatahubException e) {
throw e;
} catch (Exception e) {
throw new DatahubException(e.getMessage(), e);
}
return this.lastPackResult;
}
private void loadFromJson(InputStream is) throws OdpsException {
try {
String json = IOUtils.readStreamAsString(is);
JSONObject tree = JSON.parseObject(json);
String node = tree.getString("PackId");
if (node != null) {
this.lastPackResult = new WritePackResult(node);
} else {
throw new DatahubException("get pack id fail");
}
} catch (Exception e) {
throw new DatahubException("Invalid json content.", e);
}
}
private String generatorMD5(byte[] bytes) {
byte[] digest = messageDigest.digest(bytes);
StringBuilder sb = new StringBuilder();
for (byte b : digest) {
sb.append(String.format("%02X", b));
}
return sb.toString();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy