com.aliyun.odps.tunnel.TableTunnel Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.aliyun.odps.tunnel;
import static com.aliyun.odps.tunnel.HttpHeaders.HEADER_ODPS_REQUEST_ID;
import static com.aliyun.odps.tunnel.TunnelConstants.TUNNEL_DATE_TRANSFORM_VERSION;
import static java.lang.Math.max;
import static java.lang.Math.min;
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.net.SocketTimeoutException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Random;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.compression.CompressionCodec;
import org.apache.arrow.vector.compression.NoCompressionCodec;
import org.apache.arrow.vector.ipc.ArrowStreamReader;
import org.apache.arrow.vector.types.pojo.Schema;
import com.aliyun.odps.Column;
import com.aliyun.odps.Odps;
import com.aliyun.odps.OdpsException;
import com.aliyun.odps.PartitionSpec;
import com.aliyun.odps.TableSchema;
import com.aliyun.odps.commons.transport.Connection;
import com.aliyun.odps.commons.transport.Headers;
import com.aliyun.odps.commons.transport.Response;
import com.aliyun.odps.commons.util.ArrowUtils;
import com.aliyun.odps.commons.util.IOUtils;
import com.aliyun.odps.data.ArrayRecord;
import com.aliyun.odps.data.ArrowRecordReader;
import com.aliyun.odps.data.ArrowRecordWriter;
import com.aliyun.odps.data.ArrowStreamRecordReader;
import com.aliyun.odps.data.Record;
import com.aliyun.odps.data.RecordPack;
import com.aliyun.odps.data.RecordReader;
import com.aliyun.odps.data.RecordWriter;
import com.aliyun.odps.rest.ResourceBuilder;
import com.aliyun.odps.rest.RestClient;
import com.aliyun.odps.tunnel.impl.StreamUploadSessionImpl;
import com.aliyun.odps.tunnel.impl.UpsertSessionImpl;
import com.aliyun.odps.tunnel.io.ArrowTunnelRecordReader;
import com.aliyun.odps.tunnel.io.ArrowTunnelRecordWriter;
import com.aliyun.odps.tunnel.io.Checksum;
import com.aliyun.odps.tunnel.io.CompressOption;
import com.aliyun.odps.tunnel.io.ProtobufRecordPack;
import com.aliyun.odps.tunnel.io.TunnelBufferedWriter;
import com.aliyun.odps.tunnel.io.TunnelRecordReader;
import com.aliyun.odps.tunnel.io.TunnelRecordWriter;
import com.aliyun.odps.tunnel.io.TunnelRetryHandler;
import com.aliyun.odps.tunnel.streams.UpsertStream;
import com.aliyun.odps.utils.ColumnUtils;
import com.aliyun.odps.utils.ConnectionWatcher;
import com.aliyun.odps.utils.StringUtils;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
/**
* Tunnel 是 ODPS 的数据通道,用户可以通过 Tunnel 向 ODPS 中上传或者下载数据。
* TableTunnel 是访问 ODPS Tunnel 服务的入口类,仅支持表数据(非视图)的上传和下载。
*
* 对一张表或 partition 上传下载的过程,称为一个session。session 由一或多个到 Tunnel RESTful API 的 HTTP Request 组成。
* session 用 session ID 来标识,session 的超时时间是24小时,如果大批量数据传输导致超过24小时,需要自行拆分成多个 session。
* 数据的上传和下载分别由 {@link UploadSession} 和 {@link DownloadSession} 这两个会话来负责。
* TableTunnel 提供创建 UploadSession 对象和 DownloadSession 对象的方法.
*
*
* - 典型表数据上传流程:
* 1) 创建 TableTunnel
* 2) 创建 UploadSession
* 3) 创建 RecordWriter,写入 Record
* 4)提交上传操作
*
* - 典型表数据下载流程:
* 1) 创建 TableTunnel
* 2) 创建 DownloadSession
* 3) 创建 RecordReader,读取 Record
*
*
* 示例代码(将一张表的数据导入到另一张表):
*
* public class Sample {
private static String accessID = "";
private static String accessKey = "";
private static String odpsURL = "";
private static String tunnelURL = "";
private static String project = "";
private static String table1 = "";
private static String table2 = "";
public static void main(String args[]) {
Account account = new AliyunAccount(accessID, accessKey);
Odps odps = new Odps(account);
odps.setEndpoint(odpsURL);
odps.setDefaultProject(project);
TableTunnel tunnel = new TableTunnel(odps);
tunnel.setEndpoint(tunnelURL);
try {
DownloadSession downloadSession = tunnel.createDownloadSession(project, table1);
long count = downloadSession.getRecordCount();
RecordReader recordReader = downloadSession.openRecordReader(0, count);
Record record;
UploadSession uploadSession = tunnel.createUploadSession(project, table2);
RecordWriter recordWriter = uploadSession.openRecordWriter(0);
while ((record = recordReader.read()) != null) {
recordWriter.write(record);
}
recordReader.close();
recordWriter.close();
uploadSession.commit(new Long[]{0L});
} catch (TunnelException e) {
e.printStackTrace();
} catch (IOException e1) {
e1.printStackTrace();
}
}
}
*
*
*
*
* @see UploadSession
* @see DownloadSession
*/
public class TableTunnel {
public interface BlockVersionProvider {
long generateVersion(long blockId);
}
private final Configuration config;
private final Random random = new Random();
private final Odps odps;
/**
* 构造此类对象
*
* @param odps
* {@link Odps}
*/
public TableTunnel(Odps odps) {
this.odps = odps;
this.config = new Configuration(odps);
}
public TableTunnel(Odps odps, Configuration config) {
this.odps = odps;
this.config = config;
}
public Odps getOdps() {
return this.odps;
}
public Configuration getConfig() {
return this.config;
}
/**
* 在非分区表上创建上传会话
*
* @param projectName
* Project名称
* @param tableName
* 表名,非视图
* @return {@link TableTunnel.UploadSession}
* @throws TunnelException
*/
public TableTunnel.UploadSession createUploadSession(String projectName, String tableName)
throws TunnelException {
return createUploadSession(projectName, config.getOdps().getCurrentSchema(), tableName, false);
}
/**
* 在非分区表上创建上传会话
*
* @param projectName
* Project名称
* @param tableName
* 表名,非视图
* @param overwrite
* Overwrite模式
* @return {@link TableTunnel.UploadSession}
* @throws TunnelException
*/
public TableTunnel.UploadSession createUploadSession(String projectName, String tableName,
boolean overwrite) throws TunnelException {
return createUploadSession(projectName, config.getOdps().getCurrentSchema(), tableName, overwrite);
}
/**
* Create an upload session of a non-partitioned table.
*
* @param projectName Project name.
* @param schemaName Schema name.
* @param tableName Table name.
* @param overwrite Overwrite.
* @return {@link TableTunnel.UploadSession}
*/
public TableTunnel.UploadSession createUploadSession(
String projectName,
String schemaName,
String tableName,
boolean overwrite) throws TunnelException {
return new TableTunnel.UploadSession(projectName, schemaName, tableName, null, null, overwrite);
}
/**
* 在分区表上创建上传会话
*
*
* 注: 分区必须为最末级分区,如表有两级分区pt,ds, 则必须全部指定值, 不支持只指定其中一个值
*
*
* @param projectName
* Project名
* @param tableName
* 表名,非视图
* @param partitionSpec
* 指定分区 {@link PartitionSpec}
* @return {@link TableTunnel.UploadSession}
* @throws TunnelException
*/
public TableTunnel.UploadSession createUploadSession(
String projectName,
String tableName,
PartitionSpec partitionSpec) throws TunnelException {
return createUploadSession(projectName, tableName, partitionSpec, false);
}
/**
* 在分区表上创建上传会话
*
*
* 注: 分区必须为最末级分区,如表有两级分区pt,ds, 则必须全部指定值, 不支持只指定其中一个值
*
*
* @param projectName
* Project名
* @param tableName
* 表名,非视图
* @param partitionSpec
* 指定分区 {@link PartitionSpec}
* @param overwrite
* Overwrite模式
* @return {@link TableTunnel.UploadSession}
* @throws TunnelException
*/
public TableTunnel.UploadSession createUploadSession(
String projectName,
String tableName,
PartitionSpec partitionSpec,
boolean overwrite) throws TunnelException {
return createUploadSession(projectName, config.getOdps().getCurrentSchema(), tableName, partitionSpec, overwrite);
}
/**
* Create an upload session of a partitioned table.
*
* @param projectName Project name.
* @param schemaName Schema name.
* @param tableName Table name.
* @param partitionSpec Partition spec.
* @param overwrite Overwrite.
* @return {@link TableTunnel.UploadSession}
* @throws TunnelException
*/
public TableTunnel.UploadSession createUploadSession(
String projectName,
String schemaName,
String tableName,
PartitionSpec partitionSpec,
boolean overwrite) throws TunnelException {
if (partitionSpec == null || partitionSpec.keys().size() == 0) {
throw new IllegalArgumentException("Invalid arguments, partition spec required.");
}
return new TableTunnel.UploadSession(
projectName,
schemaName,
tableName,
partitionSpec.toString().replaceAll("'", ""),
null,
overwrite);
}
/**
* 获得在非分区表的上传会话,且该会话将要使用 {@link TunnelBufferedWriter} 进行数据上传。
* 当有多个这样的会话实例(多进程或多线程)共享会话 ID 时,需要同时声明此会话实例的唯一标识(shareId)和共享的会话实例个数(shares)。
*
* 代码实例
*
* 两个会话实例(分别在两个线程中)共享同一个会话 ID
*
*
* final String sid = "";
*
* Thread t1 = new Thread() {
* @Override
* public void run() {
* try {
* TableTunnel.UploadSession up = tunnel.getUploadSession(projectName, tableName, sid, 2, 0);
* Record r = up.newRecord();
* RecordWriter w = up.openBufferedWriter();
* r.setBigint(0, 1L);
* w.write(r);
* w.close();
* up.commit();
* } catch (TunnelException e) {
* throw new RuntimeException(e.getMessage(), e);
* }
* }
* };
*
* Thread t2 = new Thread() {
* @Override
* public void run() {
* try {
* TableTunnel.UploadSession up = tunnel.getUploadSession(projectName, tableName, sid, 2, 1);
* Record r = up.newRecord();
* RecordWriter w = up.openBufferedWriter();
* r.setBigint(0, 2L);
* w.write(r);
* w.close();
* up.commit();
* } catch (TunnelException e) {
* throw new RuntimeException(e.getMessage(), e);
* }
* }
* };
*
* t1.start();
* t2.start();
*
*
* @param projectName
* Project名
* @param tableName
* 表名,非视图
* @param id
* 上传会话的ID {@link TableTunnel.UploadSession#getId()}
* @param shares
* 有多少个 UploadSession 实例共享这个会话 ID
* @param shareId
* 此 UploadSession 的唯一标识,建议为 0 开始的正整数
* @return {@link TableTunnel.UploadSession}
* @throws TunnelException
*/
public TableTunnel.UploadSession getUploadSession(
String projectName,
String tableName,
String id,
long shares,
long shareId) throws TunnelException {
return getUploadSession(projectName, tableName, null, id, shares, shareId);
}
/**
* 获得在分区表的上传会话,且该会话将要使用 {@link TunnelBufferedWriter} 进行数据上传。
* 当有多个这样的会话实例(多进程或多线程)共享会话 ID 时,需要同时声明此会话实例的唯一标识(shareId)和共享的会话实例个数(shares)。
*
* @param projectName
* Project名
* @param tableName
* 表名,非视图
* @param partitionSpec
* 指定分区 {@link PartitionSpec}
* @param id
* 上传会话的ID {@link TableTunnel.UploadSession#getId()}
* @param shares
* 有多少个 UploadSession 实例共享这个会话 ID
* @param shareId
* 此 UploadSession 的唯一标识,建议为 0 开始的正整数
* @return {@link TableTunnel.UploadSession}
* @throws TunnelException
*/
public TableTunnel.UploadSession getUploadSession(
String projectName,
String tableName,
PartitionSpec partitionSpec,
String id,
long shares,
long shareId) throws TunnelException {
return getUploadSession(projectName, config.getOdps().getCurrentSchema(), tableName, partitionSpec, id, shares, shareId);
}
/**
* Get the upload session specified by the upload session ID. Make sure the upload session uses
* {@link TunnelBufferedWriter} to upload data.
*
* When the upload session is shared by multiple threads or processes, the total number of threads
* or processes should be passed as {@code shares}, and a unique {@code shareId} should be passed
* for each thread or process.
*
* @param projectName Project name.
* @param schemaName Schema name.
* @param tableName Table name.
* @param partitionSpec Partition spec.
* @param id Upload session ID.
* @param shares Number of clients that shares this upload session.
* @param shareId Unique ID of this client.
* @return {@link TableTunnel.UploadSession}
* @throws TunnelException
*/
public TableTunnel.UploadSession getUploadSession(
String projectName,
String schemaName,
String tableName,
PartitionSpec partitionSpec,
String id,
long shares,
long shareId) throws TunnelException {
if (!(shares >= 1)) {
throw new IllegalArgumentException("Invalid arguments, shares must >= 1");
}
if (!(shareId >= 0)) {
throw new IllegalArgumentException("Invalid arguments, shareId must >= 0");
}
if (!(shares > shareId)) {
throw new IllegalArgumentException("Invalid arguments, shares must > shareId");
}
TableTunnel.UploadSession session;
if (partitionSpec != null) {
session = getUploadSession(projectName, schemaName, tableName, partitionSpec, id);
} else {
session = getUploadSession(projectName, schemaName, tableName, id);
}
session.shares = shares;
session.curBlockId = shareId;
return session;
}
/**
* 获得在非分区表上创建的上传会话
*
* @param projectName
* Project名
* @param tableName
* 表名,非视图
* @param id
* 上传会话的ID {@link TableTunnel.UploadSession#getId()}
* @return {@link TableTunnel.UploadSession}
* @throws TunnelException
*/
public TableTunnel.UploadSession getUploadSession(
String projectName,
String tableName,
String id) throws TunnelException {
return getUploadSession(projectName, config.getOdps().getCurrentSchema(), tableName, id);
}
/**
* Get the upload session specified by the upload session ID.
*
* @param projectName Project name.
* @param schemaName Schema name.
* @param tableName Table name.
* @param id Upload session ID.
* @return
* @throws TunnelException
*/
public TableTunnel.UploadSession getUploadSession(
String projectName,
String schemaName,
String tableName,
String id) throws TunnelException {
return new TableTunnel.UploadSession(projectName, schemaName, tableName, null, id, false);
}
/**
* 获得在分区表上创建的上传会话
*
* @param projectName
* Project名
* @param tableName
* 表名,非视图
* @param partitionSpec
* 上传数据表的partition描述 {@link PartitionSpec}
* @param id
* 上传会话ID {@link TableTunnel.UploadSession#getId()}
* @return {@link TableTunnel.UploadSession}
* @throws TunnelException
*/
public TableTunnel.UploadSession getUploadSession(
String projectName,
String tableName,
PartitionSpec partitionSpec,
String id) throws TunnelException {
return getUploadSession(projectName, config.getOdps().getCurrentSchema(), tableName, partitionSpec, id);
}
/**
* Get the upload session specified by the upload session ID.
*
* @param projectName Project name.
* @param schemaName Schema name.
* @param tableName Table name.
* @param partitionSpec Partition spec.
* @param id Upload session ID.
* @return {@link TableTunnel.UploadSession}
* @throws TunnelException
*/
public TableTunnel.UploadSession getUploadSession(
String projectName,
String schemaName,
String tableName,
PartitionSpec partitionSpec,
String id) throws TunnelException {
return getUploadSession(projectName, schemaName, tableName, partitionSpec, id, true);
}
/**
* Get the upload session specified by the upload session ID.
*
* @param projectName Project name.
* @param schemaName Schema name.
* @param tableName Table name.
* @param partitionSpec Partition spec.
* @param id Upload session ID.
* @param getBlockId Indicates whether to retrieve a list of block IDs. If set to false, the blockList will be empty,
* resulting in reduced latency; however, you will not be able to use commit(Long[] blocks) and must
* use commit() instead.
* @return {@link TableTunnel.UploadSession}
* @throws TunnelException
*/
public TableTunnel.UploadSession getUploadSession(
String projectName,
String schemaName,
String tableName,
PartitionSpec partitionSpec,
String id,
boolean getBlockId) throws TunnelException {
return new TableTunnel.UploadSession(
projectName,
schemaName,
tableName,
partitionSpec == null ? null : partitionSpec.toString().replace("'", ""),
id,
false,
getBlockId);
}
public DownloadSessionBuilder buildDownloadSession(
String projectName,
String tableName) {
return new DownloadSessionBuilder().setProjectName(projectName).setTableName(tableName);
}
/**
* 在非分区表上创建下载会话
*
* @param projectName
* Project名称
* @param tableName
* 表名,非视图
* @return {@link TableTunnel.DownloadSession}
* @throws TunnelException
*/
@Deprecated
public TableTunnel.DownloadSession createDownloadSession(
String projectName,
String tableName) throws TunnelException {
return createDownloadSession(projectName, tableName, false);
}
/**
* 在非分区表上创建下载会话
*
* @param projectName
* Project名称
* @param tableName
* 表名,非视图
* @param async
* 异步创建session,小文件多的场景下可以避免连接超时的问题
* @return {@link TableTunnel.DownloadSession}
* @throws TunnelException
*/
@Deprecated
public TableTunnel.DownloadSession createDownloadSession(
String projectName,
String tableName,
boolean async) throws TunnelException {
return createDownloadSession(projectName, config.getOdps().getCurrentSchema(), tableName, async);
}
/**
* Create a download session of a non-partitioned table.
*
* @param projectName Project name.
* @param schemaName Schema name.
* @param tableName Table name.
* @param async Create the session asynchronously. Enable this option to avoid the connection
* timeout error caused by the small file issue.
* @return {@link TableTunnel.DownloadSession}
* @throws TunnelException
*/
@Deprecated
public TableTunnel.DownloadSession createDownloadSession(
String projectName,
String schemaName,
String tableName,
boolean async) throws TunnelException {
return buildDownloadSession(projectName, tableName)
.setSchemaName(schemaName)
.setAsyncMode(async)
.build();
}
/**
* 在分区表上创建下载会话
*
*
* 注: 分区必须为最末级分区,如表有两级分区pt,ds, 则必须全部指定值, 不支持只指定其中一个值
*
*
* @param projectName
* Project名
* @param tableName
* 表名,非视图
* @param partitionSpec
* 指定分区 {@link PartitionSpec}
* @return {@link TableTunnel.DownloadSession}
* @throws TunnelException
*/
@Deprecated
public TableTunnel.DownloadSession createDownloadSession(
String projectName,
String tableName,
PartitionSpec partitionSpec) throws TunnelException {
return createDownloadSession(projectName, tableName, partitionSpec, false);
}
/**
* 在分区表上创建下载会话
*
*
* 注: 分区必须为最末级分区,如表有两级分区pt,ds, 则必须全部指定值, 不支持只指定其中一个值
*
*
* @param projectName
* Project名
* @param tableName
* 表名,非视图
* @param partitionSpec
* 指定分区 {@link PartitionSpec}
* @param async
* 异步创建session,小文件多的场景下可以避免连接超时的问题
* @return {@link TableTunnel.DownloadSession}
* @throws TunnelException
*/
@Deprecated
public TableTunnel.DownloadSession createDownloadSession(
String projectName,
String tableName,
PartitionSpec partitionSpec,
boolean async) throws TunnelException {
if (partitionSpec == null || partitionSpec.keys().size() == 0) {
throw new IllegalArgumentException("Invalid arguments, partition spec required.");
}
return createDownloadSession(projectName, config.getOdps().getCurrentSchema(), tableName, partitionSpec, async);
}
/**
* Create a download session of a partitioned table.
*
* @param projectName Project name.
* @param schemaName Schema name.
* @param tableName Table name.
* @param partitionSpec Partition spec.
* @param async Create the session asynchronously. Enable this option to avoid the connection
* timeout error caused by the small file issue.
* @return {@link TableTunnel.DownloadSession}
* @throws TunnelException
*/
@Deprecated
public TableTunnel.DownloadSession createDownloadSession(
String projectName,
String schemaName,
String tableName,
PartitionSpec partitionSpec,
boolean async) throws TunnelException {
if (partitionSpec == null || partitionSpec.keys().size() == 0) {
throw new IllegalArgumentException("Invalid arguments, partition spec required.");
}
return buildDownloadSession(projectName, tableName)
.setSchemaName(schemaName)
.setPartitionSpec(partitionSpec)
.setAsyncMode(async)
.build();
}
/**
* This method is deprecated. Shard table is no longer supported.
*/
@Deprecated
public TableTunnel.DownloadSession createDownloadSession(String projectName, String tableName,
long shardId) throws TunnelException {
if (shardId < 0) {
throw new IllegalArgumentException("Invalid arguments, shard id required.");
}
return buildDownloadSession(projectName, tableName)
.setShardId(shardId)
.build();
}
/**
* This method is deprecated. Shard table is no longer supported.
*/
@Deprecated
public TableTunnel.DownloadSession createDownloadSession(String projectName, String tableName,
PartitionSpec partitionSpec,
long shardId) throws TunnelException {
if (partitionSpec == null || partitionSpec.keys().size() == 0) {
throw new IllegalArgumentException("Invalid arguments, partition spec required.");
}
if (shardId < 0) {
throw new IllegalArgumentException("Invalid arguments, shard id required.");
}
return buildDownloadSession(projectName, tableName)
.setPartitionSpec(partitionSpec)
.setShardId(shardId)
.build();
}
/**
* 获得在非分区表上创建的下载会话
*
* @param projectName
* Project名
* @param tableName
* 表名,非视图
* @param id
* 下载会话ID {@link TableTunnel.DownloadSession#getId()}
* @return {@link TableTunnel.DownloadSession}
* @throws TunnelException
*/
public TableTunnel.DownloadSession getDownloadSession(
String projectName,
String tableName,
String id) throws TunnelException {
return getDownloadSession(projectName, config.getOdps().getCurrentSchema(), tableName, id);
}
/**
* Get the download session specified by the download session ID.
*
* @param projectName Project name.
* @param schemaName Schema name.
* @param tableName Table name.
* @param id Download session ID.
* @return {@link TableTunnel.DownloadSession}
* @throws TunnelException
*/
public TableTunnel.DownloadSession getDownloadSession(
String projectName,
String schemaName,
String tableName,
String id) throws TunnelException {
return buildDownloadSession(projectName, tableName)
.setSchemaName(schemaName)
.setDownloadId(id)
.build();
}
/**
* This method is deprecated. Shard table is no longer supported.
*/
@Deprecated
public TableTunnel.DownloadSession getDownloadSession(
String projectName,
String tableName,
long shardId,
String id) throws TunnelException {
return buildDownloadSession(projectName, tableName)
.setShardId(shardId)
.setDownloadId(id)
.build();
}
/**
* 获得在分区表上创建的下载会话
*
* @param projectName
* Project名
* @param tableName
* 表名,非视图
* @param partitionSpec
* 指定分区 {@link PartitionSpec}
* @param id
* 下载会话ID {@link TableTunnel.DownloadSession#getId()}
* @return {@link TableTunnel.DownloadSession}
* @throws TunnelException
*/
public TableTunnel.DownloadSession getDownloadSession(
String projectName,
String tableName,
PartitionSpec partitionSpec,
String id) throws TunnelException {
return getDownloadSession(projectName, config.getOdps().getCurrentSchema(), tableName, partitionSpec, id);
}
/**
* Get the download session specified by the download session ID.
*
* @param projectName Project name.
* @param schemaName Schema name.
* @param tableName Table name.
* @param partitionSpec Partition spec.
* @param id Download session ID.
* @return {@link TableTunnel.DownloadSession}
* @throws TunnelException
*/
public TableTunnel.DownloadSession getDownloadSession(
String projectName,
String schemaName,
String tableName,
PartitionSpec partitionSpec,
String id) throws TunnelException {
if (partitionSpec == null || partitionSpec.keys().size() == 0) {
throw new IllegalArgumentException("Invalid arguments, partition spec required.");
}
return buildDownloadSession(projectName, tableName)
.setSchemaName(schemaName)
.setPartitionSpec(partitionSpec)
.setDownloadId(id)
.build();
}
/**
* This method is deprecated. Shard table is no longer supported.
*/
@Deprecated
public TableTunnel.DownloadSession getDownloadSession(String projectName, String tableName,
PartitionSpec partitionSpec, long shardId,
String id) throws TunnelException {
if (partitionSpec == null || partitionSpec.keys().size() == 0) {
throw new IllegalArgumentException("Invalid arguments, partition spec required.");
}
if (shardId < 0) {
throw new IllegalArgumentException("Invalid arguments, shard id required.");
}
return buildDownloadSession(projectName, tableName)
.setPartitionSpec(partitionSpec)
.setShardId(shardId)
.setDownloadId(id)
.build();
}
private String getResource(String projectName, String tableName) {
return config.getResource(projectName, tableName);
}
/**
* 设置TunnelServer地址
*
*
* 没有设置TunnelServer地址的情况下, 自动选择
*
*
* @param endpoint
*/
public void setEndpoint(String endpoint) {
try {
URI u = new URI(endpoint);
config.setEndpoint(u);
} catch (URISyntaxException e) {
throw new IllegalArgumentException("Invalid endpoint.");
}
}
public TableTunnel.StreamUploadSession.Builder buildStreamUploadSession(
String projectName, String tableName) {
return new StreamUploadSessionImpl.Builder().setConfig(this.config)
.setProjectName(projectName)
.setTableName(tableName).setSchemaName(config.getOdps().getCurrentSchema());
}
public TableTunnel.UpsertSession.Builder buildUpsertSession(
String projectName, String tableName) {
return new UpsertSessionImpl.Builder().setConfig(this.config)
.setProjectName(projectName)
.setTableName(tableName);
}
/**
* 数据预览接口,通过tunnel下载指定行数的数据,最多返回5k行,超出部分将被截断。
* 返回Arrow格式的数据流
* @param projectName ODPS project name
* @param schemaName ODPS schema name。如不指定分区可传入null。
* @param tableName ODPS table name
* @return 返回Arrow格式的数据流 {@link ArrowStreamReader}
* 可以使用 {@link ArrowStreamRecordReader} 转换为RecordReader
*/
public ArrowStreamReader preview(String projectName, String schemaName, String tableName) throws TunnelException {
return preview(projectName, schemaName, tableName, null);
}
/**
* 数据预览接口,通过tunnel下载指定行数的数据,最多返回5k行,超出部分将被截断。
* 返回Arrow格式的数据流
* @param projectName ODPS project name
* @param schemaName ODPS schema name。如不指定分区可传入null。
* @param tableName ODPS table name
* @param partitionSpec 表的分区名{@link PartitionSpec}。如不指定分区可传入null。
* @return 返回Arrow格式的数据流 {@link ArrowStreamReader}
* 可以使用 {@link ArrowStreamRecordReader} 转换为RecordReader
*/
public ArrowStreamReader preview(String projectName, String schemaName, String tableName,
String partitionSpec) throws TunnelException {
return preview(projectName, schemaName, tableName, partitionSpec, -1L);
}
/**
* @param projectName ODPS project name
* @param schemaName ODPS schema name。如不指定分区可传入null。
* @param tableName ODPS table name
* @param partitionSpec 表的分区名{@link PartitionSpec}。如不指定分区可传入null。
* @param limit 最多读取的记录行数,最大为5000。
* 数据预览接口,通过tunnel下载指定行数的数据,最多返回5k行,超出部分将被截断。
* @return 返回Arrow格式的数据流 {@link ArrowStreamReader}
* 可以使用 {@link ArrowStreamRecordReader} 转换为RecordReader
*/
public ArrowStreamReader preview(String projectName, String schemaName, String tableName,
String partitionSpec, Long limit) throws TunnelException {
return preview(projectName, schemaName, tableName, partitionSpec, limit, null);
}
public ArrowStreamReader preview(String projectName, String schemaName, String tableName,
String partitionSpec, Long limit, List requiredColumns)
throws TunnelException {
return preview(projectName, schemaName, tableName, partitionSpec, limit, requiredColumns, null);
}
/**
* 由于 arrow-memory-compression 依赖版本问题,先默认不压缩
*/
private ArrowStreamReader preview(String projectName, String schemaName, String tableName,
String partitionSpec, Long limit, List requiredColumns,
String acceptEncoding) throws TunnelException {
if (limit != null && limit < 0) {
limit = -1L;
}
HashMap headers = getCommonHeader();
if (acceptEncoding != null) {
headers.put(Headers.ACCEPT_ENCODING, acceptEncoding);
}
Map params = new HashMap<>();
params.put("limit", String.valueOf(limit));
if (partitionSpec != null) {
params.put("partition", partitionSpec);
}
if (requiredColumns != null && !requiredColumns.isEmpty()) {
TableSchema tableSchema = odps.tables().get(projectName, schemaName, tableName).getSchema();
List orderedColumns = ColumnUtils.orderColumns(tableSchema, requiredColumns);
params.put("columns", String.join(",", orderedColumns));
}
String resource = ResourceBuilder.buildTableResource(projectName, schemaName, tableName);
resource += "/preview";
RestClient client = config.newRestClient(projectName);
Connection conn;
try {
conn = client.connect(resource, "GET", params, headers);
Response resp = conn.getResponse();
if (!resp.isOK()) {
throw new TunnelException(conn.getInputStream());
}
String contentEncoding = resp.getHeader(Headers.CONTENT_ENCODING);
CompressionCodec.Factory compressionFactory;
switch (Optional.ofNullable(contentEncoding).orElse("").toUpperCase()) {
// case "ZSTD":
// case "LZ4_FRAME":
// compressionFactory = CommonsCompressionFactory.INSTANCE;
// break;
default:
compressionFactory = NoCompressionCodec.Factory.INSTANCE;
}
RootAllocator allocator = new RootAllocator(Long.MAX_VALUE);
return new ArrowStreamReader(conn.getInputStream(), allocator, compressionFactory);
} catch (IOException e) {
throw new TunnelException("Failed to connect to the tunnel endpoint " + client.getEndpoint(),
e);
} catch (TunnelException e) {
// Do not delete here! TunnelException extends from OdpsException.
throw e;
} catch (Exception e) {
throw new TunnelException(e.getMessage(), e);
}
}
public interface FlushResult {
public String getTraceId();
public long getFlushSize();
public long getRecordCount();
}
public interface StreamRecordPack {
/**
* append一条记录
* @param record
*/
public void append(Record record) throws IOException;
/**
* @return 返回当前pack存储的记录数
*/
public long getRecordCount();
/**
* 注意:由于在写到内存缓冲区前,数据会经过多层缓冲区
* 因此这个值的变化并不是连续的,有可能出现append数据后,getDataSize不变的场景
* @return 返回当前pack存储数据的大小
*/
public long getDataSize();
/**
* 数据发送到server端
* pack对象在flush成功以后可以复用
* @return traceId
* @throws IOException
*/
public String flush() throws IOException;
/**
* 数据发送到server端
* pack对象在flush成功以后可以复用
* @param flushOption 设置 write 参数 {@link FlushOption}
* @return flush result
* @throws IOException
*/
public FlushResult flush(FlushOption flushOption) throws IOException;
/**
* 重置缓冲区内存,保证pack对象可以复用
* pack对象在flush成功以后,会默认调用一次reset
*/
public void reset() throws IOException;
}
/**
* FlushOption 用于设置数据写入网络流时的一些配置。
*/
public static class FlushOption {
private long timeout = 0;
/**
* 设置写入操作的超时时间。
* @param tm 单位毫秒 <= 0 代表无超时
*/
public FlushOption timeout(long tm) {
this.timeout = tm;
return this;
}
public long getTimeout() {
return timeout;
}
public void setTimeout(long timeout) {
this.timeout = timeout;
}
}
public interface StreamUploadSession {
/**
* 设置p2p mode
* @param mode
*/
public void setP2pMode(boolean mode);
/**
* 获取Session ID
* @return Session ID
*/
public String getId();
/**
* 获取表结构
*/
public TableSchema getSchema();
/**
* 获取表结构Id
*/
public String getSchemaVersion();
/**
* 获取当前 Quota
*/
public String getQuotaName();
/**
* 创建一个无压缩{@Link StreamRecordPack}对象
* @return StreamRecordPack对象
*/
public StreamRecordPack newRecordPack() throws IOException;
/**
* 创建一个{@Link StreamRecordPack}对象
* @param compressOption 数据传输压缩选项
* @return StreamRecordPack对象
*/
public StreamRecordPack newRecordPack(CompressOption compressOption) throws IOException, TunnelException;
/**
* 创建一个{@Link Record}对象
* @return Record对象
*/
public Record newRecord();
abstract class Builder {
private String schemaName;
private PartitionSpec partitionSpec;
private long slotNum = 0;
private boolean createPartition = false;
private String schemaVersion;
protected boolean allowSchemaMismatch = true;
public String getSchemaName() {
return schemaName;
}
public Builder setSchemaName(String schemaName) {
this.schemaName = schemaName;
return this;
}
public String getPartitionSpec() {
return this.partitionSpec == null ? null : partitionSpec.toString().replaceAll("'", "");
}
public Builder setPartitionSpec(PartitionSpec spec) {
this.partitionSpec = spec;
return this;
}
public Builder setPartitionSpec(String spec) {
this.partitionSpec = spec == null ? null : new PartitionSpec(spec);
return this;
}
public long getSlotNum() {
return slotNum;
}
public Builder setSlotNum(long slotNum) {
this.slotNum = slotNum;
return this;
}
public Builder setSchemaVersion(String schemaVersion) {
this.schemaVersion = schemaVersion;
return this;
}
public String getSchemaVersion() {
return schemaVersion;
}
public boolean isCreatePartition() {
return createPartition;
}
public Builder setCreatePartition(boolean createPartition) {
this.createPartition = createPartition;
return this;
}
public Builder allowSchemaMismatch(boolean allowSchemaMismatch) {
this.allowSchemaMismatch = allowSchemaMismatch;
return this;
}
abstract public StreamUploadSession build() throws TunnelException;
}
}
public interface UpsertSession extends Closeable {
/**
* 获取Session ID
* @return Session ID
*/
String getId();
/**
* 获取当前 Quota
*/
public String getQuotaName();
/**
* 获取Session状态
* @return 状态码
* normal
* committing
* committed
* expired
* critical
* aborted
*/
String getStatus() throws TunnelException;
/**
* 获取表结构
*/
TableSchema getSchema();
/**
* 提交UpsertSession
*/
void commit(boolean async) throws TunnelException;
/**
* 中止UpsertSession
*/
void abort() throws TunnelException;
/**
* 清理客户端本地资源
*/
void close();
/**
* 创建一个{@Link Record}对象
* @return Record对象
*/
Record newRecord();
UpsertStream.Builder buildUpsertStream();
interface Builder {
String getUpsertId();
UpsertSession.Builder setUpsertId(String upsertId);
String getSchemaName();
UpsertSession.Builder setSchemaName(String schemaName);
String getPartitionSpec();
UpsertSession.Builder setPartitionSpec(PartitionSpec spec);
UpsertSession.Builder setPartitionSpec(String spec);
long getSlotNum();
UpsertSession.Builder setSlotNum(long slotNum);
long getCommitTimeout();
UpsertSession.Builder setCommitTimeout(long commitTimeoutMs);
/**
* Netty 进行网络IO的线程池(EventLoop)的线程数,默认为 1
*/
UpsertSession.Builder setNetworkThreadNum(int threadNum);
/**
* 最大并发数(允许同时存在的 Channel 数量),默认为 20,设为 <=0 为无限制
*/
UpsertSession.Builder setConcurrentNum(int concurrentNum);
/**
* 建立链接的超时时间,单位 毫秒,默认为 180 * 1000
*/
UpsertSession.Builder setConnectTimeout(long timeout);
/**
* 请求响应的超时时间,单位 毫秒,默认为 300 * 1000
*/
UpsertSession.Builder setReadTimeout(long timeout);
long getLifecycle();
/**
* Session生命周期,单位 小时,有效值域为 1 - 24,指定有效值域以外的值该参数会被忽略,使用服务端默认值
*/
UpsertSession.Builder setLifecycle(long lifecycle);
UpsertSession build() throws TunnelException, IOException;
}
}
/**
* UploadStatus表示当前Upload的状态
* UNKNOWN 未知
* NORMAL 正常
* CLOSING 关闭中
* CLOSED 已关闭
* CANCELED 已取消
* EXPIRED 已过期
* CRITICAL 严重错误
*/
public static enum UploadStatus {
UNKNOWN, NORMAL, CLOSING, CLOSED, CANCELED, EXPIRED, CRITICAL, COMMITTING
}
/**
*
* UploadSession 表示向ODPS表中上传数据的会话,一般通过{@link TableTunnel}来创建。
* 上传 Session 是 INSERT INTO 语义,即对同一张表或 partition 的多个/多次上传 Session 互不影响。
* Session ID 是Session的唯一标识符,可通过 {@link #getId()} 获取。
*
* UploadSession 通过创建 {@link RecordWriter} 来完成数据的写入操作。
* 每个 RecordWriter 对应一个 HTTP Request,单个 UploadSession 可创建多个RecordWriter。
*
* 创建 RecordWriter 时需指定 block ID,block ID是 RecordWriter 的唯一标识符,取值范围 [0, 20000),单个block上传的数据限制是
* 100G。
* 同一 UploadSession 中,使用同一 block ID 多次打开 RecordWriter 会导致覆盖行为,最后一个调用 close() 的 RecordWriter
* 所上传的数据会被保留。同一RecordWriter实例不能重复调用 close().
* RecordWriter 对应的 HTTP Request超时为 120s,若 120s 内没有数据传输,service 端会主动关闭连接。特别提醒,HTTP协议本身有8K
* buffer。
*
*
*
* 最后调用 {@link #commit(Long[])} 来提交本次上传的所有数据块。
* commit 操作可以重试,除非遇到以下异常:
*
* - CloseUploadSessionStatusConflictException
*
- DataCollisionException
*
- InternalServerError
*
- LOCAL_ERROR_CODE
*
*
* tips:
*
* - 不建议对每条数据打开一个 ReadWriter,容易影响后续使用性能;
*
- 建议用户代码缓存至少64M的数据后,再使用一个 RecordWriter 一次性批量写入。
*
*/
public class UploadSession {
private String id;
private TableSchema schema = new TableSchema();
private String projectName;
private String schemaName;
private String tableName;
private String partitionSpec;
private Long fieldMaxSize;
private List blocks = new ArrayList();
private UploadStatus status = UploadStatus.UNKNOWN;
private String quotaName;
private Configuration conf;
private RestClient tunnelServiceClient;
// TunnelBufferedWriter 完成对 blockId 的自动管理,UploadSession 比原来多了以下状态
// shares 表示有多少 TunnelBufferedWriter 在对这个表进行写
// 通过 createUploadSession 创建的会话,shares 是 1。通过 getUploadSession 得到的会话,shares 大于 1
private final Long totalBLocks = 20000L;
private Long shares = 1L;
private Long curBlockId = 0L;
private static final int RETRY_SLEEP_SECONDS = 5;
private boolean shouldTransform = false;
private boolean overwrite = false;
private boolean fetchBlockId = true;
/**
* 构造一个{@link UploadSession}对象
*
*
* 在多线程或者多进程模式下,推荐各个进程或者线程共享相同的uploadId, 使用此方法创建Upload对象来进行上传,可以提高性能。
*
*
* @param projectName
* 上传数据表所在的project名称。
* @param schemaName
* 上传数据表所在的schema名称。
* @param tableName
* 上传数据表名称。
* @param partitionSpec
* 上传数据表的partition描述,格式如下: pt=xxx,dt=xxx。
* @param uploadId
* Upload的唯一标识符。
* @param overwrite
* 是否覆盖数据。
*/
UploadSession(
String projectName,
String schemaName,
String tableName,
String partitionSpec,
String uploadId,
boolean overwrite) throws TunnelException {
this(projectName, schemaName, tableName, partitionSpec, uploadId, overwrite, true);
}
UploadSession(
String projectName,
String schemaName,
String tableName,
String partitionSpec,
String uploadId,
boolean overwrite,
boolean fetchBlockId) throws TunnelException {
this.conf = TableTunnel.this.config;
this.projectName = projectName;
this.schemaName = schemaName;
this.tableName = tableName;
this.partitionSpec = partitionSpec;
this.id = uploadId;
this.overwrite = overwrite;
this.fetchBlockId = fetchBlockId;
tunnelServiceClient = conf.newRestClient(projectName);
initiateOrReload();
}
private void initiateOrReload() throws TunnelException {
TunnelRetryHandler retryHandler = new TunnelRetryHandler(conf);
try {
retryHandler.executeWithRetry(
() -> {
if (this.id == null) {
initiate();
} else {
reload();
}
return null;
}
);
} catch (TunnelException | RuntimeException e) {
throw e;
} catch (Exception e) {
throw new TunnelException(e.getMessage(), e);
}
}
/* Initiate upload session */
private void initiate() throws TunnelException {
HashMap headers = getCommonHeader();
List tags = this.conf.getTags();
if (tags != null) {
headers.put(HttpHeaders.HEADER_ODPS_TUNNEL_TAGS, String.join(",", tags));
}
HashMap params = new HashMap<>();
params.put(TunnelConstants.UPLOADS, null);
if (this.partitionSpec != null && this.partitionSpec.length() > 0) {
params.put(TunnelConstants.RES_PARTITION, partitionSpec);
}
if (this.overwrite) {
params.put(TunnelConstants.OVERWRITE, "true");
}
if (this.conf.availableQuotaName()) {
params.put(TunnelConstants.PARAM_QUOTA_NAME, this.conf.getQuotaName());
}
Connection conn = null;
try {
conn = tunnelServiceClient.connect(getResource(), "POST", params, headers);
Response resp = conn.getResponse();
if (resp.isOK()) {
loadFromJson(conn.getInputStream());
shouldTransform =
StringUtils.equals(resp.getHeader(HttpHeaders.HEADER_ODPS_DATE_TRANSFORM), "true");
} else {
throw new TunnelException(resp.getHeader(HEADER_ODPS_REQUEST_ID), conn.getInputStream(), resp.getStatus());
}
} catch (IOException e) {
throw new TunnelException("Failed to create upload session with tunnel endpoint "
+ tunnelServiceClient.getEndpoint(), e);
} catch (TunnelException e) {
// Do not delete here! TunnelException extends from OdpsException.
throw e;
} catch (OdpsException e) {
throw new TunnelException(e.getMessage(), e);
} finally {
if (conn != null) {
try {
conn.disconnect();
} catch (IOException e) {
}
}
}
}
public boolean isShouldTransform()
{
return shouldTransform;
}
/**
* 多个线程中的 {@link TunnelBufferedWriter} 将通过这个接口获得写入的 blockId
*
* 为了防止 blockId 重复分配,对于 curBlockId 的访问必须加锁。
*
* @return 分配到的 blockId
* @throws TunnelException
*/
synchronized public Long getAvailBlockId() {
if (curBlockId >= totalBLocks) {
throw new RuntimeException("No more available blockId, already " + curBlockId);
}
Long old = curBlockId;
curBlockId += shares;
return old;
}
/**
* 不进行校验的会话提交
*
* @throws TunnelException
* @throws IOException
*/
public void commit() throws TunnelException, IOException {
completeUpload();
}
/**
* 打开http链接,写入pack数据,然后关闭链接,多次向同一个block写入时会覆盖之前数据
*
* @param blockId
* 块标识
* @param pack
* pack数据
*/
public void writeBlock(long blockId, RecordPack pack)
throws IOException {
writeBlock(blockId, pack, 0);
}
/**
* 打开http链接,写入pack数据,然后关闭链接,多次向同一个block写入时会覆盖之前数据
*
* @param blockId
* 块标识
* @param pack
* pack数据
* @param timeout
* 超时时间 单位 ms 仅对 ProtobufRecordPack 有效 <=0 无超时
*/
public void writeBlock(long blockId, RecordPack pack, long timeout)
throws IOException {
writeBlockInternal(blockId, pack, timeout, 0);
}
public void writeBlock(long blockId, RecordPack pack, long timeout, long blockVersion)
throws IOException, TunnelException {
checkBlockVersion(blockVersion);
writeBlockInternal(blockId, pack, timeout, blockVersion);
}
private void writeBlockInternal(long blockId, RecordPack pack, long timeout, long blockVersion)
throws IOException {
TunnelRetryHandler retryHandler = new TunnelRetryHandler(conf);
try {
retryHandler.executeWithRetry(() -> {
Connection conn = null;
try {
if (pack instanceof ProtobufRecordPack) {
ProtobufRecordPack protoPack = (ProtobufRecordPack) pack;
conn = getConnection(blockId, protoPack.getCompressOption(), blockVersion);
sendBlock(protoPack, conn, timeout);
} else {
RecordWriter writer = openRecordWriter(blockId);
RecordReader reader = pack.getRecordReader();
Record record;
while ((record = reader.read()) != null) {
writer.write(record);
}
writer.close();
}
} finally {
if (conn != null) {
try {
conn.disconnect();
} catch (IOException e) {
}
}
}
return null;
});
} catch (RuntimeException re) {
throw re;
} catch (Exception e) {
throw new IOException(e.getMessage(), e);
}
}
private void sendBlock(ProtobufRecordPack pack, Connection conn) throws IOException {
sendBlock(pack, conn, 0);
}
private void sendBlock(ProtobufRecordPack pack, Connection conn, long timeout) throws IOException {
if (null == conn) {
throw new IOException("Invalid connection");
}
pack.checkTransConsistency(shouldTransform);
pack.complete();
ByteArrayOutputStream baos = pack.getProtobufStream();
if (timeout > 0) {
ConnectionWatcher.getInstance().mark(conn, timeout);
}
Response response = null;
try {
baos.writeTo(conn.getOutputStream());
conn.getOutputStream().close();
baos.close();
response = conn.getResponse();
} catch (Throwable tr) {
if (timeout > 0 && ConnectionWatcher.getInstance().checkTimedOut(conn)) {
throw new SocketTimeoutException("Flush time exceeded timeout user set: " + timeout + "ms");
}
throw tr;
} finally {
if (timeout > 0) {
ConnectionWatcher.getInstance().release(conn);
}
}
if (!response.isOK()) {
TunnelException exception =
new TunnelException(response.getHeader(HEADER_ODPS_REQUEST_ID), conn.getInputStream(),
response.getStatus());
throw new IOException(exception.getMessage(), exception);
}
}
/**
* 打开{@link RecordWriter}用来写入数据
*
*
* BlockId是由用户选取的0~19999之间的数值,标识本次上传数据块
*
*
* @param blockId
* 块标识
*/
public RecordWriter openRecordWriter(long blockId) throws TunnelException, IOException {
return openRecordWriter(blockId, false);
}
/**
* 打开{@link RecordWriter}用来写入数据
*
* @param blockId
* 块标识
* @param compress
* 数据传输是否进行压缩
*/
public RecordWriter openRecordWriter(long blockId, boolean compress) throws TunnelException,
IOException {
CompressOption option = compress ? new CompressOption() :
new CompressOption(CompressOption.CompressAlgorithm.ODPS_RAW, 0, 0);
return openRecordWriter(blockId, option);
}
/**
* 打开{@link RecordWriter}用来写入数据
*
* @param blockId
* 块标识
* @param compress
* 数据传输是否进行压缩
*/
public RecordWriter openRecordWriter(long blockId, CompressOption compress)
throws TunnelException,
IOException {
return openRecordWriterInternal(blockId, compress, 0);
}
public RecordWriter openRecordWriter(long blockId, CompressOption compress, long blockVersion)
throws TunnelException,
IOException {
checkBlockVersion(blockVersion);
return openRecordWriterInternal(blockId, compress, blockVersion);
}
private RecordWriter openRecordWriterInternal(long blockId, CompressOption compress, long blockVersion)
throws TunnelException {
TunnelRetryHandler retryHandler = new TunnelRetryHandler(conf);
try {
return retryHandler.executeWithRetry(() -> {
Connection conn = null;
try {
TunnelRecordWriter writer = null;
conn = getConnection(blockId, compress, blockVersion);
writer =
new TunnelRecordWriter(schema, conn, compress);
writer.setTransform(shouldTransform);
return writer;
} catch (IOException e) {
if (conn != null) {
try {
conn.disconnect();
} catch (IOException ignored) {
}
}
throw e;
}
});
} catch (RuntimeException re) {
throw re;
} catch (Exception e) {
throw new TunnelException(e.getMessage(), e);
}
}
/**
* 打开一个无压缩 {@link TunnelBufferedWriter} 用来写入数据
*/
public RecordWriter openBufferedWriter() throws TunnelException {
return openBufferedWriter(false);
}
/**
* 打开 {@link TunnelBufferedWriter} 用来写入数据
*
* @param compress
* 数据传输是否进行压缩
*/
public RecordWriter openBufferedWriter(boolean compress) throws TunnelException {
CompressOption compressOption = compress ? conf.getCompressOption() :
new CompressOption(CompressOption.CompressAlgorithm.ODPS_RAW, 0, 0);
return openBufferedWriter(compressOption);
}
/**
* 打开 {@link TunnelBufferedWriter} 用来写入数据
*
* @param compressOption
* 数据传输压缩选项
*/
public RecordWriter openBufferedWriter(CompressOption compressOption) throws TunnelException {
return openBufferedWriter(compressOption, 0);
}
/**
* 打开 {@link TunnelBufferedWriter} 用来写入数据
*
* @param compressOption
* 数据传输压缩选项
* @param timeout
* 超时时间 单位 ms <=0 代表无超时. 推荐值: (BufferSizeInMB / UploadBandwidthInMB) * 1000 * 120%
*/
public RecordWriter openBufferedWriter(CompressOption compressOption, long timeout) throws TunnelException {
return openBufferedWriter(compressOption, timeout, null);
}
/**
* 打开 {@link TunnelBufferedWriter} 用来写入数据
*
* @param compressOption
* 数据传输压缩选项
* @param timeout
* 超时时间 单位 ms <=0 代表无超时. 推荐值: (BufferSizeInMB / UploadBandwidthInMB) * 1000 * 120%
* @param versionProvider
* BlockVersion 提供者,为内部产生的 blockId 分别指定 block version, null 代表不使用此功能
*/
public RecordWriter openBufferedWriter(CompressOption compressOption, long timeout, BlockVersionProvider versionProvider) throws TunnelException {
try {
return new TunnelBufferedWriter(this, compressOption, timeout, versionProvider);
} catch (IOException e) {
throw new TunnelException(e.getMessage(), e.getCause());
}
}
private Schema arrowSchema;
public Schema getArrowSchema() {
if (this.arrowSchema == null){
this.arrowSchema = ArrowUtils.tableSchemaToArrowSchema(this.schema);
}
return this.arrowSchema;
}
public ArrowRecordWriter openArrowRecordWriter(long blockId)
throws TunnelException,
IOException{
return openArrowRecordWriter(blockId, new CompressOption(CompressOption.CompressAlgorithm.ODPS_RAW, 0, 0));
}
public ArrowRecordWriter openArrowRecordWriter(long blockId, CompressOption option)
throws TunnelException,
IOException{
return openArrowRecordWriterInternal(blockId, option, 0);
}
public ArrowRecordWriter openArrowRecordWriter(long blockId, CompressOption option, long blockVersion)
throws TunnelException,
IOException{
checkBlockVersion(blockVersion);
return openArrowRecordWriterInternal(blockId, option, blockVersion);
}
private ArrowRecordWriter openArrowRecordWriterInternal(long blockId, CompressOption option, long blockVersion)
throws TunnelException,
IOException{
ArrowTunnelRecordWriter arrowTunnelRecordWriter = null;
Connection conn = null;
try {
conn = getConnection(blockId,true, option, blockVersion);
arrowTunnelRecordWriter = new ArrowTunnelRecordWriter(this, conn, option);
} catch (IOException e) {
if (conn != null) {
conn.disconnect();
}
throw new TunnelException(e.getMessage(), e.getCause());
} catch (TunnelException e) {
throw e;
} catch (OdpsException e) {
throw new TunnelException(e.getMessage(), e);
}
return arrowTunnelRecordWriter;
}
private Connection getConnection(long blockId, CompressOption compress, long blockVersion)
throws OdpsException, IOException {
return getConnection(blockId, false, compress, blockVersion);
}
private Connection getConnection(long blockId, boolean isArrow, CompressOption compress, long blockVersion)
throws OdpsException, IOException {
HashMap headers = new HashMap<>();
headers.put(Headers.TRANSFER_ENCODING, Headers.CHUNKED);
headers.put(Headers.CONTENT_TYPE, "application/octet-stream");
// req.setHeader("Expect", "100-continue");
headers.put(HttpHeaders.HEADER_ODPS_TUNNEL_VERSION, String.valueOf(TunnelConstants.VERSION));
switch (compress.algorithm) {
case ODPS_RAW: {
break;
}
case ODPS_ZLIB: {
headers.put(Headers.CONTENT_ENCODING, "deflate");
break;
}
case ODPS_SNAPPY: {
headers.put(Headers.CONTENT_ENCODING, "x-snappy-framed");
break;
}
case ODPS_LZ4_FRAME: {
headers.put(Headers.CONTENT_ENCODING, "x-lz4-frame");
break;
}
case ODPS_ARROW_LZ4_FRAME: {
headers.put(Headers.CONTENT_ENCODING, "x-odps-lz4-frame");
break;
}
default: {
throw new TunnelException("invalid compression option.");
}
}
List tags = this.conf.getTags();
if (tags != null) {
headers.put(HttpHeaders.HEADER_ODPS_TUNNEL_TAGS, String.join(",", tags));
}
HashMap params = new HashMap<>();
if (blockVersion > 0) {
params.put(TunnelConstants.PARAM_BLOCK_VERSION, Long.toString(blockVersion));
}
params.put(TunnelConstants.UPLOADID, id);
params.put(TunnelConstants.BLOCKID, Long.toString(blockId));
if (isArrow) {
params.put(TunnelConstants.PARAM_ARROW,"");
}
if (partitionSpec != null && partitionSpec.length() > 0) {
params.put(TunnelConstants.RES_PARTITION, partitionSpec);
}
if (conf.availableQuotaName()) {
params.put(TunnelConstants.PARAM_QUOTA_NAME, conf.getQuotaName());
}
return tunnelServiceClient.connect(getResource(), "PUT", params, headers);
}
private void reload() throws TunnelException {
HashMap headers = getCommonHeader();
List tags = this.conf.getTags();
if (tags != null) {
headers.put(HttpHeaders.HEADER_ODPS_TUNNEL_TAGS, String.join(",", tags));
}
HashMap params = new HashMap<>();
params.put(TunnelConstants.UPLOADID, id);
if (partitionSpec != null && partitionSpec.length() > 0) {
params.put(TunnelConstants.RES_PARTITION, partitionSpec);
}
params.put(TunnelConstants.GET_BLOCK_ID, String.valueOf(fetchBlockId));
if (this.conf.availableQuotaName()) {
params.put(TunnelConstants.PARAM_QUOTA_NAME, this.conf.getQuotaName());
}
Connection conn = null;
try {
conn = tunnelServiceClient.connect(getResource(), "GET", params, headers);
Response resp = conn.getResponse();
if (resp.isOK()) {
loadFromJson(conn.getInputStream());
shouldTransform =
StringUtils.equals(resp.getHeader(HttpHeaders.HEADER_ODPS_DATE_TRANSFORM), "true");
} else {
TunnelException e = new TunnelException(conn.getInputStream());
throw e;
}
} catch (IOException e) {
throw new TunnelException(e.getMessage(), e);
} catch (TunnelException e) {
// Do not delete here! TunnelException extends from OdpsException.
throw e;
} catch (OdpsException e) {
throw new TunnelException(e.getMessage(), e);
} finally {
if (conn != null) {
try {
conn.disconnect();
} catch (IOException e) {
//
}
}
}
}
/**
* 提交本次上传的所有数据块
*
*
* blcoks表示用户记录的已经成功上传的数据块列表,用来与服务器端做完整性校验
*
*
* @param blocks
* 用户预期已经上传成功的数据块列表
* @throws TunnelException
* 如果提供的Block列表与Server端存在的Block不一致抛出异常
* @throws IOException
*/
public void commit(Long[] blocks) throws TunnelException, IOException {
if (blocks == null) {
throw new IllegalArgumentException("Invalid argument: blocks.");
}
HashMap clientBlockMap = new HashMap();
for (Long blockId : blocks) {
clientBlockMap.put(blockId, true);
}
Long[] serverBlocks = getBlockList();
HashMap serverBlockMap = new HashMap();
for (Long blockId : serverBlocks) {
serverBlockMap.put(blockId, true);
}
if (serverBlockMap.size() != clientBlockMap.size()) {
throw new TunnelException("Blocks not match, server: " + serverBlockMap.size()
+ ", tunnelServiceClient: " + clientBlockMap.size());
}
for (Long blockId : blocks) {
if (!serverBlockMap.containsKey(blockId)) {
throw new TunnelException("Block not exsits on server, block id is " + blockId);
}
}
completeUpload();
}
private void completeUpload() throws TunnelException, IOException {
HashMap headers = getCommonHeader();
List tags = this.conf.getTags();
if (tags != null) {
headers.put(HttpHeaders.HEADER_ODPS_TUNNEL_TAGS, String.join(",", tags));
}
HashMap params = new HashMap<>();
params.put(TunnelConstants.UPLOADID, id);
if (partitionSpec != null && partitionSpec.length() > 0) {
params.put(TunnelConstants.RES_PARTITION, partitionSpec);
}
if (this.conf.availableQuotaName()) {
params.put(TunnelConstants.PARAM_QUOTA_NAME, this.conf.getQuotaName());
}
TunnelRetryHandler retryHandler = new TunnelRetryHandler(conf);
try {
retryHandler.executeWithRetry(() -> {
Connection conn = null;
try {
conn = tunnelServiceClient.connect(getResource(), "POST", params, headers);
Response resp = conn.getResponse();
if (resp.isOK()) {
loadFromJson(conn.getInputStream());
return null;
} else {
throw new TunnelException(resp.getHeader(HEADER_ODPS_REQUEST_ID),
conn.getInputStream(),
resp.getStatus());
}
} finally {
if (conn != null) {
try {
conn.disconnect();
} catch (IOException ignored) {
}
}
}
});
} catch (TunnelException | RuntimeException e) {
throw e;
} catch (Exception e) {
throw new TunnelException(e.getMessage(), e);
}
}
/**
* 获取会话ID
*/
public String getId() {
return this.id;
}
/**
* 获取表结构
*/
public TableSchema getSchema() {
return this.schema;
}
public String getQuotaName() { return quotaName; }
/**
* 获取会话状态
*/
public UploadStatus getStatus() throws TunnelException, IOException {
reload();
return this.status;
}
public Configuration getConfig() {
return conf;
}
/**
* 创建临时{@link Record}对象
*
*
* 创建一个临时的{@link Record}对象, 用于后续上传数据
*
*
* @return
*/
public Record newRecord() {
return new ArrayRecord(
getSchema().getColumns().toArray(new Column[0]),
true,
fieldMaxSize);
}
public RecordPack newRecordPack() throws IOException {
return newRecordPack(null);
}
/**
* 新建一个 ProtobufRecordPack,数据压缩方式 option
*
* @param option
* @throws IOException
*/
public RecordPack newRecordPack(CompressOption option) throws IOException {
return newRecordPack(0, option);
}
/**
* 新建一个 ProtobufRecordPack,预设流 buffer 大小为 capacity, 数据压缩方式 option
*
* @param capacity
* @param option
* @throws IOException
*/
public RecordPack newRecordPack(int capacity, CompressOption option) throws IOException {
ProtobufRecordPack pack = new ProtobufRecordPack(schema, new Checksum(), capacity, option);
pack.setTransform(shouldTransform);
return pack;
}
/**
* 获取当前会话已经上传成功的数据块列表
*/
public Long[] getBlockList() throws TunnelException, IOException {
reload();
return blocks.toArray(new Long[0]);
}
public String getResource() {
return conf.getResource(projectName, schemaName, tableName);
}
/*
* Parse session properties from json stream.
*/
private void loadFromJson(InputStream is) throws TunnelException {
try {
String json = IOUtils.readStreamAsString(is);
JsonObject tree = new JsonParser().parse(json).getAsJsonObject();
// session id
if (tree.has("UploadID")) {
id = tree.get("UploadID").getAsString();
}
// status
if (tree.has("Status")) {
String uploadStatus = tree.get("Status").getAsString().toUpperCase();
status = UploadStatus.valueOf(uploadStatus);
}
// blocks
blocks.clear();
if (tree.has("UploadedBlockList")) {
JsonArray blockList = tree.get("UploadedBlockList").getAsJsonArray();
for (int i = 0; i < blockList.size(); ++i) {
if (blockList.get(i).getAsJsonObject().has("BlockID")) {
blocks.add(blockList.get(i).getAsJsonObject().get("BlockID").getAsLong());
}
}
}
// schema
if (tree.has("Schema")) {
JsonObject tunnelTableSchema = tree.get("Schema").getAsJsonObject();
schema = new TunnelTableSchema(tunnelTableSchema);
}
// field max size
if (tree.has("MaxFieldSize")) {
fieldMaxSize = tree.get("MaxFieldSize").getAsLong();
}
if (tree.has("QuotaName")) {
quotaName = tree.get("QuotaName").getAsString();
}
} catch (Exception e) {
throw new TunnelException("Invalid json content.", e);
}
}
}
/**
* 下载会话的状态
* UNKNOWN 未知
* NORMAL 正常
* CLOSED 关闭
* EXPIRED 过期
*/
public static enum DownloadStatus {
UNKNOWN, NORMAL, CLOSED, EXPIRED, INITIATING
}
/**
*
* DownloadSession 表示从 ODPS 表中下载数据的会话,一般通过{@link TableTunnel}来创建。
* Session ID 是 Session 的唯一标识符,可通过 {@link #getId()} 获取。
*
*
* 表中Record总数可通过 {@link #getRecordCount()} 得到,用户可根据 Record 总数来启动并发下载。
*
* DownloadSession 通过创建 {@link RecordReader} 来完成数据的读取,需指定读取记录的起始位置和数量
* RecordReader 对应HTTP请求的超时时间为 300S,超时后 service 端会主动关闭。
*
*/
public class DownloadSession {
private String id;
private String projectName;
private String schemaName;
private String tableName;
private String partitionSpec;
private Long shardId;
private long count;
private TableSchema schema = new TableSchema();
private DownloadStatus status = DownloadStatus.UNKNOWN;
private String quotaName;
private Configuration conf;
/**
* tunnel下载行级权限表(RAP)会按照行级权限规则起sql进行过滤处理,
* 此时SQL的InstanceId,非RAP时为null
*/
private String RAPInstanceId;
private RestClient tunnelServiceClient;
private boolean shouldTransform = false;
/**
* 根据已有downloadId构造一个{@link DownloadSession}对象。
*
* @param projectName
* 下载数据表所在project名称
* @param schemaName
* 下载数据表所在schema名称
* @param tableName
* 下载数据表名称
* @param partitionSpec
* 下载数据表的partition描述,格式如下: pt=xxx,dt=xxx
* @param shardId
* 下载数据表的shard标识
* @param downloadId
* Download的唯一标识符
* @param async
* 异步创建session,小文件多的场景下可以避免连接超时的问题
*/
DownloadSession(
String projectName,
String schemaName,
String tableName,
String partitionSpec,
Long shardId,
String downloadId,
boolean async,
boolean waitAsyncBuild) throws TunnelException {
this.conf = TableTunnel.this.config;
this.projectName = projectName;
this.schemaName = schemaName;
this.tableName = tableName;
this.partitionSpec = partitionSpec;
this.shardId = shardId;
this.id = downloadId;
tunnelServiceClient = conf.newRestClient(projectName);
if (id == null) {
initiate(async, waitAsyncBuild);
} else {
reload();
}
}
/**
* 打开{@link RecordReader}用来读取记录
*
* @param start
* 本次要读取记录的起始位置
* @param count
* 本次要读取记录的数量
* @throws TunnelException
* @throws IOException
*/
public TunnelRecordReader openRecordReader(long start, long count) throws TunnelException,
IOException {
return openRecordReader(start, count, false);
}
/**
* 打开{@link RecordReader}用来读取记录
*
* @param start
* 本次要读取记录的起始位置
* @param count
* 本次要读取记录的数量
* @param compress
* 数据传输是否进行压缩;即使设置了压缩选项,如果server 不支持压缩,传输数据也不会被压缩
* @throws TunnelException
* @throws IOException
*/
public TunnelRecordReader openRecordReader(long start, long count, boolean compress)
throws TunnelException, IOException {
return openRecordReader(start, count, compress, null);
}
/**
* 打开{@link RecordReader}用来读取记录
*
* @param start
* 本次要读取记录的起始位置
* @param count
* 本次要读取记录的数量
* @param compress
* 数据传输是否进行压缩;即使设置了压缩选项,如果server 不支持压缩,传输数据也不会被压缩
* @throws TunnelException
* @throws IOException
*/
public TunnelRecordReader openRecordReader(long start, long count, CompressOption compress)
throws TunnelException, IOException {
return openRecordReader(start, count, compress, null);
}
/**
* 打开{@link RecordReader}用来读取记录
*
* @param start
* 本次要读取记录的起始位置
* @param count
* 本次要读取记录的数量
* @param compress
* 数据传输是否进行压缩;即使设置了压缩选项,如果server 不支持压缩,传输数据也不会被压缩
* @param columns
* 本次需要下载的列
* @throws TunnelException
* @throws IOException
*/
public TunnelRecordReader openRecordReader(long start, long count, boolean compress,
List columns)
throws TunnelException, IOException {
CompressOption option = compress ? new CompressOption() :
new CompressOption(CompressOption.CompressAlgorithm.ODPS_RAW, 0, 0);
return openRecordReader(start, count, option, columns);
}
/**
* 打开{@link RecordReader}用来读取记录
*
* @param start
* 本次要读取记录的起始位置
* @param count
* 本次要读取记录的数量
* @param compress
* 数据传输是否进行压缩;即使设置了压缩选项,如果server 不支持压缩,传输数据也不会被压缩
* @param columns
* 本次需要下载的列
* @throws TunnelException
* @throws IOException
*/
public TunnelRecordReader openRecordReader(long start, long count, CompressOption compress,
List columns)
throws TunnelException, IOException {
return openRecordReader(start, count, compress, columns, false);
}
/**
* 打开{@link RecordReader}用来读取记录
*
* @param start
* 本次要读取记录的起始位置
* @param count
* 本次要读取记录的数量
* @param compress
* 数据传输是否进行压缩;即使设置了压缩选项,如果server 不支持压缩,传输数据也不会被压缩
* @param columns
* 本次需要下载的列
* @param disableModifiedCheck
* 不检查下载的数据是否是表中最新数据
* @throws TunnelException
* @throws IOException
*/
public TunnelRecordReader openRecordReader(long start, long count, CompressOption compress,
List columns, boolean disableModifiedCheck)
throws TunnelException, IOException {
if (columns != null && columns.isEmpty()) {
throw new TunnelException("Specified column list is empty.");
}
TunnelRecordReader reader = new TunnelRecordReader(start, count, columns, compress, tunnelServiceClient, this, disableModifiedCheck);
reader.setTransform(shouldTransform);
return reader;
}
private Schema arrowSchema;
public Schema getArrowSchema() {
if (this.arrowSchema == null){
this.arrowSchema = ArrowUtils.tableSchemaToArrowSchema(this.schema);
}
return this.arrowSchema;
}
public ArrowRecordReader openArrowRecordReader(long start, long count)
throws TunnelException, IOException {
return openArrowRecordReader(start, count, null, null);
}
public ArrowRecordReader openArrowRecordReader(long start, long count, CompressOption compress)
throws TunnelException, IOException {
return openArrowRecordReader(start, count, null, null, compress);
}
public ArrowRecordReader openArrowRecordReader(long start, long count, BufferAllocator allocator)
throws TunnelException, IOException {
return openArrowRecordReader(start, count, null, allocator);
}
public ArrowRecordReader openArrowRecordReader(long start, long count, List columns)
throws TunnelException, IOException {
return openArrowRecordReader(start, count, columns, null);
}
public ArrowRecordReader openArrowRecordReader(long start, long count, List columns, BufferAllocator allocator)
throws TunnelException, IOException {
return openArrowRecordReader(start, count, columns, allocator, new CompressOption(CompressOption.CompressAlgorithm.ODPS_RAW, 0, 0));
}
public ArrowRecordReader openArrowRecordReader(long start, long count, List columns, BufferAllocator allocator, CompressOption compress)
throws TunnelException, IOException {
return openArrowRecordReader(start, count, columns, allocator, compress, false);
}
public ArrowRecordReader openArrowRecordReader(long start, long count, List columns, BufferAllocator allocator, CompressOption compress, boolean disableModifiedCheck)
throws TunnelException, IOException {
return new ArrowTunnelRecordReader(start, count, columns, this.tunnelServiceClient, this, allocator, compress, disableModifiedCheck);
}
// initiate a new download session
private void initiate(boolean async, boolean wait) throws TunnelException {
HashMap headers = getCommonHeader();
List tags = this.conf.getTags();
if (tags != null) {
headers.put(HttpHeaders.HEADER_ODPS_TUNNEL_TAGS, String.join(",", tags));
}
HashMap params = new HashMap<>();
params.put(TunnelConstants.DOWNLOADS, null);
if (async) {
params.put(TunnelConstants.ASYNC_MODE, "true");
}
if (partitionSpec != null && partitionSpec.length() > 0) {
params.put(TunnelConstants.RES_PARTITION, partitionSpec);
}
if (this.shardId != null) {
params.put(TunnelConstants.RES_SHARD, String.valueOf(this.shardId));
}
if (this.conf.availableQuotaName()) {
params.put(TunnelConstants.PARAM_QUOTA_NAME, this.conf.getQuotaName());
}
Connection conn = null;
try {
conn = tunnelServiceClient.connect(getResource(), "POST", params, headers);
Response resp = conn.getResponse();
if (resp.isOK()) {
loadFromJson(conn.getInputStream());
shouldTransform =
StringUtils.equals(resp.getHeader(HttpHeaders.HEADER_ODPS_DATE_TRANSFORM), "true");
} else {
throw new TunnelException(resp.getHeader(HEADER_ODPS_REQUEST_ID), conn.getInputStream(), resp.getStatus());
}
while (status == DownloadStatus.INITIATING && wait) {
Thread.sleep(random.nextInt(30 * 1000) + 5 * 1000);
reload();
}
} catch (IOException e) {
throw new TunnelException("Failed to create download session with tunnel endpoint "
+ tunnelServiceClient.getEndpoint(), e);
} catch (TunnelException e) {
// Do not delete here! TunnelException extends from OdpsException.
throw e;
} catch (OdpsException e) {
throw new TunnelException(e.getMessage(), e);
} catch (InterruptedException e) {
throw new TunnelException(e.getMessage(), e);
} finally {
if (conn != null) {
try {
conn.disconnect();
} catch (IOException e) {
// nothing
}
}
}
}
// reload download session properties
private void reload() throws TunnelException {
HashMap headers = getCommonHeader();
HashMap params = new HashMap<>();
params.put(TunnelConstants.DOWNLOADID, id);
if (partitionSpec != null && partitionSpec.length() > 0) {
params.put(TunnelConstants.RES_PARTITION, partitionSpec);
}
if (shardId != null) {
params.put(TunnelConstants.RES_SHARD, String.valueOf(shardId));
}
Connection conn = null;
try {
conn = tunnelServiceClient.connect(getResource(), "GET", params, headers);
Response resp = conn.getResponse();
if (resp.isOK()) {
loadFromJson(conn.getInputStream());
shouldTransform =
StringUtils.equals(resp.getHeader(HttpHeaders.HEADER_ODPS_DATE_TRANSFORM), "true");
} else {
TunnelException e = new TunnelException(conn.getInputStream());
e.setRequestId(resp.getHeader(HEADER_ODPS_REQUEST_ID));
throw e;
}
} catch (IOException e) {
throw new TunnelException(e.getMessage(), e);
} catch (TunnelException e) {
// Do not delete here! TunnelException extends from OdpsException.
throw e;
} catch (OdpsException e) {
throw new TunnelException(e.getMessage(), e);
} finally {
if (conn != null) {
try {
conn.disconnect();
} catch (IOException e) {
//
}
}
}
}
/**
* 获取数据对应的表结构
*
* @return {@link TableSchema}
*/
public TableSchema getSchema() {
return this.schema;
}
/**
* 获取可下载的记录总数
*/
public long getRecordCount() {
return this.count;
}
/**
* 获取会话ID
*/
public String getId() {
return this.id;
}
/**
* 获取会话状态
*/
public DownloadStatus getStatus() throws TunnelException, IOException {
reload();
return status;
}
public Configuration getConfig() {
return conf;
}
/**
* 获取 partition
*/
public String getPartitionSpec() {
return partitionSpec;
}
public String getProjectName() {
return projectName;
}
public String getSchemaName() {
return schemaName;
}
public String getTableName() {
return tableName;
}
public String getQuotaName() { return quotaName; }
public String getRAPInstanceId() { return RAPInstanceId; }
private String getResource() {
return conf.getResource(projectName, schemaName, tableName);
}
private void loadFromJson(InputStream is) throws TunnelException {
try {
String json = IOUtils.readStreamAsString(is);
JsonObject tree = new JsonParser().parse(json).getAsJsonObject();
// session id
if (tree.has("DownloadID")) {
id = tree.get("DownloadID").getAsString();
}
// status
if (tree.has("Status")) {
String downloadStatus = tree.get("Status").getAsString().toUpperCase();
status = DownloadStatus.valueOf(downloadStatus);
}
// record count
if (tree.has("RecordCount")) {
count = tree.get("RecordCount").getAsLong();
}
// schema
if (tree.has("Schema")) {
JsonObject tunnelTableSchema = tree.get("Schema").getAsJsonObject();
schema = new TunnelTableSchema(tunnelTableSchema);
}
if (tree.has("QuotaName")) {
quotaName = tree.get("QuotaName").getAsString();
}
if (tree.has("RAPInstanceId")) {
RAPInstanceId = tree.get("RAPInstanceId").getAsString();
}
} catch (Exception e) {
throw new TunnelException("Invalid json content.", e);
}
}
}
public class DownloadSessionBuilder {
private String projectName;
private String schemaName;
private String tableName;
private PartitionSpec partitionSpec;
private Long shardId;
private String downloadId;
private boolean asyncMode = false;
private boolean waitAsyncBuild = true;
public DownloadSessionBuilder setProjectName(String projectName) {
this.projectName = projectName;
return this;
}
public DownloadSessionBuilder setSchemaName(String schemaName) {
this.schemaName = schemaName;
return this;
}
public DownloadSessionBuilder setTableName(String tableName) {
this.tableName = tableName;
return this;
}
public DownloadSessionBuilder setPartitionSpec(PartitionSpec partitionSpec) {
this.partitionSpec = partitionSpec;
return this;
}
public DownloadSessionBuilder setDownloadId(String downloadId) {
this.downloadId = downloadId;
return this;
}
public DownloadSessionBuilder setShardId(Long shardId) {
this.shardId = shardId;
return this;
}
public DownloadSessionBuilder setAsyncMode(boolean asyncMode) {
this.asyncMode = asyncMode;
return this;
}
public DownloadSessionBuilder setWaitAsyncBuild(boolean waitAsyncBuild) {
this.waitAsyncBuild = waitAsyncBuild;
return this;
}
public DownloadSession build() throws TunnelException {
return new TableTunnel.DownloadSession(projectName,
schemaName,
tableName,
partitionSpec == null ? null : partitionSpec.toString().replaceAll("'", ""),
shardId,
downloadId,
asyncMode,
waitAsyncBuild);
}
/**
* 等待服务端异步创建{@link DownloadSession}请求成功
*
* @param download
* {@link DownloadSession}对象
* @param interval
* 客户端轮询间隔最大时间,单位:秒,最低1秒,最大30秒
* @param timeout
* 等待超时时间,单位:秒,最低0秒
*/
public boolean wait(DownloadSession download, int interval, long timeout) throws TunnelException {
if (download == null) {
return false;
}
interval = max(interval, 1);
interval = min(interval, 30);
timeout = max(timeout, 0);
int maxSleepIntervalMs = interval * 1000 + 1;
long timeoutValueMs = timeout * 1000;
while (download.status == DownloadStatus.INITIATING && timeoutValueMs > 0) {
long sleepInterval = random.nextInt(maxSleepIntervalMs - 500) + 500;
try {
Thread.sleep(sleepInterval);
download.reload();
timeoutValueMs -= sleepInterval;
} catch (InterruptedException e) {
throw new TunnelException("Wait Async Create Download Session interrupted", e);
}
}
if (download.status != DownloadStatus.INITIATING && download.status != DownloadStatus.NORMAL) {
throw new TunnelException("Download session is " + download.status.toString());
}
return download.status == DownloadStatus.NORMAL;
}
}
static HashMap getCommonHeader() {
HashMap headers = new HashMap<>();
headers.put(Headers.CONTENT_LENGTH, String.valueOf(0));
headers.put(HttpHeaders.HEADER_ODPS_DATE_TRANSFORM, TUNNEL_DATE_TRANSFORM_VERSION);
headers.put(HttpHeaders.HEADER_ODPS_TUNNEL_VERSION, String.valueOf(TunnelConstants.VERSION));
return headers;
}
public static void checkBlockVersion(long blockVersion) throws TunnelException {
if (blockVersion <= 0) {
throw new TunnelException("Block version should be a positive integer.");
}
}
}