com.aliyun.odps.tunnel.InstanceTunnel Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.aliyun.odps.tunnel;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.List;
import com.aliyun.odps.Column;
import com.aliyun.odps.Odps;
import com.aliyun.odps.OdpsException;
import com.aliyun.odps.TableSchema;
import com.aliyun.odps.commons.GeneralConfiguration;
import com.aliyun.odps.commons.transport.Connection;
import com.aliyun.odps.commons.transport.Response;
import com.aliyun.odps.commons.util.IOUtils;
import com.aliyun.odps.data.RecordReader;
import com.aliyun.odps.rest.ResourceBuilder;
import com.aliyun.odps.rest.RestClient;
import com.aliyun.odps.tunnel.io.CompressOption;
import com.aliyun.odps.tunnel.io.TunnelRecordReader;
import com.aliyun.odps.utils.StringUtils;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
public class InstanceTunnel {
private Configuration config;
/**
* 构造此类对象
*
* @param odps
* {@link Odps}
*/
public InstanceTunnel(Odps odps) {
this.config = new Configuration(odps);
}
public GeneralConfiguration getConfig() {
return config;
}
/**
* 在 Instance 上创建下载会话
*
* 非法情况:
* 1. 非 SQlTask
* 2. 非 select sql
* 3. Task 非 Success 状态
*
* @param projectName
* Project名
* @param instanceID
* Instance ID
* @return {@link InstanceTunnel.DownloadSession}
* @throws TunnelException
*/
public InstanceTunnel.DownloadSession createDownloadSession(String projectName, String instanceID)
throws TunnelException {
return new InstanceTunnel.DownloadSession(projectName, instanceID, null);
}
/**
* 在 Instance 上创建下载会话
*
* 非法情况:
* 1. 非 SQlTask
* 2. 非 select sql
* 3. Task 非 Success 状态
*
* @param projectName
* Project名
* @param instanceID
* Instance ID
* @return {@link InstanceTunnel.DownloadSession}
* @throws TunnelException
*/
public InstanceTunnel.DownloadSession createDownloadSession(String projectName, String instanceID, boolean limitEnabled)
throws TunnelException {
return new InstanceTunnel.DownloadSession(projectName, instanceID, null, limitEnabled);
}
/**
* 在 Instance 上创建下载long polling会话
*
* 非法情况:
* 1. 非 SQlTask
* 2. 非 select sql
* 3. Task 非 Success 状态
*
* @param projectName
* Project名
* @param instanceID
* Instance ID
* @param taskName
* SqlRtTask taskName
* @param queryId
* SqlRtTask sub queryId
* @param limitEnabled
* 是否启用project设置READ_TABLE_MAX_ROW, 启用后从该session最多返回10000条数据, 不启用则没有限制, 但会进行select权限校验
* @return {@link InstanceTunnel.DownloadSession}
* @throws TunnelException
*/
public InstanceTunnel.DownloadSession createDirectDownloadSession(String projectName, String instanceID, String taskName, int queryId, boolean limitEnabled)
throws TunnelException {
if (limitEnabled) {
return new InstanceTunnel.DownloadSession(projectName, instanceID, true, taskName, queryId);
}
return new InstanceTunnel.DownloadSession(projectName, instanceID, false, taskName, queryId);
}
/**
* 在 Instance 上创建下载long polling会话
* 使用该接口将默认启用project的READ_TABLE_MAX_ROW限制 最多返回10000条数据
*
* 非法情况:
* 1. 非 SQlTask
* 2. 非 select sql
* 3. Task 非 Success 状态
*
* @param projectName
* Project名
* @param instanceID
* Instance ID
* @param taskName
* SqlRtTask taskName
* @param queryId
* SqlRtTask sub queryId
* @return {@link InstanceTunnel.DownloadSession}
* @throws TunnelException
*/
@Deprecated
public InstanceTunnel.DownloadSession createDirectDownloadSession(String projectName, String instanceID, String taskName, int queryId)
throws TunnelException {
return new InstanceTunnel.DownloadSession(projectName, instanceID, true, taskName, queryId);
}
private String getResource(String projectName, String instanceID) {
return ResourceBuilder.buildInstanceResource(projectName, instanceID);
}
/**
* 设置TunnelServer地址
*
*
* 没有设置TunnelServer地址的情况下, 自动选择
*
*
* @param endpoint
*/
public void setEndpoint(String endpoint) {
try {
URI u = new URI(endpoint);
config.setEndpoint(u);
} catch (URISyntaxException e) {
throw new IllegalArgumentException("Invalid endpoint.");
}
}
/**
* 下载会话的状态
* UNKNOWN 未知
* NORMAL 正常
* CLOSED 关闭
* EXPIRED 过期
*/
public static enum DownloadStatus {
UNKNOWN, NORMAL, CLOSED, EXPIRED, INITIATING, FAILED
}
/**
*
* DownloadSession 表示从 ODPS Instance 中下载数据的会话,一般通过{@link InstanceTunnel}来创建。
* Session ID 是 Session 的唯一标识符,可通过 {@link #getId()} 获取。
*
*
* Instance 中Record总数可通过 {@link #getRecordCount()} 得到,用户可根据 Record 总数来启动并发下载。
*
* DownloadSession 通过创建 {@link RecordReader} 来完成数据的读取,需指定读取记录的起始位置和数量
* RecordReader 对应HTTP请求的超时时间为 300S,超时后 service 端会主动关闭。
*
*/
public class DownloadSession {
private final String instanceID;
private String id;
private String projectName;
private long count;
private boolean limitEnabled;
private TableSchema schema = new TableSchema();
private DownloadStatus status = DownloadStatus.UNKNOWN;
private Configuration conf;
private boolean shouldTransform = false;
private RestClient tunnelServiceClient;
private String taskName;
private int queryId = -1;
private boolean isLongPolling = false;
private String quotaName = "";
/**
* 根据已有downloadId构造一个{@link DownloadSession}对象。
*
* @param projectName
* 下载数据表所在project名称
* @param instanceID
* 下载数据 instanceID
* @param downloadId
* Download的唯一标识符
*/
public DownloadSession(String projectName, String instanceID, String downloadId) throws TunnelException {
this(projectName, instanceID, downloadId, false);
}
/**
* 根据已有downloadId构造一个{@link DownloadSession}对象。
*
* @param projectName
* 下载数据表所在project名称
* @param instanceID
* 下载数据 instanceID
* @param downloadId
* Download的唯一标识符
* @param limitEnabled
* limited to 1w results
*/
private DownloadSession(String projectName, String instanceID, String downloadId, boolean limitEnabled)
throws TunnelException {
this.conf = InstanceTunnel.this.config;
this.projectName = projectName;
this.instanceID = instanceID;
this.id = downloadId;
this.limitEnabled = limitEnabled;
tunnelServiceClient = conf.newRestClient(projectName);
if (id == null) {
initiate();
} else {
reload();
}
}
/**
* 创建一个long polling模式的session。
*
* @param projectName
* 下载数据表所在project名称
* @param instanceID
* 下载数据 instanceID
* @param limitEnabled
* limited to 1w results
* @param taskName
* SqlRtTask模式的task名称
* @param queryId
* SqlRtTask的SubqueryId, -1表示当前currentquery
*/
private DownloadSession(String projectName, String instanceID, boolean limitEnabled, String taskName, int queryId)
throws TunnelException {
this.conf = InstanceTunnel.this.config;
this.projectName = projectName;
this.instanceID = instanceID;
this.limitEnabled = limitEnabled;
this.taskName = taskName;
this.queryId = queryId;
this.isLongPolling = true;
tunnelServiceClient = conf.newRestClient(projectName);
}
/**
* 打开{@link RecordReader}用来读取记录
*
* @param start
* 本次要读取记录的起始位置
* @param count
* 本次要读取记录的数量
* @throws TunnelException
* @throws IOException
*/
public TunnelRecordReader openRecordReader(long start, long count) throws TunnelException,
IOException {
return openRecordReader(start, count, false);
}
/**
* 打开{@link RecordReader}用来读取记录
*
* @param start
* 本次要读取记录的起始位置
* @param count
* 本次要读取记录的数量
* @param sizeLimit
* 本次要读取记录的大小(Bytes)
* 如果超过count大小会截断
* 如果超过sizeLimit会直接抛出异常
* @throws TunnelException
* @throws IOException
*/
public TunnelRecordReader openRecordReader(long start, long count, long sizeLimit) throws TunnelException,
IOException {
return openRecordReader(start, count, sizeLimit, false);
}
/**
* 打开{@link RecordReader}用来读取记录
*
* @param start
* 本次要读取记录的起始位置
* @param count
* 本次要读取记录的数量
* @param compress
* 数据传输是否进行压缩;即使设置了压缩选项,如果server 不支持压缩,传输数据也不会被压缩
* @throws TunnelException
* @throws IOException
*/
public TunnelRecordReader openRecordReader(long start, long count, boolean compress)
throws TunnelException, IOException {
return openRecordReader(start, count, compress, null);
}
/**
* 打开{@link RecordReader}用来读取记录
*
* @param start
* 本次要读取记录的起始位置
* @param count
* 本次要读取记录的数量
* @param sizeLimit
* 本次要读取记录的大小(Bytes)
* 如果超过count大小会截断
* 如果超过sizeLimit会直接抛出异常
* @param compress
* 数据传输是否进行压缩;即使设置了压缩选项,如果server 不支持压缩,传输数据也不会被压缩
* @throws TunnelException
* @throws IOException
*/
public TunnelRecordReader openRecordReader(long start, long count, long sizeLimit, boolean compress)
throws TunnelException, IOException {
return openRecordReader(start, count, sizeLimit, compress, null);
}
/**
* 打开{@link RecordReader}用来读取记录
*
* @param start
* 本次要读取记录的起始位置
* @param count
* 本次要读取记录的数量
* @param compress
* 数据传输是否进行压缩;即使设置了压缩选项,如果server 不支持压缩,传输数据也不会被压缩
* @throws TunnelException
* @throws IOException
*/
public TunnelRecordReader openRecordReader(long start, long count, CompressOption compress)
throws TunnelException, IOException {
return openRecordReader(start, count, compress, null);
}
/**
* 打开{@link RecordReader}用来读取记录
*
* @param start
* 本次要读取记录的起始位置
* @param count
* 本次要读取记录的数量
* @param compress
* 数据传输是否进行压缩;即使设置了压缩选项,如果server 不支持压缩,传输数据也不会被压缩
* @param columns
* 本次需要下载的列
* @throws TunnelException
* @throws IOException
*/
public TunnelRecordReader openRecordReader(long start, long count, boolean compress,
List columns) throws TunnelException, IOException {
CompressOption option = compress ? conf.getCompressOption() :
new CompressOption(CompressOption.CompressAlgorithm.ODPS_RAW, 0, 0);
return openRecordReader(start, count, option, columns);
}
/**
* 打开{@link RecordReader}用来读取记录
*
* @param start
* 本次要读取记录的起始位置
* @param count
* 本次要读取记录的数量
* @param sizeLimit
* 本地要读取记录的大小(Bytes)
* 如果超过count大小会截断
* 如果超过sizeLimit会直接抛出异常
* @param compress
* 数据传输是否进行压缩;即使设置了压缩选项,如果server 不支持压缩,传输数据也不会被压缩
* @param columns
* 本次需要下载的列
* @throws TunnelException
* @throws IOException
*/
public TunnelRecordReader openRecordReader(long start, long count, long sizeLimit, boolean compress,
List columns) throws TunnelException, IOException {
CompressOption option = compress ? conf.getCompressOption() :
new CompressOption(CompressOption.CompressAlgorithm.ODPS_RAW, 0, 0);
return openRecordReader(start, count, sizeLimit, option, columns);
}
/**
* 打开{@link RecordReader}用来读取记录
*
* @param start
* 本次要读取记录的起始位置
* @param count
* 本次要读取记录的数量
* @param compress
* 数据传输是否进行压缩;即使设置了压缩选项,如果server 不支持压缩,传输数据也不会被压缩
* @param columns
* 本次需要下载的列
* @throws TunnelException
* @throws IOException
*/
public TunnelRecordReader openRecordReader(long start, long count, CompressOption compress,
List columns)
throws TunnelException, IOException {
TunnelRecordReader reader =
new TunnelRecordReader(start, count, columns, compress, tunnelServiceClient, this);
reader.setTransform(shouldTransform);
return reader;
}
/**
* 打开{@link RecordReader}用来读取记录
*
* @param start
* 本次要读取记录的起始位置
* @param count
* 本次要读取记录的数量
* @param sizeLimit
* 本次要读取记录的大小(Bytes)
* 如果超过count大小会截断
* 如果超过sizeLimit会直接抛出异常
* @param compress
* 数据传输是否进行压缩;即使设置了压缩选项,如果server 不支持压缩,传输数据也不会被压缩
* @param columns
* 本次需要下载的列
* @throws TunnelException
* @throws IOException
*/
public TunnelRecordReader openRecordReader(long start, long count, long sizeLimit, CompressOption compress,
List columns)
throws TunnelException, IOException {
TunnelRecordReader reader =
new TunnelRecordReader(start, count, sizeLimit, columns, compress, tunnelServiceClient, this);
reader.setTransform(shouldTransform);
return reader;
}
/**
* initiate a new download session
* @throws TunnelException
*/
private void initiate() throws TunnelException {
HashMap params = new HashMap();
HashMap headers = TableTunnel.getCommonHeader();
List tags = this.conf.getTags();
if (tags != null) {
headers.put(HttpHeaders.HEADER_ODPS_TUNNEL_TAGS, String.join(",", tags));
}
params.put(TunnelConstants.DOWNLOADS, null);
if (this.conf.availableQuotaName()) {
params.put(TunnelConstants.PARAM_QUOTA_NAME, this.conf.getQuotaName());
}
if (limitEnabled) {
params.put(TunnelConstants.INSTANCE_TUNNEL_LIMIT_ENABLED, null);
}
if (taskName != null) {
params.put(TunnelConstants.CACHED, null);
params.put(TunnelConstants.TASK_NAME, taskName);
if (queryId != -1) {
params.put(TunnelConstants.QUERY_ID, String.valueOf(queryId));
}
}
Connection conn = null;
try {
conn = tunnelServiceClient.connect(getResource(), "POST", params, headers);
Response resp = conn.getResponse();
if (resp.isOK()) {
loadFromJson(conn.getInputStream());
shouldTransform =
StringUtils.equals(resp.getHeader(HttpHeaders.HEADER_ODPS_DATE_TRANSFORM), "true");
} else {
TunnelException e = new TunnelException(conn.getInputStream());
e.setRequestId(resp.getHeader(HttpHeaders.HEADER_ODPS_REQUEST_ID));
throw e;
}
} catch (IOException e) {
throw new TunnelException("Failed to create download session with tunnel endpoint "
+ tunnelServiceClient.getEndpoint(), e);
} catch (TunnelException e) {
throw e;
} catch (OdpsException e) {
throw new TunnelException(e.getMessage(), e);
} finally {
if (conn != null) {
try {
conn.disconnect();
} catch (IOException e) {
// nothing
}
}
}
}
// reload download session properties
private void reload() throws TunnelException {
HashMap params = new HashMap();
HashMap headers = TableTunnel.getCommonHeader();
List tags = this.conf.getTags();
if (tags != null) {
headers.put(HttpHeaders.HEADER_ODPS_TUNNEL_TAGS, String.join(",", tags));
}
params.put(TunnelConstants.DOWNLOADID, id);
if (this.conf.availableQuotaName()) {
params.put(TunnelConstants.PARAM_QUOTA_NAME, this.conf.getQuotaName());
}
Connection conn = null;
try {
conn = tunnelServiceClient.connect(getResource(), "GET", params, headers);
Response resp = conn.getResponse();
if (resp.isOK()) {
loadFromJson(conn.getInputStream());
shouldTransform =
StringUtils.equals(resp.getHeader(HttpHeaders.HEADER_ODPS_DATE_TRANSFORM), "true");
} else {
TunnelException e = new TunnelException(conn.getInputStream());
e.setRequestId(resp.getHeader(HttpHeaders.HEADER_ODPS_REQUEST_ID));
throw e;
}
} catch (IOException e) {
throw new TunnelException(e.getMessage(), e);
} catch (TunnelException e) {
throw e;
} catch (OdpsException e) {
throw new TunnelException(e.getMessage(), e);
} finally {
if (conn != null) {
try {
conn.disconnect();
} catch (IOException e) {
//
}
}
}
}
/**
* 获取数据对应的表结构
*
* @return {@link TableSchema}
*/
public TableSchema getSchema() {
return this.schema;
}
public void setSchema(TableSchema schema) {
this.schema = schema;
}
/**
* 获取可下载的记录总数
*/
public long getRecordCount() {
return this.count;
}
public void setRecordCount(long count) {
this.count = count;
}
/**
* 获取 project name
*/
public String getProjectName() {
return this.projectName;
}
/**
* 获取 instanceID
*/
public String getInstanceID() {
return this.instanceID;
}
/**
* 获取SQL_RT_TASK_NAME
*/
public String getTaskName() {
return this.taskName;
}
/**
* 获取SQL_RT_TASK的subqueryId
*/
public int getQueryId() {
return this.queryId;
}
public boolean getIsLongPolling() {
return this.isLongPolling;
}
public Configuration getConfig() {
return conf;
}
/**
* 获取会话ID
*/
public String getId() {
return this.id;
}
/**
* 获取会话状态
*/
public DownloadStatus getStatus() throws TunnelException, IOException {
reload();
return status;
}
public boolean getEnableLimit() {
return limitEnabled;
}
private String getResource() {
return InstanceTunnel.this.getResource(projectName, instanceID);
}
private void loadFromJson(InputStream is) throws TunnelException {
try {
String json = IOUtils.readStreamAsString(is);
JsonObject tree = new JsonParser().parse(json).getAsJsonObject();
// session id
if (tree.has("DownloadID")) {
id = tree.get("DownloadID").getAsString();
}
// status
if (tree.has("Status")) {
String downloadStatus = tree.get("Status").getAsString().toUpperCase();
status = DownloadStatus.valueOf(downloadStatus);
}
// record count
if (tree.has("RecordCount")) {
count = tree.get("RecordCount").getAsLong();
}
// schema
if (tree.has("Schema")) {
JsonObject tunnelTableSchema = tree.get("Schema").getAsJsonObject();
schema = new TunnelTableSchema(tunnelTableSchema);
}
if (tree.has("QuotaName")) {
quotaName = tree.get("QuotaName").getAsString();
}
} catch (Exception e) {
throw new TunnelException("Invalid json content.", e);
}
}
public String getQuotaName() {
return quotaName;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy