Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.aliyun.odps.Table Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.aliyun.odps;
import java.time.Instant;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.StringJoiner;
import java.util.stream.Collectors;
import org.apache.arrow.vector.ipc.ArrowStreamReader;
import com.aliyun.odps.Partition.PartitionModel;
import com.aliyun.odps.Partition.PartitionSpecModel;
import com.aliyun.odps.commons.transport.Headers;
import com.aliyun.odps.data.ArrowStreamRecordReader;
import com.aliyun.odps.data.RecordReader;
import com.aliyun.odps.rest.ResourceBuilder;
import com.aliyun.odps.rest.RestClient;
import com.aliyun.odps.rest.SimpleXmlUtils;
import com.aliyun.odps.simpleframework.xml.Attribute;
import com.aliyun.odps.simpleframework.xml.Element;
import com.aliyun.odps.simpleframework.xml.ElementList;
import com.aliyun.odps.simpleframework.xml.Root;
import com.aliyun.odps.simpleframework.xml.Text;
import com.aliyun.odps.simpleframework.xml.convert.Convert;
import com.aliyun.odps.simpleframework.xml.convert.Converter;
import com.aliyun.odps.simpleframework.xml.stream.InputNode;
import com.aliyun.odps.simpleframework.xml.stream.OutputNode;
import com.aliyun.odps.table.StreamIdentifier;
import com.aliyun.odps.table.TableIdentifier;
import com.aliyun.odps.task.SQLTask;
import com.aliyun.odps.tunnel.Configuration;
import com.aliyun.odps.tunnel.TableTunnel;
import com.aliyun.odps.type.TypeInfo;
import com.aliyun.odps.utils.ColumnUtils;
import com.aliyun.odps.utils.NameSpaceSchemaUtils;
import com.aliyun.odps.utils.OdpsCommonUtils;
import com.aliyun.odps.utils.StringUtils;
import com.aliyun.odps.utils.TagUtils;
import com.aliyun.odps.utils.TagUtils.OBJECT_TYPE;
import com.aliyun.odps.utils.TagUtils.OPERATION_TYPE;
import com.aliyun.odps.utils.TagUtils.ObjectRef;
import com.aliyun.odps.utils.TagUtils.ObjectTagInfo;
import com.aliyun.odps.utils.TagUtils.SetObjectTagInput;
import com.aliyun.odps.utils.TagUtils.SimpleTag;
import com.aliyun.odps.utils.TagUtils.TagRef;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.google.gson.reflect.TypeToken;
/**
* Table表示ODPS中的表
*/
public class Table extends LazyLoad {
public enum TableType {
/**
* Regular table managed by ODPS
*/
MANAGED_TABLE,
/**
* Virtual view
*/
VIRTUAL_VIEW,
/**
* External table
*/
EXTERNAL_TABLE,
/**
* Materialized view
*/
MATERIALIZED_VIEW
}
/**
* Convert {@link TableType} to/from {@link String}
*/
public static class TableTypeConverter implements Converter {
@Override
public TableType read(InputNode node) throws Exception {
String value = node.getValue();
if (value == null) {
return null;
} else {
try {
return TableType.valueOf(value);
} catch (IllegalArgumentException e) {
// If there is a new table type which cannot be recognized
return null;
}
}
}
@Override
public void write(OutputNode node, TableType value) throws Exception {
// The server side does not accept this field
node.remove();
}
}
@Root(name = "Table", strict = false)
static class TableModel {
@Root(name = "Schema", strict = false)
static class Schema {
@Text(required = false)
String content;
}
@Element(name = "Name", required = false)
@Convert(SimpleXmlUtils.EmptyStringConverter.class)
String name;
@Element(name = "TableId", required = false)
@Convert(SimpleXmlUtils.EmptyStringConverter.class)
String ID;
@Attribute(name = "format", required = false)
private String format;
@Element(name = "Schema", required = false)
private Schema schema;
@Element(name = "Comment", required = false)
@Convert(SimpleXmlUtils.EmptyStringConverter.class)
String comment;
@Element(name = "Owner", required = false)
@Convert(SimpleXmlUtils.EmptyStringConverter.class)
String owner;
@Element(name = "Project", required = false)
@Convert(SimpleXmlUtils.EmptyStringConverter.class)
String projectName;
@Element(name = "SchemaName", required = false)
@Convert(SimpleXmlUtils.EmptyStringConverter.class)
String schemaName;
@Element(name = "TableLabel", required = false)
@Convert(SimpleXmlUtils.EmptyStringConverter.class)
String tableLabel;
@Element(name = "CryptoAlgo", required = false)
@Convert(SimpleXmlUtils.EmptyStringConverter.class)
String cryptoAlgoName;
@Element(name = "TableMaskInfo", required = false)
@Convert(SimpleXmlUtils.EmptyStringConverter.class)
String tableMaskInfo;
@Element(name = "CreationTime", required = false)
@Convert(SimpleXmlUtils.DateConverter.class)
Date createdTime;
@Element(name = "LastModifiedTime", required = false)
@Convert(SimpleXmlUtils.DateConverter.class)
Date lastModifiedTime;
@Element(name = "LastAccessTime", required = false)
@Convert(SimpleXmlUtils.DateConverter.class)
Date lastAccessTime;
@Element(name = "Type", required = false)
@Convert(TableTypeConverter.class)
TableType type;
Date lastMetaModifiedTime;
Date lastMajorCompactTime;
boolean isVirtualView;
boolean isMaterializedViewRewriteEnabled;
boolean isMaterializedViewOutdated;
boolean isExternalTable;
long life = -1L;
long hubLifecycle = -1L;
String viewText;
String viewExpandedText;
long size;
long recordNum = -1L;
boolean isArchived;
long physicalSize;
long fileNum;
boolean isTransactional;
// reserved json string in extended info
String reserved;
Shard shard;
// for external table extended info
String schemaVersion;
String storageHandler;
String location;
String resources;
Map serDeProperties;
// for clustered info
ClusterInfo clusterInfo;
// for table extended labels
List tableExtendedLabels;
Map mvProperties;
List> refreshHistory;
boolean hasRowAccessPolicy;
List primaryKey;
int acidDataRetainHours;
StorageTierInfo storageTierInfo;
TableLifecycleConfig tableLifecycleConfig;
List columnMaskInfoList;
long cdcSize = -1;
long cdcRecordNum = -1;
long cdcLatestVersion = -1;
Date cdcLatestTimestamp;
}
public static class ColumnMaskInfo {
private final String name;
private final List policyNameList;
public String getName() {
return name;
}
public List getPolicyNameList() {
return policyNameList;
}
ColumnMaskInfo(String name, List policyNameList) {
this.name = name;
this.policyNameList = policyNameList;
}
}
/**
* ClusterInfo is used to express the Shuffle and Sort properties of the table when creating a clustered table.
*/
public static class ClusterInfo {
enum ClusterType {
HASH,
RANGE
}
long bucketNum = -1;
ClusterType clusterType;
List clusterCols;
List sortCols;
ClusterInfo() {}
/**
* @param clusterType Clustering tables are divided into two types: Hash clustering tables and Range clustering tables.
* @param clusterCols Specify cluster by. MaxCompute will perform Hash/Range operations on the specified columns and distribute them to various Buckets.
* @param sortCols Specify the sorting method of fields in the Bucket. It is recommended that sorted by and clustered by be consistent to achieve better performance.
* @param bucketNum Specify the number of hash buckets. Required when using Hash Cluster.
*/
public ClusterInfo(ClusterType clusterType, List clusterCols, List sortCols, long bucketNum) {
this.clusterType = clusterType;
this.clusterCols = clusterCols;
this.sortCols = sortCols;
this.bucketNum = bucketNum;
}
public String getClusterType() {
return clusterType.name();
}
public long getBucketNum() {
return bucketNum;
}
public List getClusterCols() {
return clusterCols;
}
public List getSortCols() {
return sortCols;
}
@Override
public String toString() {
StringBuilder stringBuilder = new StringBuilder();
if (clusterType == ClusterType.HASH) {
stringBuilder.append(" CLUSTERED BY ");
} else {
stringBuilder.append(" RANGE CLUSTERED BY ");
}
stringBuilder.append("(").append(
clusterCols.stream().map(OdpsCommonUtils::quoteRef).collect(Collectors.joining(", ")))
.append(")");
if (sortCols != null && sortCols.size() > 0) {
stringBuilder.append(" SORTED BY ").append("(")
.append(sortCols.stream().map(Object::toString).collect(Collectors.joining(", ")))
.append(")");
}
if (bucketNum > 0) {
stringBuilder.append(" INTO ").append(bucketNum).append(" BUCKETS");
}
return stringBuilder.toString();
}
}
/**
* Used in ClusterInfo to specify the sorting method of fields in the Bucket.
*/
public static class SortColumn {
enum Order {
ASC,
DESC
}
private String name;
private Order order;
SortColumn(String name, String order) {
this.name = name;
this.order = Order.valueOf(order.toUpperCase());
}
public SortColumn(String name, Order order) {
this.name = name;
this.order = order;
}
public String getName() {
return name;
}
public String getOrder() {
return order.name();
}
@Override
public String toString() {
return String.format("%s %s", OdpsCommonUtils.quoteRef(name), order);
}
}
private TableModel model;
private TableSchema tableSchema;
private ObjectTagInfo tableTagInfo;
private RestClient client;
private boolean isExtendInfoLoaded;
private boolean isShardInfoLoaded;
private Odps odps;
Table(TableModel model, String project, String schemaName, Odps odps) {
this.model = model;
this.model.projectName = project;
this.model.schemaName = schemaName;
this.odps = odps;
this.client = odps.getRestClient();
this.isExtendInfoLoaded = false;
this.isShardInfoLoaded = false;
}
@Override
public void reload() throws OdpsException {
String resource = ResourceBuilder.buildTableResource(model.projectName, model.name);
Map params = initParamsWithSchema();
reload(client.request(TableModel.class, resource, "GET", params));
}
public void reload(TableModel model) throws OdpsException {
this.model = model;
if (model.schema != null) {
tableSchema = loadSchemaFromJson(model.schema.content);
}
setLoaded(true);
}
private void reloadTagInfo() {
String resource = ResourceBuilder.buildTableResource(model.projectName, model.name);
// Convert the OdpsException to a ReloadException the keep the consistency of the getter's
// method signature.
try {
tableTagInfo = TagUtils.getObjectTagInfo(resource, null, client);
} catch (OdpsException e) {
throw new ReloadException(e);
}
}
public void reloadExtendInfo() {
TableModel response;
try {
Map params = initParamsWithSchema();
params.put("extended", null);
String resource = ResourceBuilder.buildTableResource(model.projectName, model.name);
response = client.request(TableModel.class, resource, "GET", params);
} catch (OdpsException e) {
throw new ReloadException(e.getMessage(), e);
}
loadSchemaFromJson(response.schema.content);
}
private void lazyLoadExtendInfo() {
if (!this.isExtendInfoLoaded) {
reloadExtendInfo();
this.isExtendInfoLoaded = true;
}
}
/**
* 获取表名
*
* @return 表名称
*/
public String getName() {
return model.name;
}
/**
* 获取注释
*
* @return 表的相关注释信息
*/
public String getComment() {
if (model.comment == null) {
lazyLoad();
}
return model.comment;
}
/**
* 获取表所属用户
*
* @return 所属用户
*/
public String getOwner() {
if (model.owner == null) {
lazyLoad();
}
return model.owner;
}
/**
* @return 表类型
*/
public TableType getType() {
if (model.type == null) {
lazyLoad();
}
return model.type;
}
/**
* 获取创建时间
*
* @return 创建时间
*/
public Date getCreatedTime() {
if (model.createdTime == null) {
lazyLoad();
}
return model.createdTime;
}
public String getTableLabel() {
if (model.tableLabel == null) {
lazyLoad();
}
return model.tableLabel;
}
public List getTableExtendedLabels() {
if (model.tableExtendedLabels == null) {
lazyLoad();
}
return model.tableExtendedLabels;
}
public String getSchemaVersion() {
// no cache
reloadExtendInfo();
return model.schemaVersion;
}
/**
* 获取分层存储的相关信息,包括类型,大小,修改时间等等
*
* @return StorageTierInfo 分层存储信息
*/
public StorageTierInfo getStorageTierInfo() {
if (isPartitioned()) {
throw new UnsupportedOperationException(
"Partitioned table does not support get storage tier info, use Partition.getStorageTierInfo() instead.");
}
if (model.storageTierInfo == null) {
reloadExtendInfo();
isExtendInfoLoaded = true;
}
return model.storageTierInfo;
}
/**
* 获取分层存储的lifecycle配置
*
* @return TableLifecycleConfig
* */
public TableLifecycleConfig getTableLifecycleConfig() {
if (model.tableLifecycleConfig == null) {
reloadExtendInfo();
isExtendInfoLoaded = true;
}
return model.tableLifecycleConfig;
}
/**
* Get {@link Tag}(s) attached to this table.
*
* @return list of {@link Tag}
*/
public List getTags() {
reloadTagInfo();
return TagUtils.getTags(tableTagInfo, odps);
}
/**
* Get {@link Tag}(s) attached to a column of this table.
*
* @return list of {@link Tag}
*/
public List getTags(String columnName) {
reloadTagInfo();
// Make sure specified column exists
Objects.requireNonNull(columnName);
TagUtils.validateTaggingColumn(getSchema(), Collections.singletonList(columnName));
return TagUtils.getTags(tableTagInfo, columnName, odps);
}
/**
* Get simple tags attached to this table.
*
* @return a map from category to key value pairs
*/
public Map> getSimpleTags() {
reloadTagInfo();
return TagUtils.getSimpleTags(tableTagInfo);
}
/**
* Get simple tags attached to a column of this table.
*
* @param columnName column name.
* @return a map from category to key value pairs.
*/
public Map> getSimpleTags(String columnName) {
reloadTagInfo();
// Make sure specified column exists
Objects.requireNonNull(columnName);
TagUtils.validateTaggingColumn(getSchema(), Collections.singletonList(columnName));
return TagUtils.getSimpleTags(tableTagInfo, columnName);
}
/**
* Attach a {@link Tag} to this table. The table and tag should be in a same project.
*
* @param tag tag to attach
*/
public void addTag(Tag tag) throws OdpsException {
addTag(tag, null);
}
/**
* Attach a {@link Tag} to this table. The table and tag should be in a same project.
*
* @param tag tag to attach
* @param columnNames column names, could be null.
*/
public void addTag(Tag tag, List columnNames) throws OdpsException {
ObjectRef objectRef = new ObjectRef(
OBJECT_TYPE.TABLE,
model.projectName,
model.name,
columnNames);
TagRef tagRef = new TagRef(tag.getClassification(), tag.getName());
SetObjectTagInput setObjectTagInput =
new SetObjectTagInput(OPERATION_TYPE.SET, objectRef, tagRef, null);
TagUtils.updateTagInternal(setObjectTagInput, null, client);
}
/**
* Attach a simple tag to this table. A simple tag is a triad consisted of category, tag
* key, and tag value.
*
* @param category simple tag category, could be nul.
* @param key simple tag key, cannot be null.
* @param value simple tag value, cannot be null.
*/
public void addSimpleTag(String category, String key, String value) throws OdpsException {
addSimpleTag(category, key, value, null);
}
/**
* Attach a simple tag to this table or some of its columns. A simple tag is a triad consisted of
* category, tag key, and tag value.
*
* @param category simple tag category, could be nul.
* @param key simple tag key, cannot be null.
* @param value simple tag value, cannot be null.
* @param columnNames column names, should not include any partition column, could be null.
*/
public void addSimpleTag(
String category,
String key,
String value,
List columnNames) throws OdpsException {
ObjectRef objectRef = new ObjectRef(
OBJECT_TYPE.TABLE,
model.projectName,
model.name,
columnNames);
SimpleTag simpleTag = new SimpleTag(category, Collections.singletonMap(key, value));
SetObjectTagInput setObjectTagInput =
new SetObjectTagInput(OPERATION_TYPE.SET, objectRef, null, simpleTag);
TagUtils.updateTagInternal(setObjectTagInput, null, client);
}
/**
* Remove a {@link Tag}.
*
* @param tag tag to remove.
*/
public void removeTag(Tag tag) throws OdpsException {
removeTag(tag, null);
}
/**
* Remove a {@link Tag} from columns.
*
* @param tag tag to remove.
* @param columnNames column names, should not include any partition column, could be null.
*/
public void removeTag(Tag tag, List columnNames) throws OdpsException {
Objects.requireNonNull(tag);
// Make sure column names are valid
TagUtils.validateTaggingColumn(getSchema(), columnNames);
ObjectRef objectRef = new ObjectRef(
OBJECT_TYPE.TABLE,
model.projectName,
model.name,
columnNames);
TagRef tagRef = new TagRef(tag.getClassification(), tag.getName());
SetObjectTagInput setObjectTagInput =
new SetObjectTagInput(OPERATION_TYPE.UNSET, objectRef, tagRef, null);
TagUtils.updateTagInternal(setObjectTagInput, null, client);
}
/**
* Remove a simple tag. A simple tag is a triad consisted of category, tag key, and tag value.
*
* @param category category.
* @param key key.
* @param value value.
* @throws OdpsException
*/
public void removeSimpleTag(String category, String key, String value) throws OdpsException {
removeSimpleTag(category, key, value, null);
}
/**
* Remove a simple tag from columns. A simple tag is a triad consisted of category, tag key, and
* tag value.
*
* @param category category.
* @param key key.
* @param value value.
* @param columnNames column names, should not include any partition column, could be null.
* @throws OdpsException
*/
public void removeSimpleTag(
String category,
String key,
String value,
List columnNames) throws OdpsException {
Objects.requireNonNull(category);
Objects.requireNonNull(key);
Objects.requireNonNull(value);
// Make sure column names are valid
TagUtils.validateTaggingColumn(getSchema(), columnNames);
ObjectRef objectRef = new ObjectRef(
OBJECT_TYPE.TABLE,
model.projectName,
model.name,
columnNames);
SimpleTag simpleTag = new SimpleTag(category, Collections.singletonMap(key, value));
SetObjectTagInput setObjectTagInput =
new SetObjectTagInput(OPERATION_TYPE.UNSET, objectRef, null, simpleTag);
TagUtils.updateTagInternal(setObjectTagInput, null, client);
}
/**
* 获取表 ID
*
* @return tableId
*/
public String getTableID() {
if (model.ID == null) {
lazyLoad();
}
return model.ID;
}
/**
* 获取表加密算法名称
*
* @return 算法名称
*/
public String getCryptoAlgoName() {
if (model.cryptoAlgoName == null) {
lazyLoad();
}
return model.cryptoAlgoName;
}
public String getMaxExtendedLabel() {
List extendedLabels = new ArrayList();
if (getTableExtendedLabels() != null) {
extendedLabels.addAll(getTableExtendedLabels());
}
for (Column column : tableSchema.getColumns()) {
if (column.getExtendedlabels() != null) {
extendedLabels.addAll(column.getExtendedlabels());
}
}
return calculateMaxLabel(extendedLabels);
}
/**
* 获取最高的label级别
* Label的定义分两部分:
* 1. 业务分类:C,S,B
* 2. 数据等级:1,2,3,4
*
* 二者是正交关系,即C1,C2,C3,C4,S1,S2,S3,S4,B1,B2,B3,B4。
*
* MaxLabel的语意:
* 1. MaxLabel=max(TableLabel, ColumnLabel), max(...)函数的语意由Label中的数据等级决定:4>3>2>1
* 2. MaxLabel显示:
* 当最高等级Label只出现一次时,MaxLabel=业务分类+数据等级,例如:B4, C3,S2
* 当最高等级Labe出现多次,但业务分类也唯一,MaxLabel=业务分类+数据等级,例如:B4, C3,S2
* 当最高等级Labe出现多次,且业务不唯一,MaxLabel=L+数据等级,例如:L4, L3
*
* @return 表示最高label,如果没有任何label的设置,返回空字符串
*/
public String getMaxLabel() {
List labels = new ArrayList();
labels.add(getTableLabel());
for (Column column : tableSchema.getColumns()) {
labels.add(column.getCategoryLabel());
}
return calculateMaxLabel(labels);
}
static String calculateMaxLabel(List labels) {
int maxLevel = 0;
char category = '-';
for (String label : labels) {
if (!StringUtils.isNullOrEmpty(label)) {
char num = label.charAt(label.length() - 1);
if (Character.isDigit(num) && num - '0' >= maxLevel) {
if (num - '0' > maxLevel) {
maxLevel = num - '0';
category = '-';
}
// label is only one num
if (label.length() == 1) {
category = 'L';
continue;
}
// handle one or more letter before the level number
for (int i = label.length() - 2; i >= 0; i--) {
char c = label.charAt(i);
if (Character.isLetter(c)) {
c = Character.toUpperCase(c);
if (category == '-') {
category = c;
} else if (category != c) {
category = 'L';
}
}
}
}
}
}
if (category == '-' && maxLevel == 0) {
return "";
}
if (category == '-') {
category = 'L';
}
return category + "" + maxLevel;
}
/**
* 获取最后修改时间
*
* @return 最后修改时间
*/
public Date getLastMetaModifiedTime() {
if (model.lastMetaModifiedTime == null) {
lazyLoad();
}
return model.lastMetaModifiedTime;
}
/**
* 获取表所属{@link Project}名称
*
* @return Project名称
*/
public String getProject() {
return model.projectName;
}
/**
* Get the schema name.
*
* @return Schema name.
*/
public String getSchemaName() {
return model.schemaName;
}
/**
* 判断表是否为虚拟视图
*
* @return 如果是虚拟视图返回true, 否则返回false
*/
public boolean isVirtualView() {
// Since reload should always work, if this object is loaded, return the value in the model.
// If this object is not loaded, but the table type is available, return it. And if this object
// is not loaded and its table type is unavailable, trigger a reloading.
// TODO: isVirtualView can be determined by both table type and schema
if (isLoaded()) {
return model.isVirtualView;
} else if (model.type != null) {
return TableType.VIRTUAL_VIEW.equals(model.type);
} else {
lazyLoad();
return model.isVirtualView;
}
}
/**
* Return if this table is a materialized view.
*/
public boolean isMaterializedView() {
lazyLoad();
return TableType.MATERIALIZED_VIEW.equals(model.type);
}
/**
* Return if this materialized view could be used by query rewrite.
*
* @throws IllegalStateException If this table is not a materialized view.
*/
public boolean isMaterializedViewRewriteEnabled() {
lazyLoad();
if (!isMaterializedView()) {
throw new IllegalStateException("Not a materialized view");
}
return model.isMaterializedViewRewriteEnabled;
}
/**
* Return if this materialized view is outdated.
*
* @throws IllegalStateException If this table is not a materialized view.
*/
public boolean isMaterializedViewOutdated() {
lazyLoadExtendInfo();
if (!isMaterializedView()) {
throw new IllegalStateException("Not a materialized view");
}
return model.isMaterializedViewOutdated;
}
/**
* 判断表是否为外部表
*
* @return 如果是外部表返回true, 否则返回false
*/
public boolean isExternalTable() {
// Since reload should always work, if this object is loaded, return the value in the model.
// If this object is not loaded, but the table type is available, return it. And if this object
// is not loaded and its table type is unavailable, trigger a reloading.
// TODO: isVirtualView can be determined by both table type and schema
if (isLoaded()) {
return model.isExternalTable;
} else if (model.type != null) {
return TableType.EXTERNAL_TABLE.equals(model.type);
} else {
lazyLoad();
return model.isExternalTable;
}
}
/**
* 获取视图的文本内容
*
* @return 文本内容
*/
public String getViewText() {
if (model.viewText == null) {
lazyLoad();
}
return model.viewText;
}
public String getViewExpandedText() {
if (model.viewExpandedText == null) {
lazyLoad();
}
return model.viewExpandedText;
}
// {
// "columnMaskInfoList": [
// {
// "name": "s1",
// "policyNameList": ["wuyue_mask_ba"]
// }
// ]
// }
public List getColumnMaskInfo() {
if (model.columnMaskInfoList != null) {
return model.columnMaskInfoList;
}
if (model.tableMaskInfo == null) {
lazyLoad();
}
if (StringUtils.isNullOrEmpty(model.tableMaskInfo)) {
return null;
}
Gson gson = new GsonBuilder().disableHtmlEscaping().create();
JsonArray jsonArray = JsonParser.parseString(model.tableMaskInfo)
.getAsJsonObject()
.getAsJsonArray("columnMaskInfoList");
model.columnMaskInfoList = new ArrayList<>(jsonArray.size());
for (int i = 0; i < jsonArray.size(); i++) {
model.columnMaskInfoList.add(gson.fromJson(jsonArray.get(i), ColumnMaskInfo.class));
}
return model.columnMaskInfoList;
}
/**
* 获取数据最后修改时间
*
* @return 最后修改时间
*/
public Date getLastDataModifiedTime() {
if (model.lastModifiedTime == null) {
lazyLoad();
}
return model.lastModifiedTime;
}
/**
* 获取数据最后访问时间
*
* @return 最后访问时间
*/
public Date getLastDataAccessTime() {
if (model.lastAccessTime == null) {
lazyLoad();
}
return model.lastAccessTime;
}
/**
* 获取内部存储大小,单位:Byte
*
* @return 存储大小
*/
public long getSize() {
lazyLoad();
return model.size;
}
/**
* 获取表的Record数, 若无准确数据,则返回-1
*
* @return 表的record数
*/
public long getRecordNum() {
lazyLoad();
return model.recordNum;
}
/**
* 获取表的生命周期值,单位:天
*
* @return 生命周期值
*/
public long getLife() {
lazyLoad();
return model.life;
}
/**
* 获取表的datahub生命周期值,单位:天
*
* @return datahub生命周期值
*/
public long getHubLifecycle() {
lazyLoad();
return model.hubLifecycle;
}
/**
* 获取表结构定义
*
* @return 表示表结构的{@link TableSchema}对象
*/
public TableSchema getSchema() {
if (tableSchema == null) {
lazyLoad();
}
return tableSchema;
}
public String getJsonSchema() {
if (model.schema == null || model.schema.content == null) {
lazyLoad();
}
return model.schema == null ? null : model.schema.content;
}
/**
* 查看表是否进行过归档操作
*
* @return 返回true表示进行过archive操作,false表示未进行过
*/
public boolean isArchived() {
lazyLoadExtendInfo();
return model.isArchived;
}
/**
* 查看表是否事务化
*
* @return 返回true表示进行过 transactional 操作,false表示未进行过
*/
public boolean isTransactional() {
lazyLoadExtendInfo();
return model.isTransactional;
}
/**
* 查看表所占磁盘的物理大小
*
* @return 物理大小
*/
public long getPhysicalSize() {
lazyLoadExtendInfo();
return model.physicalSize;
}
/**
* 返回表数据所占的盘古文件数
*
* @return 文件数
*/
public long getFileNum() {
lazyLoadExtendInfo();
return model.fileNum;
}
/**
* 返回外部表数据存储位置
*
* @return 外部表数据存储位置
*/
public String getLocation() {
if (model.location == null) {
lazyLoadExtendInfo();
}
return model.location;
}
/**
* 返回外部表数据处理句柄
*
* @return 外部表数据处理句柄
*/
public String getStorageHandler() {
if (model.storageHandler == null) {
lazyLoadExtendInfo();
}
return model.storageHandler;
}
/**
* 返回外部表使用的资源
*
* @return 外部表使用的资源
*/
public String getResources() {
if (model.resources == null) {
lazyLoadExtendInfo();
}
return model.resources;
}
/**
* 返回外部表序列化和反序列化属性
*
* @return 外部表序列化和反序列化属性
*/
public Map getSerDeProperties() {
if (model.serDeProperties == null) {
lazyLoadExtendInfo();
}
return model.serDeProperties;
}
/**
* 返回扩展信息的保留字段
* json 字符串
*
* @return 保留字段
*/
public String getReserved() {
if (model.reserved == null) {
lazyLoadExtendInfo();
}
return model.reserved;
}
/**
* 返回 cluster range 表的 cluster 信息
*
* @return cluster info
*/
public ClusterInfo getClusterInfo() {
if (model.clusterInfo == null) {
lazyLoadExtendInfo();
}
return model.clusterInfo;
}
/**
* 返回Shard
*
* @return shard 如果没有shard返回null
*/
public Shard getShard() {
if (model.shard == null) {
lazyLoad();
}
return model.shard;
}
/**
* @return 最后一次major compact的时间
*/
public Date getLastMajorCompactTime() {
if (model.lastMajorCompactTime == null) {
lazyLoadExtendInfo();
}
return model.lastMajorCompactTime;
}
/**
* 读取表内的数据
*
* @param limit 最多读取的记录行数
* @return {@link RecordReader}对象
* @throws OdpsException
*/
public RecordReader read(int limit) throws OdpsException {
return read(null, null, limit);
}
/**
* 读取表内的数据
* 读取数据时,最多返回 1w 条记录 (project read 默认值),若超过,数据将被截断。
*
* @param partition 表的分区{@link PartitionSpec}。如不指定分区可传入null。
* @param columns 所要读取的列名的列表。如果读取全表可传入null
* @param limit 最多读取的记录行数。
* @return {@link RecordReader}对象
* @throws OdpsException
*/
public RecordReader read(PartitionSpec partition, List columns, int limit)
throws OdpsException {
return read(partition, columns, limit, null);
}
/**
* 读取表内的数据
* 读取数据时,最多返回 1w 条记录 (project read 默认值),若超过,数据将被截断。
*
* @param partition 表的分区{@link PartitionSpec}。如不指定分区可传入null。
* @param columns 所要读取的列名的列表。如果读取全表可传入null
* @param limit 最多读取的记录行数。
* @param timezone 设置 datetime 类型数据的时区,新接口使用Java8无时区类型,指定Timezone无效。
* @return {@link RecordReader}对象
* @throws OdpsException
*/
public RecordReader read(PartitionSpec partition, List columns, int limit,
String timezone) throws OdpsException {
return read(partition, columns, limit, timezone, false);
}
/**
* 读取表内的数据
* 读取数据时,最多返回 1w 条记录 (project read 默认值),若超过,数据将被截断。
*
* @param partition 表的分区{@link PartitionSpec}。如不指定分区可传入null。
* @param columns 所要读取的列名的列表。如果读取全表可传入null
* @param limit 最多读取的记录行数。
* @param timezone 设置 datetime 类型数据的时区,新接口使用Java8无时区类型,故指定Timezone无效
* @param useLegacyMode 是否使用兼容旧Read接口模式,默认为false。老接口性能较差,不推荐。
* @see 《新老read接口区别》
* @return {@link ArrowStreamRecordReader}
* @throws OdpsException
*/
public RecordReader read(PartitionSpec partition, List columns, int limit,
String timezone, boolean useLegacyMode)
throws OdpsException {
return read(partition, columns, limit, timezone, useLegacyMode, null);
}
/**
* 读取表内的数据
* 读取数据时,最多返回 1w 条记录 (project read 默认值),若超过,数据将被截断。
*
* @param partition 表的分区{@link PartitionSpec}。如不指定分区可传入null。
* @param columns 所要读取的列名的列表。如果读取全表可传入null
* @param limit 最多读取的记录行数。
* @param timezone 设置 datetime 类型数据的时区,新接口使用Java8无时区类型,故指定Timezone无效
* @param useLegacyMode 是否使用兼容旧Read接口模式,默认为false。老接口性能较差,不推荐。
* @see 《新老read接口区别》
* @return {@link ArrowStreamRecordReader}
* @throws OdpsException
*/
public RecordReader read(PartitionSpec partition, List columns, int limit,
String timezone, boolean useLegacyMode, String tunnelEndpoint)
throws OdpsException {
if (limit <= 0) {
throw new OdpsException("ODPS-0420061: Invalid parameter in HTTP request - 'linenum' must be bigger than zero!");
}
TableSchema schema = getSchema();
// apply odps network settings to table tunnel
int readTimeout = odps.getRestClient().getReadTimeout();
int connectTimeout = odps.getRestClient().getConnectTimeout();
int socketRetryTimes = odps.getRestClient().getRetryTimes();
Configuration tunnelConfig = new Configuration(odps);
tunnelConfig.setSocketTimeout(readTimeout);
tunnelConfig.setSocketConnectTimeout(connectTimeout);
tunnelConfig.setSocketRetryTimes(socketRetryTimes);
TableTunnel tableTunnel = new TableTunnel(odps, tunnelConfig);
if (!StringUtils.isNullOrEmpty(tunnelEndpoint)) {
tableTunnel.setEndpoint(tunnelEndpoint);
}
String partitionName = null;
if (partition != null && !partition.keys().isEmpty()) {
partitionName = partition.toString().replace("'", "");
}
ArrowStreamReader arrowReader;
arrowReader = tableTunnel.preview(getProject(), getSchemaName(), model.name, partitionName,
(long) limit);
ArrowStreamRecordReader
recordReader = new ArrowStreamRecordReader(arrowReader, schema, columns);
if (!StringUtils.isNullOrEmpty(timezone)) {
try {
recordReader.setTimeZone(ZoneId.of(timezone));
} catch (Exception e) {
throw new OdpsException("invalid timezone name: " + timezone, e);
}
} else {
try {
String defaultTimezone = odps.projects().get(getProject()).getProperty("odps.sql.timezone");
recordReader.setTimeZone(ZoneId.of(defaultTimezone));
} catch (Exception ignored) {}
}
recordReader.setUseLegacyOutputFormat(useLegacyMode);
return recordReader;
}
private TableSchema loadSchemaFromJson(String json) {
TableSchema s = new TableSchema();
try {
JsonObject tree = new JsonParser().parse(json).getAsJsonObject();
if (tree.has("comment")) {
model.comment = tree.get("comment").getAsString();
}
if (tree.has("owner")) {
model.owner = tree.get("owner").getAsString();
}
if (tree.has("createTime")) {
model.createdTime = new Date(tree.get("createTime").getAsLong() * 1000);
}
if (tree.has("lastModifiedTime")) {
model.lastModifiedTime = new Date(tree.get("lastModifiedTime").getAsLong() * 1000);
}
if (tree.has("lastDDLTime")) {
model.lastMetaModifiedTime = new Date(tree.get("lastDDLTime").getAsLong() * 1000);
}
if (tree.has("lastAccessTime")) {
long timestamp = tree.get("lastAccessTime").getAsLong() * 1000;
model.lastAccessTime = timestamp == 0 ? null : new Date(timestamp);
}
if (tree.has("isVirtualView")) {
model.isVirtualView = tree.get("isVirtualView").getAsBoolean();
}
if (tree.has("isMaterializedView") && tree.get("isMaterializedView").getAsBoolean()) {
model.type = TableType.MATERIALIZED_VIEW;
}
if (tree.has("isMaterializedViewRewriteEnabled")) {
model.isMaterializedViewRewriteEnabled =
tree.get("isMaterializedViewRewriteEnabled").getAsBoolean();
}
if (tree.has("IsMaterializedViewOutdated")) {
model.isMaterializedViewOutdated = tree.get("IsMaterializedViewOutdated").getAsBoolean();
}
if (tree.has("isExternal")) {
model.isExternalTable = tree.get("isExternal").getAsBoolean();
}
if (tree.has("lifecycle")) {
model.life = tree.get("lifecycle").getAsLong();
}
if (tree.has("hubLifecycle")) {
model.hubLifecycle = tree.get("hubLifecycle").getAsLong();
}
if (tree.has("viewText")) {
model.viewText = tree.get("viewText").getAsString();
}
if (tree.has("viewExpandedText")) {
model.viewExpandedText = tree.get("viewExpandedText").getAsString();
}
if (tree.has("size")) {
model.size = tree.get("size").getAsLong();
}
if (tree.has("IsArchived")) {
model.isArchived = tree.get("IsArchived").getAsBoolean();
}
if (tree.has("PhysicalSize")) {
model.physicalSize = tree.get("PhysicalSize").getAsLong();
}
if (tree.has("FileNum")) {
model.fileNum = tree.get("FileNum").getAsLong();
}
if (tree.has("recordNum")) {
model.recordNum = tree.get("recordNum").getAsLong();
}
if (tree.has("storageHandler")) {
model.storageHandler = tree.get("storageHandler").getAsString();
}
if (tree.has("location")) {
model.location = tree.get("location").getAsString();
}
if (tree.has("resources")) {
model.resources = tree.get("resources").getAsString();
}
if (tree.has("serDeProperties")) {
model.serDeProperties = new GsonBuilder().disableHtmlEscaping().create()
.fromJson(tree.get("serDeProperties").getAsString(),
new TypeToken>() {
}.getType());
}
if (tree.has("shardExist")) {
boolean shardExist = tree.get("shardExist").getAsBoolean();
if (shardExist && tree.has("shardInfo")) {
model.shard = Shard.parseShard(tree.get("shardInfo").getAsJsonObject());
} else {
model.shard = null;
}
}
if (tree.has("tableLabel")) {
model.tableLabel = tree.get("tableLabel").getAsString();
// Service will return 0 if nothing set
if (model.tableLabel.equals("0")) {
model.tableLabel = "";
}
}
if (tree.has("columns") && tree.get("columns") != null) {
JsonArray columnsNode = tree.get("columns").getAsJsonArray();
for (int i = 0; i < columnsNode.size(); ++i) {
JsonObject n = columnsNode.get(i).getAsJsonObject();
s.addColumn(ColumnUtils.fromJson(n.toString()));
}
}
if (tree.has("extendedLabel")) {
JsonArray tableExtendedLabels = tree.get("extendedLabel").getAsJsonArray();
if (tableExtendedLabels.size() != 0) {
List labelList = new LinkedList();
for (JsonElement label : tableExtendedLabels) {
labelList.add(label.getAsString());
}
model.tableExtendedLabels = labelList;
}
}
if (tree.has("partitionKeys") && tree.get("partitionKeys") != null) {
JsonArray columnsNode = tree.get("partitionKeys").getAsJsonArray();
for (int i = 0; i < columnsNode.size(); ++i) {
JsonObject n = columnsNode.get(i).getAsJsonObject();
s.addPartitionColumn(ColumnUtils.fromJson(n.toString()));
}
}
if (tree.has("Reserved")) {
model.reserved = tree.get("Reserved").getAsString();
loadReservedJson(model.reserved);
}
if (tree.has("props") && tree.get("props") != null) {
JsonObject props = tree.get("props").getAsJsonObject();
model.mvProperties = new HashMap<>();
model.mvProperties.put("enable_auto_refresh",
props.has("enable_auto_refresh") ? props.get("enable_auto_refresh")
.getAsString() : "false");
if (props.has("refresh_interval_minutes")) {
model.mvProperties.put("refresh_interval_minutes",
props.get("refresh_interval_minutes").getAsString());
}
if (props.has("refresh_cron")) {
model.mvProperties.put("refresh_cron", props.get("refresh_cron").getAsString());
}
if (props.has("enable_auto_substitute")) {
model.mvProperties.put("enable_auto_substitute",
props.get("enable_auto_substitute").getAsString());
}
}
if (tree.has("RefreshHistory")) {
String refreshHistoryStr = tree.get("RefreshHistory").getAsString();
JsonArray refreshHistoryList = new JsonParser().parse(refreshHistoryStr).getAsJsonArray();
model.refreshHistory = new LinkedList<>();
for (int i = 0; i < refreshHistoryList.size(); i++) {
JsonObject info = refreshHistoryList.get(i).getAsJsonObject();
Map infoMap = new HashMap<>();
infoMap.put("InstanceId",
info.has("InstanceId") ? info.get("InstanceId").getAsString() : null);
infoMap.put("Status", info.has("Status") ? info.get("Status").getAsString() : null);
infoMap.put("StartTime",
info.has("StartTime") ? info.get("StartTime").getAsString() : null);
infoMap.put("EndTime", info.has("EndTime") ? info.get("EndTime").getAsString() : null);
model.refreshHistory.add(infoMap);
if (model.refreshHistory.size() >= 10) {
break;
}
}
}
} catch (Exception e) {
throw new RuntimeException(e.getMessage(), e);
}
return s;
}
private void loadReservedJson(String reserved) {
JsonObject reservedJson = new JsonParser().parse(reserved).getAsJsonObject();
// load cluster info
model.clusterInfo = parseClusterInfo(reservedJson);
model.isTransactional = parseTransactionalInfo(reservedJson);
model.hasRowAccessPolicy =
reservedJson.has("HasRowAccessPolicy") ? reservedJson.get("HasRowAccessPolicy")
.getAsBoolean() : false;
if (reservedJson.has("PrimaryKey")) {
model.primaryKey = new ArrayList<>();
JsonArray element = reservedJson.get("PrimaryKey").getAsJsonArray();
for (JsonElement e : element) {
model.primaryKey.add(e.getAsString());
}
}
model.acidDataRetainHours =
reservedJson.has("acid.data.retain.hours") ? Integer.parseInt(
reservedJson.get("acid.data.retain.hours").getAsString()) : -1;
model.cdcSize = reservedJson.has("cdc_size") ? Long.parseLong(
reservedJson.get("cdc_size").getAsString()) : -1;
model.cdcRecordNum = reservedJson.has("cdc_record_num") ? Long.parseLong(
reservedJson.get("cdc_record_num").getAsString()) : -1;
model.cdcLatestVersion = reservedJson.has("cdc_latest_version") ? Long.parseLong(
reservedJson.get("cdc_latest_version").getAsString()) : -1;
if (reservedJson.has("cdc_latest_timestamp")) {
long ts = Long.parseLong(reservedJson.get("cdc_latest_timestamp").getAsString()) * 1000;
model.cdcLatestTimestamp = new Date(ts);
}
// load storageTier info
model.storageTierInfo = StorageTierInfo.getStorageTierInfo(reservedJson);
// load table lifecycle configuration
model.tableLifecycleConfig = TableLifecycleConfig.parse(reservedJson);
model.lastMajorCompactTime =
reservedJson.has("LastMajorCompactionTime") ? Date.from(Instant.ofEpochMilli(Long.parseLong(
reservedJson.get("LastMajorCompactionTime").getAsString()) * 1000)) : null;
model.schemaVersion =
reservedJson.has("schema_version") ? reservedJson.get("schema_version").getAsString() : null;
}
private static boolean parseTransactionalInfo(JsonObject jsonObject) {
if (!jsonObject.has("Transactional")) {
return false;
}
return Boolean.parseBoolean(jsonObject.get("Transactional").getAsString());
}
public static ClusterInfo parseClusterInfo(JsonObject jsonObject) {
if (!jsonObject.has("ClusterType")) {
return null;
}
ClusterInfo clusterInfo = new ClusterInfo();
clusterInfo.clusterType =
jsonObject.has("ClusterType") ? ClusterInfo.ClusterType.valueOf(
jsonObject.get("ClusterType").getAsString().toUpperCase()) : null;
clusterInfo.bucketNum =
jsonObject.has("BucketNum") ? jsonObject.get("BucketNum").getAsLong() : 0L;
JsonArray
array =
jsonObject.has("ClusterCols") ? jsonObject.get("ClusterCols").getAsJsonArray() : null;
if (array != null) {
clusterInfo.clusterCols = new ArrayList();
for (int i = 0; i < array.size(); ++i) {
clusterInfo.clusterCols.add(array.get(i).getAsString());
}
}
if (jsonObject.has("SortCols")) {
array = jsonObject.get("SortCols").getAsJsonArray();
clusterInfo.sortCols = new ArrayList();
for (int i = 0; i < array.size(); ++i) {
JsonObject obj = array.get(i).getAsJsonObject();
if (obj != null) {
clusterInfo.sortCols.add(
new SortColumn(obj.get("col").getAsString(), obj.get("order").getAsString()));
}
}
}
return clusterInfo;
}
/**
* 增加分区
*
* @param spec 分区定义 {@link PartitionSpec}
* @throws OdpsException
*/
public void createPartition(PartitionSpec spec) throws OdpsException {
createPartition(spec, false);
}
/**
* 增加分区
*
* @param spec 分区定义 {@link PartitionSpec}
* @param ifNotExists 在创建分区时,如果为 false 而存在同名分区,则返回出错;若为 true,则无论是否存在同名分区,即使分区结构与要创建的目标分区结构不一致,均返回成功。已存在的同名分区的元信息不会被改动。
* @throws OdpsException
*/
public void createPartition(PartitionSpec spec, boolean ifNotExists) throws OdpsException {
StringBuilder sb = new StringBuilder();
sb.append("ALTER TABLE ").append(getCoordinate());
sb.append(" ADD");
if (ifNotExists) {
sb.append(" IF NOT EXISTS");
}
sb.append(" PARTITION (");
String[] keys = spec.keys().toArray(new String[0]);
for (int i = 0; i < keys.length; i++) {
sb.append(keys[i]).append("='").append(spec.get(keys[i])).append("'");
if (i + 1 < keys.length) {
sb.append(',');
}
}
sb.append(");");
// new SQLTask
String taskName = "SQLAddPartitionTask";
runSQL(taskName, sb.toString());
}
/**
* 删除指定分区
*
* @param spec 分区定义 {@link PartitionSpec}
* @throws OdpsException
*/
public void deletePartition(PartitionSpec spec) throws OdpsException {
deletePartition(spec, false);
}
/**
* 删除指定分区
*
* @param spec 分区定义 {@link PartitionSpec}
* @param ifExists 如果 false 而分区不存在,则返回异常;若为 true,无论分区是否存在,皆返回成功。
* @throws OdpsException
*/
public void deletePartition(PartitionSpec spec, boolean ifExists) throws OdpsException {
StringBuilder sb = new StringBuilder();
sb.append("ALTER TABLE ").append(getCoordinate());
sb.append(" DROP");
if (ifExists) {
sb.append(" IF EXISTS");
}
sb.append(" PARTITION(");
String[] keys = spec.keys().toArray(new String[0]);
for (int i = 0; i < keys.length; i++) {
sb.append(keys[i]).append("='").append(spec.get(keys[i])).append("'");
if (i + 1 < keys.length) {
sb.append(',');
}
}
sb.append(");");
// new SQLTask
String taskName = "SQLDropPartitionTask";
runSQL(taskName, sb.toString());
}
// for list partition response
@Root(name = "Partitions", strict = false)
private static class ListPartitionsResponse {
@ElementList(entry = "Partition", inline = true, required = false)
private List partitions = new LinkedList();
@Element(name = "Marker", required = false)
@Convert(SimpleXmlUtils.EmptyStringConverter.class)
private String marker;
@Element(name = "MaxItems", required = false)
private Integer maxItems;
}
@Root(strict = false)
private static class ListPartitionSpecsResponse {
@Element(name = "Marker", required = false)
@Convert(SimpleXmlUtils.EmptyStringConverter.class)
private String marker;
@Element(name = "MaxItems", required = false)
private Integer maxItems;
@ElementList(entry = "Partition", inline = true, required = false)
private List partitionSpecs = new LinkedList<>();
}
/**
* Get list of partition specs. The returned partition specs are ordered lexicographically.
*
* @return list of {@link PartitionSpec}
*/
public List getPartitionSpecs() throws OdpsException {
Map params = initParamsWithSchema();
params.put("partitions", null);
params.put("name", null);
String resource = ResourceBuilder.buildTableResource(model.projectName, model.name);
List partitionSpecs = new ArrayList<>();
ListPartitionSpecsResponse resp = client.request(ListPartitionSpecsResponse.class,
resource,
"GET",
params);
for (PartitionSpecModel partitionSpecModel : resp.partitionSpecs) {
partitionSpecs.add(new PartitionSpec(partitionSpecModel.partitionSpec, false));
}
return partitionSpecs;
}
/**
* 在Table上创建Shards
*
* @param shardCount 创建Shard的个数
*/
public void createShards(long shardCount) throws OdpsException {
StringBuilder sb = new StringBuilder();
// TODO: not sure this sql support schema
sb.append("ALTER TABLE ").append(getCoordinate());
sb.append(String.format(" INTO %d SHARDS;", shardCount));
String taskName = "SQLCreateShardsTask";
runSQL(taskName, sb.toString());
}
/**
* 获取分区迭代器
*
* @return {@link Partition} 分区迭代器
*/
public Iterator getPartitionIterator() {
return getPartitionIterator(null);
}
/**
* 获取分区迭代器
*
* @param spec 指定的上级分区 {@link PartitionSpec}
* @return {@link Partition}迭代器
*/
public Iterator getPartitionIterator(final PartitionSpec spec) {
return getPartitionIterator(spec, false, 1000L, Long.MAX_VALUE);
}
/**
* Get a partition iterator.
*
* @param spec Specify the values of some of the partition columns. The specified columns'
* indices should be continuous and start from 0.
* @param reverse Reverse the result. The original
* @param batchSize Max number of partitions to get per request. In case of null, the batch size
* will be decided by the server.
* @param limit Limit the number of returned partitions. In case of null, {@link Long#MAX_VALUE}
* will be used.
* @return A partition iterator.
*/
public Iterator getPartitionIterator(
PartitionSpec spec,
boolean reverse,
Long batchSize,
Long limit) {
if (limit != null && limit <= 0) {
throw new IllegalArgumentException("Argument 'limit' should be greater than 0");
}
if (batchSize != null && batchSize <= 0) {
throw new IllegalArgumentException("Argument 'batchSize' should be greater than 0");
}
final long finalLimit = limit == null ? Long.MAX_VALUE : limit;
return new ListIterator() {
long numPartitions = 0;
Map params = new HashMap<>();
@Override
public boolean hasNext() {
return super.hasNext() && numPartitions < finalLimit;
}
@Override
public Partition next() {
Partition partition = super.next();
numPartitions += 1;
return partition;
}
@Override
public String getMarker() {
return params.get("marker");
}
@Override
public List list(String marker, long maxItems) {
if (marker != null) {
params.put("marker", marker);
}
if (maxItems >= 0) {
params.put("maxitems", String.valueOf(maxItems));
}
return list();
}
@Override
protected List list() {
ArrayList partitions = new ArrayList<>();
params.put("partitions", null);
params.put("expectmarker", "true"); // since sprint-11
if (spec != null && !spec.isEmpty()) {
params.put("partition", spec.toString());
}
if (reverse) {
params.put("reverse", null);
}
if (params.get("maxitems") == null && batchSize != null) {
params.put("maxitems", batchSize.toString());
}
String lastMarker = params.get("marker");
if (params.containsKey("marker") && lastMarker.length() == 0) {
return null;
}
String resource = ResourceBuilder.buildTableResource(model.projectName, model.name);
try {
params.putAll(initParamsWithSchema());
ListPartitionsResponse
resp =
client.request(ListPartitionsResponse.class, resource, "GET", params);
for (PartitionModel partitionModel : resp.partitions) {
Partition t = new Partition(
partitionModel,
model.projectName,
model.schemaName,
model.name,
odps);
partitions.add(t);
}
params.put("marker", resp.marker);
} catch (OdpsException e) {
throw new RuntimeException(e.getMessage(), e);
}
return partitions;
}
};
}
/**
* 获取所有分区信息
*
* @return {@link Partition}列表
*/
public List getPartitions() {
ArrayList parts = new ArrayList<>();
Iterator it = getPartitionIterator();
while (it.hasNext()) {
parts.add(it.next());
}
return parts;
}
/**
* 获取指定分区信息
*
* @param spec 分区定义 {@link PartitionSpec}
* @return 分区信息 {@link Partition}
*/
public Partition getPartition(PartitionSpec spec) {
return new Partition(spec, model.projectName, model.schemaName, model.name, odps);
}
/**
* 判断指定分区是否存在
*
* @param spec 分区定义 {@link PartitionSpec}
* @return 如果指定分区存在,则返回true,否则返回false
* @throws OdpsException
*/
public boolean hasPartition(PartitionSpec spec) throws OdpsException {
try {
Partition part = getPartition(spec);
part.reload();
} catch (NoSuchObjectException e) {
return false;
}
return true;
}
/**
* 删除表数据
*
* @throws OdpsException
*/
public void truncate() throws OdpsException {
StringBuilder sb = new StringBuilder();
sb.append("TRUNCATE TABLE ").append(getCoordinate()).append(";");
String taskName = "SQLTruncateTask";
runSQL(taskName, sb.toString());
}
/**
* 判断是否 Partition 表
*
* @return 是否为 Partition 表
*/
public boolean isPartitioned() {
if (isVirtualView()) {
return false;
}
return !getSchema().getPartitionColumns().isEmpty();
}
private void runSQL(String query) throws OdpsException {
runSQL("AnonymousSQLTask", query);
}
private void runSQL(String taskName, String query) throws OdpsException {
Map hints = NameSpaceSchemaUtils.setSchemaFlagInHints(null, model.schemaName);
Instance i = SQLTask.run(odps, odps.getDefaultProject(), query, taskName, hints, null);
i.waitForSuccess();
}
private HashMap initParamsWithSchema() throws OdpsException {
return NameSpaceSchemaUtils.initParamsWithSchema(model.schemaName);
}
private String getCoordinate() {
return NameSpaceSchemaUtils.getFullName(model.projectName, model.schemaName, model.name);
}
private Map getMvProperties() {
lazyLoad();
if (model.mvProperties == null) {
model.mvProperties = new HashMap<>();
}
return model.mvProperties;
}
public boolean isAutoRefreshEnabled() {
return Boolean.parseBoolean(getMvProperties().getOrDefault("enable_auto_refresh", "false"));
}
public Boolean isAutoSubstituteEnabled() {
String
autoSubstituteEnabledStr =
getMvProperties().getOrDefault("enable_auto_substitute", null);
return autoSubstituteEnabledStr == null ? null : Boolean.valueOf(autoSubstituteEnabledStr);
}
public Integer getRefreshInterval() {
String refreshIntervalStr = getMvProperties().getOrDefault("refresh_interval_minutes", null);
return refreshIntervalStr == null ? null : Integer.valueOf(refreshIntervalStr);
}
public String getRefreshCron() {
return getMvProperties().getOrDefault("refresh_cron", null);
}
public List> getRefreshHistory() {
lazyLoadExtendInfo();
return model.refreshHistory;
}
public boolean hasRowAccessPolicy() {
lazyLoadExtendInfo();
return model.hasRowAccessPolicy;
}
public List getPrimaryKey() {
lazyLoadExtendInfo();
return model.primaryKey;
}
public int getAcidDataRetainHours() {
lazyLoadExtendInfo();
return model.acidDataRetainHours;
}
public long getCdcSize() {
lazyLoadExtendInfo();
return model.cdcSize;
}
public long getCdcRecordNum() {
lazyLoadExtendInfo();
return model.cdcRecordNum;
}
public long getCdcLatestVersion() {
lazyLoadExtendInfo();
return model.cdcLatestVersion;
}
public Date getCdcLatestTimestamp() {
lazyLoadExtendInfo();
return model.cdcLatestTimestamp;
}
public Stream newStream(String streamName) throws OdpsException {
if (!isTransactional()) {
throw new IllegalArgumentException("only transactional table can attach stream");
}
StreamIdentifier
identifier =
StreamIdentifier.of(model.projectName, streamName);
odps.streams().create(identifier,
TableIdentifier.of(model.projectName, model.schemaName, model.name));
return odps.streams().get(identifier);
}
// update table methods
/**
* Modify the life cycle of an existing partitioned table or non-partitioned table.
*/
public void setLifeCycle(int days) throws OdpsException {
String sql = String.format("ALTER TABLE %s SET LIFECYCLE %d;", getCoordinate(), days);
runSQL(sql);
}
/**
* Only the Project Owner or users with the Super_Administrator role can execute commands that modify the table Owner.
*/
public void changeOwner(String newOwner) throws OdpsException {
String target = "table";
if (isVirtualView() || isMaterializedView()) {
target = "view";
}
runSQL(String.format("ALTER %s %s CHANGE OWNER TO %s;", target, getCoordinate(), OdpsCommonUtils.quoteStr(newOwner)));
}
/**
* ChangeComment Modify the comment content of the table.
*/
public void changeComment(String newComment) throws OdpsException {
runSQL(String.format("ALTER TABLE %s SET COMMENT %s;", getCoordinate(), OdpsCommonUtils.quoteStr(newComment)));
}
/**
* Touch can modify the LastModifiedTime of the table, making LastModifiedTime change to the current time
*/
public void touch() throws OdpsException {
runSQL(String.format("ALTER TABLE %s TOUCH;", getCoordinate()));
}
/**
* ChangeClusterInfo Modify the cluster information of the table.
*/
public void changeClusterInfo(ClusterInfo clusterInfo) throws OdpsException {
runSQL(String.format("ALTER TABLE %s %s;", getCoordinate(), clusterInfo.toString()));
}
/**
* Rename the table.
*/
public void rename(String newName) throws Exception {
String target = "table";
if (isVirtualView()) {
target = "view";
}
runSQL(String.format("ALTER %s %s RENAME TO %s;", target, getCoordinate(), OdpsCommonUtils.quoteRef(newName)));
model.name = newName;
}
/**
* Add new columns to the table.
*/
public void addColumns(List columns, boolean ifNotExists) throws Exception {
runSQL(generateAddColumnsSQL(columns, ifNotExists));
}
private String generateAddColumnsSQL(List columns, boolean ifNotExists) {
StringBuilder sb = new StringBuilder();
sb.append("ALTER TABLE ").append(getCoordinate()).append(" ADD COLUMNS ");
if (ifNotExists) {
sb.append("IF NOT EXISTS ");
}
sb.append("(");
StringJoiner joiner = new StringJoiner(", ");
for (Column column : columns) {
StringBuilder columnDef = new StringBuilder();
columnDef.append(String.format("%s %s", OdpsCommonUtils.quoteRef(column.getName()), column.getTypeInfo().getTypeName()));
if (column.getComment() != null && !column.getComment().isEmpty()) {
columnDef.append(String.format(" COMMENT %s", OdpsCommonUtils.quoteStr(column.getComment())));
}
joiner.add(columnDef.toString());
}
sb.append(joiner).append(");");
return sb.toString();
}
/**
* Drop columns from the table.
*/
public void dropColumns(List columnNames) throws Exception {
runSQL(generateDropColumnsSQL(columnNames));
}
private String generateDropColumnsSQL(List columnNames) {
StringJoiner joiner = new StringJoiner(", ");
for (String columnName : columnNames) {
joiner.add(OdpsCommonUtils.quoteRef(columnName));
}
return String.format("ALTER TABLE %s DROP COLUMNS %s;", getCoordinate(), joiner);
}
/**
* Change the type of an existing column in the table.
*/
public void alterColumnType(String columnName, TypeInfo columnType) throws Exception {
runSQL(String.format("ALTER TABLE %s CHANGE COLUMN %s %s %s;", getCoordinate(), columnName, columnName, columnType.getTypeName()));
}
/**
* Change the name of an existing column in the table.
*/
public void changeColumnName(String oldColumnName, String newColumnName) throws Exception {
runSQL(String.format("ALTER TABLE %s CHANGE COLUMN %s RENAME TO %s;", getCoordinate(), oldColumnName, newColumnName));
}
}