
com.aliyun.odps.Table Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.aliyun.odps;
import java.io.ByteArrayInputStream;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;
import javax.xml.bind.annotation.XmlValue;
import javax.xml.bind.annotation.adapters.XmlJavaTypeAdapter;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.aliyun.odps.Partition.PartitionModel;
import com.aliyun.odps.commons.transport.Response;
import com.aliyun.odps.data.DefaultRecordReader;
import com.aliyun.odps.data.RecordReader;
import com.aliyun.odps.rest.JAXBUtils;
import com.aliyun.odps.rest.ResourceBuilder;
import com.aliyun.odps.rest.RestClient;
import com.aliyun.odps.task.SQLTask;
import com.aliyun.odps.type.TypeInfo;
import com.aliyun.odps.type.TypeInfoParser;
import com.aliyun.odps.utils.StringUtils;
/**
* Table表示ODPS中的表
*
* @author [email protected]
*/
public class Table extends LazyLoad {
@XmlRootElement(name = "Table")
static class TableModel {
@XmlRootElement(name = "Schema")
static class Schema {
@XmlValue
String content;
}
@XmlElement(name = "Name")
String name;
@XmlElement(name = "TableId")
String ID;
@XmlAttribute(name = "format")
private String format;
@XmlElement(name = "Schema")
private Schema schema;
@XmlElement(name = "Comment")
String comment;
@XmlElement(name = "Owner")
String owner;
@XmlElement(name = "Project")
String projectName;
@XmlElement(name = "TableLabel")
String tableLabel;
@XmlElement(name = "CryptoAlgo")
String cryptoAlgoName;
@XmlElement(name = "CreationTime")
@XmlJavaTypeAdapter(JAXBUtils.DateBinding.class)
Date createdTime;
@XmlElement(name = "LastModifiedTime")
@XmlJavaTypeAdapter(JAXBUtils.DateBinding.class)
Date lastModifiedTime;
Date lastMetaModifiedTime;
boolean isVirtualView;
boolean isExternalTable;
long life = -1L;
long hubLifecycle = -1L;
String viewText;
long size;
boolean isArchived;
long physicalSize;
long fileNum;
// reserved json string in extended info
String reserved;
Shard shard;
// for external table extended info
String storageHandler;
String location;
String resources;
Map serDeProperties;
// for clustered info
ClusterInfo clusterInfo;
}
public static class ClusterInfo {
long bucketNum = -1;
String clusterType;
List clusterCols;
List sortCols;
public String getClusterType() {
return clusterType;
}
public long getBucketNum() {
return bucketNum;
}
public List getClusterCols() {
return clusterCols;
}
public List getSortCols() {
return sortCols;
}
}
static class SortColumn {
private String name;
private String order;
SortColumn(String name, String order) {
this.name = name;
this.order = order;
}
public String getName() {
return name;
}
public String getOrder() {
return order;
}
public String toString() {
return String.format("%s %s", name, order);
}
}
private TableModel model;
private TableSchema tableSchema;
private RestClient client;
private boolean isExtendInfoLoaded;
private boolean isShardInfoLoaded;
private Odps odps;
Table(TableModel model, String project, Odps odps) {
this.model = model;
this.model.projectName = project;
this.odps = odps;
this.client = odps.getRestClient();
this.isExtendInfoLoaded = false;
this.isShardInfoLoaded = false;
}
@Override
public void reload() throws OdpsException {
String resource = ResourceBuilder.buildTableResource(model.projectName, model.name);
reload(client.request(TableModel.class, resource, "GET"));
}
public void reload(TableModel model) throws OdpsException {
this.model = model;
if (model.schema != null) {
tableSchema = loadSchemaFromJson(model.schema.content);
}
setLoaded(true);
}
/**
* 获取表名
*
* @return 表名称
*/
public String getName() {
return model.name;
}
/**
* 获取注释
*
* @return 表的相关注释信息
*/
public String getComment() {
if (model.comment == null) {
lazyLoad();
}
return model.comment;
}
/**
* 获取表所属用户
*
* @return 所属用户
*/
public String getOwner() {
if (model.owner == null) {
lazyLoad();
}
return model.owner;
}
/**
* 获取创建时间
*
* @return 创建时间
*/
public Date getCreatedTime() {
if (model.createdTime == null) {
lazyLoad();
}
return model.createdTime;
}
public String getTableLabel() {
if (model.tableLabel == null) {
lazyLoad();
}
return model.tableLabel;
}
/**
* 获取表 ID
*
* @return tableId
*/
public String getTableID() {
if(model.ID == null) {
lazyLoad();
}
return model.ID;
}
/**
* 获取表加密算法名称
*
* @return 算法名称
*/
public String getCryptoAlgoName() {
if(model.cryptoAlgoName == null) {
lazyLoad();
}
return model.cryptoAlgoName;
}
/**
* 获取最高的label级别
* Label的定义分两部分:
* 1. 业务分类:C,S,B
* 2. 数据等级:1,2,3,4
*
* 二者是正交关系,即C1,C2,C3,C4,S1,S2,S3,S4,B1,B2,B3,B4。
*
* MaxLabel的语意:
* 1. MaxLabel=max(TableLabel, ColumnLabel), max(...)函数的语意由Label中的数据等级决定:4>3>2>1
* 2. MaxLabel显示:
* 当最高等级Label只出现一次时,MaxLabel=业务分类+数据等级,例如:B4, C3,S2
* 当最高等级Labe出现多次,但业务分类也唯一,MaxLabel=业务分类+数据等级,例如:B4, C3,S2
* 当最高等级Labe出现多次,且业务不唯一,MaxLabel=L+数据等级,例如:L4, L3
*
* @return 表示最高label,如果没有任何label的设置,返回空字符串
*/
public String getMaxLabel() {
List labels = new ArrayList();
labels.add(getTableLabel());
for (Column column : tableSchema.getColumns()) {
labels.add(column.getCategoryLabel());
}
return calculateMaxLabel(labels);
}
static String calculateMaxLabel(List labels) {
int maxLevel = 0;
char category = '-';
for (String label : labels) {
if (!StringUtils.isNullOrEmpty(label)) {
char num = label.charAt(label.length() - 1);
if (Character.isDigit(num) && num - '0' >= maxLevel) {
if (num - '0' > maxLevel) {
maxLevel = num - '0';
category = '-';
}
// label is only one num
if (label.length() == 1) {
category = 'L';
continue;
}
// handle one or more letter before the level number
for (int i = label.length() - 2; i >= 0; i--) {
char c = label.charAt(i);
if (Character.isLetter(c)) {
c = Character.toUpperCase(c);
if (category == '-') {
category = c;
} else if (category != c) {
category = 'L';
}
}
}
}
}
}
if (category == '-' && maxLevel == 0) {
return "";
}
if (category == '-') {
category = 'L';
}
return category + "" + maxLevel;
}
/**
* 获取最后修改时间
*
* @return 最后修改时间
*/
public Date getLastMetaModifiedTime() {
if (model.lastMetaModifiedTime == null) {
lazyLoad();
}
return model.lastMetaModifiedTime;
}
/**
* 获取表所属{@link Project}名称
*
* @return Project名称
*/
public String getProject() {
return model.projectName;
}
/**
* 判断表是否为虚拟视图
*
* @return 如果是虚拟视图返回true, 否则返回false
*/
public boolean isVirtualView() {
lazyLoad();
return model.isVirtualView;
}
/**
* 判断表是否为外部表
*
* @return 如果是外部表返回true, 否则返回false
*/
public boolean isExternalTable() {
lazyLoad();
return model.isExternalTable;
}
/**
* 获取视图的文本内容
*
* @return 文本内容
*/
public String getViewText() {
if (model.viewText == null) {
lazyLoad();
}
return model.viewText;
}
/**
* 获取数据最后修改时间
*
* @return 最后修改时间
*/
public Date getLastDataModifiedTime() {
if (model.lastModifiedTime == null) {
lazyLoad();
}
return model.lastModifiedTime;
}
/**
* 获取内部存储大小,单位:Byte
*
* @return 存储大小
*/
public long getSize() {
lazyLoad();
return model.size;
}
/**
* 获取表的生命周期值,单位:天
*
* @return 生命周期值
*/
public long getLife() {
lazyLoad();
return model.life;
}
/**
* 获取表的datahub生命周期值,单位:天
*
* @return datahub生命周期值
*/
public long getHubLifecycle() {
lazyLoad();
return model.hubLifecycle;
}
/**
* 获取表结构定义
*
* @return 表示表结构的{@link TableSchema}对象
*/
public TableSchema getSchema() {
if (tableSchema == null) {
lazyLoad();
}
return tableSchema;
}
public String getJsonSchema() {
if (model.schema == null || model.schema.content == null) {
lazyLoad();
}
return model.schema.content;
}
/**
* 查看表是否进行过归档操作
*
* @return 返回true表示进行过archive操作,false表示未进行过
*/
public boolean isArchived() {
lazyLoadExtendInfo();
return model.isArchived;
}
/**
* 查看表所占磁盘的物理大小
*
* @return 物理大小
*/
public long getPhysicalSize() {
lazyLoadExtendInfo();
return model.physicalSize;
}
/**
* 返回表数据所占的盘古文件数
*
* @return 文件数
*/
public long getFileNum() {
lazyLoadExtendInfo();
return model.fileNum;
}
/**
* 返回外部表数据存储位置
*
* @return 外部表数据存储位置
*/
public String getLocation() {
if (model.location == null) {
lazyLoadExtendInfo();
}
return model.location;
}
/**
* 返回外部表数据处理句柄
*
* @return 外部表数据处理句柄
*/
public String getStorageHandler() {
if (model.storageHandler == null) {
lazyLoadExtendInfo();
}
return model.storageHandler;
}
/**
* 返回外部表使用的资源
*
* @return 外部表使用的资源
*/
public String getResources() {
if (model.resources == null) {
lazyLoadExtendInfo();
}
return model.resources;
}
/**
* 返回外部表序列化和反序列化属性
*
* @return 外部表序列化和反序列化属性
*/
public Map getSerDeProperties() {
if (model.serDeProperties == null) {
lazyLoadExtendInfo();
}
return model.serDeProperties;
}
/**
* 返回扩展信息的保留字段
* json 字符串
*
* @return 保留字段
*/
public String getReserved() {
if (model.reserved == null) {
lazyLoadExtendInfo();
}
return model.reserved;
}
/**
* 返回 cluster range 表的 cluster 信息
*
* @return cluster info
*/
public ClusterInfo getClusterInfo() {
if (model.clusterInfo == null) {
lazyLoadExtendInfo();
}
return model.clusterInfo;
}
/**
* 返回Shard
*
* @return shard 如果没有shard返回null
*/
public Shard getShard() {
if (model.shard == null) {
lazyLoad();
}
return model.shard;
}
/**
* 读取表内的数据
*
* @param limit
* 最多读取的记录行数
* @return {@link RecordReader}对象
* @throws OdpsException
*/
public RecordReader read(int limit) throws OdpsException {
return read(null, null, limit);
}
/**
* 读取表内的数据
* 读取数据时,最多返回 1W 条记录,若超过,数据将被截断。
* 另外,读取的数据大小不能超过 10MB,否则将抛出异常。
*
* @param partition
* 表的分区{@link PartitionSpec}。如不指定分区可传入null。
* @param columns
* 所要读取的列名的列表。如果读取全表可传入null
* @param limit
* 最多读取的记录行数。
* @return {@link RecordReader}对象
* @throws OdpsException
*/
public RecordReader read(PartitionSpec partition, List columns, int limit)
throws OdpsException {
if (limit < 0) {
throw new OdpsException("limit number should >= 0.");
}
Map params = new HashMap();
params.put("data", null);
if (partition != null && partition.keys().size() > 0) {
params.put("partition", partition.toString());
}
if (columns != null && columns.size() != 0) {
String column = "";
for (String temp : columns) {
column += temp;
column += ",";
}
column = column.substring(0, column.lastIndexOf(","));
params.put("cols", column);
}
if (limit != -1) {
params.put("linenum", String.valueOf(limit));
}
String resource = ResourceBuilder.buildTableResource(model.projectName, getName());
Response resp = client.request(resource, "GET", params, null, null);
return new DefaultRecordReader(new ByteArrayInputStream(resp.getBody()), getSchema());
}
private TableSchema loadSchemaFromJson(String json) {
TableSchema s = new TableSchema();
try {
JSONObject tree = JSON.parseObject(json);
String node = tree.getString("comment");
if (node != null) {
model.comment = node;
}
node = tree.getString("owner");
if (node != null) {
model.owner = node;
}
Long node2 = tree.getLong("createTime");
if (node2 != null) {
model.createdTime = new Date(node2 * 1000);
}
node2 = tree.getLong("lastModifiedTime");
if (node2 != null) {
model.lastModifiedTime = new Date(node2 * 1000);
}
node2 = tree.getLong("lastDDLTime");
if (node2 != null) {
model.lastMetaModifiedTime = new Date(node2 * 1000);
}
Boolean node3 = tree.getBoolean("isVirtualView");
if (node3 != null) {
model.isVirtualView = node3;
}
node3 = tree.getBoolean("isExternal");
if (node3 != null) {
model.isExternalTable = node3;
}
node2 = tree.getLong("lifecycle");
if (node2 != null) {
model.life = node2;
}
node2 = tree.getLong("hubLifecycle");
if (node2 != null) {
model.hubLifecycle = node2;
}
node = tree.getString("viewText");
if (node != null) {
model.viewText = node;
}
node2 = tree.getLong("size");
if (node2 != null) {
model.size = node2;
}
node3 = tree.getBoolean("IsArchived");
if (node3 != null) {
model.isArchived = node3;
}
node2 = tree.getLong("PhysicalSize");
if (node2 != null) {
model.physicalSize = node2;
}
node2 = tree.getLong("FileNum");
if (node2 != null) {
model.fileNum = node2;
}
node = tree.getString("storageHandler");
if (node != null) {
model.storageHandler = node;
}
node = tree.getString("location");
if (node != null) {
model.location = node;
}
node = tree.getString("resources");
if (node != null) {
model.resources = node;
}
node = tree.getString("serDeProperties");
if (node != null) {
model.serDeProperties = (Map)JSON.parse(node);
}
node3 = tree.getBoolean("shardExist");
if (node3 != null) {
boolean shardExist = node3;
JSONObject node4 = tree.getJSONObject("shardInfo");
if (shardExist && node4 != null) {
model.shard = Shard.parseShard(node4);
} else {
model.shard = null;
}
}
node = tree.getString("tableLabel");
if (node != null) {
model.tableLabel = node;
// Service will return 0 if nothing set
if (model.tableLabel.equals("0")) {
model.tableLabel = "";
}
}
JSONArray columnsNode = tree.getJSONArray("columns");
if (columnsNode != null) {
for (int i = 0; i < columnsNode.size(); ++i) {
JSONObject n = columnsNode.getJSONObject(i);
s.addColumn(parseColumn(n));
}
}
columnsNode = tree.getJSONArray("partitionKeys");
if (columnsNode != null) {
for (int i = 0; i < columnsNode.size(); ++i) {
JSONObject n = columnsNode.getJSONObject(i);
s.addPartitionColumn(parseColumn(n));
}
}
node = tree.getString("Reserved");
if (node != null) {
model.reserved = node;
loadReservedJson(node);
}
} catch (Exception e) {
throw new RuntimeException(e.getMessage(), e);
}
return s;
}
private void loadReservedJson(String reserved) {
JSONObject reservedJson = JSON.parseObject(reserved);
// load cluster info
model.clusterInfo = parseClusterInfo(reservedJson);
}
public static ClusterInfo parseClusterInfo(JSONObject jsonObject) {
if (jsonObject.getString("ClusterType") == null) {
return null;
}
ClusterInfo clusterInfo = new ClusterInfo();
clusterInfo.clusterType = jsonObject.getString("ClusterType");
clusterInfo.bucketNum = jsonObject.getLong("BucketNum");
JSONArray array = jsonObject.getJSONArray("ClusterCols");
if (array != null) {
clusterInfo.clusterCols = new ArrayList();
for (int i = 0; i < array.size(); ++i) {
clusterInfo.clusterCols.add(array.getString(i));
}
}
array = jsonObject.getJSONArray("SortCols");
if (array != null) {
clusterInfo.sortCols = new ArrayList();
for (int i = 0; i < array.size(); ++i) {
JSONObject obj = array.getJSONObject(i);
if (obj != null) {
clusterInfo.sortCols.add(new SortColumn(obj.getString("col"), obj.getString("order")));
}
}
}
return clusterInfo;
}
/**
* 增加分区
*
* @param spec
* 分区定义 {@link PartitionSpec}
* @throws OdpsException
*/
public void createPartition(PartitionSpec spec) throws OdpsException {
createPartition(spec, false);
}
/**
* 增加分区
*
* @param spec
* 分区定义 {@link PartitionSpec}
* @param ifNotExists
* 在创建分区时,如果为 false 而存在同名分区,则返回出错;若为 true,则无论是否存在同名分区,即使分区结构与要创建的目标分区结构不一致,均返回成功。已存在的同名分区的元信息不会被改动。
* @throws OdpsException
*/
public void createPartition(PartitionSpec spec, boolean ifNotExists) throws OdpsException {
StringBuilder sb = new StringBuilder();
sb.append("ALTER TABLE ").append(getProject()).append(".").append(getName());
sb.append(" ADD");
if (ifNotExists) {
sb.append(" IF NOT EXISTS");
}
sb.append(" PARTITION (");
String[] keys = spec.keys().toArray(new String[0]);
for (int i = 0; i < keys.length; i++) {
sb.append(keys[i]).append("='").append(spec.get(keys[i])).append("'");
if (i + 1 < keys.length) {
sb.append(',');
}
}
sb.append(");");
// new SQLTask
String taskName = "SQLAddPartitionTask";
runSQL(taskName, sb.toString());
}
/**
* 删除指定分区
*
* @param spec
* 分区定义 {@link PartitionSpec}
* @throws OdpsException
*/
public void deletePartition(PartitionSpec spec) throws OdpsException {
deletePartition(spec, false);
}
/**
* 删除指定分区
*
* @param spec
* 分区定义 {@link PartitionSpec}
* @param ifExists
* 如果 false 而分区不存在,则返回异常;若为 true,无论分区是否存在,皆返回成功。
* @throws OdpsException
*/
public void deletePartition(PartitionSpec spec, boolean ifExists) throws OdpsException {
StringBuilder sb = new StringBuilder();
sb.append("ALTER TABLE ").append(getProject()).append(".").append(getName());
sb.append(" DROP");
if (ifExists) {
sb.append(" IF EXISTS");
}
sb.append(" PARTITION(");
String[] keys = spec.keys().toArray(new String[0]);
for (int i = 0; i < keys.length; i++) {
sb.append(keys[i]).append("='").append(spec.get(keys[i])).append("'");
if (i + 1 < keys.length) {
sb.append(',');
}
}
sb.append(");");
// new SQLTask
String taskName = "SQLDropPartitionTask";
runSQL(taskName, sb.toString());
}
// for list partition response
@XmlRootElement(name = "Partitions")
private static class ListPartitionsResponse {
@XmlElement(name = "Partition")
private List partitions = new LinkedList();
@XmlElement(name = "Marker")
private String marker;
@XmlElement(name = "MaxItems")
private Integer maxItems;
}
/**
* 在Table上创建Shards
*
* @param shardCount
* 创建Shard的个数
*/
public void createShards(long shardCount) throws OdpsException {
StringBuilder sb = new StringBuilder();
sb.append("ALTER TABLE ").append(getProject()).append(".").append(getName());
sb.append(String.format(" INTO %d SHARDS;", shardCount));
String taskName = "SQLCreateShardsTask";
runSQL(taskName, sb.toString());
}
/**
* 获取分区迭代器
*
* @return {@link Partition} 分区迭代器
*/
public Iterator getPartitionIterator() {
return getPartitionIterator(null);
}
/**
* 获取分区迭代器
*
* @param spec
* 指定的上级分区 {@link PartitionSpec}
* @return {@link Partition}迭代器
*/
public Iterator getPartitionIterator(final PartitionSpec spec) {
return new ListIterator() {
Map params = new HashMap();
@Override
protected List list() {
ArrayList partitions = new ArrayList();
params.put("partitions", null);
params.put("expectmarker", "true"); // since sprint-11
if (spec != null && !spec.isEmpty()) {
params.put("partition", spec.toString());
}
String lastMarker = params.get("marker");
if (params.containsKey("marker") && lastMarker.length() == 0) {
return null;
}
String resource = ResourceBuilder.buildTableResource(model.projectName, getName());
try {
ListPartitionsResponse
resp =
client.request(ListPartitionsResponse.class, resource, "GET", params);
for (PartitionModel partitionModel : resp.partitions) {
Partition t = new Partition(partitionModel, model.projectName, getName(), client);
partitions.add(t);
}
params.put("marker", resp.marker);
} catch (OdpsException e) {
throw new RuntimeException(e.getMessage(), e);
}
return partitions;
}
};
}
/**
* 获取所有分区信息
*
* @return {@link Partition}列表
*/
public List getPartitions() {
ArrayList parts = new ArrayList();
Iterator it = getPartitionIterator();
while (it.hasNext()) {
parts.add(it.next());
}
return parts;
}
/**
* 获取指定分区信息
*
* @param spec
* 分区定义 {@link PartitionSpec}
* @return 分区信息 {@link Partition}
*/
public Partition getPartition(PartitionSpec spec) {
return new Partition(spec, model.projectName, getName(), client);
}
/**
* 判断指定分区是否存在
*
* @param spec
* 分区定义 {@link PartitionSpec}
* @return 如果指定分区存在,则返回true,否则返回false
* @throws OdpsException
*/
public boolean hasPartition(PartitionSpec spec) throws OdpsException {
try {
Partition part = getPartition(spec);
part.reload();
} catch (NoSuchObjectException e) {
return false;
}
return true;
}
/**
* 删除表数据
*
* @throws OdpsException
*/
public void truncate() throws OdpsException {
StringBuilder sb = new StringBuilder();
sb.append("TRUNCATE TABLE ").append(getProject()).append(".").append(getName()).append(";");
String taskName = "SQLTruncateTask";
runSQL(taskName, sb.toString());
}
/**
* 判断是否 Partition 表
*
* @return 是否为 Partition 表
* @throws OdpsException
*/
public boolean isPartitioned() throws OdpsException {
if (isVirtualView()) {
return false;
}
return getSchema().getPartitionColumns().size() > 0;
}
private void runSQL(String taskName, String query) throws OdpsException {
SQLTask task = new SQLTask();
task.setName(taskName);
task.setQuery(query);
Instances instances = odps.instances();
Instance instance = instances.create(task);
instance.waitForSuccess();
}
/* private */
private Column parseColumn(JSONObject node) {
String name = node.getString("name");
String typeString = node.getString("type").toUpperCase();
TypeInfo typeInfo = TypeInfoParser.getTypeInfoFromTypeString(typeString);
String comment = node.getString("comment");
String label = null;
if (node.containsKey("label") && (!node.getString("label").isEmpty())) {
label = node.getString("label");
}
return new Column(name, typeInfo, comment, label);
}
private void lazyLoadExtendInfo() {
if (!this.isExtendInfoLoaded) {
Map params = new LinkedHashMap();
params.put("extended", null);
String resource = ResourceBuilder.buildTableResource(model.projectName, model.name);
TableModel response;
try {
response = client.request(TableModel.class, resource, "GET", params);
} catch (OdpsException e) {
throw new ReloadException(e.getMessage(), e);
}
loadSchemaFromJson(response.schema.content);
this.isExtendInfoLoaded = true;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy