com.qcloud.dlc.common.PartConverters Maven / Gradle / Ivy
package com.qcloud.dlc.common;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.convert.Convert;
import cn.hutool.core.date.DatePattern;
import cn.hutool.core.date.DateUtil;
import cn.hutool.core.map.MapUtil;
import cn.hutool.core.util.ArrayUtil;
import cn.hutool.core.util.StrUtil;
import com.tencentcloudapi.dlc.v20210125.models.DMSColumn;
import com.tencentcloudapi.dlc.v20210125.models.DMSColumnOrder;
import com.tencentcloudapi.dlc.v20210125.models.DMSColumnStatistic;
import com.tencentcloudapi.dlc.v20210125.models.DMSPartition;
import com.tencentcloudapi.dlc.v20210125.models.DMSPartitionColumnStatisticInfo;
import com.tencentcloudapi.dlc.v20210125.models.DMSSds;
import com.tencentcloudapi.dlc.v20210125.models.KVPair;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.DateColumnStatsData;
import org.apache.hadoop.hive.metastore.api.Decimal;
import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
public class PartConverters {
public static KVPair[] convertMapToKVPair(Map map) {
KVPair[] pairs = null;
if (MapUtil.isNotEmpty(map)) {
List list = new LinkedList<>();
for (Entry entry : map.entrySet()) {
KVPair pair = new KVPair();
pair.setKey(entry.getKey());
pair.setValue(entry.getValue());
list.add(pair);
}
pairs = list.toArray(new KVPair[0]);
}
return pairs;
}
public static Map convertKVPairToMap(KVPair[] kvPairs) {
Map map = new HashMap<>();
if (ArrayUtil.isNotEmpty(kvPairs)) {
for (KVPair kvPair : kvPairs) {
map.put(kvPair.getKey(), kvPair.getValue());
}
}
return map;
}
public static DMSPartition convertPartitionToDMSPartition(Partition partition) {
DMSPartition dmsPartition = null;
if (null != partition) {
dmsPartition = new DMSPartition();
dmsPartition.setTableName(partition.getTableName());
dmsPartition.setDatabaseName(partition.getDbName());
Map parameters = partition.getParameters();
KVPair[] pairs = convertMapToKVPair(parameters);
if (ArrayUtil.isNotEmpty(pairs)) {
dmsPartition.setParams(pairs);
}
if (partition.getCreateTime() > 0) {
String format = DateUtil.format(new Date(partition.getCreateTime() * 1000),
DatePattern.UTC_WITH_ZONE_OFFSET_PATTERN);
String pre = StrUtil.subWithLength(format, 0, format.length() - 2);
String end = StrUtil.subWithLength(format, format.length() - 2, 2);
String date = new StringBuffer(pre).append(":").append(end).toString();
dmsPartition.setCreateTime(date);
}
if (partition.getLastAccessTime() > 0) {
String format = DateUtil.format(new Date(partition.getCreateTime() * 1000),
DatePattern.UTC_WITH_ZONE_OFFSET_PATTERN);
String pre = StrUtil.subWithLength(format, 0, format.length() - 2);
String end = StrUtil.subWithLength(format, format.length() - 2, 2);
String date = new StringBuffer(pre).append(":").append(end).toString();
dmsPartition.setLastAccessTime(date);
}
if (CollUtil.isNotEmpty(partition.getValues())) {
dmsPartition.setValues(partition.getValues().toArray(new String[0]));
}
StorageDescriptor sd = partition.getSd();
if (null != sd) {
DMSSds dmsSds = new DMSSds();
dmsSds.setLocation(sd.getLocation());
dmsSds.setCompressed(sd.isCompressed());
dmsSds.setInputFormat(sd.getInputFormat());
dmsSds.setOutputFormat(sd.getOutputFormat());
dmsSds.setNumBuckets(Convert.toLong(sd.getNumBuckets()));
dmsSds.setStoredAsSubDirectories(sd.isStoredAsSubDirectories());
if (CollUtil.isNotEmpty(sd.getBucketCols())) {
dmsSds.setBucketCols(sd.getBucketCols().toArray(new String[0]));
}
SerDeInfo serdeInfo = sd.getSerdeInfo();
if (null != serdeInfo) {
dmsSds.setSerdeLib(serdeInfo.getSerializationLib());
dmsSds.setSerdeName(serdeInfo.getName());
KVPair[] serPairs = convertMapToKVPair(serdeInfo.getParameters());
if (ArrayUtil.isNotEmpty(serPairs)) {
dmsSds.setSerdeParams(serPairs);
}
}
KVPair[] sdPairs = convertMapToKVPair(sd.getParameters());
if (ArrayUtil.isNotEmpty(sdPairs)) {
dmsSds.setParams(sdPairs);
}
List cols = sd.getCols();
if (CollUtil.isNotEmpty(cols)) {
List dmsColumnList = new LinkedList<>();
for (FieldSchema col : cols) {
DMSColumn dmsColumn = new DMSColumn();
dmsColumn.setName(col.getName());
dmsColumn.setType(col.getType());
dmsColumnList.add(dmsColumn);
}
dmsSds.setCols(dmsColumnList.toArray(new DMSColumn[0]));
}
List sortCols = sd.getSortCols();
if (CollUtil.isNotEmpty(cols)) {
List dmsColumnOrderList = new LinkedList<>();
for (Order sortCol : sortCols) {
DMSColumnOrder dmsColumnOrder = new DMSColumnOrder();
dmsColumnOrder.setCol(sortCol.getCol());
dmsColumnOrder.setOrder(Convert.toLong(sortCol.getOrder()));
dmsColumnOrderList.add(dmsColumnOrder);
}
dmsSds.setSortColumns(dmsColumnOrderList.toArray(new DMSColumnOrder[0]));
}
dmsPartition.setSds(dmsSds);
}
}
return dmsPartition;
}
public static Partition convertDMSPartitionToPartition(DMSPartition dmsPartition) {
Partition partition = null;
if (null != dmsPartition) {
partition = new Partition();
partition.setTableName(dmsPartition.getTableName());
partition.setDbName(dmsPartition.getDatabaseName());
KVPair[] pairs = dmsPartition.getParams();
Map params = convertKVPairToMap(pairs);
if (ArrayUtil.isNotEmpty(pairs)) {
partition.setParameters(params);
}
String createTime = dmsPartition.getCreateTime();
if (StrUtil.isNotBlank(createTime)) {
String pre = StrUtil.subWithLength(createTime, 0, createTime.length() - 3);
String end = StrUtil.subWithLength(createTime, createTime.length() - 2, 2);
String date = new StringBuffer(pre).append(end).toString();
partition.setCreateTime(
DateUtil.parse(date, DatePattern.UTC_WITH_ZONE_OFFSET_PATTERN)
.millisecond() / 1000);
}
String lastAccessTime = dmsPartition.getLastAccessTime();
if (StrUtil.isNotBlank(lastAccessTime)) {
String pre = StrUtil.subWithLength(lastAccessTime, 0, lastAccessTime.length() - 3);
String end = StrUtil.subWithLength(lastAccessTime, lastAccessTime.length() - 2, 2);
String date = new StringBuffer(pre).append(end).toString();
partition.setLastAccessTime(
DateUtil.parse(date, DatePattern.UTC_WITH_ZONE_OFFSET_PATTERN)
.millisecond() / 1000);
}
if (ArrayUtil.isNotEmpty(dmsPartition.getValues())) {
partition.setValues(Arrays.asList(dmsPartition.getValues()));
}
DMSSds sds = dmsPartition.getSds();
if (null != sds) {
StorageDescriptor storageDescriptor = new StorageDescriptor();
storageDescriptor.setLocation(sds.getLocation());
storageDescriptor.setCompressed(sds.getCompressed());
storageDescriptor.setInputFormat(sds.getInputFormat());
storageDescriptor.setOutputFormat(sds.getOutputFormat());
storageDescriptor.setNumBuckets(Convert.toInt(sds.getNumBuckets()));
storageDescriptor.setStoredAsSubDirectories(sds.getStoredAsSubDirectories());
String[] bucketCols = sds.getBucketCols();
if (ArrayUtil.isNotEmpty(bucketCols)) {
storageDescriptor.setBucketCols(Arrays.asList(bucketCols));
}
SerDeInfo serdeInfo = new SerDeInfo();
serdeInfo.setSerializationLib(sds.getSerdeLib());
serdeInfo.setName(sds.getSerdeName());
serdeInfo.setParameters(convertKVPairToMap(sds.getSerdeParams()));
storageDescriptor.setParameters(convertKVPairToMap(sds.getParams()));
DMSColumn[] sdsCols = sds.getCols();
if (ArrayUtil.isNotEmpty(sdsCols)) {
List cols = new LinkedList<>();
for (DMSColumn dmsColumn : sdsCols) {
FieldSchema fieldSchema = new FieldSchema();
fieldSchema.setName(dmsColumn.getName());
fieldSchema.setType(dmsColumn.getType());
cols.add(fieldSchema);
}
storageDescriptor.setCols(cols);
}
DMSColumnOrder[] sortColumns = sds.getSortColumns();
if (ArrayUtil.isNotEmpty(sortColumns)) {
List sortColList = new LinkedList<>();
for (DMSColumnOrder sortColumn : sortColumns) {
Order order = new Order();
order.setCol(sortColumn.getCol());
order.setOrder(Convert.toInt(sortColumn.getOrder()));
sortColList.add(order);
}
storageDescriptor.setSortCols(sortColList);
}
partition.setSd(storageDescriptor);
}
}
return partition;
}
public static DMSColumnStatistic toColStatsRequest(ColumnStatisticsObj obj) {
DMSColumnStatistic request = new DMSColumnStatistic();
request.setColumnName(obj.getColName());
request.setColumnType(obj.getColType());
if (obj.getStatsData().isSetBooleanStats()) {
BooleanColumnStatsData boolStats = obj.getStatsData().getBooleanStats();
request.setNumTrues(boolStats.getNumTrues());
request.setNumFalse(boolStats.getNumFalses());
request.setNumNulls(boolStats.getNumNulls());
} else if (obj.getStatsData().isSetLongStats()) {
LongColumnStatsData longStats = obj.getStatsData().getLongStats();
request.setNumNulls(longStats.getNumNulls());
request.setNumDistinct(longStats.getNumDVs());
request.setLongLowValue(longStats.getLowValue());
request.setLongHighValue(longStats.getHighValue());
} else if (obj.getStatsData().isSetDoubleStats()) {
DoubleColumnStatsData doubleStats = obj.getStatsData().getDoubleStats();
request.setNumNulls(doubleStats.getNumNulls());
request.setNumDistinct(doubleStats.getNumDVs());
request.setDoubleLowValue(Convert.toFloat(doubleStats.getLowValue()));
request.setDoubleHighValue(Convert.toFloat(doubleStats.getHighValue()));
} else if (obj.getStatsData().isSetDecimalStats()) {
DecimalColumnStatsData decimalStats = obj.getStatsData().getDecimalStats();
request.setNumNulls(decimalStats.getNumNulls());
request.setNumDistinct(decimalStats.getNumDVs());
request.setBigDecimalLowValue(toDecimalString(decimalStats.getLowValue()));
request.setBigDecimalHighValue(toDecimalString(decimalStats.getHighValue()));
} else if (obj.getStatsData().isSetDateStats()) {
DateColumnStatsData dateStats = obj.getStatsData().getDateStats();
request.setNumNulls(dateStats.getNumNulls());
request.setNumDistinct(dateStats.getNumDVs());
request.setLongLowValue(dateStats.getLowValue().getDaysSinceEpoch());
request.setLongHighValue(dateStats.getHighValue().getDaysSinceEpoch());
} else if (obj.getStatsData().isSetStringStats()) {
StringColumnStatsData stringStats = obj.getStatsData().getStringStats();
request.setNumNulls(stringStats.getNumNulls());
request.setNumDistinct(stringStats.getNumDVs());
request.setMaxColLen(stringStats.getMaxColLen());
request.setAvgColLen(Convert.toFloat(stringStats.getAvgColLen()));
} else if (obj.getStatsData().isSetBinaryStats()) {
BinaryColumnStatsData binaryStats = obj.getStatsData().getBinaryStats();
request.setNumNulls(binaryStats.getNumNulls());
request.setMaxColLen(binaryStats.getMaxColLen());
request.setAvgColLen(Convert.toFloat(binaryStats.getAvgColLen()));
}
return request;
} //toColStatsRequest
public static ColumnStatisticsObj toColumnStatisticsObj(DMSColumnStatistic columnStatistic) {
// DMSColumnStatistic columnStatistic = dto.getColumnStatistic();
String columnName = columnStatistic.getColumnName();
String columnType = columnStatistic.getColumnType();
ColumnStatisticsData statisticsData = null;
//ref StatObjectConverter.java
if (isBooleanType(columnType)) {
BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
boolStats.setNumFalses(columnStatistic.getNumFalse());
boolStats.setNumTrues(columnStatistic.getNumTrues());
boolStats.setNumNulls(columnStatistic.getNumNulls());
statisticsData = ColumnStatisticsData.booleanStats(boolStats);
} else if (isStringType(columnType)) {
StringColumnStatsData stringStats = new StringColumnStatsData();
stringStats.setNumNulls(columnStatistic.getNumNulls());
stringStats.setAvgColLen(columnStatistic.getAvgColLen());
stringStats.setMaxColLen(columnStatistic.getMaxColLen());
stringStats.setNumDVs(columnStatistic.getNumDistinct());
statisticsData = ColumnStatisticsData.stringStats(stringStats);
} else if (isBinaryType(columnType)) {
BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
binaryStats.setNumNulls(columnStatistic.getNumNulls());
binaryStats.setAvgColLen(columnStatistic.getAvgColLen());
binaryStats.setMaxColLen(columnStatistic.getMaxColLen());
statisticsData = ColumnStatisticsData.binaryStats(binaryStats);
} else if (isLongType(columnType)) {
LongColumnStatsData longStats = new LongColumnStatsData();
longStats.setNumNulls(columnStatistic.getNumNulls());
Long longHighValue = columnStatistic.getLongHighValue();
if (longHighValue != null) {
longStats.setHighValue(longHighValue);
}
Long longLowValue = columnStatistic.getLongLowValue();
if (longLowValue != null) {
longStats.setLowValue(longLowValue);
}
longStats.setNumDVs(columnStatistic.getNumDistinct());
statisticsData = ColumnStatisticsData.longStats(longStats);
} else if (isDoubleType(columnType)) {
DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
doubleStats.setNumNulls(columnStatistic.getNumNulls());
Float doubleHighValue = columnStatistic.getDoubleHighValue();
if (doubleHighValue != null) {
doubleStats.setHighValue(Convert.toDouble(doubleHighValue));
}
Float doubleLowValue = columnStatistic.getDoubleLowValue();
if (doubleLowValue != null) {
doubleStats.setLowValue(Convert.toDouble(doubleLowValue));
}
doubleStats.setNumDVs(columnStatistic.getNumDistinct());
statisticsData = ColumnStatisticsData.doubleStats(doubleStats);
} else if (isDecimalType(columnType)) {
DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
decimalStats.setNumNulls(columnStatistic.getNumNulls());
String decimalHighValue = columnStatistic.getBigDecimalHighValue();
if (decimalHighValue != null) {
decimalStats.setHighValue(toDecimal(decimalHighValue));
}
String decimalLowValue = columnStatistic.getBigDecimalLowValue();
if (decimalLowValue != null) {
decimalStats.setLowValue(toDecimal(decimalLowValue));
}
decimalStats.setNumDVs(columnStatistic.getNumDistinct());
statisticsData = ColumnStatisticsData.decimalStats(decimalStats);
} else if (isDateType(columnType)) {
DateColumnStatsData dateStats = new DateColumnStatsData();
dateStats.setNumNulls(columnStatistic.getNumNulls());
dateStats.setHighValue(new org.apache.hadoop.hive.metastore.api.Date(columnStatistic.getLongHighValue()));
dateStats.setLowValue(new org.apache.hadoop.hive.metastore.api.Date(columnStatistic.getLongLowValue()));
dateStats.setNumDVs(columnStatistic.getNumDistinct());
statisticsData = ColumnStatisticsData.dateStats(dateStats);
}
return new ColumnStatisticsObj(columnName, columnType, statisticsData);
}
private static boolean isBooleanType(String columnType) {
return columnType.equalsIgnoreCase("boolean");
}
private static boolean isStringType(String columnType) {
return columnType.equalsIgnoreCase("string")
|| columnType.startsWith("varchar")
|| columnType.startsWith("char");
}
private static boolean isBinaryType(String columnType) {
return columnType.equalsIgnoreCase("binary");
}
private static boolean isLongType(String columnType) {
return columnType.equalsIgnoreCase("bigint")
|| columnType.equalsIgnoreCase("int")
|| columnType.equalsIgnoreCase("smallint")
|| columnType.equalsIgnoreCase("tinyint")
|| columnType.equalsIgnoreCase("timestamp");
}
private static boolean isDoubleType(String columnType) {
return columnType.equalsIgnoreCase("double")
|| columnType.equalsIgnoreCase("float");
}
private static boolean isDecimalType(String columnType) {
return columnType.equalsIgnoreCase("decimal")
|| columnType.equalsIgnoreCase("bigdecimal");
}
private static boolean isDateType(String columnType) {
return columnType.equalsIgnoreCase("date");
}
private static String toDecimalString(Decimal d) {
return new BigDecimal(new BigInteger(d.getUnscaled()), d.getScale()).toString();
}
private static Decimal toDecimal(String s) {
BigDecimal d = new BigDecimal(s);
return new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short) d.scale());
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy