com.alibaba.ververica.connectors.odps.util.PartitionConditionParser Maven / Gradle / Ivy
package com.alibaba.ververica.connectors.odps.util;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.table.api.TableException;
import com.aliyun.odps.Partition;
import com.aliyun.odps.PartitionSpec;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
/**
* The class is to parse partition parameter value, the value contains one or more conditions.
* Now only support 3 type conditions:
* 1. condition contains 'max_pt()', it filters max partition from given partitions
* Notice : We just support two level partition at most.
* example 1: partition='max_pt()': get max partition
* example 2: partition='pt=max_pt(),ds=20180710' or `partition`='ds=20180710,pt=max_pt()':
* get max partition at pt, which satisfies ds=20180710 at same time
* 2. condition contains 'max_pt_with_done()', it filters max partition which appears with a .done flag partition
* Notice : We just support two level partition at most.
* example 1: partition='max_pt_with_done()': get max partition which appears with a .done flag partition
* example 2: partition='pt=max_pt_with_done(),ds=20180710' or `partition`='ds=20180710,pt=max_pt_with_done()':
* get max partition at pt which appears with a .done flag partition, which satisfies ds=20180710 at same time
* 3. condition startWith 'regex:', it filters partition which matches regex pattern.
* example : partition='regex:ds=2017030*'
* 4. normal string which not contains 'max_pt()', 'max_pt_with_done()' or 'regex:'
*/
public class PartitionConditionParser {
private static final Logger LOGGER = LoggerFactory.getLogger(PartitionConditionParser.class);
private static final String MAX_PT_KEYWORD = "max_pt()";
private static final String MAX_PT_WITH_DONE_KEYWORD = "max_pt_with_done()";
private static final String REGEX_PT_KEYWORD = "regex:";
/**
* Selects partitions which could satisfy one condition or more conditions.
*
* @param partitions all given partitions
* @param conditions all conditions to filter given partition list.
* @return partitions which could satisfy one or more condition.
*/
public static List filter(
List partitions,
List conditions) {
if (partitions == null || partitions.isEmpty()) {
return partitions;
}
Set distinctConditions = new HashSet<>(conditions);
boolean[] matchedFlags = new boolean[partitions.size()];
// Filter partitions based on conditions one by one.
for (String condition : distinctConditions) {
if (condition.contains(MAX_PT_KEYWORD)) {
Tuple2 maxPartition = getMaxPartition(partitions, condition, false);
if (maxPartition != null) {
matchedFlags[maxPartition.f1] = true;
}
} else if (condition.contains(MAX_PT_WITH_DONE_KEYWORD)) {
Tuple2 maxPtWithDone = getMaxPartition(partitions, condition, true);
if (maxPtWithDone != null) {
matchedFlags[maxPtWithDone.f1] = true;
}
} else {
String regexCondition = condition;
if (regexCondition.startsWith(REGEX_PT_KEYWORD)) {
// compatible with old version
regexCondition = regexCondition.substring(REGEX_PT_KEYWORD.length());
}
regexCondition = regexCondition.replaceAll("\\*", "([\\\\w\\\\W]*)");
Pattern pattern = Pattern.compile(regexCondition);
for (int index = 0; index < partitions.size(); index++) {
Partition p = partitions.get(index);
// ignore .done partitions
if (!OdpsUtils.isDoneFlagPartition(p)) {
String partStr = OdpsUtils.partitionSpecToString(p.getPartitionSpec());
if (pattern.matcher(partStr).matches()) {
matchedFlags[index] = true;
}
}
}
}
}
List matchedPartitions = new ArrayList<>();
for (int index = 0; index < partitions.size(); index++) {
if (matchedFlags[index]) {
matchedPartitions.add(partitions.get(index));
}
}
return matchedPartitions;
}
private static Tuple2 getMaxPartition(List partitions, String maxPtCondition, boolean withDoneFlag) {
List> partitionsWithIndice = new ArrayList<>();
Set donePartStrs = new HashSet<>();
for (int index = 0; index < partitions.size(); index++) {
Partition p = partitions.get(index);
if (OdpsUtils.isDoneFlagPartition(p)) {
donePartStrs.add(OdpsUtils.partitionSpecToString(p.getPartitionSpec()));
} else {
partitionsWithIndice.add(new Tuple2<>(p, index));
}
}
// sorts the partitions into descending order
Collections.sort(partitionsWithIndice, (p1, p2) -> {
PartitionSpec spec1 = p1.f0.getPartitionSpec();
PartitionSpec spec2 = p2.f0.getPartitionSpec();
for (String key : spec1.keys()) {
int compare = spec2.get(key).compareTo(spec1.get(key));
if (compare != 0) {
return compare;
}
}
return 0;
});
MaxPartParseResult parseResult = validateAndParseMaxPartStr(maxPtCondition);
String specifiedPartSpec = parseResult.specifiedPartSpec;
String specifiedPartColumn = parseResult.specifiedPartColumn;
Tuple2 maxPartition = null;
for (Tuple2 p : partitionsWithIndice) {
PartitionSpec spec = p.f0.getPartitionSpec();
String specStr = OdpsUtils.partitionSpecToString(spec);
if (specifiedPartSpec == null) {
if (withDoneFlag) {
String donePartSpec = specStr + OdpsUtils.DONE_FLAG;
if (donePartStrs.contains(donePartSpec)) {
maxPartition = p;
break;
}
} else {
maxPartition = p;
break;
}
} else if (specStr.contains(specifiedPartSpec) && spec.get(specifiedPartColumn) != null) {
if (withDoneFlag) {
String donePartSpec = specStr + OdpsUtils.DONE_FLAG;
if (donePartStrs.contains(donePartSpec)) {
maxPartition = p;
break;
}
} else {
maxPartition = p;
break;
}
}
}
return maxPartition;
}
private static MaxPartParseResult validateAndParseMaxPartStr(String maxPtCondition) {
String[] subParts = maxPtCondition.split(",");
if (subParts.length == 2) {
if (subParts[0].contains(MAX_PT_KEYWORD) || subParts[0].contains(MAX_PT_WITH_DONE_KEYWORD)) {
String maxPtColumn = parseSpecifiedMaxPartColumn(subParts[0]);
if (maxPtColumn != null) {
return new MaxPartParseResult(subParts[1], maxPtColumn);
}
} else if (subParts[1].contains(MAX_PT_KEYWORD) || subParts[1].contains(MAX_PT_WITH_DONE_KEYWORD)) {
String maxPtColumn = parseSpecifiedMaxPartColumn(subParts[1]);
if (maxPtColumn != null) {
return new MaxPartParseResult(subParts[0], maxPtColumn);
}
}
} else if (subParts.length == 1) {
if (maxPtCondition.equals(MAX_PT_KEYWORD) || maxPtCondition.equals(MAX_PT_WITH_DONE_KEYWORD)) {
return new MaxPartParseResult(null, null);
} else {
String maxPtColumn = parseSpecifiedMaxPartColumn(maxPtCondition);
if (maxPtColumn != null) {
return new MaxPartParseResult(null, maxPtColumn);
}
}
}
LOGGER.error("Partition condition format is invalid! Input partition is {}", maxPtCondition);
throw new TableException("Partition specific format is invalid!");
}
private static String parseSpecifiedMaxPartColumn(String maxPartSubStr) {
String[] kv = maxPartSubStr.split("=");
if (kv.length == 2 && (kv[1].equals(MAX_PT_KEYWORD) || kv[1].equals(MAX_PT_WITH_DONE_KEYWORD))) {
return kv[0];
} else {
return null;
}
}
private static class MaxPartParseResult {
private String specifiedPartSpec;
private String specifiedPartColumn;
private MaxPartParseResult(String specifiedPartSpec, String specifiedPartColumn) {
this.specifiedPartSpec = specifiedPartSpec;
this.specifiedPartColumn = specifiedPartColumn;
}
}
private PartitionConditionParser() {
}
}