All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.alibaba.ververica.connectors.odps.util.PartitionConditionParser Maven / Gradle / Ivy

There is a newer version: 1.17-vvr-8.0.8
Show newest version
package com.alibaba.ververica.connectors.odps.util;

import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.table.api.TableException;

import com.aliyun.odps.Partition;
import com.aliyun.odps.PartitionSpec;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;

/**
 * The class is to parse partition parameter value, the value contains one or more conditions.
 * Now only support 3 type conditions:
 * 1. condition contains 'max_pt()', it filters max partition from given partitions
 * Notice : We just support two level partition at most.
 * example 1: partition='max_pt()': get max partition
 * example 2: partition='pt=max_pt(),ds=20180710' or `partition`='ds=20180710,pt=max_pt()':
 * get max partition at pt, which satisfies ds=20180710 at same time
 * 2. condition contains 'max_pt_with_done()', it filters max partition which appears with a .done flag partition
 * Notice : We just support two level partition at most.
 * example 1: partition='max_pt_with_done()': get max partition which appears with a .done flag partition
 * example 2: partition='pt=max_pt_with_done(),ds=20180710' or `partition`='ds=20180710,pt=max_pt_with_done()':
 * get max partition at pt which appears with a .done flag partition, which satisfies ds=20180710 at same time
 * 3. condition startWith 'regex:', it filters partition which matches regex pattern.
 * example : partition='regex:ds=2017030*'
 * 4. normal string which not contains 'max_pt()', 'max_pt_with_done()' or 'regex:'
 */
public class PartitionConditionParser {

	private static final Logger LOGGER = LoggerFactory.getLogger(PartitionConditionParser.class);

	private static final String MAX_PT_KEYWORD = "max_pt()";

	private static final String MAX_PT_WITH_DONE_KEYWORD = "max_pt_with_done()";

	private static final String REGEX_PT_KEYWORD = "regex:";

	/**
	 * Selects partitions which could satisfy one condition or more conditions.
	 *
	 * @param partitions all given partitions
	 * @param conditions all conditions to filter given partition list.
	 * @return partitions which could satisfy one or more condition.
	 */
	public static List filter(
			List partitions,
			List conditions) {
		if (partitions == null || partitions.isEmpty()) {
			return partitions;
		}
		Set distinctConditions = new HashSet<>(conditions);
		boolean[] matchedFlags = new boolean[partitions.size()];
		// Filter partitions based on conditions one by one.
		for (String condition : distinctConditions) {
			if (condition.contains(MAX_PT_KEYWORD)) {
				Tuple2 maxPartition = getMaxPartition(partitions, condition, false);
				if (maxPartition != null) {
					matchedFlags[maxPartition.f1] = true;
				}
			} else if (condition.contains(MAX_PT_WITH_DONE_KEYWORD)) {
				Tuple2 maxPtWithDone = getMaxPartition(partitions, condition, true);
				if (maxPtWithDone != null) {
					matchedFlags[maxPtWithDone.f1] = true;
				}
			} else {
				String regexCondition = condition;
				if (regexCondition.startsWith(REGEX_PT_KEYWORD)) {
					//	compatible with old version
					regexCondition = regexCondition.substring(REGEX_PT_KEYWORD.length());
				}
				regexCondition = regexCondition.replaceAll("\\*", "([\\\\w\\\\W]*)");
				Pattern pattern = Pattern.compile(regexCondition);
				for (int index = 0; index < partitions.size(); index++) {
					Partition p = partitions.get(index);
					// ignore .done partitions
					if (!OdpsUtils.isDoneFlagPartition(p)) {
						String partStr = OdpsUtils.partitionSpecToString(p.getPartitionSpec());
						if (pattern.matcher(partStr).matches()) {
							matchedFlags[index] = true;
						}
					}
				}
			}
		}
		List matchedPartitions = new ArrayList<>();
		for (int index = 0; index < partitions.size(); index++) {
			if (matchedFlags[index]) {
				matchedPartitions.add(partitions.get(index));
			}
		}
		return matchedPartitions;
	}

	private static Tuple2 getMaxPartition(List partitions, String maxPtCondition, boolean withDoneFlag) {
		List> partitionsWithIndice = new ArrayList<>();
		Set donePartStrs = new HashSet<>();
		for (int index = 0; index < partitions.size(); index++) {
			Partition p = partitions.get(index);
			if (OdpsUtils.isDoneFlagPartition(p)) {
				donePartStrs.add(OdpsUtils.partitionSpecToString(p.getPartitionSpec()));
			} else {
				partitionsWithIndice.add(new Tuple2<>(p, index));
			}

		}
		// sorts the partitions into descending order
		Collections.sort(partitionsWithIndice, (p1, p2) -> {
			PartitionSpec spec1 = p1.f0.getPartitionSpec();
			PartitionSpec spec2 = p2.f0.getPartitionSpec();
			for (String key : spec1.keys()) {
				int compare = spec2.get(key).compareTo(spec1.get(key));
				if (compare != 0) {
					return compare;
				}
			}
			return 0;
		});

		MaxPartParseResult parseResult = validateAndParseMaxPartStr(maxPtCondition);
		String specifiedPartSpec = parseResult.specifiedPartSpec;
		String specifiedPartColumn = parseResult.specifiedPartColumn;
		Tuple2 maxPartition = null;
		for (Tuple2 p : partitionsWithIndice) {
			PartitionSpec spec = p.f0.getPartitionSpec();
			String specStr = OdpsUtils.partitionSpecToString(spec);
			if (specifiedPartSpec == null) {
				if (withDoneFlag) {
					String donePartSpec = specStr + OdpsUtils.DONE_FLAG;
					if (donePartStrs.contains(donePartSpec)) {
						maxPartition = p;
						break;
					}
				} else {
					maxPartition = p;
					break;
				}
			} else if (specStr.contains(specifiedPartSpec) && spec.get(specifiedPartColumn) != null) {
				if (withDoneFlag) {
					String donePartSpec = specStr + OdpsUtils.DONE_FLAG;
					if (donePartStrs.contains(donePartSpec)) {
						maxPartition = p;
						break;
					}
				} else {
					maxPartition = p;
					break;
				}
			}
		}
		return maxPartition;
	}

	private static MaxPartParseResult validateAndParseMaxPartStr(String maxPtCondition) {
		String[] subParts = maxPtCondition.split(",");
		if (subParts.length == 2) {
			if (subParts[0].contains(MAX_PT_KEYWORD) || subParts[0].contains(MAX_PT_WITH_DONE_KEYWORD)) {
				String maxPtColumn = parseSpecifiedMaxPartColumn(subParts[0]);
				if (maxPtColumn != null) {
					return new MaxPartParseResult(subParts[1], maxPtColumn);
				}
			} else if (subParts[1].contains(MAX_PT_KEYWORD) || subParts[1].contains(MAX_PT_WITH_DONE_KEYWORD)) {
				String maxPtColumn = parseSpecifiedMaxPartColumn(subParts[1]);
				if (maxPtColumn != null) {
					return new MaxPartParseResult(subParts[0], maxPtColumn);
				}
			}
		} else if (subParts.length == 1) {
			if (maxPtCondition.equals(MAX_PT_KEYWORD) || maxPtCondition.equals(MAX_PT_WITH_DONE_KEYWORD)) {
				return new MaxPartParseResult(null, null);
			} else {
				String maxPtColumn = parseSpecifiedMaxPartColumn(maxPtCondition);
				if (maxPtColumn != null) {
					return new MaxPartParseResult(null, maxPtColumn);
				}
			}
		}
		LOGGER.error("Partition condition format is invalid! Input partition is {}", maxPtCondition);
		throw new TableException("Partition specific format is invalid!");
	}

	private static String parseSpecifiedMaxPartColumn(String maxPartSubStr) {
		String[] kv = maxPartSubStr.split("=");
		if (kv.length == 2 && (kv[1].equals(MAX_PT_KEYWORD) || kv[1].equals(MAX_PT_WITH_DONE_KEYWORD))) {
			return kv[0];
		} else {
			return null;
		}
	}

	private static class MaxPartParseResult {

		private String specifiedPartSpec;
		private String specifiedPartColumn;

		private MaxPartParseResult(String specifiedPartSpec, String specifiedPartColumn) {
			this.specifiedPartSpec = specifiedPartSpec;
			this.specifiedPartColumn = specifiedPartColumn;
		}
	}

	private PartitionConditionParser() {

	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy