All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.alibaba.ververica.connectors.odps.dim.ODPSCachedRowFetcher Maven / Gradle / Ivy

There is a newer version: 1.17-vvr-8.0.8
Show newest version
package com.alibaba.ververica.connectors.odps.dim;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.table.api.TableException;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.data.GenericRowData;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.runtime.typeutils.RowDataSerializer;
import org.apache.flink.table.runtime.typeutils.RowDataTypeInfo;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.table.types.logical.RowType;
import org.apache.flink.util.Collector;
import org.apache.flink.util.FlinkRuntimeException;

import com.alibaba.ververica.connectors.common.dim.DimJoinFetcher;
import com.alibaba.ververica.connectors.common.dim.cache.AllCache;
import com.alibaba.ververica.connectors.common.dim.cache.CacheConfig;
import com.alibaba.ververica.connectors.common.dim.reload.CacheAllReloadConf;
import com.alibaba.ververica.connectors.common.dim.reload.SerializableRunnable;
import com.alibaba.ververica.connectors.common.errorcode.ConnectorErrors;
import com.alibaba.ververica.connectors.common.exception.ErrorUtils;
import com.alibaba.ververica.connectors.common.util.DateUtil;
import com.alibaba.ververica.connectors.odps.OdpsConf;
import com.alibaba.ververica.connectors.odps.schema.ODPSColumn;
import com.alibaba.ververica.connectors.odps.type.ODPSType;
import com.alibaba.ververica.connectors.odps.util.OdpsMetadataProvider;
import com.alibaba.ververica.connectors.odps.util.OdpsUtils;
import com.alibaba.ververica.connectors.odps.util.PartitionConditionParser;
import com.aliyun.odps.Column;
import com.aliyun.odps.Odps;
import com.aliyun.odps.Partition;
import com.aliyun.odps.PartitionSpec;
import com.aliyun.odps.Table;
import com.aliyun.odps.data.Record;
import com.aliyun.odps.data.RecordReader;
import com.aliyun.odps.tunnel.TableTunnel;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.List;

/**
 * Row Fetcher for ODPS table.
 */
public class ODPSCachedRowFetcher extends DimJoinFetcher
		implements FlatMapFunction, ResultTypeQueryable {

	private static final Logger LOG = LoggerFactory.getLogger(ODPSCachedRowFetcher.class);

	private final String sqlTableName;
	private final OdpsConf odpsConf;
	private final String tableName;
	private final RowType rowType;
	private final ODPSColumn[] selectedColumns;
	protected final int fieldLength;
	private final boolean isPartitionedTable;
	protected final List sourceKeys;
	protected final List targetKeys;
	private final LogicalType[] keyTypes;
	private int maxRetries = 10;
	private List specificPartitions;
	private Long maxRowCount;

	private RowDataSerializer serializer;

	public ODPSCachedRowFetcher(
			String sqlTableName,
			OdpsConf odpsConf,
			String tableName,
			RowType rowType,
			ODPSColumn[] selectedColumns,
			String[] lookupKeys,
			boolean isPartitionedTable,
			TableSchema tableSchema,
			CacheConfig cacheConfig,
			List specificPartitions,
			Long maxRowCount) {
		super(sqlTableName, lookupKeys, cacheConfig.getCacheStrategy());
		this.sqlTableName = sqlTableName;
		this.odpsConf = odpsConf;
		this.tableName = tableName;
		this.rowType = rowType;
		this.selectedColumns = selectedColumns;
		this.isPartitionedTable = isPartitionedTable;
		CacheAllReloadConf reloadConf = new CacheAllReloadConf(
				cacheConfig.getTimeRangeBlacklist(),
				cacheConfig.getCacheScanLimit(),
				cacheConfig.getCacheStrategy().getTtlMs());

		setAllCacheReloadRunner(new ReloadCacheRunner(), reloadConf);

		this.sourceKeys = new ArrayList<>();
		this.targetKeys = new ArrayList<>();
		this.serializer = new RowDataSerializer(null, rowType);
		this.fieldLength = selectedColumns.length;
		this.keyTypes = new LogicalType[lookupKeys.length];

		String[] fieldNames = tableSchema.getFieldNames();
		for (int i = 0; i < lookupKeys.length; i++) {
			sourceKeys.add(i);
			int targetIdx = getColumnIndex(lookupKeys[i], fieldNames);
			if (targetIdx < 0) {
				throw new TableException("Column: " + lookupKeys[i] + " doesn't exists.");
			}
			targetKeys.add(targetIdx);
			keyTypes[i] = rowType.getTypeAt(targetIdx);
		}
		this.specificPartitions = specificPartitions;
		this.maxRowCount = maxRowCount;
	}

	@Override
	public void openConnection(Configuration parameters) {
		// no need to prepare connection here.
	}

	@Override
	public void closeConnection() {
		// no need to close connection here.
	}

	@Override
	public void flatMap(RowData row, Collector collector) throws Exception {
		Object key = getSourceKey(row);
		if (key == null) {
			LOG.debug("Join ODPS on an empty key of row: {}", row);
			return;
		}
		while (!one2oneAllCacheHandler.isLoadedOrThrowException()) {
			// one2oneCache is not ready for the first, so sleep a while to wait
			Thread.sleep(10);
		}
		one2oneAllCacheHandler.lock.readLock().lock();
		RowData cachedRow;
		try {
			cachedRow = one2oneAllCacheHandler.get(key);
		} finally {
			one2oneAllCacheHandler.lock.readLock().unlock();
		}
		if (cachedRow != null) {
			collector.collect(cachedRow);
		}
	}

	@Override
	public TypeInformation getProducedType() {
		return RowDataTypeInfo.of(rowType);
	}

	protected Object getSourceKey(RowData source) {
		return getKey(source, sourceKeys, keyTypes);
	}

	protected Object prepareCacheKey(RowData target) {
		return getKey(target, targetKeys, keyTypes);
	}

	private class ReloadCacheRunner extends SerializableRunnable {

		@Override
		public void run() {
			int attemptNum = 1;
			AllCache allCache;
			allCache = one2oneAllCacheHandler;
			while (attemptNum <= maxRetries) {
				try {
					long currentTime = System.currentTimeMillis();
					if (DateUtil.isTimeInRange(reloadConf.timeRangeBlackList, currentTime)) {
						if (allCache.isLoaded()) {
							LOG.info("Current time {} is in reload black list, so try to reload cache next time.", currentTime);
							return;
						} else {
							LOG.info("Current time {} is in reload black list, but this is the first time to load cache, so still load.", currentTime);
							// not return
						}
					}

					LOG.info("Reloading all data from ODPS '{}' ...", sqlTableName);
					allCache.initialize();
					// reload all data...
					long startTime = System.nanoTime();
					long readCount = 0L;
					// create a new scan every time
					TableTunnel tunnel = OdpsUtils.createTableTunnel(odpsConf);
					String project = odpsConf.getProject();
					PartitionSpec currentPartitionSpec = null;
					final TableTunnel.DownloadSession downloadSession;
					if (isPartitionedTable) {
						if (specificPartitions != null && !specificPartitions.isEmpty()) {
							Odps odps = OdpsUtils.initOdps(odpsConf);
							Table table = OdpsMetadataProvider.getTable(odps, project, tableName);
							final List matchedPartitions = PartitionConditionParser
									.filter(table.getPartitions(), specificPartitions);

							if (matchedPartitions == null || matchedPartitions.isEmpty()) {
								LOG.warn(
										"Does not find any match partition for table {}.{}, partition condition is {}! And don't fetch data until next period.",
										project, tableName, specificPartitions);
								allCache.switchCache();
								break;
							} else if (matchedPartitions.size() > 1) {
								LOG.error(
										"Finds multiple matched partitions for table {}.{}, partition condition is {}!",
										project, tableName, specificPartitions);
								throw new TableException("Only support one partition now when OdpsTable as dimension!");
							} else {
								currentPartitionSpec = matchedPartitions.get(0).getPartitionSpec();
								downloadSession = tunnel
										.createDownloadSession(project, tableName, currentPartitionSpec);
							}
						} else {
							LOG.error("Does not specify any partition for table {}.{}.", project, tableName);
							throw new TableException("specified partitions cannot be empty for partition table!");
						}
					} else {
						downloadSession = tunnel.createDownloadSession(project, tableName);
					}

					LOG.info("Session Status is : " + downloadSession.getStatus().toString());
					long count = downloadSession.getRecordCount();

					LOG.info("RecordCount is: " + count);
					if (count <= 0) {
						LOG.warn(
								"RecordCount is: {} for table {}.{}, partition condition is {}! And don't fetch data until next period.",
								count, project, tableName, specificPartitions);
						allCache.switchCache();
						break;
					}
					if (count > maxRowCount) {
						LOG.error(
								"RecordCount is: {} exceed max limitation {} for table {}.{}, partition condition is {}!",
								count, maxRowCount, project, tableName, specificPartitions);
						ErrorUtils.throwException(
								ConnectorErrors.INST.odpsTableExceedMaxRowCountError(
										tableName,
										currentPartitionSpec != null ? currentPartitionSpec.toString() : null,
										String.valueOf(maxRowCount)));
					}
					List columns = new ArrayList<>();
					for (ODPSColumn column : selectedColumns) {
						if (column != null && !column.isPartition()) {
							columns.add(new Column(column.getName(), column.getType()));
						}
					}
					RecordReader recordReader = downloadSession.openRecordReader(0, count, false, columns);
					Record record;
					while ((record = recordReader.read()) != null) {
						readCount++;
						GenericRowData resultRow = new GenericRowData(fieldLength);
						// parse to row
						for (int idx = 0; idx < fieldLength; idx++) {
							ODPSColumn column = selectedColumns[idx];
							String columnName = column.getName();
							ODPSType odpsType = ODPSType.valueOf(column.getType().name());
							if (column.isPartition()) {
								odpsType.setRowField(resultRow, idx, currentPartitionSpec.get(columnName));
							} else {
								odpsType.setRowField(resultRow, idx, record, columnName);
							}
						}
						Object key = prepareCacheKey(resultRow);
						// TODO exception when duplicate data on uk ?
						one2oneAllCacheHandler.put(key, serializer.copy(resultRow));
					}
					recordReader.close();

					long endTime = System.nanoTime();
					LOG.info("Loaded {} rows from ODPS '{}' into cache, used {}ms.",
							readCount,
							sqlTableName,
							(endTime - startTime) / 1000_000);

					allCache.switchCache();
					break;
				} catch (Throwable t) {
					// catch all throwable errors
					FlinkRuntimeException e = new FlinkRuntimeException("Error happens in reload thread.", t);
					if (t instanceof InterruptedException || t instanceof OutOfMemoryError) {
						LOG.error("Error happens when scanning all data from ODPS.", e);
						allCache.setException(e);
					} else {
						attemptNum++;
						if (attemptNum <= maxRetries) {
							LOG.warn(
									"Error happens when scanning all data from ODPS, try for the {} time.",
									attemptNum,
									e);
							try {
								Thread.sleep(1000 * attemptNum);
							} catch (InterruptedException e1) {
								LOG.error("Cache reload thread is interrupted", e1);
								ErrorUtils.throwException(
										"Thread sleep may be interrupted by main thread.",
										e1);
							}
						} else {
							LOG.error("Error happens when scanning all data from ODPS.", e);
							allCache.setException(e);
						}
					}
				}
			}
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy