All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.alibaba.ververica.connectors.odps.source.OdpsDynamicTableSource Maven / Gradle / Ivy

There is a newer version: 1.17-vvr-8.0.8
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.ververica.connectors.odps.source;

import org.apache.flink.table.api.TableException;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.connector.ChangelogMode;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.connector.source.InputFormatProvider;
import org.apache.flink.table.connector.source.LookupTableSource;
import org.apache.flink.table.connector.source.ScanTableSource;
import org.apache.flink.table.connector.source.SourceFunctionProvider;
import org.apache.flink.table.connector.source.TableFunctionProvider;
import org.apache.flink.table.connector.source.abilities.SupportsProjectionPushDown;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.functions.TableFunction;
import org.apache.flink.table.runtime.typeutils.RowDataTypeInfo;
import org.apache.flink.table.types.logical.RowType;
import org.apache.flink.util.Preconditions;

import com.alibaba.ververica.connectors.common.dim.LookupFunctionWrapper;
import com.alibaba.ververica.connectors.common.dim.cache.CacheConfig;
import com.alibaba.ververica.connectors.common.errorcode.ConnectorErrors;
import com.alibaba.ververica.connectors.common.exception.ErrorUtils;
import com.alibaba.ververica.connectors.odps.ODPSStreamSource;
import com.alibaba.ververica.connectors.odps.OdpsConf;
import com.alibaba.ververica.connectors.odps.dim.ODPSCachedRowFetcher;
import com.alibaba.ververica.connectors.odps.schema.ODPSColumn;
import com.alibaba.ververica.connectors.odps.schema.ODPSTableSchema;
import com.alibaba.ververica.connectors.odps.util.OdpsMetadataProvider;
import com.alibaba.ververica.connectors.odps.util.OdpsUtils;
import com.aliyun.odps.Odps;
import com.aliyun.odps.OdpsException;
import com.aliyun.odps.Table;

import java.util.List;

import static org.apache.flink.util.Preconditions.checkArgument;

/**
 * Creates a DynamicTableSource to lookup an ODPS table.
 */
public class OdpsDynamicTableSource
		implements ScanTableSource, LookupTableSource, SupportsProjectionPushDown {

	private final String executionMode;

	private final boolean exitAfterFinish;
	private final int sleepTime;
	private final int retryTime;

	private final String tableName;
	private final OdpsConf odpsConf;
	private final String table;

	private final List userSpecificPartitions;

	// columns to prune
	private ODPSColumn[] selectedColumns;

	// odps table schema information
	protected final boolean isPartitionedTable;

	private final CacheConfig cacheConfig;
	private final Long maxRowCount;
	private final TableSchema tableSchema;

	public OdpsDynamicTableSource(
		String executionMode,
		boolean exitAfterFinish,
		String tableName,
		OdpsConf odpsConf,
		String table,
		TableSchema tableSchema,
		List userSpecificPartitions,
		CacheConfig cacheConfig,
		Long maxRowCount,
		int sleepTime,
		int retryTime) {
		this.executionMode = executionMode;
		this.exitAfterFinish = exitAfterFinish;
		this.tableName = tableName;
		this.odpsConf = odpsConf;
		this.table = table;
		Odps odps = OdpsUtils.initOdps(odpsConf);
		ODPSTableSchema originTableSchema;
		try {
			Table t = OdpsMetadataProvider.getTable(odps, odpsConf.getProject(), table);
			com.aliyun.odps.TableSchema schema = t.getSchema();
			boolean isView = t.isVirtualView();
			originTableSchema = new ODPSTableSchema(schema.getColumns(), schema.getPartitionColumns(), isView);
			checkArgument(!originTableSchema.isView(), "view is not supported yet!");
			this.isPartitionedTable = originTableSchema.isPartition();
		} catch (OdpsException e) {
			throw new TableException("Failed to get table schema !", e);
		}

		Preconditions.checkArgument(!isPartitionedTable || !userSpecificPartitions.isEmpty(),
			"Partition should be specified for partitioned ODPS source.");

		String[] returnFieldsName = tableSchema.getFieldNames();
		int fieldsNum = returnFieldsName.length;
		this.selectedColumns = new ODPSColumn[fieldsNum];
		for (int idx = 0; idx < fieldsNum; idx++) {
			String fieldName = returnFieldsName[idx];
			ODPSColumn column = originTableSchema.getColumn(fieldName);
			if (column == null) {
				throw new TableException("Unknown column " + fieldName + "!");
			}
			selectedColumns[idx] = column;
		}
		this.userSpecificPartitions = userSpecificPartitions;
		this.cacheConfig = cacheConfig;
		this.maxRowCount = maxRowCount;
		this.tableSchema = tableSchema;
		this.sleepTime = sleepTime;
		this.retryTime = retryTime;
	}

	@Override
	public ScanRuntimeProvider getScanRuntimeProvider(ScanContext scanContext) {
		if (executionMode.equals("stream")) {
			RowDataTypeInfo rowTypeInfo = OdpsUtils.deriveRowType(tableSchema, selectedColumns);
			ODPSStreamSource sourceFunc = new ODPSStreamSource(
				odpsConf,
				table,
				selectedColumns,
				userSpecificPartitions,
				rowTypeInfo,
				sleepTime,
				retryTime);
			if (exitAfterFinish) {
				sourceFunc.enableExitAfterReadFinished();
			}
			return SourceFunctionProvider.of(sourceFunc, false);
		} else {
			return InputFormatProvider.of(
				new ODPSInputFormat(
					odpsConf,
					table,
					selectedColumns,
					isPartitionedTable,
					userSpecificPartitions.toArray(new String[0])));
		}
	}

	@Override
	public LookupRuntimeProvider getLookupRuntimeProvider(LookupContext context) {
		if (!cacheConfig.getCacheStrategy().isAllCache()) {
			ErrorUtils.throwException(
				ConnectorErrors.INST.onlySupportedCacheStrategyError("ODPS dim", "ALL"));
		}
		String[] lookupKeys = new String[context.getKeys().length];
		for (int i = 0; i < lookupKeys.length; i++) {
			int[] innerKeyArr = context.getKeys()[i];
			Preconditions.checkArgument(innerKeyArr.length == 1,
					"ODPS do not support nested lookup keys");
			lookupKeys[i] = tableSchema.getFieldNames()[innerKeyArr[0]];
		}
		RowType rowType = (RowType) tableSchema.toPhysicalRowDataType().getLogicalType();
		TableFunction lookupFunc =
			new LookupFunctionWrapper(new ODPSCachedRowFetcher(
				tableName,
				odpsConf,
				table,
				rowType,
				selectedColumns,
				lookupKeys,
				isPartitionedTable,
				tableSchema,
				cacheConfig,
				userSpecificPartitions,
				maxRowCount));
		return TableFunctionProvider.of(lookupFunc);
	}

	@Override
	public DynamicTableSource copy() {
		return new OdpsDynamicTableSource(
			executionMode, exitAfterFinish, tableName, odpsConf, table, tableSchema,
			userSpecificPartitions, cacheConfig, maxRowCount, sleepTime, retryTime);
	}

	@Override
	public String asSummaryString() {
		return String.format("ODPS-Source: %s-%s", tableName, userSpecificPartitions.get(0));
	}

	@Override
	public boolean supportsNestedProjection() {
		return false;
	}

	@Override
	public void applyProjection(int[][] fields) {
		checkArgument(
				fields != null && fields.length != 0 && fields.length <= selectedColumns.length,
				"project fields cannot be null or empty, " +
						"project fields number cannot be more than origin fields length!");
		ODPSColumn[] originColumns = this.selectedColumns;
		this.selectedColumns = new ODPSColumn[fields.length];
		for (int i = 0; i < fields.length; i++) {
			checkArgument(fields[i].length == 1, "Nested projection push down is not supported yet.");
			int fieldOrd = fields[i][0];
			selectedColumns[i] = originColumns[fieldOrd];
		}
	}

	@Override
	public ChangelogMode getChangelogMode() {
		return ChangelogMode.insertOnly();
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy