All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.alibaba.ververica.connectors.odps.sink.DynamicPartitionOdpsWriter Maven / Gradle / Ivy

There is a newer version: 1.17-vvr-8.0.8
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.ververica.connectors.odps.sink;

import org.apache.flink.types.Row;

import com.alibaba.ververica.connectors.odps.OdpsConf;
import com.alibaba.ververica.connectors.odps.util.OdpsUtils;
import com.aliyun.odps.Odps;
import com.aliyun.odps.PartitionSpec;
import com.aliyun.odps.Table;
import com.aliyun.odps.data.RecordWriter;
import com.aliyun.odps.tunnel.TableTunnel;
import com.aliyun.odps.tunnel.io.TunnelBufferedWriter;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;

/**
 * Odps writer to dynamic write records to different partition of
 * a odps table based on partition field value of each record.
 */
public class DynamicPartitionOdpsWriter extends OdpsWriter {

	private static final Logger LOGGER = LoggerFactory.getLogger(DynamicPartitionOdpsWriter.class);
	private static final String NULL_PARTITION = "NULL";

	private final List dynamicCols;
	private final String[] fieldNames;
	private final int dynamicPartitionLimit;
	private transient Map sessionMap;
	private transient Map writerMap;
	private transient Map cachedCountMap;
	private transient Table tableClient;

	public DynamicPartitionOdpsWriter(
			String[] fieldNames,
			int dynamicPartitionLimit,
			long flushIntervalMs,
			OdpsConf odpsConf,
			String table,
			String partition) {
		super(flushIntervalMs, odpsConf, table);
		this.fieldNames = fieldNames;
		dynamicCols = new ArrayList<>();
		String[] partNames = partition.split(",");
		for (int i = 0; i < fieldNames.length; i++) {
			for (String partName : partNames) {
				if (partName.trim().equalsIgnoreCase(fieldNames[i])) {
					dynamicCols.add(i);
				}
			}
		}
		this.dynamicPartitionLimit = dynamicPartitionLimit;
	}

	@Override
	public void open() {
		LOGGER.info("Opening writer ...");
		super.open();
		this.sessionMap = new HashMap<>();
		this.writerMap = new HashMap<>();
		this.cachedCountMap = new HashMap<>();
		Odps odps = OdpsUtils.initOdps(odpsConf);
		String project = odpsConf.getProject();
		this.tableClient = odps.tables().get(project, table);
		LOGGER.info("Finish to open writer.");
	}

	@Override
	public synchronized void flush(boolean commit) {
		LOGGER.info("Flushing writer [{}] ...", commit);
		for (Map.Entry writer : writerMap.entrySet()) {
			String partition = writer.getKey();
			try {
				((TunnelBufferedWriter) writer.getValue()).flush();
				cachedCountMap.get(partition).set(0);
			} catch (Throwable e) {
				LOGGER.error("Fail to flush writer for partition {}.", partition, e);
				throw new RuntimeException(e);
			}
		}
		if (commit) {
			LOGGER.info("Prepare to commit to odps with dynamic partition, partition count: {}.", sessionMap.size());
			for (Map.Entry sessionEntry : sessionMap.entrySet()) {
				TableTunnel.UploadSession session = sessionEntry.getValue();
				String partition = sessionEntry.getKey();
				try {
					session.commit();
					LOGGER.info("Commit to odps && sessionId {} for partition {}.", session.getId(), partition);
				} catch (Throwable e) {
					LOGGER.error("Fail to commit to odps && sessionId {} for partition {}.", session.getId(),
							partition);
					throw new RuntimeException(e);
				}
			}
			sessionMap.clear();
		}
		writerMap.clear();
		cachedCountMap.clear();
		LOGGER.info("Finish to flush writer.");
	}

	@Override
	public void close() {
		LOGGER.info("Closing writer ...");
		super.close();
		synchronized (this) {
			for (Map.Entry writer : writerMap.entrySet()) {
				try {
					writer.getValue().close();
				} catch (Throwable e) {
					LOGGER.error("Fail to close writer for partition key {}!" + writer.getKey(), e);
					throw new RuntimeException(e);
				}
			}
			for (Map.Entry session : sessionMap.entrySet()) {
				try {
					session.getValue().commit();
				} catch (Throwable e) {
					LOGGER.error("Fail to commit odps upload session for partition key {} !", session.getKey(), e);
					throw new RuntimeException(e);
				}
			}
			writerMap.clear();
			sessionMap.clear();
		}
		LOGGER.info("Finish to close writer.");
	}

	@Override
	public synchronized void checkAndFlush(String dynamicPartKey) {
		if (cachedCountMap.get(dynamicPartKey).incrementAndGet() > batchCount) {
			flush(false);
		}
	}

	@Override
	public TableTunnel.UploadSession getUploadSession(String dynamicPartKey) {
		if (StringUtils.isEmpty(dynamicPartKey)) {
			dynamicPartKey = getNullPartitionKey();
		}
		if (sessionMap.containsKey(dynamicPartKey)) {
			return sessionMap.get(dynamicPartKey);
		} else {
			if (sessionMap.size() >= dynamicPartitionLimit) {
				LOGGER.error("Too many dynamic partitions: {}, which exceeds the size limit: {}!", sessionMap.size(),
						dynamicPartitionLimit);
				throw new RuntimeException("Too many dynamic partitions: "
						+ sessionMap.size()
						+ ", which exceeds the size limit: " + dynamicPartitionLimit);
			}

			PartitionSpec partSpec = new PartitionSpec(dynamicPartKey);
			try {
				if (!tableClient.hasPartition(partSpec)) {
					LOGGER.info("Creates a new partition {} in table {}.", partSpec, table);
					tableClient.createPartition(partSpec, true);
				}
			} catch (Throwable e) {
				LOGGER.error("Fail to create partition {} in table {}.", partSpec, table);
				throw new RuntimeException(e);
			}

			TableTunnel.UploadSession uploadSession = createUploadSession(partSpec);
			sessionMap.put(dynamicPartKey, uploadSession);
			return uploadSession;
		}
	}

	@Override
	public RecordWriter getRecordWriter(String dynamicPartKey) {
		if (StringUtils.isEmpty(dynamicPartKey)) {
			dynamicPartKey = getNullPartitionKey();
		}
		RecordWriter recordWriter;
		if (writerMap.containsKey(dynamicPartKey)) {
			recordWriter = writerMap.get(dynamicPartKey);
		} else {
			if (writerMap.size() >= dynamicPartitionLimit) {
				LOGGER.error("Too many dynamic partitions: {}, which exceeds the size limit: {}!", sessionMap.size(),
						dynamicPartitionLimit);
				throw new RuntimeException("Too many dynamic partitions: "
						+ sessionMap.size()
						+ ", which exceeds the size limit: " + dynamicPartitionLimit);
			}
			TableTunnel.UploadSession uploadSession = getUploadSession(dynamicPartKey);
			recordWriter = createRecordWriter(uploadSession);
			writerMap.put(dynamicPartKey, recordWriter);
			LOGGER.info("Created upload session id {} for partition key {}.", uploadSession.getId(), dynamicPartKey);

			AtomicInteger currentCachedCount = new AtomicInteger(0);
			cachedCountMap.put(dynamicPartKey, currentCachedCount);
		}
		return recordWriter;
	}

	public String getPartitionKey(Row row) {
		StringBuilder sb = new StringBuilder();
		for (int i = 0; i < dynamicCols.size(); i++) {
			sb.append(fieldNames[dynamicCols.get(i)]);
			sb.append("=");
			sb.append(row.getField(dynamicCols.get(i)));
			if (i < dynamicCols.size() - 1) {
				sb.append(",");
			}
		}
		return sb.toString();
	}

	private String getNullPartitionKey() {
		StringBuilder sb = new StringBuilder();
		for (int i = 0; i < dynamicCols.size(); i++) {
			sb.append(fieldNames[dynamicCols.get(i)]);
			sb.append("=");
			sb.append(NULL_PARTITION);
			if (i < dynamicCols.size() - 1) {
				sb.append(",");
			}
		}
		return sb.toString();
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy