Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.alibaba.ververica.connectors.odps.sink;
import org.apache.flink.types.Row;
import com.alibaba.ververica.connectors.odps.OdpsConf;
import com.alibaba.ververica.connectors.odps.util.OdpsUtils;
import com.aliyun.odps.Odps;
import com.aliyun.odps.PartitionSpec;
import com.aliyun.odps.Table;
import com.aliyun.odps.data.RecordWriter;
import com.aliyun.odps.tunnel.TableTunnel;
import com.aliyun.odps.tunnel.io.TunnelBufferedWriter;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
/**
* Odps writer to dynamic write records to different partition of
* a odps table based on partition field value of each record.
*/
public class DynamicPartitionOdpsWriter extends OdpsWriter {
private static final Logger LOGGER = LoggerFactory.getLogger(DynamicPartitionOdpsWriter.class);
private static final String NULL_PARTITION = "NULL";
private final List dynamicCols;
private final String[] fieldNames;
private final int dynamicPartitionLimit;
private transient Map sessionMap;
private transient Map writerMap;
private transient Map cachedCountMap;
private transient Table tableClient;
public DynamicPartitionOdpsWriter(
String[] fieldNames,
int dynamicPartitionLimit,
long flushIntervalMs,
OdpsConf odpsConf,
String table,
String partition) {
super(flushIntervalMs, odpsConf, table);
this.fieldNames = fieldNames;
dynamicCols = new ArrayList<>();
String[] partNames = partition.split(",");
for (int i = 0; i < fieldNames.length; i++) {
for (String partName : partNames) {
if (partName.trim().equalsIgnoreCase(fieldNames[i])) {
dynamicCols.add(i);
}
}
}
this.dynamicPartitionLimit = dynamicPartitionLimit;
}
@Override
public void open() {
LOGGER.info("Opening writer ...");
super.open();
this.sessionMap = new HashMap<>();
this.writerMap = new HashMap<>();
this.cachedCountMap = new HashMap<>();
Odps odps = OdpsUtils.initOdps(odpsConf);
String project = odpsConf.getProject();
this.tableClient = odps.tables().get(project, table);
LOGGER.info("Finish to open writer.");
}
@Override
public synchronized void flush(boolean commit) {
LOGGER.info("Flushing writer [{}] ...", commit);
for (Map.Entry writer : writerMap.entrySet()) {
String partition = writer.getKey();
try {
((TunnelBufferedWriter) writer.getValue()).flush();
cachedCountMap.get(partition).set(0);
} catch (Throwable e) {
LOGGER.error("Fail to flush writer for partition {}.", partition, e);
throw new RuntimeException(e);
}
}
if (commit) {
LOGGER.info("Prepare to commit to odps with dynamic partition, partition count: {}.", sessionMap.size());
for (Map.Entry sessionEntry : sessionMap.entrySet()) {
TableTunnel.UploadSession session = sessionEntry.getValue();
String partition = sessionEntry.getKey();
try {
session.commit();
LOGGER.info("Commit to odps && sessionId {} for partition {}.", session.getId(), partition);
} catch (Throwable e) {
LOGGER.error("Fail to commit to odps && sessionId {} for partition {}.", session.getId(),
partition);
throw new RuntimeException(e);
}
}
sessionMap.clear();
}
writerMap.clear();
cachedCountMap.clear();
LOGGER.info("Finish to flush writer.");
}
@Override
public void close() {
LOGGER.info("Closing writer ...");
super.close();
synchronized (this) {
for (Map.Entry writer : writerMap.entrySet()) {
try {
writer.getValue().close();
} catch (Throwable e) {
LOGGER.error("Fail to close writer for partition key {}!" + writer.getKey(), e);
throw new RuntimeException(e);
}
}
for (Map.Entry session : sessionMap.entrySet()) {
try {
session.getValue().commit();
} catch (Throwable e) {
LOGGER.error("Fail to commit odps upload session for partition key {} !", session.getKey(), e);
throw new RuntimeException(e);
}
}
writerMap.clear();
sessionMap.clear();
}
LOGGER.info("Finish to close writer.");
}
@Override
public synchronized void checkAndFlush(String dynamicPartKey) {
if (cachedCountMap.get(dynamicPartKey).incrementAndGet() > batchCount) {
flush(false);
}
}
@Override
public TableTunnel.UploadSession getUploadSession(String dynamicPartKey) {
if (StringUtils.isEmpty(dynamicPartKey)) {
dynamicPartKey = getNullPartitionKey();
}
if (sessionMap.containsKey(dynamicPartKey)) {
return sessionMap.get(dynamicPartKey);
} else {
if (sessionMap.size() >= dynamicPartitionLimit) {
LOGGER.error("Too many dynamic partitions: {}, which exceeds the size limit: {}!", sessionMap.size(),
dynamicPartitionLimit);
throw new RuntimeException("Too many dynamic partitions: "
+ sessionMap.size()
+ ", which exceeds the size limit: " + dynamicPartitionLimit);
}
PartitionSpec partSpec = new PartitionSpec(dynamicPartKey);
try {
if (!tableClient.hasPartition(partSpec)) {
LOGGER.info("Creates a new partition {} in table {}.", partSpec, table);
tableClient.createPartition(partSpec, true);
}
} catch (Throwable e) {
LOGGER.error("Fail to create partition {} in table {}.", partSpec, table);
throw new RuntimeException(e);
}
TableTunnel.UploadSession uploadSession = createUploadSession(partSpec);
sessionMap.put(dynamicPartKey, uploadSession);
return uploadSession;
}
}
@Override
public RecordWriter getRecordWriter(String dynamicPartKey) {
if (StringUtils.isEmpty(dynamicPartKey)) {
dynamicPartKey = getNullPartitionKey();
}
RecordWriter recordWriter;
if (writerMap.containsKey(dynamicPartKey)) {
recordWriter = writerMap.get(dynamicPartKey);
} else {
if (writerMap.size() >= dynamicPartitionLimit) {
LOGGER.error("Too many dynamic partitions: {}, which exceeds the size limit: {}!", sessionMap.size(),
dynamicPartitionLimit);
throw new RuntimeException("Too many dynamic partitions: "
+ sessionMap.size()
+ ", which exceeds the size limit: " + dynamicPartitionLimit);
}
TableTunnel.UploadSession uploadSession = getUploadSession(dynamicPartKey);
recordWriter = createRecordWriter(uploadSession);
writerMap.put(dynamicPartKey, recordWriter);
LOGGER.info("Created upload session id {} for partition key {}.", uploadSession.getId(), dynamicPartKey);
AtomicInteger currentCachedCount = new AtomicInteger(0);
cachedCountMap.put(dynamicPartKey, currentCachedCount);
}
return recordWriter;
}
public String getPartitionKey(Row row) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < dynamicCols.size(); i++) {
sb.append(fieldNames[dynamicCols.get(i)]);
sb.append("=");
sb.append(row.getField(dynamicCols.get(i)));
if (i < dynamicCols.size() - 1) {
sb.append(",");
}
}
return sb.toString();
}
private String getNullPartitionKey() {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < dynamicCols.size(); i++) {
sb.append(fieldNames[dynamicCols.get(i)]);
sb.append("=");
sb.append(NULL_PARTITION);
if (i < dynamicCols.size() - 1) {
sb.append(",");
}
}
return sb.toString();
}
}