All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.github.shanqiang.sp.dimension.OdpsDimensionTable Maven / Gradle / Ivy

package io.github.shanqiang.sp.dimension;

import io.github.shanqiang.exception.UnknownTypeException;
import io.github.shanqiang.table.Index;
import io.github.shanqiang.table.Table;
import io.github.shanqiang.table.TableBuilder;
import io.github.shanqiang.table.Type;
import com.aliyun.odps.Instance;
import com.aliyun.odps.Odps;
import com.aliyun.odps.OdpsException;
import com.aliyun.odps.Partition;
import com.aliyun.odps.PartitionSpec;
import com.aliyun.odps.account.Account;
import com.aliyun.odps.account.AliyunAccount;
import com.aliyun.odps.data.Record;
import com.aliyun.odps.data.RecordReader;
import com.aliyun.odps.task.SQLTask;
import com.aliyun.odps.tunnel.TableTunnel;
import io.github.shanqiang.Threads;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.time.Duration;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

import static java.lang.String.format;
import static java.util.Objects.requireNonNull;

public class OdpsDimensionTable extends DimensionTable {
    private static final Logger logger = LoggerFactory.getLogger(OdpsDimensionTable.class);

    private final String useProject;
    private final String projectName;
    private final String tableName;
    private final String partitionColumnName;
    private final String partition;
    private final String ak;
    private final String sk;
    private final Duration refreshInterval;
    private final Map columnTypeMap;
    private final String[] primaryKeyColumnNames;
    private final String myName;

    public OdpsDimensionTable(String endPoint,
                              String projectName,
                              String tableName,
                              String partitionColumnName,
                              String partition,
                              String ak,
                              String sk,
                              Duration refreshInterval,
                              Map columnTypeMap,
                              String... primaryKeyColumnNames) {
        this(endPoint, projectName, projectName, tableName, partitionColumnName, partition, ak, sk, refreshInterval,
                columnTypeMap, primaryKeyColumnNames);
    }

    public OdpsDimensionTable(String endPoint,
                              final String useProject,
                              String projectName,
                              String tableName,
                              String partitionColumnName,
                              String partition,
                              String ak,
                              String sk,
                              Duration refreshInterval,
                              Map columnTypeMap,
                              String... primaryKeyColumnNames) {
        this.useProject = requireNonNull(useProject);
        this.projectName = requireNonNull(projectName);
        this.tableName = requireNonNull(tableName);
        this.partitionColumnName = requireNonNull(partitionColumnName);
        this.partition = requireNonNull(partition).replace(" ", "").toLowerCase();
        this.ak = requireNonNull(ak);
        this.sk = requireNonNull(sk);
        this.refreshInterval = requireNonNull(refreshInterval);
        this.columnTypeMap = requireNonNull(columnTypeMap);
        if (columnTypeMap.size() < 1) {
            throw new IllegalArgumentException();
        }
        this.primaryKeyColumnNames = requireNonNull(primaryKeyColumnNames);
        if (primaryKeyColumnNames.length < 1) {
            throw new IllegalArgumentException();
        }

        this.myName = format("%s: %s.%s", this.getClass().getSimpleName(), projectName, tableName);

        final DimensionTable that = this;
        new ScheduledThreadPoolExecutor(1, Threads.threadsNamed(myName)).
                scheduleWithFixedDelay(new Runnable() {
                    @Override
                    public void run() {
                        try {
                            long pre = System.currentTimeMillis();
                            logger.info("begin to load {}", myName);

                            TableBuilder tableBuilder = new TableBuilder(columnTypeMap);

                            Account account = new AliyunAccount(ak, sk);
                            Odps odps = new Odps(account);
                            odps.setEndpoint(endPoint);
                            odps.setDefaultProject(useProject);
                            String ptSpec = partition;
                            if (partition.equals("max_pt()")) {
                                ptSpec = getMaxPtSpec(odps, projectName, tableName);
                            }

                            TableTunnel tunnel = new TableTunnel(odps);
                            TableTunnel.DownloadSession downloadSession = tunnel.createDownloadSession(projectName,
                                    tableName,
                                    new PartitionSpec(format("%s='%s'", partitionColumnName, ptSpec)));
                            long recordNum = downloadSession.getRecordCount();
                            logger.info("{} will download {} records", myName, recordNum);
                            try (RecordReader recordReader = downloadSession.openRecordReader(0, recordNum)) {
                                Record record;
                                int row = 0;
                                while ((record = recordReader.read()) != null) {
                                    if (debug(row)) {
                                        break;
                                    }

                                    int i = 0;
                                    for (String columnName : columnTypeMap.keySet()) {
                                        Type type = columnTypeMap.get(columnName);
                                        switch (type) {
                                            case INT:
                                                tableBuilder.append(i, record.getBigint(columnName) == null ? null : record.getBigint(columnName).intValue());
                                                break;
                                            case BIGINT:
                                                tableBuilder.append(i, record.getBigint(columnName));
                                                break;
                                            case DOUBLE:
                                                tableBuilder.append(i, record.getDouble(columnName));
                                                break;
                                            case VARBYTE:
                                                tableBuilder.append(i, record.getString(columnName));
                                                break;
                                            default:
                                                throw new UnknownTypeException(type.name());
                                        }
                                        i++;
                                    }
                                    row++;

                                    long now = System.currentTimeMillis();
                                    if (now - pre > 5000) {
                                        logger.info("{} have loaded {} rows", myName, row);
                                        pre = now;
                                    }
                                }

                                Table table = tableBuilder.build();
                                Index index = table.createIndex(primaryKeyColumnNames);
                                tableIndex = new TableIndex(table, index);
                                callback(that);
                                logger.info("end to load {}, rows: {}, index.size: {}", myName, row, index.getColumns2Rows().size());
                            }
                        } catch (Throwable t) {
                            logger.error("", t);
                            try {
                                Thread.sleep(10_000);
                                run();
                            } catch (Throwable t1) {
                                logger.error("", t1);
                            }
                        }
                    }
                }, 0, refreshInterval.toMillis(), TimeUnit.MILLISECONDS);
    }

    private static String getMaxPtSpec(Odps odps, String projectName, String tableName) throws OdpsException {
        Instance instance = SQLTask.run(odps, "select max_pt('" + projectName + "." + tableName + "');");
        instance.waitForSuccess();
        List records = SQLTask.getResult(instance);
        return records.get(0).getString(0);
    }

    private static String getMaxPtSpecWithoutSql(Odps odps, String projectName, String tableName) {
        odps.setDefaultProject(projectName);
        List partitions = odps.tables().get(tableName).getPartitions();
        String maxPtSpec = null;
        for (Partition partition : partitions) {
            PartitionSpec partitionSpec = partition.getPartitionSpec();
            String ptSpec = partitionSpec.get(partitionSpec.keys().iterator().next());
            if (null == maxPtSpec || maxPtSpec.compareTo(ptSpec) < 0) {
                maxPtSpec = ptSpec;
            }
        }
        return maxPtSpec;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy