All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliyun.odps.tunnel.impl.StreamUploadSessionImpl Maven / Gradle / Ivy

package com.aliyun.odps.tunnel.impl;

import static com.aliyun.odps.tunnel.HttpHeaders.HEADER_ODPS_REQUEST_ID;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.SocketTimeoutException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.concurrent.TimeUnit;

import com.aliyun.odps.Column;
import com.aliyun.odps.OdpsException;
import com.aliyun.odps.Table;
import com.aliyun.odps.TableSchema;
import com.aliyun.odps.commons.transport.Connection;
import com.aliyun.odps.commons.transport.Headers;
import com.aliyun.odps.commons.transport.Response;
import com.aliyun.odps.data.ArrayRecord;
import com.aliyun.odps.data.Record;
import com.aliyun.odps.exceptions.SchemaMismatchException;
import com.aliyun.odps.tunnel.Configuration;
import com.aliyun.odps.tunnel.HttpHeaders;
import com.aliyun.odps.tunnel.TableTunnel;
import com.aliyun.odps.tunnel.TunnelConstants;
import com.aliyun.odps.tunnel.TunnelException;
import com.aliyun.odps.tunnel.io.CompressOption;
import com.aliyun.odps.tunnel.io.ProtobufRecordPack;
import com.aliyun.odps.tunnel.io.StreamRecordPackImpl;
import com.aliyun.odps.tunnel.io.TunnelRetryHandler;
import com.aliyun.odps.utils.ConnectionWatcher;
import com.aliyun.odps.utils.StringUtils;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.google.gson.JsonSyntaxException;

public class StreamUploadSessionImpl extends StreamSessionBase implements TableTunnel.StreamUploadSession {
    public static class Builder extends TableTunnel.StreamUploadSession.Builder {
        private String projectName;
        private String tableName;
        private CompressOption compressOption = new CompressOption();
        private boolean p2pMode = false;
        private List zorderColumns;
        private Configuration config;

        public String getProjectName() {
            return projectName;
        }

        public StreamUploadSessionImpl.Builder setProjectName(String projectName) {
            this.projectName = projectName;
            return this;
        }

        public String getTableName() {
            return tableName;
        }

        public StreamUploadSessionImpl.Builder setTableName(String tableName) {
            this.tableName = tableName;
            return this;
        }

        public CompressOption getCompressOption() {
            return compressOption;
        }

        public StreamUploadSessionImpl.Builder setCompressOption(CompressOption compressOption) {
            this.compressOption = compressOption;
            return this;
        }

        public List getZorderColumns() {
            return zorderColumns;
        }

        public StreamUploadSessionImpl.Builder setZorderColumns(List zorderColumns) {
            this.zorderColumns = zorderColumns;
            return this;
        }

        public Configuration getConfig() {
            return config;
        }

        public StreamUploadSessionImpl.Builder setConfig(Configuration config) {
            this.config = config;
            return this;
        }

        public TableTunnel.StreamUploadSession build() throws TunnelException {
            return new StreamUploadSessionImpl(config,
                    projectName,
                    getSchemaName(),
                    tableName,
                    getPartitionSpec(),
                    isCreatePartition(),
                    getSlotNum(),
                    zorderColumns,
                    getSchemaVersion(),
                    allowSchemaMismatch);
        }
    }

    protected StreamUploadSessionImpl.Slots slots;
    private boolean p2pMode = false;
    private List columns;
    private boolean checkLatestSchema;

    public StreamUploadSessionImpl(Configuration conf,
                                   String projectName,
                                   String schemaName,
                                   String tableName,
                                   String partitionSpec,
                                   boolean cretaPartition,
                                   long slotNum,
                                   List zorderColumns,
                                   String schemaVersion,
                                   boolean allowSchemaMismatch) throws TunnelException {
        this.config = conf;
        this.projectName = projectName;
        this.schemaName = schemaName;
        this.tableName = tableName;
        this.partitionSpec = partitionSpec;
        this.columns = zorderColumns;
        this.httpClient = Util.newRestClient(conf, projectName);
        this.schemaVersion = schemaVersion;
        this.checkLatestSchema = !allowSchemaMismatch;

        // Due to server-side architecture design, the latest TableSchema may not be used when creating a Session,
        // which may make users very confused in the scenario of not allowSchemaMismatch and use not specified schema version.
        // So the session is rebuilt here until the session schema is the latest table schema.
        if (this.checkLatestSchema && StringUtils.isNullOrEmpty(schemaVersion)) {
            initiateUntilUseLatestSchema(slotNum, cretaPartition);
        } else {
            initiate(slotNum, cretaPartition);
        }
    }

    private void initiateUntilUseLatestSchema(long slotNum, boolean createPartition)
        throws TunnelException {
        Table table = config.getOdps().tables().get(projectName, schemaName, tableName);
        // max retry 5 minutes
        int maxRetry = 60;
        for (int i = 0; i < maxRetry; i++) {
            schemaVersion = table.getSchemaVersion();
            if (i == maxRetry - 1) {
                throw new TunnelException(
                    "Cannot init session use latest schema version: " + schemaVersion);
            }
            try {
                initiate(slotNum, createPartition);
                break;
            } catch (OdpsException e) {
                if (!"NoSuchSchema".equals(e.getErrorCode())) {
                    throw e;
                }
                try {
                    TimeUnit.SECONDS.sleep(5);
                } catch (InterruptedException e1) {
                    Thread.currentThread().interrupt();
                    throw new TunnelException("Interrupt when initiate session.", e1);
                }
            }
        }
    }

    private void initiate(long slotNum, boolean createPartition) throws TunnelException {

        HashMap params = getCommonParams();

        if (createPartition) {
            params.put(TunnelConstants.CREATE_PARTITION, "");
        }

        if (columns != null && columns.size() != 0) {
            params.put(TunnelConstants.ZORDER_COLUMNS, getColumnString());
        }

        if (schemaVersion != null && !schemaVersion.isEmpty()) {
            params.put(TunnelConstants.SCHEMA_VERSION, this.schemaVersion);
        }

        HashMap headers = getCommonHeaders();
        if (slotNum > 0) {
            headers.put(HttpHeaders.HEADER_ODPS_SLOT_NUM, String.valueOf(slotNum));
        }

        StreamSessionBase.HttpResult result = httpRequest(headers, params, "POST", "create stream upload session");

        try {
            JsonObject tree = new JsonParser().parse(result.body).getAsJsonObject();
            this.slots = new Slots(loadFromJson(result.requestId, tree, false));
        } catch (JsonSyntaxException e) {
            throw new TunnelException(result.requestId, "Invalid json content: '" + result.body + "'", e);
        }
    }

    private void reload() throws TunnelException {

        HashMap params = getCommonParams();

        params.put(TunnelConstants.UPLOADID, id);
        params.put(TunnelConstants.SCHEMA_VERSION, schemaVersion);

        HashMap headers = getCommonHeaders();

        StreamSessionBase.HttpResult result = httpRequest(headers, params, "GET", "get stream upload session");

        try {
            JsonObject tree = new JsonParser().parse(result.body).getAsJsonObject();
            this.slots = new Slots(loadFromJson(result.requestId, tree, true));
        } catch (JsonSyntaxException e) {
            throw new TunnelException(result.requestId, "Invalid json content: '" + result.body + "'", e);
        }
    }

    public void reloadSlots(Slot slot, String server, int slotNum) throws TunnelException {
        if (slots.getSlotNum() != slotNum) {
            // reload slot routes if slot num changed
            reload();
        } else {
            // reset routed server slot rescheduled
            if (!slot.getServer().equals(server)) {
                slot.setServer(server);
            }
        }
    }

    static class Slots implements Iterable {
        private Random rand = new Random();
        private final List slots;
        private int curSlotIndex;
        private Iterator iter;

        public Slots(List slots) throws TunnelException {
            this.slots = slots;
            curSlotIndex = -1;

            if (this.slots.size() > 0) {
                curSlotIndex = rand.nextInt(this.slots.size());
            }

            // round robin iterator
            iter = new Iterator() {
                @Override
                public boolean hasNext() {
                    return curSlotIndex >= 0;
                }

                @Override
                public synchronized Slot next() {
                    if (hasNext()) {
                        if (curSlotIndex >= slots.size()) {
                            curSlotIndex = 0;
                        }
                        return slots.get(curSlotIndex++);
                    } else {
                        return null;
                    }
                }
            };
        }

        @Override
        public Iterator iterator() {
            return iter;
        }

        public int getSlotNum() {
            return slots.size();
        }
    }

    private Connection getConnection(CompressOption compress, Slot slot, long size, long reocrdCount)
            throws OdpsException, IOException {
        HashMap params = new HashMap();

        params.put(TunnelConstants.UPLOADID, id);
        params.put(TunnelConstants.SLOT_ID, slot.getSlot());
        params.put(TunnelConstants.SCHEMA_VERSION, schemaVersion);

        if (this.partitionSpec != null && this.partitionSpec.length() > 0) {
            params.put(TunnelConstants.RES_PARTITION, partitionSpec);
        }

        if (reocrdCount > 0) {
            params.put(TunnelConstants.RECORD_COUNT, String.valueOf(reocrdCount));
        }

        if (columns != null && columns.size() != 0) {
            params.put(TunnelConstants.ZORDER_COLUMNS, getColumnString());
        }

        HashMap headers = getCommonHeaders();

        if (size < 0) {
            headers.put(Headers.TRANSFER_ENCODING, Headers.CHUNKED);
        } else {
            headers.put(Headers.CONTENT_LENGTH, String.valueOf(size));
        }

        headers.put(Headers.CONTENT_TYPE, "application/octet-stream");

        headers.put(HttpHeaders.HEADER_ODPS_TUNNEL_VERSION, String.valueOf(TunnelConstants.VERSION));

        headers.put(HttpHeaders.HEADER_ODPS_SLOT_NUM, String.valueOf(slots.getSlotNum()));

        if (!StringUtils.isNullOrEmpty(config.getQuotaName())) {
            params.put(TunnelConstants.PARAM_QUOTA_NAME, config.getQuotaName());
        }
        params.put(TunnelConstants.PARAM_CHECK_LATEST_SCHEMA, String.valueOf(checkLatestSchema));

        switch (compress.algorithm) {
            case ODPS_RAW: {
                break;
            }
            case ODPS_ZLIB: {
                headers.put(Headers.CONTENT_ENCODING, "deflate");
                break;
            }
            case ODPS_SNAPPY: {
                headers.put(Headers.CONTENT_ENCODING, "x-snappy-framed");
                break;
            }
            case ODPS_LZ4_FRAME: {
                headers.put(Headers.CONTENT_ENCODING, "x-lz4-frame");
                break;
            }
            default: {
                throw new TunnelException("unsupported compression option.");
            }
        }

        headers.put(HttpHeaders.HEADER_ODPS_ROUTED_SERVER, slot.getServer());

        if (p2pMode) {
            try {
                URI u = new URI(httpClient.getEndpoint());
                return httpClient.connect(getResource(), "PUT", params, headers,
                        u.getScheme() + "://" + slot.getIp());
            } catch (URISyntaxException e) {
                throw new TunnelException("Invalid endpoint: " + httpClient.getEndpoint());
            }
        } else {
            return httpClient.connect(getResource(), "PUT", params, headers);
        }
    }

    /**
     * 打开http链接,写入pack数据,然后关闭链
     *
     * @param pack
     *     pack数据
     */
    public String writeBlock(ProtobufRecordPack pack)
            throws IOException {
        return writeBlock(pack, 0);
    }

    /**
     * 打开http链接,写入pack数据,然后关闭链
     *
     * @param pack
     *     pack数据
     * @param timeout
     *     超时时间(单位毫秒),0代表无超时。
     */
    public String writeBlock(ProtobufRecordPack pack, long timeout)
            throws IOException {
        TunnelRetryHandler tunnelRetryHandler = new TunnelRetryHandler(config);
        try {
            return tunnelRetryHandler.executeWithRetry(() -> {
                Connection conn = null;
                try {
                    Slot slot = slots.iterator().next();
                    conn =
                        getConnection(pack.getCompressOption(), slot, pack.getTotalBytes(),
                                      pack.getSize());
                    return sendBlock(pack, conn, slot, timeout);
                } finally {
                    if (conn != null) {
                        try {
                            conn.disconnect();
                        } catch (IOException e) {
                        }
                    }
                }
            });
        } catch (RuntimeException | IOException re) {
            throw re;
        } catch (Exception e) {
            throw new IOException(e.getMessage(), e);
        }
    }

    private String sendBlock(ProtobufRecordPack pack, Connection conn, Slot slot, long timeout)
            throws IOException, TunnelException {
        if (null == conn) {
            throw new IOException("Invalid connection");
        }
        ByteArrayOutputStream baos = pack.getProtobufStream();
        if (timeout > 0) {
            ConnectionWatcher.getInstance().mark(conn, timeout);
        }
        Response response = null;
        try {
            baos.writeTo(conn.getOutputStream());
            conn.getOutputStream().close();
            baos.close();
            response = conn.getResponse();
        } catch (Throwable tr) {
            if (timeout > 0 && ConnectionWatcher.getInstance().checkTimedOut(conn)) {
                throw new SocketTimeoutException("Flush time exceeded timeout user set: " + timeout + "ms");
            }
            throw tr;
        } finally {
            if (timeout > 0) {
                ConnectionWatcher.getInstance().release(conn);
            }
        }
        if (!response.isOK()) {
            TunnelException exception = new TunnelException(response.getHeader(HEADER_ODPS_REQUEST_ID),
                                                      conn.getInputStream(),
                                                      response.getStatus());
            if (exception.getErrorCode().equals("SchemaModified") &&
                exception.getStatus().equals(412)) {
                throw new SchemaMismatchException("SchemaModified",
                                                  response.getHeader(
                                                      HttpHeaders.HEADER_ODPS_TUNNEL_LATEST_SCHEMA_VERSION));
            }
            throw new IOException(exception.getMessage(), exception);
        }

        reloadSlots(slot,
                    response.getHeader(HttpHeaders.HEADER_ODPS_ROUTED_SERVER),
                    Integer.valueOf(response.getHeader(HttpHeaders.HEADER_ODPS_SLOT_NUM)));

        return response.getHeader(HEADER_ODPS_REQUEST_ID);
    }

    private String getColumnString() {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < columns.size(); ++i) {
            sb.append(columns.get(i).getName());
            if (i != columns.size() - 1) {
                sb.append(",");
            }
        }
        return sb.toString();
    }

    @Override
    public String getId() {
        return id;
    }

    @Override
    public void setP2pMode(boolean mode) {
        this.p2pMode = mode;
    }

    @Override
    public TableSchema getSchema() {
        return schema;
    }

    @Override
    public String getSchemaVersion() {
        return schemaVersion;
    }

    @Override
    public String getQuotaName() {
        return quotaName;
    }

    @Override
    public TableTunnel.StreamRecordPack newRecordPack() throws IOException {
        return new StreamRecordPackImpl(this, new CompressOption(CompressOption.CompressAlgorithm.ODPS_RAW, 0, 0));
    }

    @Override
    public TableTunnel.StreamRecordPack newRecordPack(CompressOption option) throws IOException {
        return new StreamRecordPackImpl(this, option);
    }

    @Override
    public Record newRecord() {
        return new ArrayRecord(schema.getColumns().toArray(new Column[0]));
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy