All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliyun.odps.table.write.impl.batch.TableBatchWriteSessionImpl Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.aliyun.odps.table.write.impl.batch;

import static com.aliyun.odps.table.utils.ConfigConstants.VERSION_1;
import static com.aliyun.odps.tunnel.HttpHeaders.HEADER_ODPS_REQUEST_ID;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;

import org.apache.arrow.vector.VectorSchemaRoot;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.aliyun.odps.Column;
import com.aliyun.odps.PartitionSpec;
import com.aliyun.odps.commons.transport.Headers;
import com.aliyun.odps.commons.transport.Response;
import com.aliyun.odps.rest.ResourceBuilder;
import com.aliyun.odps.rest.RestClient;
import com.aliyun.odps.table.DataFormat;
import com.aliyun.odps.table.DataSchema;
import com.aliyun.odps.table.SessionStatus;
import com.aliyun.odps.table.TableIdentifier;
import com.aliyun.odps.table.configuration.ArrowOptions;
import com.aliyun.odps.table.configuration.DynamicPartitionOptions;
import com.aliyun.odps.table.configuration.WriterOptions;
import com.aliyun.odps.table.distribution.Distribution;
import com.aliyun.odps.table.distribution.Distributions;
import com.aliyun.odps.table.enviroment.EnvironmentSettings;
import com.aliyun.odps.table.enviroment.ExecutionEnvironment;
import com.aliyun.odps.table.order.NullOrdering;
import com.aliyun.odps.table.order.SortDirection;
import com.aliyun.odps.table.order.SortOrder;
import com.aliyun.odps.table.write.BatchWriter;
import com.aliyun.odps.table.write.TableWriteCapabilities;
import com.aliyun.odps.table.write.WriterAttemptId;
import com.aliyun.odps.table.write.WriterCommitMessage;
import com.aliyun.odps.table.utils.HttpUtils;
import com.aliyun.odps.table.utils.Preconditions;
import com.aliyun.odps.table.utils.ConfigConstants;
import com.aliyun.odps.table.utils.TableRetryHandler;
import com.aliyun.odps.table.utils.SchemaUtils;
import com.aliyun.odps.table.utils.SessionUtils;
import com.aliyun.odps.tunnel.io.TunnelRetryHandler;
import com.aliyun.odps.tunnel.TunnelException;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.google.gson.JsonPrimitive;

public class TableBatchWriteSessionImpl extends TableBatchWriteSessionBase {

    private static final Logger logger = LoggerFactory.getLogger(TableBatchWriteSessionImpl.class.getName());

    private transient RestClient restClient;

    private transient TunnelRetryHandler retryHandler;

    public TableBatchWriteSessionImpl(TableIdentifier identifier,
                                      PartitionSpec partitionSpec,
                                      boolean overwrite,
                                      DynamicPartitionOptions dynamicPartitionOptions,
                                      ArrowOptions arrowOptions,
                                      TableWriteCapabilities capabilities,
                                      EnvironmentSettings settings,
                                      long maxFieldSize) throws IOException {
        super(identifier, partitionSpec, overwrite,
                dynamicPartitionOptions, arrowOptions, capabilities, settings, maxFieldSize);
    }

    public TableBatchWriteSessionImpl(TableIdentifier identifier,
                                      String sessionId,
                                      EnvironmentSettings settings) throws IOException {
        super(identifier, sessionId, settings);
    }

    @Override
    protected void initSession() throws IOException {
        ensureInitialized();

        Map headers = HttpUtils.createCommonHeader(settings);
        headers.put(Headers.CONTENT_TYPE, "application/json");

        Map params = HttpUtils.createCommonParams(settings);
        params.put(ConfigConstants.SESSION_TYPE, getType().toString());

        try {
            String req = generateWriteSessionRequest();
            if (logger.isDebugEnabled()) {
                logger.debug(String.format("Write table '%s'.\n"
                        + "Session request:\n"
                        + "%s", identifier.toString(), req));
            }

            Response resp = retryHandler.executeWithRetry(() -> restClient.stringRequest(
                    ResourceBuilder.buildTableSessionResource(
                            ConfigConstants.VERSION_1,
                            identifier.getProject(),
                            identifier.getSchema(),
                            identifier.getTable(),
                            null),
                    "POST", params, headers, req));

            if (resp.isOK()) {
                String response = new String(resp.getBody());
                if (logger.isDebugEnabled()) {
                    logger.debug(String.format("Write table '%s'.\n"
                            + "Session response:\n"
                            + "%s", identifier.toString(), response));
                }
                loadResultFromJson(response);
            } else {
                throw new TunnelException(resp.getHeader(HEADER_ODPS_REQUEST_ID),
                        new ByteArrayInputStream(resp.getBody()), resp.getStatus());
            }
        } catch (Exception e) {
            throw new IOException(e.getMessage(), e);
        } finally {
            // nothing
        }
    }

    @Override
    protected String reloadSession() throws IOException {
        ensureInitialized();

        Preconditions.checkString(this.sessionId, "Table write session id");

        Map headers = HttpUtils.createCommonHeader(settings);

        Map params = HttpUtils.createCommonParams(settings);
        params.put(ConfigConstants.SESSION_TYPE, getType().toString());

        try {
            Response resp = restClient.request(ResourceBuilder.buildTableSessionResource(
                            ConfigConstants.VERSION_1,
                            identifier.getProject(),
                            identifier.getSchema(),
                            identifier.getTable(),
                            sessionId),
                    "GET", params, headers, null);
            if (resp.isOK()) {
                String response = new String(resp.getBody());
                loadResultFromJson(response);
                return response;
            } else {
                throw new TunnelException(resp.getHeader(HEADER_ODPS_REQUEST_ID),
                        new ByteArrayInputStream(resp.getBody()), resp.getStatus());
            }
        } catch (Exception e) {
            throw new IOException("Failed to reload table write session with endpoint: "
                    + restClient.getEndpoint(), e);
        } finally {
            // nothing
        }
    }

    @Override
    public BatchWriter createArrowWriter(long blockNumber,
                                                           WriterAttemptId attemptId,
                                                           WriterOptions options) throws IOException {
        Preconditions.checkString(sessionId, "Table write session id");
        Preconditions.checkNotNull(identifier, "Table write session id");
        Preconditions.checkLong(blockNumber, 0, "Block number");
        Preconditions.checkNotNull(attemptId, "Attempt id");
        Preconditions.checkNotNull(options, "Writer options");

        if (options.maxBlockNumber().isPresent()) {
            if (blockNumber >= options.maxBlockNumber().get()) {
                throw new IOException("User defined max block writer number: " + options.maxBlockNumber().get());
            }
        } else if (maxBlockNumber().isPresent()
                && blockNumber >= maxBlockNumber().get()) {
            throw new IOException("Server max block writer number: " + maxBlockNumber().get());
        }

        return new ArrowWriterImpl(
                sessionId, identifier, requiredSchema, blockNumber, attemptId, options, arrowOptions);
    }

    @Override
    public boolean supportsDataFormat(DataFormat dataFormat) {
        if (supportDataFormats != null) {
            return supportDataFormats.contains(dataFormat);
        }
        return false;
    }

    @Override
    public void commit(WriterCommitMessage[] messages) throws IOException {
        ensureInitialized();

        if (messages == null) {
            throw new IllegalArgumentException("Invalid argument: messages.");
        }

        Preconditions.checkString(sessionId, "Table write session id");
        Map headers = HttpUtils.createCommonHeader(settings);
        headers.put(Headers.CONTENT_TYPE, "application/json");

        Map params = HttpUtils.createCommonParams(settings);
        params.put(ConfigConstants.SESSION_ID, sessionId);

        try {
            String commitRequest = generateCommitRequest(messages);
            if (logger.isDebugEnabled()) {
                logger.debug(String.format("Commit table '%s'.\n"
                        + "Session request:\n"
                        + "%s", identifier.toString(), commitRequest));
            }

            Response resp = retryHandler.executeWithRetry(
                    () -> restClient.stringRequest(ResourceBuilder.buildTableCommitResource(
                                    VERSION_1,
                                    identifier.getProject(),
                                    identifier.getSchema(),
                                    identifier.getTable()),
                            "POST", params, headers, commitRequest));

            String response;
            if (!resp.isOK()) {
                throw new TunnelException(resp.getHeader(HEADER_ODPS_REQUEST_ID),
                        new ByteArrayInputStream(resp.getBody()), resp.getStatus());
            } else {
                response = new String(resp.getBody());
                loadResultFromJson(response);
            }

            if (sessionStatus != SessionStatus.COMMITTED) {
                long asyncIntervalInMills = HttpUtils.getAsyncIntervalInMills(settings);
                long asyncTimeoutInMills = HttpUtils.getAsyncTimeoutInSeconds(settings) * 1000L;
                long startTime = System.currentTimeMillis();

                while (sessionStatus == SessionStatus.NORMAL ||
                        sessionStatus == SessionStatus.COMMITTING) {
                    Thread.sleep(asyncIntervalInMills);

                    logger.trace(String.format("Async commit table: '%s', session id: %s",
                            identifier.toString(), sessionId));

                    response = reloadSession();

                    if (System.currentTimeMillis() - startTime >= asyncTimeoutInMills) {
                        throw new IOException(
                                String.format(
                                        "Commit table write session timeout.\n"
                                                + "Table identifier: %s.\n"
                                                + "Session status: %s.\n"
                                                + "Session id: %s.\n"
                                                + "Error message: %s.",
                                        identifier.toString(),
                                        sessionStatus,
                                        sessionId,
                                        errorMessage));
                    }
                }
            }

            if (sessionStatus != SessionStatus.COMMITTED) {
                throw new IOException(
                        String.format(
                                "Commit table write session failed.\n"
                                        + "Table identifier: %s.\n"
                                        + "Session status: %s.\n"
                                        + "Session id: %s.\n"
                                        + "Error message: %s.",
                                identifier.toString(),
                                sessionStatus,
                                sessionId,
                                errorMessage));
            } else {
                if (logger.isDebugEnabled()) {
                    logger.debug(String.format("Commit table '%s' success.\n"
                            + "Session response:\n"
                            + "%s", identifier.toString(), response));
                }
            }
        } catch (Exception e) {
            throw new IOException(e.getMessage(), e);
        } finally {
            // nothing
        }
    }

    @Override
    public void cleanup() {
        // TODO: clean up
    }

    private void ensureInitialized() {
        if (this.restClient == null) {
            this.restClient = ExecutionEnvironment.create(settings)
                    .createHttpClient(identifier.getProject());
            this.restClient.setRetryLogger(new RestClient.RetryLogger() {
                @Override
                public void onRetryLog(Throwable e, long retryCount, long retrySleepTime) {
                    logger.warn(String.format("Start retry for table write: %s, " +
                                    "retryCount: %d, will retry in %d seconds.",
                            identifier.toString(), retryCount, retrySleepTime / 1000), e);
                }
            });
        }

        if (this.retryHandler == null) {
            this.retryHandler = new TableRetryHandler(restClient);
        }
    }

    private String generateWriteSessionRequest() {
        JsonObject request = new JsonObject();
        request.add("PartitionSpec", new JsonPrimitive(
                targetPartitionSpec.toString(false, true)));
        request.add("Overwrite", new JsonPrimitive(overwrite));

        JsonObject jsonArrowOptions = new JsonObject();
        jsonArrowOptions.addProperty("TimestampUnit", arrowOptions.getTimestampUnit().toString());
        jsonArrowOptions.addProperty("DatetimeUnit", arrowOptions.getDateTimeUnit().toString());
        request.add("ArrowOptions", jsonArrowOptions);

        JsonObject dynamicOptions = new JsonObject();
        dynamicOptions.addProperty("InvalidStrategy", dynamicPartitionOptions.getInvalidStrategy().toString());
        dynamicOptions.addProperty("InvalidLimit", dynamicPartitionOptions.getInvalidLimit());
        dynamicOptions.addProperty("DynamicPartitionLimit", dynamicPartitionOptions.getDynamicPartitionLimit());
        request.add("DynamicPartitionOptions", dynamicOptions);

        if (writeCapabilities.supportHashBuckets()
                || writeCapabilities.supportRangeBuckets()) {
            request.add("SupportWriteCluster", new JsonPrimitive(true));
        } else {
            request.add("SupportWriteCluster", new JsonPrimitive(false));
        }
        request.addProperty("MaxFieldSize", maxFieldSize);

        Gson gson = new GsonBuilder().disableHtmlEscaping().create();
        return gson.toJson(request);
    }

    private String generateCommitRequest(WriterCommitMessage[] messages) {
        JsonObject request = new JsonObject();
        JsonArray messageArray = new JsonArray();
        for (WriterCommitMessage commitMessage : messages) {
            if (commitMessage == null) {
                continue;
            }
            WriterCommitMessageImpl msg = (WriterCommitMessageImpl) commitMessage;
            messageArray.add(new JsonPrimitive(msg.getCommitMessage()));
        }
        request.add("CommitMessages", messageArray);
        return request.toString();
    }

    private void loadResultFromJson(String json) throws TunnelException {
        try {
            JsonObject tree = new JsonParser().parse(json).getAsJsonObject();

            // session id
            if (tree.has("SessionId")) {
                sessionId = tree.get("SessionId").getAsString();
            }

            // expiration time
            if (tree.has("ExpirationTime")) {
                expirationTime = tree.get("ExpirationTime").getAsLong();
            }

            // session type
            if (tree.has("SessionType")) {
                String sessionType = tree.get("SessionType").getAsString();
                if (!getType().toString().equals(sessionType.toLowerCase())) {
                    throw new UnsupportedOperationException("Unsupported session type: " + sessionType);
                }
            }

            // status
            if (tree.has("SessionStatus")) {
                String status = tree.get("SessionStatus").getAsString().toUpperCase();
                sessionStatus = SessionStatus.valueOf(status);
            }

            // error message
            if (tree.has("Message")) {
                errorMessage = tree.get("Message").getAsString();
            }

            // schema
            if (tree.has("DataSchema")) {
                JsonObject dataSchema = tree.get("DataSchema").getAsJsonObject();
                List schemaColumns = new ArrayList<>();
                List partitionKeys = new ArrayList<>();

                if (dataSchema.has("DataColumns")) {
                    JsonArray dataColumns = dataSchema.get("DataColumns").getAsJsonArray();
                    for (int i = 0; i < dataColumns.size(); ++i) {
                        JsonObject column = dataColumns.get(i).getAsJsonObject();
                        schemaColumns.add(SchemaUtils.parseColumn(column));
                    }
                }

                if (dataSchema.has("PartitionColumns")) {
                    JsonArray partitionColumns = dataSchema.get("PartitionColumns").getAsJsonArray();
                    for (int i = 0; i < partitionColumns.size(); ++i) {
                        JsonObject column = partitionColumns.get(i).getAsJsonObject();
                        Column partitionCol = SchemaUtils.parseColumn(column);
                        schemaColumns.add(partitionCol);
                        partitionKeys.add(partitionCol.getName());
                    }
                }

                requiredSchema = DataSchema.newBuilder()
                        .columns(schemaColumns)
                        .partitionBy(partitionKeys)
                        .build();
            }

            // data format
            if (tree.has("SupportedDataFormat")) {
                supportDataFormats = new HashSet<>();
                JsonArray formats = tree.get("SupportedDataFormat").getAsJsonArray();
                formats.forEach(format -> supportDataFormats.add(
                        SessionUtils.parseDataFormat(format.getAsJsonObject())));
            }

            // max block num
            if (tree.has("MaxBlockNumber")) {
                maxBlockNumber = tree.get("MaxBlockNumber").getAsLong();
            }

            // required ordering
            if (tree.has("RequiredOrdering")) {
                JsonArray orders = tree.get("RequiredOrdering").getAsJsonArray();
                List sortOrders = new ArrayList<>();
                orders.forEach(order -> sortOrders.add(parseOrders(order.getAsJsonObject())));
                requiredSortOrders = sortOrders.toArray(new SortOrder[0]);
            }

            // required distribution
            if (tree.has("RequiredDistribution")) {
                JsonObject distribution = tree.get("RequiredDistribution").getAsJsonObject();
                Distribution.Type type = Distribution.Type.UNSPECIFIED;
                if (distribution.has("Type")) {
                    type = Distribution.Type.valueOf(distribution.get("Type").getAsString().toUpperCase());
                }
                if (type.equals(Distribution.Type.UNSPECIFIED)) {
                    requiredDistribution = Distributions.unspecified();
                } else if (type.equals(Distribution.Type.HASH) ||
                        type.equals(Distribution.Type.RANGE)) {
                    List clusterKeys = new ArrayList<>();
                    if (distribution.has("ClusterKeys")) {
                        JsonArray keys = distribution.get("ClusterKeys").getAsJsonArray();
                        keys.forEach(key -> clusterKeys.add(key.getAsString()));
                    }
                    int bucketsNumber = -1;
                    if (distribution.has("BucketsNumber")) {
                        bucketsNumber = distribution.get("BucketsNumber").getAsInt();
                    }
                    requiredDistribution = Distributions.clustered(clusterKeys, type, bucketsNumber);
                }
            }
        } catch (Exception e) {
            throw new TunnelException("Invalid session response: \n" + json, e);
        }
    }

    private SortOrder parseOrders(JsonObject order) {
        Preconditions.checkArgument(order.has("Name"), "Sort order must has name");
        String name = order.get("Name").getAsString();
        SortDirection sortDirection = order.has("SortDirection") ?
                SortDirection.valueOf(order.get("SortDirection").getAsString().toUpperCase())
                : SortDirection.NONE;
        NullOrdering nullOrdering = order.has("NullOrdering") ?
                NullOrdering.valueOf(order.get("NullOrdering").getAsString().toUpperCase())
                : NullOrdering.ANY;
        return new SortOrder(name, sortDirection, nullOrdering);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy