All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliyun.odps.table.read.impl.batch.SplitArrowReaderImpl Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.aliyun.odps.table.read.impl.batch;

import java.io.IOException;
import java.util.Map;

import org.apache.arrow.vector.VectorSchemaRoot;

import com.aliyun.odps.commons.transport.Connection;
import com.aliyun.odps.commons.transport.Headers;
import com.aliyun.odps.commons.transport.Response;
import com.aliyun.odps.rest.ResourceBuilder;
import com.aliyun.odps.rest.RestClient;
import com.aliyun.odps.table.TableIdentifier;
import com.aliyun.odps.table.arrow.ArrowReader;
import com.aliyun.odps.table.arrow.ArrowReaderFactory;
import com.aliyun.odps.table.configuration.CompressionCodec;
import com.aliyun.odps.table.configuration.ReaderOptions;
import com.aliyun.odps.table.enviroment.ExecutionEnvironment;
import com.aliyun.odps.table.metrics.Metrics;
import com.aliyun.odps.table.metrics.count.BytesCount;
import com.aliyun.odps.table.metrics.count.RecordCount;
import com.aliyun.odps.table.read.SplitReader;
import com.aliyun.odps.table.read.split.InputSplit;
import com.aliyun.odps.table.read.split.InputSplitWithIndex;
import com.aliyun.odps.table.read.split.InputSplitWithRowRange;
import com.aliyun.odps.table.utils.ConfigConstants;
import com.aliyun.odps.table.utils.HttpUtils;
import com.aliyun.odps.table.utils.TableRetryHandler;
import com.aliyun.odps.tunnel.HttpHeaders;
import com.aliyun.odps.tunnel.TunnelException;
import com.aliyun.odps.tunnel.io.TunnelRetryHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SplitArrowReaderImpl implements SplitReader {

    private static final Logger logger = LoggerFactory.getLogger(SplitArrowReaderImpl.class.getName());

    private final ArrowReader reader;
    private Connection connection;
    private boolean isClosed;
    private Metrics metrics;
    private BytesCount bytesCount;
    private RecordCount recordCount;
    private String requestId;

    public SplitArrowReaderImpl(TableIdentifier identifier,
                                InputSplit split,
                                ReaderOptions options) throws IOException {
        openReaderConnection(identifier, split, options);
        initMetrics();
        this.isClosed = false;
        this.reader = ArrowReaderFactory.getRecordBatchReader(connection.getInputStream(), options);
    }

    @Override
    public boolean hasNext() throws IOException {
        try {
            return this.reader.nextBatch();
        } catch (IOException e) {
            logger.error("Get next record batch failed, requestId=" + requestId, e);
            throw e;
        }
    }

    @Override
    public VectorSchemaRoot get() {
        VectorSchemaRoot root = reader.getCurrentValue();
        recordCount.inc(root.getRowCount());
        bytesCount.setValue(reader.bytesRead());
        return root;
    }

    @Override
    public void close() throws IOException {
        if (!isClosed) {
            if (reader != null) {
                reader.close();
            }
            disconnect();
            isClosed = true;
        }
    }

    @Override
    public Metrics currentMetricsValues() {
        return this.metrics;
    }

    private void initMetrics() {
        this.bytesCount = new BytesCount();
        this.recordCount = new RecordCount();
        this.metrics = new Metrics();
        metrics.register(bytesCount);
        metrics.register(recordCount);
    }

    private void openReaderConnection(TableIdentifier identifier,
                                      InputSplit split,
                                      ReaderOptions options) throws IOException {
        RestClient restClient = ExecutionEnvironment.create(options.getSettings())
                .createHttpClient(identifier.getProject());
        restClient.setRetryLogger(new RestClient.RetryLogger() {
            @Override
            public void onRetryLog(Throwable e, long retryCount, long retrySleepTime) {
                logger.warn(String.format("Reader retry for session: %s, " +
                                "retryCount: %d, will retry in %d seconds.",
                        split.getSessionId(), retryCount, retrySleepTime / 1000), e);
            }
        });

        TunnelRetryHandler retryHandler = new TableRetryHandler(restClient);

        Map headers = HttpUtils.createCommonHeader(options.getSettings());
        if (options.getCompressionCodec().equals(CompressionCodec.ZSTD)) {
            headers.put(Headers.ACCEPT_ENCODING, CompressionCodec.ZSTD.toString());
        } else if (options.getCompressionCodec().equals(CompressionCodec.LZ4_FRAME)) {
            headers.put(Headers.ACCEPT_ENCODING, CompressionCodec.LZ4_FRAME.toString());
        }

        Map params = HttpUtils.createCommonParams(options.getSettings());
        params.put(ConfigConstants.SESSION_ID, split.getSessionId());

        if (split instanceof InputSplitWithRowRange) {
            InputSplitWithRowRange rowRangeInputSplit = (InputSplitWithRowRange) split;
            params.put(ConfigConstants.ROW_INDEX,
                    String.valueOf(rowRangeInputSplit.getRowRange().getStartIndex()));
            params.put(ConfigConstants.ROW_COUNT,
                    String.valueOf(rowRangeInputSplit.getRowRange().getNumRecord()));
        } else if (split instanceof InputSplitWithIndex) {
            InputSplitWithIndex indexedInputSplit = (InputSplitWithIndex) split;
            params.put(ConfigConstants.SPLIT_INDEX,
                    String.valueOf(indexedInputSplit.getSplitIndex()));
        } else {
            throw new UnsupportedOperationException("Unsupported split type: " + split);
        }

        params.put(ConfigConstants.MAX_BATCH_ROWS,
                String.valueOf(options.getBatchRowCount()));
        if (options.getBatchRawSize() != 0L) {
            params.put(ConfigConstants.MAX_BATCH_RAW_SIZE, String.valueOf(options.getBatchRawSize()));
        }
        params.put(ConfigConstants.DATA_FORMAT_TYPE,
                options.getDataFormat().getType().toString());
        params.put(ConfigConstants.DATA_FORMAT_VERSION,
                options.getDataFormat().getVersion().toString());
        try {
            String resource = ResourceBuilder.buildTableDataResource(
                    ConfigConstants.VERSION_1,
                    identifier.getProject(),
                    identifier.getSchema(),
                    identifier.getTable());

            Response resp = retryHandler.executeWithRetry(() -> {
                try {
                    connection = restClient.connect(resource, "GET", params, headers);
                    return connection.getResponse();
                } catch (Exception e) {
                    disconnect();
                    throw e;
                }
            });

            if (!resp.isOK()) {
                TunnelException err = new TunnelException(connection.getInputStream());
                err.setRequestId(resp.getHeader(HttpHeaders.HEADER_ODPS_REQUEST_ID));
                throw err;
            }
            this.requestId = resp.getHeader(HttpHeaders.HEADER_ODPS_REQUEST_ID);
        } catch (Exception e) {
            disconnect();
            logger.error("Open split reader failed", e);
            throw new IOException(e.getMessage(), e);
        }
    }

    private void disconnect() throws IOException {
        if (connection != null) {
            connection.disconnect();
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy