com.aliyun.odps.table.write.impl.batch.ArrowWriterImpl Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.aliyun.odps.table.write.impl.batch;
import com.aliyun.odps.commons.transport.Connection;
import com.aliyun.odps.commons.transport.Headers;
import com.aliyun.odps.commons.transport.Response;
import com.aliyun.odps.commons.util.IOUtils;
import com.aliyun.odps.rest.ResourceBuilder;
import com.aliyun.odps.rest.RestClient;
import com.aliyun.odps.table.DataSchema;
import com.aliyun.odps.table.TableIdentifier;
import com.aliyun.odps.table.arrow.ArrowWriter;
import com.aliyun.odps.table.arrow.ArrowWriterFactory;
import com.aliyun.odps.table.configuration.ArrowOptions;
import com.aliyun.odps.table.configuration.WriterOptions;
import com.aliyun.odps.table.enviroment.ExecutionEnvironment;
import com.aliyun.odps.table.metrics.Metrics;
import com.aliyun.odps.table.metrics.count.BytesCount;
import com.aliyun.odps.table.metrics.count.RecordCount;
import com.aliyun.odps.table.utils.ConfigConstants;
import com.aliyun.odps.table.utils.HttpUtils;
import com.aliyun.odps.table.utils.SchemaUtils;
import com.aliyun.odps.table.utils.TableRetryHandler;
import com.aliyun.odps.table.write.BatchWriter;
import com.aliyun.odps.table.write.WriterAttemptId;
import com.aliyun.odps.table.write.WriterCommitMessage;
import com.aliyun.odps.tunnel.HttpHeaders;
import com.aliyun.odps.tunnel.TunnelException;
import com.aliyun.odps.tunnel.io.TunnelRetryHandler;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.types.pojo.Schema;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.Map;
import static com.aliyun.odps.table.utils.ConfigConstants.VERSION_1;
import static com.aliyun.odps.tunnel.HttpHeaders.HEADER_ODPS_REQUEST_ID;
public class ArrowWriterImpl implements BatchWriter {
private static final Logger logger = LoggerFactory.getLogger(ArrowWriterImpl.class.getName());
private boolean isClosed;
private final long blockNumber;
private final WriterOptions writerOptions;
private final Schema arrowSchema;
private final String sessionId;
private final TableIdentifier identifier;
private final WriterAttemptId attemptId;
private Connection connection;
private ArrowWriter batchWriter;
private WriterCommitMessage commitMessage;
private Metrics metrics;
private BytesCount bytesCount;
private RecordCount recordCount;
public ArrowWriterImpl(String sessionId,
TableIdentifier identifier,
DataSchema schema,
long blockNumber,
WriterAttemptId attemptId,
WriterOptions writerOptions,
ArrowOptions arrowOptions) {
this.sessionId = sessionId;
this.identifier = identifier;
this.blockNumber = blockNumber;
this.attemptId = attemptId;
this.writerOptions = writerOptions;
this.arrowSchema = SchemaUtils.toArrowSchema(schema.getColumns(), arrowOptions);
this.isClosed = false;
initMetrics();
}
@Override
public VectorSchemaRoot newElement() {
return VectorSchemaRoot.create(arrowSchema, writerOptions.getBufferAllocator());
}
@Override
public void write(VectorSchemaRoot root) throws IOException {
if (isClosed) {
throw new IOException("Arrow writer is closed");
}
if (batchWriter == null) {
batchWriter = ArrowWriterFactory.getRecordBatchWriter(
openWriterConnection(sessionId, identifier, blockNumber, attemptId),
writerOptions);
}
try {
batchWriter.writeBatch(root);
recordCount.inc(root.getRowCount());
bytesCount.setValue(batchWriter.bytesWritten());
} catch (IOException e) {
Response response = connection.getResponse();
if (response != null && !response.isOK()) {
TunnelException exception = new TunnelException(response.getHeader(HEADER_ODPS_REQUEST_ID),
connection.getInputStream(),
response.getStatus());
throw new IOException(exception.getMessage(), exception);
} else {
throw new IOException("ArrowHttpOutputStream Serialize Exception", e);
}
}
}
@Override
public void abort() throws IOException {
disconnect();
}
@Override
@Nullable
public WriterCommitMessage commit() throws IOException {
close();
return commitMessage;
}
@Override
public void close() throws IOException {
if (!isClosed) {
try {
if (batchWriter != null) {
batchWriter.close();
Response response = connection.getResponse();
if (!response.isOK()) {
TunnelException exception = new TunnelException(response.getHeader(HEADER_ODPS_REQUEST_ID),
connection.getInputStream(),
response.getStatus());
throw new IOException(exception.getMessage(), exception);
} else {
commitMessage = new WriterCommitMessageImpl(blockNumber,
loadResultFromJson(connection.getInputStream()));
}
}
} finally {
disconnect();
isClosed = true;
}
}
}
@Override
public Metrics currentMetricsValues() {
return this.metrics;
}
private void initMetrics() {
this.bytesCount = new BytesCount();
this.recordCount = new RecordCount();
this.metrics = new Metrics();
this.metrics.register(bytesCount);
this.metrics.register(recordCount);
}
private OutputStream openWriterConnection(String sessionId,
TableIdentifier identifier,
long blockNumber,
WriterAttemptId attemptId)
throws IOException {
Map headers = new HashMap<>();
headers.put(Headers.TRANSFER_ENCODING, Headers.CHUNKED);
headers.put(Headers.CONTENT_TYPE, "application/octet-stream");
if (writerOptions.getSettings() != null && writerOptions.getSettings().getTags()
.isPresent()) {
headers.put(HttpHeaders.HEADER_ODPS_TUNNEL_TAGS,
String.join(",", writerOptions.getSettings().getTags().get()));
}
// TODO: compress
Map params = HttpUtils.createCommonParams(writerOptions.getSettings());
params.put(ConfigConstants.BLOCK_NUMBER, Long.toString(blockNumber));
params.put(ConfigConstants.ATTEMPT_NUMBER, Integer.toString(attemptId.getAttemptNumber()));
params.put(ConfigConstants.DATA_FORMAT_TYPE,
writerOptions.getDataFormat().getType().toString());
params.put(ConfigConstants.DATA_FORMAT_VERSION,
writerOptions.getDataFormat().getVersion().toString());
String resource = ResourceBuilder.buildTableSessionDataResource(
VERSION_1,
identifier.getProject(),
identifier.getSchema(),
identifier.getTable(),
sessionId);
try {
RestClient restClient = ExecutionEnvironment.create(writerOptions.getSettings())
.createHttpClient(identifier.getProject());
restClient.setChunkSize(writerOptions.getChunkSize());
restClient.setRetryLogger(new RestClient.RetryLogger() {
@Override
public void onRetryLog(Throwable e, long retryCount, long retrySleepTime) {
logger.warn(String.format("Writer retry for session: %s, " +
"retryCount: %d, will retry in %d seconds.",
sessionId, retryCount, retrySleepTime / 1000), e);
}
});
TunnelRetryHandler retryHandler = new TableRetryHandler(restClient);
return retryHandler.executeWithRetry(() -> {
try {
this.connection = restClient.connect(resource, "POST", params, headers);
return connection.getOutputStream();
} catch (Exception e) {
disconnect();
throw e;
}
});
} catch (Exception e) {
disconnect();
logger.error("Open writer failed", e);
throw new IOException(e.getMessage(), e);
}
}
private String loadResultFromJson(InputStream is) throws IOException {
String result = "";
try {
String json = IOUtils.readStreamAsString(is);
JsonObject tree = new JsonParser().parse(json).getAsJsonObject();
if (tree.has("CommitMessage")) {
result = tree.get("CommitMessage").getAsString();
}
} catch (Exception e) {
throw new IOException("Parse writer commit response failed", e);
} finally {
if (is != null) {
is.close();
}
}
return result;
}
private void disconnect() throws IOException {
if (connection != null) {
connection.disconnect();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy