All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.snowflake.client.jdbc.SnowflakeResultSetSerializableV1 Maven / Gradle / Ivy

There is a newer version: 3.21.0
Show newest version
/*
 * Copyright (c) 2012-2019 Snowflake Computing Inc. All rights reserved.
 */

package net.snowflake.client.jdbc;

import static net.snowflake.client.core.Constants.GB;
import static net.snowflake.client.core.Constants.MB;
import static net.snowflake.client.core.SessionUtil.*;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.Serializable;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.*;
import net.snowflake.client.core.*;
import net.snowflake.client.jdbc.telemetry.NoOpTelemetryClient;
import net.snowflake.client.jdbc.telemetry.Telemetry;
import net.snowflake.client.log.ArgSupplier;
import net.snowflake.client.log.SFLogger;
import net.snowflake.client.log.SFLoggerFactory;
import net.snowflake.common.core.SFBinaryFormat;
import net.snowflake.common.core.SnowflakeDateTimeFormat;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.ipc.ArrowStreamReader;

/**
 * This object is an intermediate object between result JSON from GS and ResultSet. Originally, it
 * is created from result JSON. And it can also be serializable. Logically, it stands for a part of
 * ResultSet.
 *
 * 

A typical result JSON data section consists of the content of the first chunk file and file * metadata for the rest of chunk files e.g. URL, chunk size, etc. So this object consists of one * chunk data and a list of chunk file entries. In actual cases, it may only include chunk data or * chunk files entries. * *

This object is serializable, so it can be distributed to other threads or worker nodes for * distributed processing. */ public class SnowflakeResultSetSerializableV1 implements SnowflakeResultSetSerializable, Serializable { private static final long serialVersionUID = 1L; static final SFLogger logger = SFLoggerFactory.getLogger(SnowflakeResultSetSerializableV1.class); static final ObjectMapper mapper = ObjectMapperFactory.getObjectMapper(); private static final long LOW_MAX_MEMORY = GB; /** An Entity class to represent a chunk file metadata. */ public static class ChunkFileMetadata implements Serializable { private static final long serialVersionUID = 1L; String fileURL; int rowCount; int compressedByteSize; int uncompressedByteSize; public ChunkFileMetadata( String fileURL, int rowCount, int compressedByteSize, int uncompressedByteSize) { this.fileURL = fileURL; this.rowCount = rowCount; this.compressedByteSize = compressedByteSize; this.uncompressedByteSize = uncompressedByteSize; } public void setFileURL(String fileURL) { this.fileURL = fileURL; } public String getFileURL() { return fileURL; } public int getRowCount() { return rowCount; } public int getCompressedByteSize() { return compressedByteSize; } public int getUncompressedByteSize() { return uncompressedByteSize; } public String toString() { StringBuilder builder = new StringBuilder(1024); builder.append("RowCount: ").append(rowCount).append(", "); builder.append("CompressedSize: ").append(compressedByteSize).append(", "); builder.append("UnCompressedSize: ").append(uncompressedByteSize); return builder.toString(); } } // Below fields are for the data fields that this object wraps // For ARROW, firstChunkStringData is BASE64-encoded arrow file. // For JSON, it's string data for the json. String firstChunkStringData; int firstChunkRowCount; int chunkFileCount; List chunkFileMetadatas = new ArrayList<>(); // below fields are used for building a ChunkDownloader which // uses http client to download chunk files int resultPrefetchThreads; String qrmk; Map chunkHeadersMap = new HashMap<>(); // Below fields are from session or statement SnowflakeConnectString snowflakeConnectionString; OCSPMode ocspMode; HttpClientSettingsKey httpClientKey; int networkTimeoutInMilli; boolean isResultColumnCaseInsensitive; int resultSetType; int resultSetConcurrency; int resultSetHoldability; // Below are some metadata fields parsed from the result JSON node String queryId; String finalDatabaseName; String finalSchemaName; String finalRoleName; String finalWarehouseName; SFStatementType statementType; boolean totalRowCountTruncated; Map parameters = new HashMap<>(); int columnCount; private List resultColumnMetadata = new ArrayList<>(); long resultVersion; int numberOfBinds; boolean arrayBindSupported; long sendResultTime; List metaDataOfBinds = new ArrayList<>(); QueryResultFormat queryResultFormat; boolean treatNTZAsUTC; boolean formatDateWithTimezone; boolean useSessionTimezone; // Below fields are transient, they are generated from parameters transient TimeZone timeZone; transient Optional possibleSession = Optional.empty(); transient boolean honorClientTZForTimestampNTZ; transient SnowflakeDateTimeFormat timestampNTZFormatter; transient SnowflakeDateTimeFormat timestampLTZFormatter; transient SnowflakeDateTimeFormat timestampTZFormatter; transient SnowflakeDateTimeFormat dateFormatter; transient SnowflakeDateTimeFormat timeFormatter; transient SFBinaryFormat binaryFormatter; transient long memoryLimit; // Below fields are transient, they are generated on the fly. transient JsonNode firstChunkRowset = null; // only used for JSON result transient ChunkDownloader chunkDownloader = null; transient RootAllocator rootAllocator = null; // only used for ARROW result transient SFResultSetMetaData resultSetMetaData = null; transient ResultStreamProvider resultStreamProvider = new DefaultResultStreamProvider(); /** Default constructor. */ public SnowflakeResultSetSerializableV1() {} /** * This is copy constructor. * *

NOTE: The copy is NOT deep copy. * * @param toCopy the source object to be copied. */ private SnowflakeResultSetSerializableV1(SnowflakeResultSetSerializableV1 toCopy) { // Below fields are for the data fields that this object wraps this.firstChunkStringData = toCopy.firstChunkStringData; this.firstChunkRowCount = toCopy.firstChunkRowCount; this.chunkFileCount = toCopy.chunkFileCount; this.chunkFileMetadatas = toCopy.chunkFileMetadatas; // below fields are used for building a ChunkDownloader this.resultPrefetchThreads = toCopy.resultPrefetchThreads; this.qrmk = toCopy.qrmk; this.chunkHeadersMap = toCopy.chunkHeadersMap; // Below fields are from session or statement this.snowflakeConnectionString = toCopy.snowflakeConnectionString; this.ocspMode = toCopy.ocspMode; this.httpClientKey = toCopy.httpClientKey; this.networkTimeoutInMilli = toCopy.networkTimeoutInMilli; this.isResultColumnCaseInsensitive = toCopy.isResultColumnCaseInsensitive; this.resultSetType = toCopy.resultSetType; this.resultSetConcurrency = toCopy.resultSetConcurrency; this.resultSetHoldability = toCopy.resultSetHoldability; this.treatNTZAsUTC = toCopy.treatNTZAsUTC; this.formatDateWithTimezone = toCopy.formatDateWithTimezone; this.useSessionTimezone = toCopy.useSessionTimezone; // Below are some metadata fields parsed from the result JSON node this.queryId = toCopy.queryId; this.finalDatabaseName = toCopy.finalDatabaseName; this.finalSchemaName = toCopy.finalSchemaName; this.finalRoleName = toCopy.finalRoleName; this.finalWarehouseName = toCopy.finalWarehouseName; this.statementType = toCopy.statementType; this.totalRowCountTruncated = toCopy.totalRowCountTruncated; this.parameters = toCopy.parameters; this.columnCount = toCopy.columnCount; this.resultColumnMetadata = toCopy.resultColumnMetadata; this.resultVersion = toCopy.resultVersion; this.numberOfBinds = toCopy.numberOfBinds; this.arrayBindSupported = toCopy.arrayBindSupported; this.sendResultTime = toCopy.sendResultTime; this.metaDataOfBinds = toCopy.metaDataOfBinds; this.queryResultFormat = toCopy.queryResultFormat; this.possibleSession = toCopy.possibleSession; // Below fields are transient, they are generated from parameters this.timeZone = toCopy.timeZone; this.honorClientTZForTimestampNTZ = toCopy.honorClientTZForTimestampNTZ; this.timestampNTZFormatter = toCopy.timestampNTZFormatter; this.timestampLTZFormatter = toCopy.timestampLTZFormatter; this.timestampTZFormatter = toCopy.timestampTZFormatter; this.dateFormatter = toCopy.dateFormatter; this.timeFormatter = toCopy.timeFormatter; this.binaryFormatter = toCopy.binaryFormatter; this.memoryLimit = toCopy.memoryLimit; // Below fields are transient, they are generated on the fly. this.firstChunkRowset = toCopy.firstChunkRowset; this.chunkDownloader = toCopy.chunkDownloader; this.rootAllocator = toCopy.rootAllocator; this.resultSetMetaData = toCopy.resultSetMetaData; this.resultStreamProvider = toCopy.resultStreamProvider; } public void setRootAllocator(RootAllocator rootAllocator) { this.rootAllocator = rootAllocator; } public void setQueryResultFormat(QueryResultFormat queryResultFormat) { this.queryResultFormat = queryResultFormat; } public void setChunkFileCount(int chunkFileCount) { this.chunkFileCount = chunkFileCount; } public void setFristChunkStringData(String firstChunkStringData) { this.firstChunkStringData = firstChunkStringData; } public void setChunkDownloader(ChunkDownloader chunkDownloader) { this.chunkDownloader = chunkDownloader; } public void setResultStreamProvider(ResultStreamProvider resultStreamProvider) { this.resultStreamProvider = resultStreamProvider; } public ResultStreamProvider getResultStreamProvider() { return resultStreamProvider; } public SFResultSetMetaData getSFResultSetMetaData() { return resultSetMetaData; } public int getResultSetType() { return resultSetType; } public int getResultSetConcurrency() { return resultSetConcurrency; } public int getResultSetHoldability() { return resultSetHoldability; } public SnowflakeConnectString getSnowflakeConnectString() { return snowflakeConnectionString; } public OCSPMode getOCSPMode() { return ocspMode; } public HttpClientSettingsKey getHttpClientKey() { return httpClientKey; } public String getQrmk() { return qrmk; } public int getNetworkTimeoutInMilli() { return networkTimeoutInMilli; } public int getResultPrefetchThreads() { return resultPrefetchThreads; } public long getMemoryLimit() { return memoryLimit; } public Map getChunkHeadersMap() { return chunkHeadersMap; } public List getChunkFileMetadatas() { return chunkFileMetadatas; } public RootAllocator getRootAllocator() { return rootAllocator; } public QueryResultFormat getQueryResultFormat() { return queryResultFormat; } public int getChunkFileCount() { return chunkFileCount; } public boolean isArrayBindSupported() { return arrayBindSupported; } public String getQueryId() { return queryId; } public String getFinalDatabaseName() { return finalDatabaseName; } public String getFinalSchemaName() { return finalSchemaName; } public String getFinalRoleName() { return finalRoleName; } public String getFinalWarehouseName() { return finalWarehouseName; } public SFStatementType getStatementType() { return statementType; } public boolean isTotalRowCountTruncated() { return totalRowCountTruncated; } public Map getParameters() { return parameters; } public int getColumnCount() { return columnCount; } public List getResultColumnMetadata() { return resultColumnMetadata; } public JsonNode getAndClearFirstChunkRowset() { JsonNode firstChunkRowset = this.firstChunkRowset; this.firstChunkRowset = null; return firstChunkRowset; } public int getFirstChunkRowCount() { return firstChunkRowCount; } public long getResultVersion() { return resultVersion; } public int getNumberOfBinds() { return numberOfBinds; } public ChunkDownloader getChunkDownloader() { return chunkDownloader; } public SnowflakeDateTimeFormat getTimestampNTZFormatter() { return timestampNTZFormatter; } public SnowflakeDateTimeFormat getTimestampLTZFormatter() { return timestampLTZFormatter; } public SnowflakeDateTimeFormat getTimestampTZFormatter() { return timestampTZFormatter; } public SnowflakeDateTimeFormat getDateFormatter() { return dateFormatter; } public SnowflakeDateTimeFormat getTimeFormatter() { return timeFormatter; } public TimeZone getTimeZone() { return timeZone; } public boolean isHonorClientTZForTimestampNTZ() { return honorClientTZForTimestampNTZ; } public SFBinaryFormat getBinaryFormatter() { return binaryFormatter; } public long getSendResultTime() { return sendResultTime; } public List getMetaDataOfBinds() { return metaDataOfBinds; } public String getFirstChunkStringData() { return firstChunkStringData; } public boolean getTreatNTZAsUTC() { return treatNTZAsUTC; } public boolean getFormatDateWithTimeZone() { return formatDateWithTimezone; } public boolean getUseSessionTimezone() { return useSessionTimezone; } public Optional getSession() { return possibleSession; } /** * A factory function to create SnowflakeResultSetSerializable object from result JSON node, using * the DefaultResultStreamProvider. * * @param rootNode result JSON node received from GS * @param sfSession the Snowflake session * @param sfStatement the Snowflake statement * @return processed ResultSetSerializable object * @throws SnowflakeSQLException if failed to parse the result JSON node */ public static SnowflakeResultSetSerializableV1 create( JsonNode rootNode, SFBaseSession sfSession, SFBaseStatement sfStatement) throws SnowflakeSQLException { return create(rootNode, sfSession, sfStatement, new DefaultResultStreamProvider()); } /** * A factory function to create SnowflakeResultSetSerializable object from result JSON node, with * an overrideable ResultStreamProvider. * * @param rootNode result JSON node received from GS * @param sfSession the Snowflake session * @param sfStatement the Snowflake statement * @param resultStreamProvider a ResultStreamProvider for computing a custom data source for * result-file streams * @return processed ResultSetSerializable object * @throws SnowflakeSQLException if failed to parse the result JSON node */ public static SnowflakeResultSetSerializableV1 create( JsonNode rootNode, SFBaseSession sfSession, SFBaseStatement sfStatement, ResultStreamProvider resultStreamProvider) throws SnowflakeSQLException { SnowflakeResultSetSerializableV1 resultSetSerializable = new SnowflakeResultSetSerializableV1(); logger.debug("Entering create()"); SnowflakeUtil.checkErrorAndThrowException(rootNode); // get the query id resultSetSerializable.queryId = rootNode.path("data").path("queryId").asText(); JsonNode databaseNode = rootNode.path("data").path("finalDatabaseName"); resultSetSerializable.finalDatabaseName = databaseNode.isNull() ? null : databaseNode.asText(); JsonNode schemaNode = rootNode.path("data").path("finalSchemaName"); resultSetSerializable.finalSchemaName = schemaNode.isNull() ? null : schemaNode.asText(); JsonNode roleNode = rootNode.path("data").path("finalRoleName"); resultSetSerializable.finalRoleName = roleNode.isNull() ? null : roleNode.asText(); JsonNode warehouseNode = rootNode.path("data").path("finalWarehouseName"); resultSetSerializable.finalWarehouseName = warehouseNode.isNull() ? null : warehouseNode.asText(); resultSetSerializable.statementType = SFStatementType.lookUpTypeById(rootNode.path("data").path("statementTypeId").asLong()); resultSetSerializable.totalRowCountTruncated = rootNode.path("data").path("totalTruncated").asBoolean(); resultSetSerializable.possibleSession = Optional.ofNullable(sfSession); logger.debug("query id: {}", resultSetSerializable.queryId); Optional queryResultFormat = QueryResultFormat.lookupByName(rootNode.path("data").path("queryResultFormat").asText()); resultSetSerializable.queryResultFormat = queryResultFormat.orElse(QueryResultFormat.JSON); // extract parameters resultSetSerializable.parameters = SessionUtil.getCommonParams(rootNode.path("data").path("parameters")); // initialize column metadata resultSetSerializable.columnCount = rootNode.path("data").path("rowtype").size(); for (int i = 0; i < resultSetSerializable.columnCount; i++) { JsonNode colNode = rootNode.path("data").path("rowtype").path(i); SnowflakeColumnMetadata columnMetadata = SnowflakeUtil.extractColumnMetadata( colNode, sfSession.isJdbcTreatDecimalAsInt(), sfSession); resultSetSerializable.resultColumnMetadata.add(columnMetadata); logger.debug("Get column metadata: {}", (ArgSupplier) () -> columnMetadata.toString()); } resultSetSerializable.resultStreamProvider = resultStreamProvider; // process the content of first chunk. if (resultSetSerializable.queryResultFormat == QueryResultFormat.ARROW) { resultSetSerializable.firstChunkStringData = rootNode.path("data").path("rowsetBase64").asText(); resultSetSerializable.rootAllocator = new RootAllocator(Long.MAX_VALUE); // Set first chunk row count from firstChunkStringData resultSetSerializable.setFirstChunkRowCountForArrow(); } else { resultSetSerializable.firstChunkRowset = rootNode.path("data").path("rowset"); if (resultSetSerializable.firstChunkRowset == null || resultSetSerializable.firstChunkRowset.isMissingNode()) { resultSetSerializable.firstChunkRowCount = 0; resultSetSerializable.firstChunkStringData = null; } else { resultSetSerializable.firstChunkRowCount = resultSetSerializable.firstChunkRowset.size(); resultSetSerializable.firstChunkStringData = resultSetSerializable.firstChunkRowset.toString(); } } logger.debug("First chunk row count: {}", resultSetSerializable.firstChunkRowCount); // parse file chunks resultSetSerializable.parseChunkFiles(rootNode, sfStatement); // result version JsonNode versionNode = rootNode.path("data").path("version"); if (!versionNode.isMissingNode()) { resultSetSerializable.resultVersion = versionNode.longValue(); } // number of binds JsonNode numberOfBindsNode = rootNode.path("data").path("numberOfBinds"); if (!numberOfBindsNode.isMissingNode()) { resultSetSerializable.numberOfBinds = numberOfBindsNode.intValue(); } JsonNode arrayBindSupported = rootNode.path("data").path("arrayBindSupported"); resultSetSerializable.arrayBindSupported = !arrayBindSupported.isMissingNode() && arrayBindSupported.asBoolean(); // time result sent by GS (epoch time in millis) JsonNode sendResultTimeNode = rootNode.path("data").path("sendResultTime"); if (!sendResultTimeNode.isMissingNode()) { resultSetSerializable.sendResultTime = sendResultTimeNode.longValue(); } logger.debug("result version={}", resultSetSerializable.resultVersion); // Bind parameter metadata JsonNode bindData = rootNode.path("data").path("metaDataOfBinds"); if (!bindData.isMissingNode()) { List returnVal = new ArrayList<>(); for (JsonNode child : bindData) { int precision = child.path("precision").asInt(); boolean nullable = child.path("nullable").asBoolean(); int scale = child.path("scale").asInt(); int byteLength = child.path("byteLength").asInt(); int length = child.path("length").asInt(); String name = child.path("name").asText(); String type = child.path("type").asText(); MetaDataOfBinds param = new MetaDataOfBinds(precision, nullable, scale, byteLength, length, name, type); returnVal.add(param); } resultSetSerializable.metaDataOfBinds = returnVal; } // setup fields from sessions. resultSetSerializable.ocspMode = sfSession.getOCSPMode(); resultSetSerializable.httpClientKey = sfSession.getHttpClientKey(); resultSetSerializable.snowflakeConnectionString = sfSession.getSnowflakeConnectionString(); resultSetSerializable.networkTimeoutInMilli = sfSession.getNetworkTimeoutInMilli(); resultSetSerializable.isResultColumnCaseInsensitive = sfSession.isResultColumnCaseInsensitive(); resultSetSerializable.treatNTZAsUTC = sfSession.getTreatNTZAsUTC(); resultSetSerializable.formatDateWithTimezone = sfSession.getFormatDateWithTimezone(); resultSetSerializable.useSessionTimezone = sfSession.getUseSessionTimezone(); // setup transient fields from parameter resultSetSerializable.setupFieldsFromParameters(); // The chunk downloader will start prefetching // first few chunk files in background thread(s) resultSetSerializable.chunkDownloader = (resultSetSerializable.chunkFileCount > 0) ? new SnowflakeChunkDownloader(resultSetSerializable) : new SnowflakeChunkDownloader.NoOpChunkDownloader(); // Setup ResultSet metadata resultSetSerializable.resultSetMetaData = new SFResultSetMetaData( resultSetSerializable.getResultColumnMetadata(), resultSetSerializable.queryId, sfSession, resultSetSerializable.isResultColumnCaseInsensitive, resultSetSerializable.timestampNTZFormatter, resultSetSerializable.timestampLTZFormatter, resultSetSerializable.timestampTZFormatter, resultSetSerializable.dateFormatter, resultSetSerializable.timeFormatter); return resultSetSerializable; } /** * Some fields are generated from this.parameters, so generate them from this.parameters instead * of serializing them. */ private void setupFieldsFromParameters() { String sqlTimestampFormat = (String) ResultUtil.effectiveParamValue(this.parameters, "TIMESTAMP_OUTPUT_FORMAT"); // Special handling of specialized formatters, use a helper function this.timestampNTZFormatter = ResultUtil.specializedFormatter( this.parameters, "timestamp_ntz", "TIMESTAMP_NTZ_OUTPUT_FORMAT", sqlTimestampFormat); this.timestampLTZFormatter = ResultUtil.specializedFormatter( this.parameters, "timestamp_ltz", "TIMESTAMP_LTZ_OUTPUT_FORMAT", sqlTimestampFormat); this.timestampTZFormatter = ResultUtil.specializedFormatter( this.parameters, "timestamp_tz", "TIMESTAMP_TZ_OUTPUT_FORMAT", sqlTimestampFormat); String sqlDateFormat = (String) ResultUtil.effectiveParamValue(this.parameters, "DATE_OUTPUT_FORMAT"); this.dateFormatter = SnowflakeDateTimeFormat.fromSqlFormat(sqlDateFormat); logger.debug( "sql date format: {}, java date format: {}", sqlDateFormat, (ArgSupplier) () -> this.dateFormatter.toSimpleDateTimePattern()); String sqlTimeFormat = (String) ResultUtil.effectiveParamValue(this.parameters, "TIME_OUTPUT_FORMAT"); this.timeFormatter = SnowflakeDateTimeFormat.fromSqlFormat(sqlTimeFormat); logger.debug( "sql time format: {}, java time format: {}", sqlTimeFormat, (ArgSupplier) () -> this.timeFormatter.toSimpleDateTimePattern()); String timeZoneName = (String) ResultUtil.effectiveParamValue(this.parameters, "TIMEZONE"); this.timeZone = TimeZone.getTimeZone(timeZoneName); this.honorClientTZForTimestampNTZ = (boolean) ResultUtil.effectiveParamValue( this.parameters, "CLIENT_HONOR_CLIENT_TZ_FOR_TIMESTAMP_NTZ"); logger.debug("Honoring client TZ for timestamp_ntz? {}", this.honorClientTZForTimestampNTZ); String binaryFmt = (String) ResultUtil.effectiveParamValue(this.parameters, "BINARY_OUTPUT_FORMAT"); this.binaryFormatter = SFBinaryFormat.getSafeOutputFormat(binaryFmt); } /** * Parse the chunk file nodes from result JSON node * * @param rootNode result JSON node received from GS * @param sfStatement the snowflake statement */ private void parseChunkFiles(JsonNode rootNode, SFBaseStatement sfStatement) { JsonNode chunksNode = rootNode.path("data").path("chunks"); if (!chunksNode.isMissingNode()) { this.chunkFileCount = chunksNode.size(); // Try to get the Query Result Master Key JsonNode qrmkNode = rootNode.path("data").path("qrmk"); this.qrmk = qrmkNode.isMissingNode() ? null : qrmkNode.textValue(); // Determine the prefetch thread count and memoryLimit if (this.chunkFileCount > 0) { logger.debug("#chunks={}, initialize chunk downloader", this.chunkFileCount); adjustMemorySettings(sfStatement); // Parse chunk header JsonNode chunkHeaders = rootNode.path("data").path("chunkHeaders"); if (chunkHeaders != null && !chunkHeaders.isMissingNode()) { Iterator> chunkHeadersIter = chunkHeaders.fields(); while (chunkHeadersIter.hasNext()) { Map.Entry chunkHeader = chunkHeadersIter.next(); logger.debug( "add header key={}, value={}", chunkHeader.getKey(), chunkHeader.getValue().asText()); this.chunkHeadersMap.put(chunkHeader.getKey(), chunkHeader.getValue().asText()); } } // parse chunk files metadata e.g. url and row count for (int idx = 0; idx < this.chunkFileCount; idx++) { JsonNode chunkNode = chunksNode.get(idx); String url = chunkNode.path("url").asText(); int rowCount = chunkNode.path("rowCount").asInt(); int compressedSize = chunkNode.path("compressedSize").asInt(); int uncompressedSize = chunkNode.path("uncompressedSize").asInt(); this.chunkFileMetadatas.add( new ChunkFileMetadata(url, rowCount, compressedSize, uncompressedSize)); logger.debug( "add chunk, url={} rowCount={} " + "compressedSize={} uncompressedSize={}", url, rowCount, compressedSize, uncompressedSize); } } } } private void adjustMemorySettings(SFBaseStatement sfStatement) { this.resultPrefetchThreads = DEFAULT_CLIENT_PREFETCH_THREADS; if (this.statementType.isSelect() && this.parameters.containsKey(CLIENT_ENABLE_CONSERVATIVE_MEMORY_USAGE) && (boolean) this.parameters.get(CLIENT_ENABLE_CONSERVATIVE_MEMORY_USAGE)) { // use conservative memory settings this.resultPrefetchThreads = sfStatement.getConservativePrefetchThreads(); this.memoryLimit = sfStatement.getConservativeMemoryLimit(); int chunkSize = (int) this.parameters.get(CLIENT_RESULT_CHUNK_SIZE); logger.debug( "enable conservative memory usage with prefetchThreads = {} and memoryLimit = {} and " + "resultChunkSize = {}", this.resultPrefetchThreads, this.memoryLimit, chunkSize); } else { // prefetch threads if (this.parameters.get(CLIENT_PREFETCH_THREADS) != null) { this.resultPrefetchThreads = (int) this.parameters.get(CLIENT_PREFETCH_THREADS); } this.memoryLimit = initMemoryLimit(this.parameters); } long maxChunkSize = (int) this.parameters.get(CLIENT_RESULT_CHUNK_SIZE) * MB; if (queryResultFormat == QueryResultFormat.ARROW && Runtime.getRuntime().maxMemory() < LOW_MAX_MEMORY && memoryLimit * 2 + maxChunkSize > Runtime.getRuntime().maxMemory()) { memoryLimit = Runtime.getRuntime().maxMemory() / 2 - maxChunkSize; logger.debug( "To avoid OOM for arrow buffer allocation, " + "memoryLimit {} should be less than half of the " + "maxMemory {} + maxChunkSize {}", memoryLimit, Runtime.getRuntime().maxMemory(), maxChunkSize); } } /** * Calculate memory limit in bytes * * @param parameters The parameters for result JSON node * @return memory limit in bytes */ private static long initMemoryLimit(Map parameters) { // default setting long memoryLimit = DEFAULT_CLIENT_MEMORY_LIMIT * 1024 * 1024; if (parameters.get(CLIENT_MEMORY_LIMIT) != null) { // use the settings from the customer memoryLimit = (int) parameters.get(CLIENT_MEMORY_LIMIT) * 1024L * 1024L; } long maxMemoryToUse = Runtime.getRuntime().maxMemory() * 8 / 10; if ((int) parameters.get(CLIENT_MEMORY_LIMIT) == DEFAULT_CLIENT_MEMORY_LIMIT) { // if the memory limit is the default value and best effort memory is enabled // set the memory limit to 80% of the maximum as the best effort memoryLimit = Math.max(memoryLimit, maxMemoryToUse); } // always make sure memoryLimit <= 80% of the maximum memoryLimit = Math.min(memoryLimit, maxMemoryToUse); logger.debug("Set allowed memory usage to {} bytes", memoryLimit); return memoryLimit; } /** * Setup all transient fields based on serialized fields and System Runtime. * * @throws SQLException if fails to setup any transient fields */ private void setupTransientFields() throws SQLException { // Setup transient fields from serialized fields setupFieldsFromParameters(); // Setup memory limitation from parameters and System Runtime. this.memoryLimit = initMemoryLimit(this.parameters); this.resultStreamProvider = new DefaultResultStreamProvider(); // Create below transient fields on the fly. if (QueryResultFormat.ARROW.equals(this.queryResultFormat)) { this.rootAllocator = new RootAllocator(Long.MAX_VALUE); this.firstChunkRowset = null; } else { this.rootAllocator = null; try { this.firstChunkRowset = (this.firstChunkStringData != null) ? mapper.readTree(this.firstChunkStringData) : null; } catch (IOException ex) { throw new SnowflakeSQLLoggedException( possibleSession.orElse(/* session = */ null), "The JSON data is invalid. The error is: " + ex.getMessage()); } } // Setup ResultSet metadata this.resultSetMetaData = new SFResultSetMetaData( this.getResultColumnMetadata(), this.queryId, null, // This is session less this.isResultColumnCaseInsensitive, this.timestampNTZFormatter, this.timestampLTZFormatter, this.timestampTZFormatter, this.dateFormatter, this.timeFormatter); // Allocate chunk downloader if necessary chunkDownloader = (this.chunkFileCount > 0) ? new SnowflakeChunkDownloader(this) : new SnowflakeChunkDownloader.NoOpChunkDownloader(); } /** * Split this object into small pieces based on the user specified data size. * * @param maxSizeInBytes the expected max data size wrapped in the result ResultSetSerializables * object. NOTE: if a result chunk size is greater than this value, the ResultSetSerializable * object will include one result chunk. * @return a list of SnowflakeResultSetSerializable * @throws SQLException if fails to split objects. */ public List splitBySize(long maxSizeInBytes) throws SQLException { List resultSetSerializables = new ArrayList<>(); if (this.chunkFileMetadatas.isEmpty() && this.firstChunkStringData == null) { throw new SnowflakeSQLLoggedException( this.possibleSession.orElse(/* session = */ null), "The Result Set serializable is invalid."); } // In the beginning, only the first data chunk is included in the result // serializable, so the chunk files are removed from the copy. // NOTE: make sure to handle the case that the first data chunk doesn't // exist. SnowflakeResultSetSerializableV1 curResultSetSerializable = new SnowflakeResultSetSerializableV1(this); curResultSetSerializable.chunkFileMetadatas = new ArrayList<>(); curResultSetSerializable.chunkFileCount = 0; for (int idx = 0; idx < this.chunkFileCount; idx++) { ChunkFileMetadata curChunkFileMetadata = this.getChunkFileMetadatas().get(idx); // If the serializable object has reach the max size, // save current one and create new one. if ((curResultSetSerializable.getUncompressedDataSizeInBytes() > 0) && (maxSizeInBytes < (curResultSetSerializable.getUncompressedDataSizeInBytes() + curChunkFileMetadata.getUncompressedByteSize()))) { resultSetSerializables.add(curResultSetSerializable); // Create new result serializable and reset it as empty curResultSetSerializable = new SnowflakeResultSetSerializableV1(this); curResultSetSerializable.chunkFileMetadatas = new ArrayList<>(); curResultSetSerializable.chunkFileCount = 0; curResultSetSerializable.firstChunkStringData = null; curResultSetSerializable.firstChunkRowCount = 0; curResultSetSerializable.firstChunkRowset = null; } // Append this chunk file to result serializable object curResultSetSerializable.getChunkFileMetadatas().add(curChunkFileMetadata); curResultSetSerializable.chunkFileCount++; } // Add the last result serializable object into result. resultSetSerializables.add(curResultSetSerializable); return resultSetSerializables; } /** * Get ResultSet from the ResultSet Serializable object so that the user can access the data. * * @param resultSetRetrieveConfig The extra info to retrieve the result set. * @return a ResultSet which represents for the data wrapped in the object */ public ResultSet getResultSet(ResultSetRetrieveConfig resultSetRetrieveConfig) throws SQLException { // Adjust OCSP cache server if necessary. try { SessionUtil.resetOCSPUrlIfNecessary(resultSetRetrieveConfig.getSfFullURL()); } catch (IOException e) { throw new SnowflakeSQLLoggedException( /*session = */ null, // There is no connection ErrorCode.INTERNAL_ERROR, "Hit exception when adjusting OCSP cache server. The original message is: " + e.getMessage()); } return getResultSetInternal(resultSetRetrieveConfig.getProxyProperties()); } /** * Get ResultSet from the ResultSet Serializable object so that the user can access the data. * *

This API is used by spark spark connector from 2.6.0 to 2.8.1. It is deprecated from * sc:2.8.2/jdbc:3.12.12 since Sept 2020. It is safe to remove it after Sept 2022. * * @return a ResultSet which represents for the data wrapped in the object * @deprecated Please use new interface function getResultSet(ResultSetRetrieveConfig) */ @Deprecated public ResultSet getResultSet() throws SQLException { return getResultSetInternal(null); } /** * Get ResultSet from the ResultSet Serializable object so that the user can access the data. * *

This API is used by spark spark connector from 2.6.0 to 2.8.1. It is deprecated from * sc:2.8.2/jdbc:3.12.12 since Sept 2020. It is safe to remove it after Sept 2022. * * @param info The proxy sever information if proxy is necessary. * @return a ResultSet which represents for the data wrapped in the object * @deprecated Please use new interface function getResultSet(ResultSetRetrieveConfig) */ @Deprecated public ResultSet getResultSet(Properties info) throws SQLException { return getResultSetInternal(info); } /** * Get ResultSet from the ResultSet Serializable object so that the user can access the data. * * @param info The proxy sever information if proxy is necessary. * @return a ResultSet which represents for the data wrapped in the object */ private ResultSet getResultSetInternal(Properties info) throws SQLException { // Setup proxy info if necessary this.httpClientKey = SnowflakeUtil.convertProxyPropertiesToHttpClientKey(ocspMode, info); // Setup transient fields setupTransientFields(); // This result set is sessionless, so it doesn't support telemetry. Telemetry telemetryClient = new NoOpTelemetryClient(); // The use case is distributed processing, so sortResult is not necessary. boolean sortResult = false; // Setup base result set. SFBaseResultSet sfBaseResultSet = null; switch (getQueryResultFormat()) { case ARROW: { sfBaseResultSet = new SFArrowResultSet(this, telemetryClient, sortResult); break; } case JSON: { sfBaseResultSet = new SFResultSet(this, telemetryClient, sortResult); break; } default: throw new SnowflakeSQLLoggedException( this.possibleSession.orElse(/*session = */ null), ErrorCode.INTERNAL_ERROR, "Unsupported query result format: " + getQueryResultFormat().name()); } // Create result set SnowflakeResultSetV1 resultSetV1 = new SnowflakeResultSetV1(sfBaseResultSet, this); return resultSetV1; } // Set the row count for first result chunk by parsing the chunk data. private void setFirstChunkRowCountForArrow() throws SnowflakeSQLException { firstChunkRowCount = 0; // If the first chunk doesn't exist or empty, set it as 0 if (firstChunkStringData == null || firstChunkStringData.isEmpty()) { firstChunkRowCount = 0; } // Parse the Arrow result chunk else if (getQueryResultFormat().equals(QueryResultFormat.ARROW)) { // Below code is developed based on SFArrowResultSet.buildFirstChunk // and ArrowResultChunk.readArrowStream() byte[] bytes = Base64.getDecoder().decode(firstChunkStringData); VectorSchemaRoot root = null; RootAllocator localRootAllocator = (rootAllocator != null) ? rootAllocator : new RootAllocator(Long.MAX_VALUE); try (ByteArrayInputStream is = new ByteArrayInputStream(bytes); ArrowStreamReader reader = new ArrowStreamReader(is, localRootAllocator)) { root = reader.getVectorSchemaRoot(); while (reader.loadNextBatch()) { firstChunkRowCount += root.getRowCount(); root.clear(); } } catch (Exception ex) { throw new SnowflakeSQLLoggedException( possibleSession.orElse(/* session = */ null), ErrorCode.INTERNAL_ERROR, "Fail to retrieve row count for first arrow chunk: " + ex.getCause()); } finally { if (root != null) { root.clear(); } } } else { // This shouldn't happen throw new SnowflakeSQLLoggedException( this.possibleSession.orElse(/*session = */ null), ErrorCode.INTERNAL_ERROR, "setFirstChunkRowCountForArrow() should only be called for Arrow."); } } /** * Retrieve total row count included in the the ResultSet Serializable object. * *

GS sends the data of first chunk and metadata of the other chunk if exist to client, so this * function calculates the row count for all of them. * * @return the total row count from metadata */ public long getRowCount() throws SQLException { // Get row count for first chunk if it exists. long totalRowCount = firstChunkRowCount; // Get row count from chunk file metadata for (ChunkFileMetadata chunkFileMetadata : chunkFileMetadatas) { totalRowCount += chunkFileMetadata.rowCount; } return totalRowCount; } /** * Retrieve compressed data size in the the ResultSet Serializable object. * *

GS sends the data of first chunk and metadata of the other chunks if exist to client, so * this function calculates the data size for all of them. NOTE: if first chunk exists, this * function uses its uncompressed data size as its compressed data size in this calculation though * it is not compressed. * * @return the total compressed data size in bytes from metadata */ public long getCompressedDataSizeInBytes() throws SQLException { long totalCompressedDataSize = 0; // Count the data size for the first chunk if it exists. if (firstChunkStringData != null) { totalCompressedDataSize += firstChunkStringData.length(); } for (ChunkFileMetadata chunkFileMetadata : chunkFileMetadatas) { totalCompressedDataSize += chunkFileMetadata.compressedByteSize; } return totalCompressedDataSize; } /** * Retrieve Uncompressed data size in the the ResultSet Serializable object. * *

GS sends the data of first chunk and metadata of the other chunk if exist to client, so this * function calculates the data size for all of them. * * @return the total uncompressed data size in bytes from metadata */ public long getUncompressedDataSizeInBytes() throws SQLException { long totalUncompressedDataSize = 0; // Count the data size for the first chunk if it exists. if (firstChunkStringData != null) { totalUncompressedDataSize += firstChunkStringData.length(); } for (ChunkFileMetadata chunkFileMetadata : chunkFileMetadatas) { totalUncompressedDataSize += chunkFileMetadata.uncompressedByteSize; } return totalUncompressedDataSize; } public String toString() { StringBuilder builder = new StringBuilder(16 * 1024); builder.append("hasFirstChunk: ").append(this.firstChunkStringData != null).append("\n"); builder.append("RowCountInFirstChunk: ").append(this.firstChunkRowCount).append("\n"); builder.append("queryResultFormat: ").append(this.queryResultFormat).append("\n"); builder.append("chunkFileCount: ").append(this.chunkFileCount).append("\n"); for (ChunkFileMetadata chunkFileMetadata : chunkFileMetadatas) { builder.append("\t").append(chunkFileMetadata.toString()).append("\n"); } return builder.toString(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy