
org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.arrow.adapter.jdbc;
import static org.apache.arrow.adapter.jdbc.JdbcToArrowConfig.DEFAULT_TARGET_BATCH_SIZE;
import java.math.RoundingMode;
import java.util.Calendar;
import java.util.Map;
import java.util.function.Function;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.util.Preconditions;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.types.pojo.ArrowType;
/**
* This class builds {@link JdbcToArrowConfig}s.
*/
public class JdbcToArrowConfigBuilder {
private Calendar calendar;
private BufferAllocator allocator;
private boolean includeMetadata;
private boolean reuseVectorSchemaRoot;
private Map arraySubTypesByColumnIndex;
private Map arraySubTypesByColumnName;
private Map explicitTypesByColumnIndex;
private Map explicitTypesByColumnName;
private Map schemaMetadata;
private Map> columnMetadataByColumnIndex;
private int targetBatchSize;
private Function jdbcToArrowTypeConverter;
private JdbcToArrowConfig.JdbcConsumerFactory jdbcConsumerGetter;
private RoundingMode bigDecimalRoundingMode;
/**
* Default constructor for the JdbcToArrowConfigBuilder}
.
* Use the setter methods for the allocator and calendar; the allocator must be
* set. Otherwise, {@link #build()} will throw a {@link NullPointerException}.
*/
public JdbcToArrowConfigBuilder() {
this.allocator = null;
this.calendar = null;
this.includeMetadata = false;
this.reuseVectorSchemaRoot = false;
this.arraySubTypesByColumnIndex = null;
this.arraySubTypesByColumnName = null;
this.explicitTypesByColumnIndex = null;
this.explicitTypesByColumnName = null;
this.schemaMetadata = null;
this.columnMetadataByColumnIndex = null;
this.bigDecimalRoundingMode = null;
}
/**
* Constructor for the JdbcToArrowConfigBuilder
. The
* allocator is required, and a {@link NullPointerException}
* will be thrown if it is null
.
*
* The allocator is used to construct Arrow vectors from the JDBC ResultSet.
* The calendar is used to determine the time zone of {@link java.sql.Timestamp}
* fields and convert {@link java.sql.Date}, {@link java.sql.Time}, and
* {@link java.sql.Timestamp} fields to a single, common time zone when reading
* from the result set.
*
*
* @param allocator The Arrow Vector memory allocator.
* @param calendar The calendar to use when constructing timestamp fields.
*/
public JdbcToArrowConfigBuilder(BufferAllocator allocator, Calendar calendar) {
this();
Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
this.allocator = allocator;
this.calendar = calendar;
this.includeMetadata = false;
this.reuseVectorSchemaRoot = false;
this.targetBatchSize = DEFAULT_TARGET_BATCH_SIZE;
}
/**
* Constructor for the JdbcToArrowConfigBuilder
. Both the
* allocator and calendar are required. A {@link NullPointerException}
* will be thrown if either of those arguments is null
.
*
* The allocator is used to construct Arrow vectors from the JDBC ResultSet.
* The calendar is used to determine the time zone of {@link java.sql.Timestamp}
* fields and convert {@link java.sql.Date}, {@link java.sql.Time}, and
* {@link java.sql.Timestamp} fields to a single, common time zone when reading
* from the result set.
*
*
* The includeMetadata
argument, if true
will cause
* various information about each database field to be added to the Vector
* Schema's field metadata.
*
*
* @param allocator The Arrow Vector memory allocator.
* @param calendar The calendar to use when constructing timestamp fields.
*/
public JdbcToArrowConfigBuilder(BufferAllocator allocator, Calendar calendar, boolean includeMetadata) {
this(allocator, calendar);
this.includeMetadata = includeMetadata;
}
/**
* Sets the memory allocator to use when constructing the Arrow vectors from the ResultSet.
*
* @param allocator the allocator to set.
* @exception NullPointerException if allocator
is null.
*/
public JdbcToArrowConfigBuilder setAllocator(BufferAllocator allocator) {
Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
this.allocator = allocator;
return this;
}
/**
* Sets the {@link Calendar} to use when constructing timestamp fields in the
* Arrow schema, and reading time-based fields from the JDBC ResultSet
.
*
* @param calendar the calendar to set.
*/
public JdbcToArrowConfigBuilder setCalendar(Calendar calendar) {
this.calendar = calendar;
return this;
}
/**
* Sets whether to include JDBC ResultSet field metadata in the Arrow Schema field metadata.
*
* @param includeMetadata Whether to include or exclude JDBC metadata in the Arrow Schema field metadata.
* @return This instance of the JdbcToArrowConfig
, for chaining.
*/
public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) {
this.includeMetadata = includeMetadata;
return this;
}
/**
* Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}.
* The column index is 1-based, to match the JDBC column index.
*
* @param map The mapping.
* @return This instance of the JdbcToArrowConfig
, for chaining.
*/
public JdbcToArrowConfigBuilder setArraySubTypeByColumnIndexMap(Map map) {
this.arraySubTypesByColumnIndex = map;
return this;
}
/**
* Sets the mapping of column-name-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}.
*
* @param map The mapping.
* @return This instance of the JdbcToArrowConfig
, for chaining.
*/
public JdbcToArrowConfigBuilder setArraySubTypeByColumnNameMap(Map map) {
this.arraySubTypesByColumnName = map;
return this;
}
/**
* Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for column types.
*
* This can be useful to override type information from JDBC drivers that provide incomplete type info,
* e.g. DECIMAL with precision = scale = 0.
*
* The column index is 1-based, to match the JDBC column index.
* @param map The mapping.
*/
public JdbcToArrowConfigBuilder setExplicitTypesByColumnIndex(Map map) {
this.explicitTypesByColumnIndex = map;
return this;
}
/**
* Sets the mapping of column-name-to-{@link JdbcFieldInfo} used for column types.
*
* This can be useful to override type information from JDBC drivers that provide incomplete type info,
* e.g. DECIMAL with precision = scale = 0.
* @param map The mapping.
*/
public JdbcToArrowConfigBuilder setExplicitTypesByColumnName(Map map) {
this.explicitTypesByColumnName = map;
return this;
}
/**
* Set the target number of rows to convert at once.
*
* Use {@link JdbcToArrowConfig#NO_LIMIT_BATCH_SIZE} to read all rows at once.
*/
public JdbcToArrowConfigBuilder setTargetBatchSize(int targetBatchSize) {
this.targetBatchSize = targetBatchSize;
return this;
}
/**
* Set the function used to convert JDBC types to Arrow types.
*
* Defaults to wrapping {@link JdbcToArrowUtils#getArrowTypeFromJdbcType(JdbcFieldInfo, Calendar)}.
*/
public JdbcToArrowConfigBuilder setJdbcToArrowTypeConverter(
Function jdbcToArrowTypeConverter) {
this.jdbcToArrowTypeConverter = jdbcToArrowTypeConverter;
return this;
}
/**
* Set the function used to get a JDBC consumer for a given type.
*
* Defaults to wrapping {@link
* JdbcToArrowUtils#getConsumer(ArrowType, Integer, Boolean, FieldVector, JdbcToArrowConfig)}.
*/
public JdbcToArrowConfigBuilder setJdbcConsumerGetter(
JdbcToArrowConfig.JdbcConsumerFactory jdbcConsumerGetter) {
this.jdbcConsumerGetter = jdbcConsumerGetter;
return this;
}
/**
* Set whether to use the same {@link org.apache.arrow.vector.VectorSchemaRoot} instance on each iteration,
* or to allocate a new one.
*/
public JdbcToArrowConfigBuilder setReuseVectorSchemaRoot(boolean reuseVectorSchemaRoot) {
this.reuseVectorSchemaRoot = reuseVectorSchemaRoot;
return this;
}
/**
* Set metadata for schema.
*/
public JdbcToArrowConfigBuilder setSchemaMetadata(Map schemaMetadata) {
this.schemaMetadata = schemaMetadata;
return this;
}
/**
* Set metadata from columnIndex->meta map on per field basis.
*/
public JdbcToArrowConfigBuilder setColumnMetadataByColumnIndex(
Map> columnMetadataByColumnIndex) {
this.columnMetadataByColumnIndex = columnMetadataByColumnIndex;
return this;
}
/**
* Set the rounding mode used when the scale of the actual value does not match the declared scale.
*
* By default, an error is raised in such cases.
*/
public JdbcToArrowConfigBuilder setBigDecimalRoundingMode(RoundingMode bigDecimalRoundingMode) {
this.bigDecimalRoundingMode = bigDecimalRoundingMode;
return this;
}
/**
* This builds the {@link JdbcToArrowConfig} from the provided
* {@link BufferAllocator} and {@link Calendar}.
*
* @return The built {@link JdbcToArrowConfig}
* @throws NullPointerException if either the allocator or calendar was not set.
*/
public JdbcToArrowConfig build() {
return new JdbcToArrowConfig(
allocator,
calendar,
includeMetadata,
reuseVectorSchemaRoot,
arraySubTypesByColumnIndex,
arraySubTypesByColumnName,
targetBatchSize,
jdbcToArrowTypeConverter,
jdbcConsumerGetter,
explicitTypesByColumnIndex,
explicitTypesByColumnName,
schemaMetadata,
columnMetadataByColumnIndex,
bigDecimalRoundingMode);
}
}