Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.execution.vectorized;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.sql.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.spark.memory.MemoryMode;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.util.DateTimeUtils;
import org.apache.spark.sql.types.*;
import org.apache.spark.sql.vectorized.ColumnarArray;
import org.apache.spark.sql.vectorized.ColumnarBatch;
import org.apache.spark.sql.vectorized.ColumnarMap;
import org.apache.spark.unsafe.types.CalendarInterval;
import org.apache.spark.unsafe.types.UTF8String;
/**
* Utilities to help manipulate data associate with ColumnVectors. These should be used mostly
* for debugging or other non-performance critical paths.
* These utilities are mostly used to convert ColumnVectors into other formats.
*/
public class ColumnVectorUtils {
/**
* Populates the entire `col` with `row[fieldIdx]`
*/
public static void populate(WritableColumnVector col, InternalRow row, int fieldIdx) {
int capacity = col.capacity;
DataType t = col.dataType();
if (row.isNullAt(fieldIdx)) {
col.putNulls(0, capacity);
} else {
if (t == DataTypes.BooleanType) {
col.putBooleans(0, capacity, row.getBoolean(fieldIdx));
} else if (t == DataTypes.BinaryType) {
col.putByteArray(0, row.getBinary(fieldIdx));
} else if (t == DataTypes.ByteType) {
col.putBytes(0, capacity, row.getByte(fieldIdx));
} else if (t == DataTypes.ShortType) {
col.putShorts(0, capacity, row.getShort(fieldIdx));
} else if (t == DataTypes.IntegerType) {
col.putInts(0, capacity, row.getInt(fieldIdx));
} else if (t == DataTypes.LongType) {
col.putLongs(0, capacity, row.getLong(fieldIdx));
} else if (t == DataTypes.FloatType) {
col.putFloats(0, capacity, row.getFloat(fieldIdx));
} else if (t == DataTypes.DoubleType) {
col.putDoubles(0, capacity, row.getDouble(fieldIdx));
} else if (t == DataTypes.StringType) {
UTF8String v = row.getUTF8String(fieldIdx);
byte[] bytes = v.getBytes();
for (int i = 0; i < capacity; i++) {
col.putByteArray(i, bytes);
}
} else if (t instanceof DecimalType) {
DecimalType dt = (DecimalType)t;
Decimal d = row.getDecimal(fieldIdx, dt.precision(), dt.scale());
if (dt.precision() <= Decimal.MAX_INT_DIGITS()) {
col.putInts(0, capacity, (int)d.toUnscaledLong());
} else if (dt.precision() <= Decimal.MAX_LONG_DIGITS()) {
col.putLongs(0, capacity, d.toUnscaledLong());
} else {
final BigInteger integer = d.toJavaBigDecimal().unscaledValue();
byte[] bytes = integer.toByteArray();
for (int i = 0; i < capacity; i++) {
col.putByteArray(i, bytes, 0, bytes.length);
}
}
} else if (t instanceof CalendarIntervalType) {
CalendarInterval c = (CalendarInterval)row.get(fieldIdx, t);
col.getChild(0).putInts(0, capacity, c.months);
col.getChild(1).putLongs(0, capacity, c.microseconds);
} else if (t instanceof DateType) {
col.putInts(0, capacity, row.getInt(fieldIdx));
} else if (t instanceof TimestampType) {
col.putLongs(0, capacity, row.getLong(fieldIdx));
} else {
throw new RuntimeException(String.format("DataType %s is not supported" +
" in column vectorized reader.", t.sql()));
}
}
}
/**
* Returns the array data as the java primitive array.
* For example, an array of IntegerType will return an int[].
* Throws exceptions for unhandled schemas.
*/
public static int[] toJavaIntArray(ColumnarArray array) {
for (int i = 0; i < array.numElements(); i++) {
if (array.isNullAt(i)) {
throw new RuntimeException("Cannot handle NULL values.");
}
}
return array.toIntArray();
}
public static Map toJavaIntMap(ColumnarMap map) {
int[] keys = toJavaIntArray(map.keyArray());
int[] values = toJavaIntArray(map.valueArray());
assert keys.length == values.length;
Map result = new HashMap<>();
for (int i = 0; i < keys.length; i++) {
result.put(keys[i], values[i]);
}
return result;
}
private static void appendValue(WritableColumnVector dst, DataType t, Object o) {
if (o == null) {
if (t instanceof CalendarIntervalType) {
dst.appendStruct(true);
} else {
dst.appendNull();
}
} else {
if (t == DataTypes.BooleanType) {
dst.appendBoolean((Boolean) o);
} else if (t == DataTypes.ByteType) {
dst.appendByte((Byte) o);
} else if (t == DataTypes.ShortType) {
dst.appendShort((Short) o);
} else if (t == DataTypes.IntegerType) {
dst.appendInt((Integer) o);
} else if (t == DataTypes.LongType) {
dst.appendLong((Long) o);
} else if (t == DataTypes.FloatType) {
dst.appendFloat((Float) o);
} else if (t == DataTypes.DoubleType) {
dst.appendDouble((Double) o);
} else if (t == DataTypes.StringType) {
byte[] b =((String)o).getBytes(StandardCharsets.UTF_8);
dst.appendByteArray(b, 0, b.length);
} else if (t instanceof DecimalType) {
DecimalType dt = (DecimalType) t;
Decimal d = Decimal.apply((BigDecimal) o, dt.precision(), dt.scale());
if (dt.precision() <= Decimal.MAX_INT_DIGITS()) {
dst.appendInt((int) d.toUnscaledLong());
} else if (dt.precision() <= Decimal.MAX_LONG_DIGITS()) {
dst.appendLong(d.toUnscaledLong());
} else {
final BigInteger integer = d.toJavaBigDecimal().unscaledValue();
byte[] bytes = integer.toByteArray();
dst.appendByteArray(bytes, 0, bytes.length);
}
} else if (t instanceof CalendarIntervalType) {
CalendarInterval c = (CalendarInterval)o;
dst.appendStruct(false);
dst.getChild(0).appendInt(c.months);
dst.getChild(1).appendInt(c.days);
dst.getChild(2).appendLong(c.microseconds);
} else if (t instanceof DateType) {
dst.appendInt(DateTimeUtils.fromJavaDate((Date)o));
} else {
throw new UnsupportedOperationException("Type " + t);
}
}
}
private static void appendValue(WritableColumnVector dst, DataType t, Row src, int fieldIdx) {
if (t instanceof ArrayType) {
ArrayType at = (ArrayType)t;
if (src.isNullAt(fieldIdx)) {
dst.appendNull();
} else {
List