com.clickzetta.platform.arrow.ArrowSchema Maven / Gradle / Ivy
Show all versions of clickzetta-java Show documentation
package com.clickzetta.platform.arrow;
import com.google.common.base.Preconditions;
import cz.proto.DataType;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.Schema;
import java.util.*;
public class ArrowSchema {
/**
* The key columns contains (primaryKey|sortKey|clusterKey|partitionKey).
*
* Keep all key field index in all arrowFields list (columnsByIndex).
*/
private volatile Set keyColumnsIndex;
/**
* Mapping of column index to column.
*/
private volatile List columnsByIndex;
/**
* Mapping of column name to index.
*/
private volatile Map columnsByName;
/**
* Mapping of column index to type.
*/
private volatile Map columnTypesByIndex;
/**
* Mapping of column name to index with case Insensitive.
* such as. column1 will be COLUMN1 or column1.
*/
private volatile Map columnsByNameWithCase;
/**
* Arrow Schema of columns.
*/
private volatile Schema schema;
private volatile int rowSize;
// -------------------------------------------------------------------------
// Source DataType Info
// -------------------------------------------------------------------------
/**
* Mapping of column index to original type of cz Schema.
*/
private volatile Map columnOriginalDataTypesByIndex;
public ArrowSchema(List originalTypes, List fields, Set keyFieldsIndex) {
Preconditions.checkArgument(originalTypes.size() == fields.size());
this.columnsByIndex = new ArrayList<>();
this.columnsByName = new HashMap<>();
this.columnsByNameWithCase = new HashMap<>(fields.size() * 2);
this.columnTypesByIndex = new HashMap<>();
this.columnOriginalDataTypesByIndex = new HashMap<>();
this.keyColumnsIndex = new HashSet<>();
this.keyColumnsIndex.addAll(keyFieldsIndex);
for (int index = 0; index < originalTypes.size(); index++) {
this.columnOriginalDataTypesByIndex.put(index, originalTypes.get(index));
}
for (int index = 0; index < fields.size(); index++) {
Field field = fields.get(index);
columnsByIndex.add(field);
if (this.columnsByName.put(field.getName(), index) != null) {
throw new IllegalArgumentException(String.format("Column names must be unique: %s", field));
}
// only used for case senstive match when use row.setValue.
this.columnsByNameWithCase.put(field.getName().toLowerCase(), index);
this.columnsByNameWithCase.put(field.getName().toUpperCase(), index);
this.columnTypesByIndex.put(index, field.getType());
}
this.schema = new Schema(fields);
this.rowSize = getSchemaRowSize();
}
public void applyNewArrowSchema(ArrowSchema newArrowSchema) {
synchronized (this) {
this.keyColumnsIndex = newArrowSchema.keyColumnsIndex;
this.columnsByIndex = newArrowSchema.columnsByIndex;
this.columnsByName = newArrowSchema.columnsByName;
this.columnTypesByIndex = newArrowSchema.columnTypesByIndex;
this.columnsByNameWithCase = newArrowSchema.columnsByNameWithCase;
this.schema = newArrowSchema.schema;
this.rowSize = newArrowSchema.rowSize;
this.columnOriginalDataTypesByIndex = newArrowSchema.columnOriginalDataTypesByIndex;
}
}
private int getSchemaRowSize() {
return this.schema.toByteArray().length;
}
public Schema getSchema() {
return schema;
}
public int getRowSize() {
return this.rowSize;
}
public List getColumns() {
return new ArrayList<>(this.columnsByIndex);
}
public boolean hasColumn(String columnName) {
return this.columnsByName.containsKey(columnName);
}
public int getColumnIndex(String columnName) {
Integer index = this.columnsByName.get(columnName);
if (index == null) {
index = this.columnsByNameWithCase.get(columnName.toLowerCase()) != null ?
this.columnsByNameWithCase.get(columnName.toLowerCase()) :
this.columnsByNameWithCase.get(columnName.toUpperCase());
}
if (index == null) {
throw new IllegalArgumentException(String.format("Unknown column: %s", columnName));
}
return index;
}
public Field getColumnByIndex(int idx) {
return this.columnsByIndex.get(idx);
}
public DataType getColumnOriginalTypeByIndex(int idx) {
return this.columnOriginalDataTypesByIndex.get(idx);
}
public Field getColumn(String columnName) {
return columnsByIndex.get(getColumnIndex(columnName));
}
public int getColumnCount() {
return this.columnsByIndex.size();
}
public Map getColumnTypesByIndex() {
return columnTypesByIndex;
}
public Set getKeyColumnsIndex() {
return keyColumnsIndex;
}
@Override
public String toString() {
return "ArrowSchema{" +
"keyColumnsIndex=" + keyColumnsIndex +
", columnsByIndex=" + columnsByIndex +
", columnsByName=" + columnsByName +
", columnTypesByIndex=" + columnTypesByIndex +
", columnsByNameWithCase=" + columnsByNameWithCase +
", schema=" + schema +
", rowSize=" + rowSize +
", columnOriginalDataTypesByIndex=" + columnOriginalDataTypesByIndex +
'}';
}
}