All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.clickzetta.platform.arrow.ArrowSchema Maven / Gradle / Ivy

There is a newer version: 2.0.0
Show newest version
package com.clickzetta.platform.arrow;

import com.google.common.base.Preconditions;
import cz.proto.DataType;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.Schema;

import java.util.*;

public class ArrowSchema {

  /**
   * The key columns contains (primaryKey|sortKey|clusterKey|partitionKey).
   * 

* Keep all key field index in all arrowFields list (columnsByIndex). */ private volatile Set keyColumnsIndex; /** * Mapping of column index to column. */ private volatile List columnsByIndex; /** * Mapping of column name to index. */ private volatile Map columnsByName; /** * Mapping of column index to type. */ private volatile Map columnTypesByIndex; /** * Mapping of column name to index with case Insensitive. * such as. column1 will be COLUMN1 or column1. */ private volatile Map columnsByNameWithCase; /** * Arrow Schema of columns. */ private volatile Schema schema; private volatile int rowSize; // ------------------------------------------------------------------------- // Source DataType Info // ------------------------------------------------------------------------- /** * Mapping of column index to original type of cz Schema. */ private volatile Map columnOriginalDataTypesByIndex; public ArrowSchema(List originalTypes, List fields, Set keyFieldsIndex) { Preconditions.checkArgument(originalTypes.size() == fields.size()); this.columnsByIndex = new ArrayList<>(); this.columnsByName = new HashMap<>(); this.columnsByNameWithCase = new HashMap<>(fields.size() * 2); this.columnTypesByIndex = new HashMap<>(); this.columnOriginalDataTypesByIndex = new HashMap<>(); this.keyColumnsIndex = new HashSet<>(); this.keyColumnsIndex.addAll(keyFieldsIndex); for (int index = 0; index < originalTypes.size(); index++) { this.columnOriginalDataTypesByIndex.put(index, originalTypes.get(index)); } for (int index = 0; index < fields.size(); index++) { Field field = fields.get(index); columnsByIndex.add(field); if (this.columnsByName.put(field.getName(), index) != null) { throw new IllegalArgumentException(String.format("Column names must be unique: %s", field)); } // only used for case senstive match when use row.setValue. this.columnsByNameWithCase.put(field.getName().toLowerCase(), index); this.columnsByNameWithCase.put(field.getName().toUpperCase(), index); this.columnTypesByIndex.put(index, field.getType()); } this.schema = new Schema(fields); this.rowSize = getSchemaRowSize(); } public void applyNewArrowSchema(ArrowSchema newArrowSchema) { synchronized (this) { this.keyColumnsIndex = newArrowSchema.keyColumnsIndex; this.columnsByIndex = newArrowSchema.columnsByIndex; this.columnsByName = newArrowSchema.columnsByName; this.columnTypesByIndex = newArrowSchema.columnTypesByIndex; this.columnsByNameWithCase = newArrowSchema.columnsByNameWithCase; this.schema = newArrowSchema.schema; this.rowSize = newArrowSchema.rowSize; this.columnOriginalDataTypesByIndex = newArrowSchema.columnOriginalDataTypesByIndex; } } private int getSchemaRowSize() { return this.schema.toByteArray().length; } public Schema getSchema() { return schema; } public int getRowSize() { return this.rowSize; } public List getColumns() { return new ArrayList<>(this.columnsByIndex); } public boolean hasColumn(String columnName) { return this.columnsByName.containsKey(columnName); } public int getColumnIndex(String columnName) { Integer index = this.columnsByName.get(columnName); if (index == null) { index = this.columnsByNameWithCase.get(columnName.toLowerCase()) != null ? this.columnsByNameWithCase.get(columnName.toLowerCase()) : this.columnsByNameWithCase.get(columnName.toUpperCase()); } if (index == null) { throw new IllegalArgumentException(String.format("Unknown column: %s", columnName)); } return index; } public Field getColumnByIndex(int idx) { return this.columnsByIndex.get(idx); } public DataType getColumnOriginalTypeByIndex(int idx) { return this.columnOriginalDataTypesByIndex.get(idx); } public Field getColumn(String columnName) { return columnsByIndex.get(getColumnIndex(columnName)); } public int getColumnCount() { return this.columnsByIndex.size(); } public Map getColumnTypesByIndex() { return columnTypesByIndex; } public Set getKeyColumnsIndex() { return keyColumnsIndex; } @Override public String toString() { return "ArrowSchema{" + "keyColumnsIndex=" + keyColumnsIndex + ", columnsByIndex=" + columnsByIndex + ", columnsByName=" + columnsByName + ", columnTypesByIndex=" + columnTypesByIndex + ", columnsByNameWithCase=" + columnsByNameWithCase + ", schema=" + schema + ", rowSize=" + rowSize + ", columnOriginalDataTypesByIndex=" + columnOriginalDataTypesByIndex + '}'; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy