org.yamcs.yarch.TableDefinition Maven / Gradle / Ivy
package org.yamcs.yarch;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.yamcs.LimitExceededException;
import org.yamcs.utils.ByteArray;
import org.yamcs.utils.DatabaseCorruptionException;
import org.yamcs.utils.IndexedList;
import org.yamcs.utils.StringConverter;
import org.yamcs.yarch.PartitioningSpec._type;
import org.yamcs.yarch.streamsql.ColumnNotFoundException;
import org.yamcs.yarch.streamsql.GenericStreamSqlException;
import org.yamcs.yarch.streamsql.StreamSqlException;
import org.yamcs.yarch.streamsql.StreamSqlException.ErrCode;
import com.google.common.collect.BiMap;
/**
* A table definition consists of a (key,value) pair of tuple definitions. A tuple has to contain all the columns from
* the key while it can contain only a few of the columns from the value (basically it's a sparse table).
*
*
* The key is encoded as a byte array of all the columns in order. The value is encoded as a byte array of all the
* columns preceded by the id of their data type (1 byte) and their index (3 bytes).
*
* The secondary index key is encoded as a byte array of all the columns in order preceded by the id of their data type
* with the first bit set to 1 for the columns present and 0 for the column not present (i.e. null).
*
* A table can also be partitioned according to the partitioningSpec.
*
*/
public class TableDefinition {
static Logger log = LoggerFactory.getLogger(TableDefinition.class.getName());
static final int MAX_NUM_COLS = 0x00FFFFFF;
/*
* table version history
* 0: yamcs version < 3.0
* 1: - the histogram were stored in a separate rocksdb database.
* - pp table contained a column ppgroup instead of group
* 2: - the PROTOBUF(org.yamcs.protobuf.Pvalue$ParameterValue) is replaced by PARAMETER_VALUE in the pp table
* 3: November 2020 (Yamcs 5.3)
* - changed serialization to preserve order of negative values in the key
* - first of the 4 bytes column index preceding the value is the datatype
*
*/
public static final int CURRENT_FORMAT_VERSION = 3;
private final int formatVersion;
// the definition of keys and values columns
private volatile IndexedList keyDef;
private volatile IndexedList valueDef;
// these are all columns used in histograms or secondary indices
private volatile IndexedList histoIdx;
// keyDef+valueDef
private volatile TupleDefinition tupleDef;
private YarchDatabaseInstance ydb;
// compressed and column family name are actually storage dependent
// if we ever support a secondary storage, we should move them into some sort of options
private boolean compressed;
private String cfName;
private PartitioningSpec partitioningSpec = PartitioningSpec.noneSpec();
private String storageEngineName = YarchDatabase.RDB_ENGINE_NAME;
private String name;
private List histoColumns;
private List secondaryIndex;
// these are the value columns which are autoincrement.
private List autoIncrementValues;
/**
* Used when creating an empty table via sql.
*
* @param name
* @param tdef
* @param primaryKey
* @throws StreamSqlException
*/
public TableDefinition(String name, TupleDefinition tdef, List primaryKey) throws StreamSqlException {
this.name = name;
this.formatVersion = CURRENT_FORMAT_VERSION;
keyDef = new IndexedList<>();
for (String s : primaryKey) {
ColumnDefinition cd = tdef.getColumn(s);
if (cd == null) {
throw new ColumnNotFoundException(s);
}
TableColumnDefinition tcd = getTcd(cd);
keyDef.add(cd.getName(), tcd);
tcd.setSerializer(ColumnSerializerFactory.getColumnSerializer(this, tcd));
}
valueDef = new IndexedList<>(tdef.size() - keyDef.size());
for (ColumnDefinition cd : tdef.getColumnDefinitions()) {
if (!keyDef.hasKey(cd.getName())) {
TableColumnDefinition tcd = getTcd(cd);
valueDef.add(cd.getName(), tcd);
tcd.setSerializer(ColumnSerializerFactory.getColumnSerializer(this, tcd));
}
}
computeTupleDef();
computeAutoincrValues();
computeHistoIdx();
}
private TableColumnDefinition getTcd(ColumnDefinition cd) {
if (cd instanceof TableColumnDefinition) {
return (TableColumnDefinition) cd;
} else {
return new TableColumnDefinition(cd);
}
}
/**
* Used when creating the table from the serialized data on disk
*
*/
public TableDefinition(int formatVersion, List key, List value) {
this.keyDef = new IndexedList<>(key.size());
this.formatVersion = formatVersion;
for (TableColumnDefinition tcd : key) {
tcd.setSerializer(ColumnSerializerFactory.getColumnSerializer(this, tcd));
keyDef.add(tcd.getName(), tcd);
}
this.valueDef = new IndexedList<>(key.size());
for (TableColumnDefinition tcd : value) {
tcd.setSerializer(ColumnSerializerFactory.getColumnSerializer(this, tcd));
valueDef.add(tcd.getName(), tcd);
}
computeTupleDef();
computeAutoincrValues();
computeHistoIdx();
}
public void setDb(YarchDatabaseInstance ydb) {
this.ydb = ydb;
}
private void computeAutoincrValues() {
for (TableColumnDefinition tcd : valueDef) {
if (tcd.isAutoIncrement()) {
if (autoIncrementValues == null) {
autoIncrementValues = new ArrayList();
}
autoIncrementValues.add(tcd);
}
}
}
/**
* time based partitions can be on the first column of the key (which has to be of type timestamp) value based
* partitions can be on any other mandatory column
*
* @param pspec
*/
public void setPartitioningSpec(PartitioningSpec pspec) throws StreamSqlException {
if ((pspec.type == PartitioningSpec._type.TIME) || (pspec.type == PartitioningSpec._type.TIME_AND_VALUE)) {
ColumnDefinition cd = keyDef.get(pspec.timeColumn);
if (cd == null) {
throw new GenericStreamSqlException(
"time partition specified on a column not part of the primary key: '" + pspec.timeColumn + "'");
}
if (cd.getType() != DataType.TIMESTAMP) {
throw new GenericStreamSqlException("time partition specified on a column of type " + cd.getType());
}
if (!keyDef.get(0).getName().equals(pspec.timeColumn)) {
throw new GenericStreamSqlException(
"time partition supported only on the first column of the primary key");
}
}
if ((pspec.type == PartitioningSpec._type.VALUE) || (pspec.type == PartitioningSpec._type.TIME_AND_VALUE)) {
ColumnDefinition c = getColumnDefinition(pspec.valueColumn);
if (c == null) {
throw new ColumnNotFoundException(pspec.valueColumn);
}
pspec.setValueColumnType(c.getType());
}
this.partitioningSpec = pspec;
}
private void computeTupleDef() {
TupleDefinition tmp = new TupleDefinition();
for (ColumnDefinition cd : keyDef) {
tmp.addColumn(cd);
}
for (ColumnDefinition cd : valueDef) {
tmp.addColumn(cd);
}
tupleDef = tmp;
}
private void computeHistoIdx() {
IndexedList tmp = new IndexedList<>();
TableColumnDefinition tcd = keyDef.get(0);
if (histoColumns != null) {
tmp.add(tcd.getName(), tcd);
for (String s : histoColumns) {
if (!tmp.hasKey(s)) {
tmp.add(s, getColumnDefinition(s));
}
}
}
if (secondaryIndex != null) {
for (String s : secondaryIndex) {
if (!tmp.hasKey(s)) {
tmp.add(s, getColumnDefinition(s));
}
}
}
histoIdx = tmp;
}
public List getKeyDefinition() {
return keyDef.getList();
}
public List getValueDefinition() {
return valueDef.getList();
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public TupleDefinition getTupleDefinition() {
return tupleDef;
}
/**
* Checks that the table definition is valid: - primary key not string, except for the last in the list (otherwise
* the binary sorting does not work properly)
*
* @throws StreamSqlException
*/
public void validate() throws StreamSqlException {
for (TableColumnDefinition tcd : keyDef) {
if (tcd.isAutoIncrement() && tcd.getType() != DataType.LONG) {
throw new StreamSqlException(ErrCode.NOT_SUPPORTED,
"AUTO_INCREMENT is only supported for columns of type long.");
}
}
for (TableColumnDefinition tcd : valueDef) {
if (tcd.isAutoIncrement() && tcd.getType() != DataType.LONG) {
throw new StreamSqlException(ErrCode.NOT_SUPPORTED,
"AUTO_INCREMENT is only supported for columns of type long.");
}
}
}
/**
* Generate a new table row by transforming the key part of the tuple into a byte array to be written to disk. The
* tuple must contain each column from the key and they are written in order (such that sorting is according to the
* definition of the primary key).
*
* In addition, it stores into the returned row all the values for the columns used in histograms or indices
*
* @param t
* @return a tuple containing the histogram and secondary index values as well as the generated key
* @throws YarchException
*/
public Row generateRow(Tuple t) throws YarchException {
Row tableTuple = new Row(histoIdx);
ByteArray byteArray = new ByteArray();
for (int keyIdx = 0; keyIdx < keyDef.size(); keyIdx++) {
TableColumnDefinition tableCd = keyDef.get(keyIdx);
String colName = tableCd.getName();
int tIdx = t.getColumnIndex(colName);
Object value;
if (tIdx < 0) {
if (tableCd.isAutoIncrement()) {
value = tableCd.getSequence().next();
} else {
throw new IllegalArgumentException("Tuple does not have mandatory column '" + colName + "'");
}
} else {
ColumnDefinition tupleCd = t.getColumnDefinition(tIdx);
Object v = t.getColumn(tIdx);
value = DataType.castAs(tupleCd.type, tableCd.type, v);
}
tableCd.serializeValue(byteArray, value);
setSertupleValue(tableTuple, colName, value);
}
tableTuple.setKey(byteArray.toArray());
return tableTuple;
}
/**
* adds all missing columns to the value part and serialises the table definition to disk
*/
private synchronized void addMissingValueColumns(TupleDefinition tdef) {
IndexedList valueDef1 = new IndexedList<>(valueDef);
if (valueDef.size() >= MAX_NUM_COLS) {
throw new LimitExceededException(
"The number of value columns in table " + name + " has reached the maximum " + MAX_NUM_COLS);
}
for (int i = 0; i < tdef.size(); i++) {
ColumnDefinition cd = tdef.getColumn(i);
if (keyDef.hasKey(cd.getName())) {
continue;
}
int cidx = valueDef.getIndex(cd.getName());
if (cidx == -1) {
TableColumnDefinition tcd = new TableColumnDefinition(cd);
tcd.setSerializer(ColumnSerializerFactory.getColumnSerializer(this, tcd));
valueDef1.add(tcd.name, tcd);
}
}
ydb.saveTableDefinition(this, keyDef.getList(), valueDef1.getList());
valueDef = valueDef1;
computeTupleDef();
}
/**
* Renames column and serializes the table definition to disk.
*
* Commented out because not safe (can only be used when nobody is using the table)
*
* @param oldName
* - old name of the column
* @param newName
* - new name of the column
*
* public synchronized void renameColumn(String oldName, String newName) { if (keyDef.hasKey(oldName)) {
* keyDef.changeKey(oldName, newName); } else if (valueDef.hasKey(oldName)) { valueDef.changeKey(oldName,
* newName); } else { throw new IllegalArgumentException("no column named '" + oldName + "'"); }
*
* if(secondaryIndexDef.hasKey(oldName)) { keyDef.changeKey(oldName, newName); }
*
* if (oldName.equals(partitioningSpec.timeColumn)) { PartitioningSpec newSpec = new
* PartitioningSpec(partitioningSpec.type, newName, partitioningSpec.valueColumn);
* newSpec.setTimePartitioningSchema(partitioningSpec.getTimePartitioningSchema()); partitioningSpec =
* newSpec; } else if (oldName.equals(partitioningSpec.valueColumn)) { PartitioningSpec newSpec = new
* PartitioningSpec(partitioningSpec.type, partitioningSpec.timeColumn, newName);
* newSpec.setTimePartitioningSchema(partitioningSpec.getTimePartitioningSchema()); partitioningSpec =
* newSpec; }
*
* int idx = histoColumns.indexOf(oldName); if (idx != -1) { histoColumns.set(idx, newName); }
* ydb.saveTableDefinition(this, keyDef.getList(), valueDef.getList()); }
*/
/**
* Adds a value to a enum and writes the table definition to disk
*
*/
private Short addEnumValue(String columnName, String value) {
TableColumnDefinition tdef = getColumnDefinition(columnName);
TableColumnDefinition tdef1 = new TableColumnDefinition(tdef);
short x = tdef1.addEnumValue(value);
IndexedList keyDef1 = keyDef;
IndexedList valueDef1 = valueDef;
IndexedList histoIdx1 = histoIdx;
int idx = keyDef.getIndex(columnName);
if (idx >= 0) {
keyDef1 = new IndexedList<>(keyDef);
keyDef1.set(idx, tdef1);
} else {
idx = valueDef.getIndex(columnName);
assert (idx >= 0);
valueDef1 = new IndexedList<>(valueDef);
valueDef1.set(idx, tdef1);
}
ydb.saveTableDefinition(this, keyDef1.getList(), valueDef1.getList());
idx = histoIdx.getIndex(columnName);
if (idx >= 0) {
histoIdx1 = new IndexedList<>(histoIdx);
histoIdx1.set(idx, tdef1);
}
keyDef = keyDef1;
valueDef = valueDef1;
histoIdx = histoIdx1;
computeTupleDef();
return x;
}
/**
* get the enum value corresponding to a column, creating it if it does not exist
*
* @return
*/
public Short addAndGetEnumValue(String columnName, String value) {
TableColumnDefinition tdef = getColumnDefinition(columnName);
if (tdef == null) {
throw new IllegalArgumentException("No column named '" + columnName + "'");
}
if (value == null) {
throw new NullPointerException("Enum value cannot be null");
}
Short enumValue = tdef.getEnumIndex(value);
if (enumValue == null) {
synchronized (this) {
enumValue = tdef.getEnumIndex(value);
if (enumValue == null) {
enumValue = addEnumValue(columnName, value);
}
}
}
return enumValue;
}
/**
* Same as {@link #serializeValue(Tuple, Row)} but encodes the output in user provided byte array
*
* @param tuple
* @param sertuple
* @param byteArray
*/
public void serializeValue(Tuple tuple, Row sertuple, ByteArray byteArray) {
TupleDefinition tdef = tuple.getDefinition();
int length = byteArray.size();
for (int i = 0; i < tdef.size(); i++) {
ColumnDefinition tupleCd = tdef.getColumn(i);
if (keyDef.hasKey(tupleCd.getName())) {
continue;
}
int cidx = valueDef.getIndex(tupleCd.getName());
if (cidx == -1) { // call again this function after adding the
// missing columns to the table
addMissingValueColumns(tdef);
byteArray.reset(length);
serializeValue(tuple, sertuple, byteArray);
return;
}
TableColumnDefinition tableCd = valueDef.get(cidx);
Object v = tuple.getColumn(i);
if (v == null) {
continue;
}
Object v1 = DataType.castAs(tupleCd.type, tableCd.type, v);
cidx = (tableCd.type.getTypeId() << 24) | cidx;
byteArray.addInt(cidx);
tableCd.serializeValue(byteArray, v1);
setSertupleValue(sertuple, tupleCd.getName(), v1);
}
// add values for all the autoincrements which are not part of the tuple
if (autoIncrementValues != null) {
for (TableColumnDefinition tcd : autoIncrementValues) {
if (!tuple.hasColumn(tcd.getName())) {
long v = tcd.getSequence().next();
int cidx = (tcd.type.getTypeId() << 24) | valueDef.getIndex(tcd.getName());
byteArray.addInt(cidx);
tcd.serializeValue(byteArray, v);
setSertupleValue(sertuple, tcd.getName(), v);
}
}
}
// add a final -1 eof marker
byteArray.addInt(-1);
}
/**
* Transform the value part of the tuple into a byte array to be written on disk. Each column is preceded by a tag
* (the column index).
*
* If there are columns in the tuple which are not in the valueDef, they are added and the TableDefinition is
* serialized on disk.
*
* Columns whose values are null are not serialized but their definition is still added to the table definition if
* not present already.
*
* @param tuple
* @param sertuple
* - if not null, store all the values of the columns to this tuple as written to the database (possibly
* after some data casting)
* @return the serialized version of the value part of the tuple
*
*/
public byte[] serializeValue(Tuple tuple, Row sertuple) {
ByteArray byteArray = new ByteArray();
serializeValue(tuple, sertuple, byteArray);
return byteArray.toArray();
}
private void setSertupleValue(Row sertuple, String colName, Object value) {
if (sertuple != null) {
int idx = sertuple.getIndex(colName);
if (idx >= 0) {
sertuple.set(idx, value);
}
}
}
public Tuple deserialize(byte[] k, byte[] v) {
TupleDefinition tdef = new TupleDefinition();
ArrayList