org.yamcs.yarch.TableDefinition Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of yamcs-core Show documentation
There is a newer version: 5.10.9
package org.yamcs.yarch;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.yamcs.LimitExceededException;
import org.yamcs.utils.ByteArray;
import org.yamcs.utils.DatabaseCorruptionException;
import org.yamcs.utils.IndexedList;
import org.yamcs.utils.StringConverter;
import org.yamcs.yarch.PartitioningSpec._type;
import org.yamcs.yarch.streamsql.ColumnNotFoundException;
import org.yamcs.yarch.streamsql.GenericStreamSqlException;
import org.yamcs.yarch.streamsql.StreamSqlException;
import org.yamcs.yarch.streamsql.StreamSqlException.ErrCode;

import com.google.common.collect.BiMap;

/**
 * A table definition consists of a (key,value) pair of tuple definitions. A tuple has to contain all the columns from
 * the key while it can contain only a few of the columns from the value (basically it's a sparse table).
 * 
 * 
 * The key is encoded as a byte array of all the columns in order. The value is encoded as a byte array of all the
 * columns preceded by the id of their data type (1 byte) and their index (3 bytes).
 * 

 * The secondary index key is encoded as a byte array of all the columns in order preceded by the id of their data type
 * with the first bit set to 1 for the columns present and 0 for the column not present (i.e. null).
 * 

 * A table can also be partitioned according to the partitioningSpec.
 * 
 */
public class TableDefinition {
    static Logger log = LoggerFactory.getLogger(TableDefinition.class.getName());
    static final int MAX_NUM_COLS = 0x00FFFFFF;
    /*
     * table version history
     * 0: yamcs version < 3.0
     * 1: - the histogram were stored in a separate rocksdb database.
     * - pp table contained a column ppgroup instead of group
     * 2: - the PROTOBUF(org.yamcs.protobuf.Pvalue$ParameterValue) is replaced by PARAMETER_VALUE in the pp table
     * 3: November 2020 (Yamcs 5.3)
     * - changed serialization to preserve order of negative values in the key
     * - first of the 4 bytes column index preceding the value is the datatype
     * 
     */
    public static final int CURRENT_FORMAT_VERSION = 3;
    private final int formatVersion;

    // the definition of keys and values columns
    private volatile IndexedList keyDef;
    private volatile IndexedList valueDef;

    // these are all columns used in histograms or secondary indices
    private volatile IndexedList histoIdx;

    // keyDef+valueDef
    private volatile TupleDefinition tupleDef;

    private YarchDatabaseInstance ydb;

    // compressed and column family name are actually storage dependent
    // if we ever support a secondary storage, we should move them into some sort of options
    private boolean compressed;
    private String cfName;

    private PartitioningSpec partitioningSpec = PartitioningSpec.noneSpec();

    private String storageEngineName = YarchDatabase.RDB_ENGINE_NAME;

    private String name;
    private List histoColumns;
    private List secondaryIndex;

    // these are the value columns which are autoincrement.
    private List autoIncrementValues;

    /**
     * Used when creating an empty table via sql.
     * 
     * @param name
     * @param tdef
     * @param primaryKey
     * @throws StreamSqlException
     */
    public TableDefinition(String name, TupleDefinition tdef, List primaryKey) throws StreamSqlException {
        this.name = name;
        this.formatVersion = CURRENT_FORMAT_VERSION;

        keyDef = new IndexedList<>();
        for (String s : primaryKey) {
            ColumnDefinition cd = tdef.getColumn(s);
            if (cd == null) {
                throw new ColumnNotFoundException(s);
            }

            TableColumnDefinition tcd = getTcd(cd);
            keyDef.add(cd.getName(), tcd);
            tcd.setSerializer(ColumnSerializerFactory.getColumnSerializer(this, tcd));
        }

        valueDef = new IndexedList<>(tdef.size() - keyDef.size());
        for (ColumnDefinition cd : tdef.getColumnDefinitions()) {
            if (!keyDef.hasKey(cd.getName())) {
                TableColumnDefinition tcd = getTcd(cd);
                valueDef.add(cd.getName(), tcd);
                tcd.setSerializer(ColumnSerializerFactory.getColumnSerializer(this, tcd));
            }
        }
        computeTupleDef();
        computeAutoincrValues();
        computeHistoIdx();
    }

    private TableColumnDefinition getTcd(ColumnDefinition cd) {
        if (cd instanceof TableColumnDefinition) {
            return (TableColumnDefinition) cd;
        } else {
            return new TableColumnDefinition(cd);
        }
    }

    /**
     * Used when creating the table from the serialized data on disk
     * 
     */
    public TableDefinition(int formatVersion, List key, List value) {

        this.keyDef = new IndexedList<>(key.size());
        this.formatVersion = formatVersion;
        for (TableColumnDefinition tcd : key) {
            tcd.setSerializer(ColumnSerializerFactory.getColumnSerializer(this, tcd));
            keyDef.add(tcd.getName(), tcd);
        }

        this.valueDef = new IndexedList<>(key.size());
        for (TableColumnDefinition tcd : value) {
            tcd.setSerializer(ColumnSerializerFactory.getColumnSerializer(this, tcd));
            valueDef.add(tcd.getName(), tcd);
        }
        computeTupleDef();
        computeAutoincrValues();
        computeHistoIdx();
    }

    public void setDb(YarchDatabaseInstance ydb) {
        this.ydb = ydb;
    }

    private void computeAutoincrValues() {
        for (TableColumnDefinition tcd : valueDef) {
            if (tcd.isAutoIncrement()) {
                if (autoIncrementValues == null) {
                    autoIncrementValues = new ArrayList();
                }
                autoIncrementValues.add(tcd);
            }
        }
    }

    /**
     * time based partitions can be on the first column of the key (which has to be of type timestamp) value based
     * partitions can be on any other mandatory column
     * 
     * @param pspec
     */
    public void setPartitioningSpec(PartitioningSpec pspec) throws StreamSqlException {
        if ((pspec.type == PartitioningSpec._type.TIME) || (pspec.type == PartitioningSpec._type.TIME_AND_VALUE)) {
            ColumnDefinition cd = keyDef.get(pspec.timeColumn);
            if (cd == null) {
                throw new GenericStreamSqlException(
                        "time partition specified on a column not part of the primary key: '" + pspec.timeColumn + "'");
            }
            if (cd.getType() != DataType.TIMESTAMP) {
                throw new GenericStreamSqlException("time partition specified on a column of type " + cd.getType());
            }
            if (!keyDef.get(0).getName().equals(pspec.timeColumn)) {
                throw new GenericStreamSqlException(
                        "time partition supported only on the first column of the primary key");
            }
        }

        if ((pspec.type == PartitioningSpec._type.VALUE) || (pspec.type == PartitioningSpec._type.TIME_AND_VALUE)) {
            ColumnDefinition c = getColumnDefinition(pspec.valueColumn);
            if (c == null) {
                throw new ColumnNotFoundException(pspec.valueColumn);
            }
            pspec.setValueColumnType(c.getType());
        }

        this.partitioningSpec = pspec;
    }

    private void computeTupleDef() {
        TupleDefinition tmp = new TupleDefinition();
        for (ColumnDefinition cd : keyDef) {
            tmp.addColumn(cd);
        }
        for (ColumnDefinition cd : valueDef) {
            tmp.addColumn(cd);
        }
        tupleDef = tmp;
    }

    private void computeHistoIdx() {
        IndexedList tmp = new IndexedList<>();
        TableColumnDefinition tcd = keyDef.get(0);
        if (histoColumns != null) {
            tmp.add(tcd.getName(), tcd);

            for (String s : histoColumns) {
                if (!tmp.hasKey(s)) {
                    tmp.add(s, getColumnDefinition(s));
                }
            }
        }
        if (secondaryIndex != null) {
            for (String s : secondaryIndex) {
                if (!tmp.hasKey(s)) {
                    tmp.add(s, getColumnDefinition(s));
                }
            }
        }

        histoIdx = tmp;
    }

    public List getKeyDefinition() {
        return keyDef.getList();
    }

    public List getValueDefinition() {
        return valueDef.getList();
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public TupleDefinition getTupleDefinition() {
        return tupleDef;
    }

    /**
     * Checks that the table definition is valid: - primary key not string, except for the last in the list (otherwise
     * the binary sorting does not work properly)
     * 
     * @throws StreamSqlException
     */
    public void validate() throws StreamSqlException {
        for (TableColumnDefinition tcd : keyDef) {
            if (tcd.isAutoIncrement() && tcd.getType() != DataType.LONG) {
                throw new StreamSqlException(ErrCode.NOT_SUPPORTED,
                        "AUTO_INCREMENT is only supported for columns of type long.");
            }
        }

        for (TableColumnDefinition tcd : valueDef) {
            if (tcd.isAutoIncrement() && tcd.getType() != DataType.LONG) {
                throw new StreamSqlException(ErrCode.NOT_SUPPORTED,
                        "AUTO_INCREMENT is only supported for columns of type long.");
            }
        }

    }

    /**
     * Generate a new table row by transforming the key part of the tuple into a byte array to be written to disk. The
     * tuple must contain each column from the key and they are written in order (such that sorting is according to the
     * definition of the primary key).
     * 

     * In addition, it stores into the returned row all the values for the columns used in histograms or indices
     * 
     * @param t
     * @return a tuple containing the histogram and secondary index values as well as the generated key
     * @throws YarchException
     */
    public Row generateRow(Tuple t) throws YarchException {
        Row tableTuple = new Row(histoIdx);
        ByteArray byteArray = new ByteArray();
        for (int keyIdx = 0; keyIdx < keyDef.size(); keyIdx++) {
            TableColumnDefinition tableCd = keyDef.get(keyIdx);
            String colName = tableCd.getName();
            int tIdx = t.getColumnIndex(colName);
            Object value;
            if (tIdx < 0) {
                if (tableCd.isAutoIncrement()) {
                    value = tableCd.getSequence().next();
                } else {
                    throw new IllegalArgumentException("Tuple does not have mandatory column '" + colName + "'");
                }
            } else {
                ColumnDefinition tupleCd = t.getColumnDefinition(tIdx);
                Object v = t.getColumn(tIdx);
                value = DataType.castAs(tupleCd.type, tableCd.type, v);
            }
            tableCd.serializeValue(byteArray, value);
            setSertupleValue(tableTuple, colName, value);
        }
        tableTuple.setKey(byteArray.toArray());
        return tableTuple;
    }

    /**
     * adds all missing columns to the value part and serialises the table definition to disk
     */
    private synchronized void addMissingValueColumns(TupleDefinition tdef) {
        IndexedList valueDef1 = new IndexedList<>(valueDef);
        if (valueDef.size() >= MAX_NUM_COLS) {
            throw new LimitExceededException(
                    "The number of value columns in table " + name + " has reached the maximum " + MAX_NUM_COLS);
        }

        for (int i = 0; i < tdef.size(); i++) {
            ColumnDefinition cd = tdef.getColumn(i);
            if (keyDef.hasKey(cd.getName())) {
                continue;
            }
            int cidx = valueDef.getIndex(cd.getName());
            if (cidx == -1) {
                TableColumnDefinition tcd = new TableColumnDefinition(cd);
                tcd.setSerializer(ColumnSerializerFactory.getColumnSerializer(this, tcd));
                valueDef1.add(tcd.name, tcd);
            }
        }

        ydb.saveTableDefinition(this, keyDef.getList(), valueDef1.getList());
        valueDef = valueDef1;
        computeTupleDef();

    }

    /**
     * Renames column and serializes the table definition to disk.
     * 
     * Commented out because not safe (can only be used when nobody is using the table)
     * 
     * @param oldName
     *            - old name of the column
     * @param newName
     *            - new name of the column
     * 
     *            public synchronized void renameColumn(String oldName, String newName) { if (keyDef.hasKey(oldName)) {
     *            keyDef.changeKey(oldName, newName); } else if (valueDef.hasKey(oldName)) { valueDef.changeKey(oldName,
     *            newName); } else { throw new IllegalArgumentException("no column named '" + oldName + "'"); }
     * 
     *            if(secondaryIndexDef.hasKey(oldName)) { keyDef.changeKey(oldName, newName); }
     * 
     *            if (oldName.equals(partitioningSpec.timeColumn)) { PartitioningSpec newSpec = new
     *            PartitioningSpec(partitioningSpec.type, newName, partitioningSpec.valueColumn);
     *            newSpec.setTimePartitioningSchema(partitioningSpec.getTimePartitioningSchema()); partitioningSpec =
     *            newSpec; } else if (oldName.equals(partitioningSpec.valueColumn)) { PartitioningSpec newSpec = new
     *            PartitioningSpec(partitioningSpec.type, partitioningSpec.timeColumn, newName);
     *            newSpec.setTimePartitioningSchema(partitioningSpec.getTimePartitioningSchema()); partitioningSpec =
     *            newSpec; }
     * 
     *            int idx = histoColumns.indexOf(oldName); if (idx != -1) { histoColumns.set(idx, newName); }
     *            ydb.saveTableDefinition(this, keyDef.getList(), valueDef.getList()); }
     */

    /**
     * Adds a value to a enum and writes the table definition to disk
     * 
     */
    private Short addEnumValue(String columnName, String value) {
        TableColumnDefinition tdef = getColumnDefinition(columnName);

        TableColumnDefinition tdef1 = new TableColumnDefinition(tdef);
        short x = tdef1.addEnumValue(value);

        IndexedList keyDef1 = keyDef;
        IndexedList valueDef1 = valueDef;
        IndexedList histoIdx1 = histoIdx;

        int idx = keyDef.getIndex(columnName);
        if (idx >= 0) {
            keyDef1 = new IndexedList<>(keyDef);
            keyDef1.set(idx, tdef1);
        } else {
            idx = valueDef.getIndex(columnName);
            assert (idx >= 0);
            valueDef1 = new IndexedList<>(valueDef);
            valueDef1.set(idx, tdef1);
        }
        ydb.saveTableDefinition(this, keyDef1.getList(), valueDef1.getList());

        idx = histoIdx.getIndex(columnName);
        if (idx >= 0) {
            histoIdx1 = new IndexedList<>(histoIdx);
            histoIdx1.set(idx, tdef1);
        }
        keyDef = keyDef1;
        valueDef = valueDef1;
        histoIdx = histoIdx1;

        computeTupleDef();

        return x;
    }

    /**
     * get the enum value corresponding to a column, creating it if it does not exist
     * 
     * @return
     */
    public Short addAndGetEnumValue(String columnName, String value) {
        TableColumnDefinition tdef = getColumnDefinition(columnName);
        if (tdef == null) {
            throw new IllegalArgumentException("No column named '" + columnName + "'");
        }
        if (value == null) {
            throw new NullPointerException("Enum value cannot be null");
        }

        Short enumValue = tdef.getEnumIndex(value);
        if (enumValue == null) {
            synchronized (this) {
                enumValue = tdef.getEnumIndex(value);
                if (enumValue == null) {
                    enumValue = addEnumValue(columnName, value);
                }
            }
        }
        return enumValue;
    }

    /**
     * Same as {@link #serializeValue(Tuple, Row)} but encodes the output in user provided byte array
     * 
     * @param tuple
     * @param sertuple
     * @param byteArray
     */
    public void serializeValue(Tuple tuple, Row sertuple, ByteArray byteArray) {
        TupleDefinition tdef = tuple.getDefinition();
        int length = byteArray.size();

        for (int i = 0; i < tdef.size(); i++) {
            ColumnDefinition tupleCd = tdef.getColumn(i);
            if (keyDef.hasKey(tupleCd.getName())) {
                continue;
            }
            int cidx = valueDef.getIndex(tupleCd.getName());
            if (cidx == -1) { // call again this function after adding the
                              // missing columns to the table
                addMissingValueColumns(tdef);
                byteArray.reset(length);
                serializeValue(tuple, sertuple, byteArray);
                return;
            }
            TableColumnDefinition tableCd = valueDef.get(cidx);
            Object v = tuple.getColumn(i);
            if (v == null) {
                continue;
            }
            Object v1 = DataType.castAs(tupleCd.type, tableCd.type, v);
            cidx = (tableCd.type.getTypeId() << 24) | cidx;
            byteArray.addInt(cidx);
            tableCd.serializeValue(byteArray, v1);

            setSertupleValue(sertuple, tupleCd.getName(), v1);
        }

        // add values for all the autoincrements which are not part of the tuple
        if (autoIncrementValues != null) {
            for (TableColumnDefinition tcd : autoIncrementValues) {
                if (!tuple.hasColumn(tcd.getName())) {
                    long v = tcd.getSequence().next();
                    int cidx = (tcd.type.getTypeId() << 24) | valueDef.getIndex(tcd.getName());
                    byteArray.addInt(cidx);
                    tcd.serializeValue(byteArray, v);
                    setSertupleValue(sertuple, tcd.getName(), v);
                }
            }
        }

        // add a final -1 eof marker
        byteArray.addInt(-1);

    }

    /**
     * Transform the value part of the tuple into a byte array to be written on disk. Each column is preceded by a tag
     * (the column index).
     * 

     * If there are columns in the tuple which are not in the valueDef, they are added and the TableDefinition is
     * serialized on disk.
     * 

     * Columns whose values are null are not serialized but their definition is still added to the table definition if
     * not present already.
     * 
     * @param tuple
     * @param sertuple
     *            - if not null, store all the values of the columns to this tuple as written to the database (possibly
     *            after some data casting)
     * @return the serialized version of the value part of the tuple
     * 
     */
    public byte[] serializeValue(Tuple tuple, Row sertuple) {
        ByteArray byteArray = new ByteArray();
        serializeValue(tuple, sertuple, byteArray);
        return byteArray.toArray();
    }

    private void setSertupleValue(Row sertuple, String colName, Object value) {
        if (sertuple != null) {
            int idx = sertuple.getIndex(colName);
            if (idx >= 0) {
                sertuple.set(idx, value);
            }
        }
    }

    public Tuple deserialize(byte[] k, byte[] v) {
        TupleDefinition tdef = new TupleDefinition();
        ArrayList