io.deephaven.benchmark.generator.ColumnDefs Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of deephaven-benchmark Show documentation
Deephaven Benchmark
The newest version!
/* Copyright (c) 2022-2023 Deephaven Data Labs and Patent Pending */
package io.deephaven.benchmark.generator;

import java.util.*;

/**
 * Contains column definitions used to generate data and schemas. Columns are described by name, type, and data range
 * (ex. "[1-100]", "str[1-100]ing"). Values are retrieved during data generation either randomly or incrementally
 * through the range. The same seed is used for random each time this class is instantiated.
 * 
 * Note: All possible data values are loaded up front to prevent object-creation during production. This can take a
 * considerable amount of memory for larger scales, especially for generated strings.
 * 

 */
public class ColumnDefs {
    final int valueCacheSize;
    final List columns = new ArrayList<>();
    private String defaultDistribution = "random";

    public ColumnDefs() {
        this(1024);
    }

    ColumnDefs(int valueCacheSize) {
        this.valueCacheSize = valueCacheSize;
    }

    /**
     * Get the number of column definitions.
     * 
     * @return the number of column definitions
     */
    public int getCount() {
        return columns.size();
    }

    /**
     * Set the default column distribution for columns that do not have a distribution defined.
     * 
     * @param distribution the distribution name (e.g. {@code random | incremental})
     */
    public void setDefaultDistribution(String distribution) {
        defaultDistribution = distribution;
    }

    /**
     * Get the maximum possible number of values represented by the ranges in all column definitions. For example, given
     * two column ranges [1-10] and [10-30], the count would be 20. Put another way, it's the largest range for all
     * column definitions in this set.
     * 
     * @return the maximum number of values defined in this set
     */
    public long getMaxValueCount() {
        return columns.stream().mapToLong(c -> c.maker.getDefSize()).max().getAsLong();
    }

    /**
     * Get a comma-separated list of quoted column names in this set
     * 
     * @return quoted column names.
     */
    public String getQuotedColumns() {
        return String.join(",", columns.stream().map(c -> "\"" + c.name + "\"").toList());
    }

    /**
     * Get a map containing the name and type for each column in this definition set
     * 
     * @return column names and types as a map.
     */
    public Map toTypeMap() {
        var typeMap = new LinkedHashMap();
        columns.stream().forEach(f -> typeMap.put(f.name(), f.type()));
        return typeMap;
    }

    /**
     * Add a new column definition.
     * 
     * @param name the column name
     * @param type the column type
     * @param valueDef the range data (ex. "[1-10]", "str[1-100]ing")
     * @param distribution override default distribution function (e.g. random, incremental) with another one, or null
     * @return this
     */
    public ColumnDefs add(String name, String type, String valueDef, String distribution) {
        var maker = getMaker(type, valueDef);
        maker.setDistribution(distribution, name + ':' + type + ':' + valueDef);
        columns.add(new ColumnDef(name, type, valueDef, maker));
        return this;
    }

    public ColumnDefs add(String name, String type, String valueDef) {
        return add(name, type, valueDef, null);
    }

    /**
     * Get the next value for the column in the given index according to the columns defined distribution.
     * 
     * @param columnIndex the index of the column
     * @param seed a value to use to get the next value (e.g. row id)
     * @param the maximum value that could be used as a seed (e.g. row count)
     * @return the next value according to the column definition
     */
    public Object nextValue(int columnIndex, long seed, long max) {
        return columns.get(columnIndex).maker().next(seed, max);
    }

    /**
     * Get the column definitions as a string. It intentionally avoids OS-specific line endings.
     * 

     * Note: This method is used to write table definitions for comparison to the file system. Do not change without
     * understanding the impact.
     * 
     * @return a string describing this column definition
     */
    public String describe() {
        var str = "name,type,values,distribution\n";
        for (ColumnDef c : columns) {
            var distribution = c.maker.getDistribution().toLowerCase();
            str += String.join(",", c.name(), c.type(), c.valueDef(), distribution) + "\n";
        }
        return str;
    }

    private Maker getMaker(String type, String valueDef) {
        ValueDef def = parseValueDef(valueDef);
        switch (type.toLowerCase()) {
            case "string":
                return new StringMaker(def);
            case "long":
                return new LongMaker(def);
            case "int":
                return new IntMaker(def);
            case "double":
                return new DoubleMaker(def);
            case "float":
                return new FloatMaker(def);
            case "timestamp-millis":
                return new TimestampMaker(def);
            default:
                throw new RuntimeException("Invalid field type: " + type);
        }
    }

    // "[1-10]"
    private ValueDef parseValueDef(String valueDef) {
        String bracketMatch = ".*(\\[[0-9]+[-][0-9]+\\]).*";
        if (!valueDef.matches(bracketMatch))
            return new ValueDef(0, 1, null, valueDef, true);
        String brackets = valueDef.replaceAll(bracketMatch, "$1");
        String[] range = brackets.replaceAll(".*\\[([0-9]+)[-]([0-9]+)\\].*", "$1,$2").split(",");
        if (range.length != 2)
            return new ValueDef(0, 1, null, valueDef, true);
        long rangeStart = Long.parseLong(range[0]);
        long rangeEnd = Long.parseLong(range[1]) + 1; // End is inclusive

        return new ValueDef(rangeStart, rangeEnd - rangeStart, brackets, valueDef, false);
    }

    record ColumnDef(String name, String type, String valueDef, Maker maker) {
    }

    class StringMaker extends Maker {
        StringMaker(ValueDef def) {
            super(def);
        }

        @Override
        String value(long index) {
            return def.getString(index);
        }
    }

    class LongMaker extends Maker {
        LongMaker(ValueDef def) {
            super(def);
        }

        @Override
        Long value(long index) {
            return def.getLong(index);
        }
    }

    class IntMaker extends Maker {
        IntMaker(ValueDef def) {
            super(def);
        }

        @Override
        Integer value(long index) {
            return (int) def.getLong(index);
        }
    }

    class DoubleMaker extends Maker {
        DoubleMaker(ValueDef def) {
            super(def);
        }

        @Override
        Double value(long index) {
            return (double) def.getLong(index);
        }
    }

    class FloatMaker extends Maker {
        FloatMaker(ValueDef def) {
            super(def);
        }

        @Override
        Float value(long index) {
            return (float) def.getLong(index);
        }
    }

    class TimestampMaker extends Maker {
        TimestampMaker(ValueDef def) {
            super(def);
        }

        @Override
        Long value(long index) {
            return def.getLong(index);
        }
    }

    abstract class Maker {
        final List