All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pinot.spi.data.FieldSpec Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.pinot.spi.data;

import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.databind.node.ObjectNode;
import java.io.Serializable;
import java.math.BigDecimal;
import java.sql.Timestamp;
import javax.annotation.Nullable;
import org.apache.pinot.spi.utils.BooleanUtils;
import org.apache.pinot.spi.utils.BytesUtils;
import org.apache.pinot.spi.utils.EqualityUtils;
import org.apache.pinot.spi.utils.JsonUtils;
import org.apache.pinot.spi.utils.TimestampUtils;


/**
 * The FieldSpec class contains all specs related to any field (column) in {@link Schema}.
 * 

There are 3 types of FieldSpec: * {@link DimensionFieldSpec}, {@link MetricFieldSpec}, {@link TimeFieldSpec} *

Specs stored are as followings: *

- Name: name of the field. *

- DataType: type of the data stored (e.g. INTEGER, LONG, FLOAT, DOUBLE, STRING). *

- IsSingleValueField: single-value or multi-value field. *

- DefaultNullValue: when no value found for this field, use this value. Stored in string format. *

- VirtualColumnProvider: the virtual column provider to use for this field. */ @SuppressWarnings("unused") public abstract class FieldSpec implements Comparable, Serializable { public static final int DEFAULT_MAX_LENGTH = 512; public static final Integer DEFAULT_DIMENSION_NULL_VALUE_OF_INT = Integer.MIN_VALUE; public static final Long DEFAULT_DIMENSION_NULL_VALUE_OF_LONG = Long.MIN_VALUE; public static final Float DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT = Float.NEGATIVE_INFINITY; public static final Double DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE = Double.NEGATIVE_INFINITY; public static final Integer DEFAULT_DIMENSION_NULL_VALUE_OF_BOOLEAN = 0; public static final Long DEFAULT_DIMENSION_NULL_VALUE_OF_TIMESTAMP = 0L; public static final String DEFAULT_DIMENSION_NULL_VALUE_OF_STRING = "null"; public static final String DEFAULT_DIMENSION_NULL_VALUE_OF_JSON = "null"; public static final byte[] DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES = new byte[0]; public static final BigDecimal DEFAULT_DIMENSION_NULL_VALUE_OF_BIG_DECIMAL = BigDecimal.ZERO; public static final Integer DEFAULT_METRIC_NULL_VALUE_OF_INT = 0; public static final Long DEFAULT_METRIC_NULL_VALUE_OF_LONG = 0L; public static final Float DEFAULT_METRIC_NULL_VALUE_OF_FLOAT = 0.0F; public static final Double DEFAULT_METRIC_NULL_VALUE_OF_DOUBLE = 0.0D; public static final BigDecimal DEFAULT_METRIC_NULL_VALUE_OF_BIG_DECIMAL = BigDecimal.ZERO; public static final String DEFAULT_METRIC_NULL_VALUE_OF_STRING = "null"; public static final byte[] DEFAULT_METRIC_NULL_VALUE_OF_BYTES = new byte[0]; protected String _name; protected DataType _dataType; protected boolean _isSingleValueField = true; // NOTE: for STRING column, this is the max number of characters; for BYTES column, this is the max number of bytes private int _maxLength = DEFAULT_MAX_LENGTH; protected Object _defaultNullValue; private transient String _stringDefaultNullValue; // Transform function to generate this column, can be based on other columns @Deprecated // Set this in TableConfig -> IngestionConfig -> TransformConfigs protected String _transformFunction; protected String _virtualColumnProvider; // Default constructor required by JSON de-serializer. DO NOT REMOVE. public FieldSpec() { } public FieldSpec(String name, DataType dataType, boolean isSingleValueField) { this(name, dataType, isSingleValueField, DEFAULT_MAX_LENGTH, null); } public FieldSpec(String name, DataType dataType, boolean isSingleValueField, @Nullable Object defaultNullValue) { this(name, dataType, isSingleValueField, DEFAULT_MAX_LENGTH, defaultNullValue); } public FieldSpec(String name, DataType dataType, boolean isSingleValueField, int maxLength, @Nullable Object defaultNullValue) { _name = name; _dataType = dataType; _isSingleValueField = isSingleValueField; _maxLength = maxLength; setDefaultNullValue(defaultNullValue); } public abstract FieldType getFieldType(); public String getName() { return _name; } // Required by JSON de-serializer. DO NOT REMOVE. public void setName(String name) { _name = name; } public DataType getDataType() { return _dataType; } // Required by JSON de-serializer. DO NOT REMOVE. public void setDataType(DataType dataType) { _dataType = dataType; _defaultNullValue = getDefaultNullValue(getFieldType(), _dataType, _stringDefaultNullValue); } public boolean isSingleValueField() { return _isSingleValueField; } // Required by JSON de-serializer. DO NOT REMOVE. public void setSingleValueField(boolean isSingleValueField) { _isSingleValueField = isSingleValueField; } public int getMaxLength() { return _maxLength; } // Required by JSON de-serializer. DO NOT REMOVE. public void setMaxLength(int maxLength) { _maxLength = maxLength; } public String getVirtualColumnProvider() { return _virtualColumnProvider; } public void setVirtualColumnProvider(String virtualColumnProvider) { _virtualColumnProvider = virtualColumnProvider; } /** * Returns whether the column is virtual. Virtual columns are constructed while loading the segment, thus do not exist * in the record, nor should be persisted to the disk. *

Identify a column as virtual if the virtual column provider is configured. */ @JsonIgnore public boolean isVirtualColumn() { return _virtualColumnProvider != null && !_virtualColumnProvider.isEmpty(); } public Object getDefaultNullValue() { return _defaultNullValue; } public String getDefaultNullValueString() { return getStringValue(_defaultNullValue); } /** * Helper method to return the String value for the given object. * This is required as not all data types have a toString() (eg byte[]). * * @param value Value for which String value needs to be returned * @return String value for the object. */ protected static String getStringValue(Object value) { if (value instanceof byte[]) { return BytesUtils.toHexString((byte[]) value); } else { return value.toString(); } } // Required by JSON de-serializer. DO NOT REMOVE. public void setDefaultNullValue(@Nullable Object defaultNullValue) { if (defaultNullValue != null) { _stringDefaultNullValue = getStringValue(defaultNullValue); } if (_dataType != null) { _defaultNullValue = getDefaultNullValue(getFieldType(), _dataType, _stringDefaultNullValue); } } public static Object getDefaultNullValue(FieldType fieldType, DataType dataType, @Nullable String stringDefaultNullValue) { if (stringDefaultNullValue != null) { return dataType.convert(stringDefaultNullValue); } else { switch (fieldType) { case METRIC: switch (dataType) { case INT: return DEFAULT_METRIC_NULL_VALUE_OF_INT; case LONG: return DEFAULT_METRIC_NULL_VALUE_OF_LONG; case FLOAT: return DEFAULT_METRIC_NULL_VALUE_OF_FLOAT; case DOUBLE: return DEFAULT_METRIC_NULL_VALUE_OF_DOUBLE; case BIG_DECIMAL: return DEFAULT_METRIC_NULL_VALUE_OF_BIG_DECIMAL; case STRING: return DEFAULT_METRIC_NULL_VALUE_OF_STRING; case BYTES: return DEFAULT_METRIC_NULL_VALUE_OF_BYTES; default: throw new IllegalStateException("Unsupported metric data type: " + dataType); } case DIMENSION: case TIME: case DATE_TIME: switch (dataType) { case INT: return DEFAULT_DIMENSION_NULL_VALUE_OF_INT; case LONG: return DEFAULT_DIMENSION_NULL_VALUE_OF_LONG; case FLOAT: return DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT; case DOUBLE: return DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE; case BOOLEAN: return DEFAULT_DIMENSION_NULL_VALUE_OF_BOOLEAN; case TIMESTAMP: return DEFAULT_DIMENSION_NULL_VALUE_OF_TIMESTAMP; case STRING: return DEFAULT_DIMENSION_NULL_VALUE_OF_STRING; case JSON: return DEFAULT_DIMENSION_NULL_VALUE_OF_JSON; case BYTES: return DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES; case BIG_DECIMAL: return DEFAULT_DIMENSION_NULL_VALUE_OF_BIG_DECIMAL; default: throw new IllegalStateException("Unsupported dimension/time data type: " + dataType); } default: throw new IllegalStateException("Unsupported field type: " + fieldType); } } } /** * Transform function if defined else null. * Deprecated. Use TableConfig -> IngestionConfig -> TransformConfigs */ @Deprecated public String getTransformFunction() { return _transformFunction; } // Required by JSON de-serializer. DO NOT REMOVE. /** * Deprecated. Use TableConfig -> IngestionConfig -> TransformConfigs */ @Deprecated public void setTransformFunction(@Nullable String transformFunction) { _transformFunction = transformFunction; } /** * Returns the {@link ObjectNode} representing the field spec. *

Only contains fields with non-default value. *

NOTE: here we use {@link ObjectNode} to preserve the insertion order. */ public ObjectNode toJsonObject() { ObjectNode jsonObject = JsonUtils.newObjectNode(); jsonObject.put("name", _name); jsonObject.put("dataType", _dataType.name()); if (!_isSingleValueField) { jsonObject.put("singleValueField", false); } if (_maxLength != DEFAULT_MAX_LENGTH) { jsonObject.put("maxLength", _maxLength); } appendDefaultNullValue(jsonObject); appendTransformFunction(jsonObject); return jsonObject; } protected void appendDefaultNullValue(ObjectNode jsonNode) { assert _defaultNullValue != null; String key = "defaultNullValue"; if (!_defaultNullValue.equals(getDefaultNullValue(getFieldType(), _dataType, null))) { switch (_dataType) { case INT: jsonNode.put(key, (Integer) _defaultNullValue); break; case LONG: jsonNode.put(key, (Long) _defaultNullValue); break; case FLOAT: jsonNode.put(key, (Float) _defaultNullValue); break; case DOUBLE: jsonNode.put(key, (Double) _defaultNullValue); break; case BIG_DECIMAL: jsonNode.put(key, (BigDecimal) _defaultNullValue); break; case BOOLEAN: jsonNode.put(key, (Integer) _defaultNullValue == 1); break; case TIMESTAMP: jsonNode.put(key, new Timestamp((Long) _defaultNullValue).toString()); break; case STRING: case JSON: jsonNode.put(key, (String) _defaultNullValue); break; case BYTES: jsonNode.put(key, BytesUtils.toHexString((byte[]) _defaultNullValue)); break; default: throw new IllegalStateException("Unsupported data type: " + this); } } } protected void appendTransformFunction(ObjectNode jsonNode) { if (_transformFunction != null) { jsonNode.put("transformFunction", _transformFunction); } } @SuppressWarnings("EqualsWhichDoesntCheckParameterClass") @Override public boolean equals(Object o) { if (EqualityUtils.isSameReference(this, o)) { return true; } if (EqualityUtils.isNullOrNotSameClass(this, o)) { return false; } FieldSpec that = (FieldSpec) o; return EqualityUtils.isEqual(_name, that._name) && EqualityUtils.isEqual(_dataType, that._dataType) && EqualityUtils .isEqual(_isSingleValueField, that._isSingleValueField) && EqualityUtils .isEqual(getStringValue(_defaultNullValue), getStringValue(that._defaultNullValue)) && EqualityUtils .isEqual(_maxLength, that._maxLength) && EqualityUtils.isEqual(_transformFunction, that._transformFunction) && EqualityUtils.isEqual(_virtualColumnProvider, that._virtualColumnProvider); } @Override public int hashCode() { int result = EqualityUtils.hashCodeOf(_name); result = EqualityUtils.hashCodeOf(result, _dataType); result = EqualityUtils.hashCodeOf(result, _isSingleValueField); result = EqualityUtils.hashCodeOf(result, getStringValue(_defaultNullValue)); result = EqualityUtils.hashCodeOf(result, _maxLength); result = EqualityUtils.hashCodeOf(result, _transformFunction); result = EqualityUtils.hashCodeOf(result, _virtualColumnProvider); return result; } /** * The FieldType enum is used to demonstrate the real world business logic for a column. *

DIMENSION: columns used to filter records. *

METRIC: columns used to apply aggregation on. METRIC field only contains numeric data. *

TIME: time column (at most one per {@link Schema}). TIME field can be used to prune *

DATE_TIME: time column (at most one per {@link Schema}). TIME field can be used to * prune * segments, otherwise treated the same as DIMENSION field. */ public enum FieldType { DIMENSION, METRIC, TIME, DATE_TIME, COMPLEX } /** * The DataType enum is used to demonstrate the data type of a field. */ @SuppressWarnings("rawtypes") public enum DataType { // LIST is for complex lists which is different from multi-value column of primitives // STRUCT, MAP and LIST are composable to form a COMPLEX field INT(Integer.BYTES, true, true), LONG(Long.BYTES, true, true), FLOAT(Float.BYTES, true, true), DOUBLE(Double.BYTES, true, true), BIG_DECIMAL(true, true), BOOLEAN(INT, false, true), TIMESTAMP(LONG, false, true), STRING(false, true), JSON(STRING, false, false), BYTES(false, false), STRUCT(false, false), MAP(false, false), LIST(false, false); private final DataType _storedType; private final int _size; private final boolean _sortable; private final boolean _numeric; DataType(boolean numeric, boolean sortable) { _storedType = this; _size = -1; _sortable = sortable; _numeric = numeric; } DataType(DataType storedType, boolean numeric, boolean sortable) { _storedType = storedType; _size = storedType._size; _sortable = sortable; _numeric = numeric; } DataType(int size, boolean numeric, boolean sortable) { _storedType = this; _size = size; _sortable = sortable; _numeric = numeric; } /** * Returns the data type stored in Pinot. *

Pinot internally stores data (physical) in INT, LONG, FLOAT, DOUBLE, STRING, BYTES type, other data types * (logical) will be stored as one of these types. *

Stored type should be used when reading the physical stored values from Dictionary, Forward Index etc. */ public DataType getStoredType() { return _storedType; } /** * Returns {@code true} if the data type is of fixed width (INT, LONG, FLOAT, DOUBLE, BOOLEAN, TIMESTAMP), * {@code false} otherwise. */ public boolean isFixedWidth() { return _size >= 0; } /** * Returns the number of bytes needed to store the data type. */ public int size() { if (_size >= 0) { return _size; } throw new IllegalStateException("Cannot get number of bytes for: " + this); } /** * Returns {@code true} if the data type is numeric (INT, LONG, FLOAT, DOUBLE, BIG_DECIMAL), {@code false} * otherwise. */ public boolean isNumeric() { return _numeric; } /** * Converts the given string value to the data type. Returns byte[] for BYTES. */ public Object convert(String value) { try { switch (this) { case INT: return Integer.valueOf(value); case LONG: return Long.valueOf(value); case FLOAT: return Float.valueOf(value); case DOUBLE: return Double.valueOf(value); case BIG_DECIMAL: return new BigDecimal(value); case BOOLEAN: return BooleanUtils.toInt(value); case TIMESTAMP: return TimestampUtils.toMillisSinceEpoch(value); case STRING: case JSON: return value; case BYTES: return BytesUtils.toBytes(value); default: throw new IllegalStateException(); } } catch (Exception e) { throw new IllegalArgumentException(String.format("Cannot convert value: '%s' to type: %s", value, this)); } } /** * Converts the given string value to the data type. Returns ByteArray for BYTES. */ public Comparable convertInternal(String value) { try { switch (this) { case INT: return Integer.valueOf(value); case LONG: return Long.valueOf(value); case FLOAT: return Float.valueOf(value); case DOUBLE: return Double.valueOf(value); case BIG_DECIMAL: return new BigDecimal(value); case BOOLEAN: return BooleanUtils.toInt(value); case TIMESTAMP: return TimestampUtils.toMillisSinceEpoch(value); case STRING: case JSON: return value; case BYTES: return BytesUtils.toByteArray(value); default: throw new IllegalStateException(); } } catch (Exception e) { throw new IllegalArgumentException(String.format("Cannot convert value: '%s' to type: %s", value, this)); } } /** * Checks whether the data type can be a sorted column. */ public boolean canBeASortedColumn() { return _sortable; } } @Override public int compareTo(FieldSpec otherSpec) { // Sort fieldspecs based on their name return _name.compareTo(otherSpec._name); } /*** * Return true if it is backward compatible with the old FieldSpec. * Backward compatibility requires * all other fields except DefaultNullValue and Max Length should be retained. * * @param oldFieldSpec * @return */ public boolean isBackwardCompatibleWith(FieldSpec oldFieldSpec) { return EqualityUtils.isEqual(_name, oldFieldSpec._name) && EqualityUtils.isEqual(_dataType, oldFieldSpec._dataType) && EqualityUtils.isEqual(_isSingleValueField, oldFieldSpec._isSingleValueField); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy