All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.prestosql.plugin.kafka.encoder.raw.RawRowEncoder Maven / Gradle / Ivy

There is a newer version: 350
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.prestosql.plugin.kafka.encoder.raw;

import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableSet;
import io.prestosql.plugin.kafka.encoder.AbstractRowEncoder;
import io.prestosql.plugin.kafka.encoder.EncoderColumnHandle;
import io.prestosql.spi.connector.ConnectorSession;
import io.prestosql.spi.type.Type;
import io.prestosql.spi.type.VarcharType;

import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.prestosql.spi.type.BigintType.BIGINT;
import static io.prestosql.spi.type.BooleanType.BOOLEAN;
import static io.prestosql.spi.type.DoubleType.DOUBLE;
import static io.prestosql.spi.type.IntegerType.INTEGER;
import static io.prestosql.spi.type.RealType.REAL;
import static io.prestosql.spi.type.SmallintType.SMALLINT;
import static io.prestosql.spi.type.TinyintType.TINYINT;
import static java.lang.Integer.parseInt;
import static java.lang.String.format;

public class RawRowEncoder
        extends AbstractRowEncoder
{
    private enum FieldType
    {
        BYTE(Byte.SIZE),
        SHORT(Short.SIZE),
        INT(Integer.SIZE),
        LONG(Long.SIZE),
        FLOAT(Float.SIZE),
        DOUBLE(Double.SIZE);

        private final int size;

        FieldType(int bitSize)
        {
            this.size = bitSize / 8;
        }

        public int getSize()
        {
            return size;
        }
    }

    private static final Pattern MAPPING_PATTERN = Pattern.compile("(\\d+)(?::(\\d+))?");
    private static final Set SUPPORTED_PRIMITIVE_TYPES = ImmutableSet.of(
            BIGINT, INTEGER, SMALLINT, TINYINT, DOUBLE, REAL, BOOLEAN);

    public static final String NAME = "raw";

    private final List columnMappings;
    private final ByteBuffer buffer;

    public RawRowEncoder(ConnectorSession session, List columnHandles)
    {
        super(session, columnHandles);

        for (EncoderColumnHandle handle : this.columnHandles) {
            checkArgument(isSupportedType(handle.getType()), "Unsupported column type '%s' for column '%s'", handle.getType().getDisplayName(), handle.getName());
            checkArgument(handle.getFormatHint() == null, "Unexpected format hint '%s' defined for column '%s'", handle.getFormatHint(), handle.getName());
        }

        // parse column mappings from column handles
        this.columnMappings = this.columnHandles.stream().map(ColumnMapping::new).collect(toImmutableList());

        for (ColumnMapping mapping : this.columnMappings) {
            if (mapping.getLength() != mapping.getFieldType().getSize() && !(mapping.getType() instanceof VarcharType)) {
                throw new IndexOutOfBoundsException(format(
                        "Mapping length '%s' is not equal to expected length '%s' for column '%s'",
                        mapping.getLength(),
                        mapping.getFieldType().getSize(),
                        mapping.getName()));
            }
        }

        // check that column mappings don't overlap and that there are no gaps
        int position = 0;
        for (ColumnMapping mapping : this.columnMappings) {
            checkArgument(mapping.getStart() == position, format(
                    "Start mapping '%s' for column '%s' does not equal expected mapping '%s'",
                    mapping.getStart(),
                    mapping.getName(),
                    position));
            checkArgument(mapping.getEnd() > mapping.getStart(), format(
                    "End mapping '%s' for column '%s' is less than or equal to start '%s'",
                    mapping.getEnd(),
                    mapping.getName(),
                    mapping.getStart()));
            position += mapping.getLength();
        }

        this.buffer = ByteBuffer.allocate(position);
    }

    private static class ColumnMapping
    {
        private final String name;
        private final Type type;
        private final FieldType fieldType;
        private final int start;
        private final int end;

        public ColumnMapping(EncoderColumnHandle columnHandle)
        {
            this.name = columnHandle.getName();
            this.type = columnHandle.getType();

            this.fieldType = parseFieldType(columnHandle.getDataFormat(), this.name);
            checkFieldType(this.name, this.type, this.fieldType);

            Optional mapping = Optional.ofNullable(columnHandle.getMapping());
            if (mapping.isPresent()) {
                Matcher mappingMatcher = MAPPING_PATTERN.matcher(mapping.get());
                if (!mappingMatcher.matches()) {
                    throw new IllegalArgumentException(format("Invalid mapping for column '%s'", this.name));
                }

                if (mappingMatcher.group(2) != null) {
                    this.start = parseOffset(mappingMatcher.group(1), "start", this.name);
                    this.end = parseOffset(mappingMatcher.group(2), "end", this.name);
                }
                else {
                    this.start = parseOffset(mappingMatcher.group(1), "start", this.name);
                    this.end = this.start + this.fieldType.getSize();
                }
            }
            else {
                throw new IllegalArgumentException(format("No mapping defined for column '%s'", this.name));
            }
        }

        private static int parseOffset(String group, String offsetName, String columnName)
        {
            try {
                return parseInt(group);
            }
            catch (NumberFormatException e) {
                throw new IllegalArgumentException(format("Unable to parse '%s' offset for column '%s'", offsetName, columnName), e);
            }
        }

        private static FieldType parseFieldType(String dataFormat, String columnName)
        {
            try {
                if (dataFormat != null && !dataFormat.equals("")) {
                    return FieldType.valueOf(dataFormat.toUpperCase(Locale.ENGLISH));
                }
                return FieldType.BYTE;
            }
            catch (IllegalArgumentException e) {
                throw new IllegalArgumentException(format("Invalid dataFormat '%s' for column '%s'", dataFormat, columnName));
            }
        }

        private static void checkFieldType(String columnName, Type columnType, FieldType fieldType)
        {
            if (columnType == BIGINT) {
                checkFieldTypeOneOf(fieldType, columnName, columnType, FieldType.BYTE, FieldType.SHORT, FieldType.INT, FieldType.LONG);
            }
            else if (columnType == INTEGER) {
                checkFieldTypeOneOf(fieldType, columnName, columnType, FieldType.BYTE, FieldType.SHORT, FieldType.INT);
            }
            else if (columnType == SMALLINT) {
                checkFieldTypeOneOf(fieldType, columnName, columnType, FieldType.BYTE, FieldType.SHORT);
            }
            else if (columnType == TINYINT) {
                checkFieldTypeOneOf(fieldType, columnName, columnType, FieldType.BYTE);
            }
            else if (columnType == BOOLEAN) {
                checkFieldTypeOneOf(fieldType, columnName, columnType, FieldType.BYTE, FieldType.SHORT, FieldType.INT, FieldType.LONG);
            }
            else if (columnType == DOUBLE) {
                checkFieldTypeOneOf(fieldType, columnName, columnType, FieldType.DOUBLE, FieldType.FLOAT);
            }
            else if (columnType instanceof VarcharType) {
                checkFieldTypeOneOf(fieldType, columnName, columnType, FieldType.BYTE);
            }
        }

        private static void checkFieldTypeOneOf(FieldType declaredFieldType, String columnName, Type columnType, FieldType... allowedFieldTypes)
        {
            checkArgument(Arrays.asList(allowedFieldTypes).contains(declaredFieldType),
                    format("Wrong dataformat '%s' specified for column '%s'; %s type implies use of %s",
                            declaredFieldType.name(),
                            columnName,
                            columnType,
                            Joiner.on("/").join(allowedFieldTypes)));
        }

        public String getName()
        {
            return name;
        }

        public Type getType()
        {
            return type;
        }

        public int getStart()
        {
            return start;
        }

        public int getEnd()
        {
            return end;
        }

        public FieldType getFieldType()
        {
            return fieldType;
        }

        public int getLength()
        {
            return end - start;
        }
    }

    private static boolean isSupportedType(Type type)
    {
        return type instanceof VarcharType || SUPPORTED_PRIMITIVE_TYPES.contains(type);
    }

    @Override
    protected void appendLong(long value)
    {
        buffer.putLong(value);
    }

    @Override
    protected void appendInt(int value)
    {
        buffer.putInt(value);
    }

    @Override
    protected void appendShort(short value)
    {
        buffer.putShort(value);
    }

    @Override
    protected void appendByte(byte value)
    {
        buffer.put(value);
    }

    @Override
    protected void appendDouble(double value)
    {
        buffer.putDouble(value);
    }

    @Override
    protected void appendFloat(float value)
    {
        buffer.putFloat(value);
    }

    @Override
    protected void appendBoolean(boolean value)
    {
        buffer.put((byte) (value ? 1 : 0));
    }

    @Override
    protected void appendString(String value)
    {
        byte[] valueBytes = value.getBytes(StandardCharsets.UTF_8);
        checkArgument(valueBytes.length == columnMappings.get(currentColumnIndex).getLength(), format(
                "length '%s' of message '%s' for column '%s' does not equal expected length '%s'",
                valueBytes.length,
                value,
                columnHandles.get(currentColumnIndex).getName(),
                columnMappings.get(currentColumnIndex).getLength()));
        buffer.put(valueBytes, 0, valueBytes.length);
    }

    @Override
    protected void appendByteBuffer(ByteBuffer value)
    {
        byte[] valueBytes = value.array();
        checkArgument(valueBytes.length == columnMappings.get(currentColumnIndex).getLength(), format(
                "length '%s' of message for column '%s' does not equal expected length '%s'",
                valueBytes.length,
                columnHandles.get(currentColumnIndex).getName(),
                columnMappings.get(currentColumnIndex).getLength()));
        buffer.put(valueBytes, 0, valueBytes.length);
    }

    @Override
    public byte[] toByteArray()
    {
        // make sure entire row has been updated with new values
        checkArgument(currentColumnIndex == columnHandles.size(), format("Missing %d columns", columnHandles.size() - currentColumnIndex + 1));

        resetColumnIndex(); // reset currentColumnIndex to prepare for next row
        buffer.clear(); // set buffer position back to 0 to prepare for next row, this method does not affect the backing byte array
        return buffer.array();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy