All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.facebook.presto.orc.stream.ValueStreams Maven / Gradle / Ivy

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.orc.stream;

import com.facebook.presto.orc.StreamId;
import com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind;
import com.facebook.presto.orc.metadata.OrcType.OrcTypeKind;

import static com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind.DICTIONARY;
import static com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind.DICTIONARY_V2;
import static com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind.DIRECT;
import static com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind.DIRECT_V2;
import static com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind.DWRF_DIRECT;
import static com.facebook.presto.orc.metadata.OrcType.OrcTypeKind.DECIMAL;
import static com.facebook.presto.orc.metadata.OrcType.OrcTypeKind.INT;
import static com.facebook.presto.orc.metadata.OrcType.OrcTypeKind.TIMESTAMP;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.DATA;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.DICTIONARY_DATA;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.IN_DICTIONARY;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.LENGTH;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.PRESENT;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.ROW_GROUP_DICTIONARY;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.ROW_GROUP_DICTIONARY_LENGTH;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.SECONDARY;
import static java.lang.String.format;

public final class ValueStreams
{
    private ValueStreams()
    {
    }

    public static ValueStream createValueStreams(
            StreamId streamId,
            OrcInputStream inputStream,
            OrcTypeKind type,
            ColumnEncodingKind encoding,
            boolean usesVInt)
    {
        if (streamId.getStreamKind() == PRESENT) {
            return new BooleanStream(inputStream);
        }

        // dictionary length and data streams are unsigned int streams
        if ((encoding == DICTIONARY || encoding == DICTIONARY_V2) && (streamId.getStreamKind() == LENGTH || streamId.getStreamKind() == DATA)) {
            return createLongStream(inputStream, encoding, INT, false, usesVInt);
        }

        if (streamId.getStreamKind() == DATA) {
            switch (type) {
                case BOOLEAN:
                    return new BooleanStream(inputStream);
                case BYTE:
                    return new ByteStream(inputStream);
                case SHORT:
                case INT:
                case LONG:
                case DATE:
                    return createLongStream(inputStream, encoding, type, true, usesVInt);
                case FLOAT:
                    return new FloatStream(inputStream);
                case DOUBLE:
                    return new DoubleStream(inputStream);
                case STRING:
                case VARCHAR:
                case BINARY:
                    return new ByteArrayStream(inputStream);
                case TIMESTAMP:
                    return createLongStream(inputStream, encoding, type, true, usesVInt);
                case DECIMAL:
                    return new DecimalStream(inputStream);
            }
        }

        // length stream of a direct encoded string or binary column
        if (streamId.getStreamKind() == LENGTH) {
            switch (type) {
                case STRING:
                case VARCHAR:
                case BINARY:
                case MAP:
                case LIST:
                    return createLongStream(inputStream, encoding, type, false, usesVInt);
            }
        }

        // length stream of a the row group dictionary
        if (streamId.getStreamKind() == ROW_GROUP_DICTIONARY_LENGTH) {
            switch (type) {
                case STRING:
                case VARCHAR:
                case BINARY:
                    return new RowGroupDictionaryLengthStream(inputStream, false);
            }
        }

        // row group dictionary
        if (streamId.getStreamKind() == ROW_GROUP_DICTIONARY) {
            switch (type) {
                case STRING:
                case VARCHAR:
                case BINARY:
                    return new ByteArrayStream(inputStream);
            }
        }

        // row group dictionary
        if (streamId.getStreamKind() == IN_DICTIONARY) {
            return new BooleanStream(inputStream);
        }

        // length (nanos) of a timestamp column
        if (type == TIMESTAMP && streamId.getStreamKind() == SECONDARY) {
            return createLongStream(inputStream, encoding, type, false, usesVInt);
        }

        // scale of a decimal column
        if (type == DECIMAL && streamId.getStreamKind() == SECONDARY) {
            // specification (https://orc.apache.org/docs/encodings.html) says scale stream is unsigned,
            // however Hive writer stores scale as signed integer (org.apache.hadoop.hive.ql.io.orc.WriterImpl.DecimalTreeWriter)
            // BUG link: https://issues.apache.org/jira/browse/HIVE-13229
            return createLongStream(inputStream, encoding, type, true, usesVInt);
        }

        if (streamId.getStreamKind() == DICTIONARY_DATA) {
            switch (type) {
                case SHORT:
                case INT:
                case LONG:
                    return createLongStream(inputStream, DWRF_DIRECT, INT, true, usesVInt);
                case STRING:
                case VARCHAR:
                case CHAR:
                case BINARY:
                    return new ByteArrayStream(inputStream);
            }
        }

        throw new IllegalArgumentException(format("Unsupported column type %s for stream %s with encoding %s", type, streamId, encoding));
    }

    private static ValueStream createLongStream(
            OrcInputStream inputStream,
            ColumnEncodingKind encoding,
            OrcTypeKind type,
            boolean signed,
            boolean usesVInt)
    {
        if (encoding == DIRECT_V2 || encoding == DICTIONARY_V2) {
            return new LongStreamV2(inputStream, signed, false);
        }
        else if (encoding == DIRECT || encoding == DICTIONARY) {
            return new LongStreamV1(inputStream, signed);
        }
        else if (encoding == DWRF_DIRECT) {
            return new LongStreamDwrf(inputStream, type, signed, usesVInt);
        }
        else {
            throw new IllegalArgumentException("Unsupported encoding for long stream: " + encoding);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy