All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.facebook.presto.hive.RcFileFileWriterFactory Maven / Gradle / Ivy

The newest version!
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.hive;

import com.facebook.presto.common.type.Type;
import com.facebook.presto.common.type.TypeManager;
import com.facebook.presto.hive.metastore.MetastoreUtil;
import com.facebook.presto.hive.metastore.StorageFormat;
import com.facebook.presto.hive.rcfile.HdfsRcFileDataSource;
import com.facebook.presto.rcfile.RcFileDataSource;
import com.facebook.presto.rcfile.RcFileEncoding;
import com.facebook.presto.rcfile.binary.BinaryRcFileEncoding;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.PrestoException;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableMap;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.RCFileOutputFormat;
import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe;
import org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.joda.time.DateTimeZone;

import javax.inject.Inject;

import java.io.IOException;
import java.io.OutputStream;
import java.util.List;
import java.util.Optional;
import java.util.Properties;
import java.util.concurrent.Callable;
import java.util.function.Supplier;

import static com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR;
import static com.facebook.presto.hive.HiveErrorCode.HIVE_WRITE_VALIDATION_FAILED;
import static com.facebook.presto.hive.HiveType.toHiveTypes;
import static com.facebook.presto.hive.rcfile.RcFilePageSourceFactory.createTextVectorEncoding;
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.toList;
import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS;
import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES;

public class RcFileFileWriterFactory
        implements HiveFileWriterFactory
{
    private final DateTimeZone hiveStorageTimeZone;
    private final HdfsEnvironment hdfsEnvironment;
    private final TypeManager typeManager;
    private final NodeVersion nodeVersion;
    private final FileFormatDataSourceStats stats;

    @Inject
    public RcFileFileWriterFactory(
            HdfsEnvironment hdfsEnvironment,
            TypeManager typeManager,
            NodeVersion nodeVersion,
            HiveClientConfig hiveClientConfig,
            FileFormatDataSourceStats stats)
    {
        this(hdfsEnvironment, typeManager, nodeVersion, requireNonNull(hiveClientConfig, "hiveClientConfig is null").getDateTimeZone(), stats);
    }

    public RcFileFileWriterFactory(
            HdfsEnvironment hdfsEnvironment,
            TypeManager typeManager,
            NodeVersion nodeVersion,
            DateTimeZone hiveStorageTimeZone,
            FileFormatDataSourceStats stats)
    {
        this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
        this.typeManager = requireNonNull(typeManager, "typeManager is null");
        this.nodeVersion = requireNonNull(nodeVersion, "nodeVersion is null");
        this.hiveStorageTimeZone = requireNonNull(hiveStorageTimeZone, "hiveStorageTimeZone is null");
        this.stats = requireNonNull(stats, "stats is null");
    }

    @Override
    public Optional createFileWriter(
            Path path,
            List inputColumnNames,
            StorageFormat storageFormat,
            Properties schema,
            JobConf configuration,
            ConnectorSession session,
            Optional encryptionInformation)
    {
        if (!HiveSessionProperties.isRcfileOptimizedWriterEnabled(session)) {
            return Optional.empty();
        }

        if (!RCFileOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
            return Optional.empty();
        }

        RcFileEncoding rcFileEncoding;
        if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
            rcFileEncoding = new BinaryRcFileEncoding();
        }
        else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
            rcFileEncoding = createTextVectorEncoding(schema, hiveStorageTimeZone);
        }
        else {
            return Optional.empty();
        }

        Optional codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC));

        // existing tables and partitions may have columns in a different order than the writer is providing, so build
        // an index to rearrange columns in the proper order
        List fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings().splitToList(schema.getProperty(META_TABLE_COLUMNS, ""));
        List fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream()
                .map(hiveType -> hiveType.getType(typeManager))
                .collect(toList());

        int[] fileInputColumnIndexes = fileColumnNames.stream()
                .mapToInt(inputColumnNames::indexOf)
                .toArray();

        try {
            FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
            OutputStream outputStream = fileSystem.create(path);

            Optional> validationInputFactory = Optional.empty();
            if (HiveSessionProperties.isRcfileOptimizedWriterValidate(session)) {
                validationInputFactory = Optional.of(() -> {
                    try {
                        return new HdfsRcFileDataSource(
                                path.toString(),
                                fileSystem.open(path),
                                fileSystem.getFileStatus(path).getLen(),
                                stats);
                    }
                    catch (IOException e) {
                        throw new PrestoException(HIVE_WRITE_VALIDATION_FAILED, e);
                    }
                });
            }

            Callable rollbackAction = () -> {
                fileSystem.delete(path, false);
                return null;
            };

            return Optional.of(new RcFileFileWriter(
                    outputStream,
                    rollbackAction,
                    rcFileEncoding,
                    fileColumnTypes,
                    codecName,
                    fileInputColumnIndexes,
                    ImmutableMap.builder()
                            .put(MetastoreUtil.PRESTO_VERSION_NAME, nodeVersion.toString())
                            .put(MetastoreUtil.PRESTO_QUERY_ID_NAME, session.getQueryId())
                            .build(),
                    validationInputFactory));
        }
        catch (Exception e) {
            throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating RCFile file", e);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy