All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.uber.hoodie.common.util.AvroUtils Maven / Gradle / Ivy

There is a newer version: 0.4.7
Show newest version
/*
 *  Copyright (c) 2016 Uber Technologies, Inc. ([email protected])
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *           http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package com.uber.hoodie.common.util;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;

import com.uber.hoodie.avro.model.HoodieCleanMetadata;
import com.uber.hoodie.avro.model.HoodieCleanPartitionMetadata;
import com.uber.hoodie.avro.model.HoodieRollbackMetadata;
import com.uber.hoodie.avro.model.HoodieRollbackPartitionMetadata;
import com.uber.hoodie.avro.model.HoodieSavepointMetadata;
import com.uber.hoodie.avro.model.HoodieSavepointPartitionMetadata;
import com.uber.hoodie.common.HoodieCleanStat;
import com.uber.hoodie.common.HoodieRollbackStat;
import com.uber.hoodie.common.model.HoodieAvroPayload;
import com.uber.hoodie.common.model.HoodieKey;
import com.uber.hoodie.common.model.HoodieRecord;
import com.uber.hoodie.exception.HoodieIOException;

import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.file.FileReader;
import org.apache.avro.file.SeekableByteArrayInput;
import org.apache.avro.file.SeekableInput;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.mapred.FsInput;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificDatumWriter;
import org.apache.avro.specific.SpecificRecordBase;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Optional;

public class AvroUtils {

    public static List> loadFromFiles(FileSystem fs,
        List deltaFilePaths, Schema expectedSchema) {
        List> loadedRecords = Lists.newArrayList();
        deltaFilePaths.forEach(s -> {
            List> records = loadFromFile(fs, s, expectedSchema);
            loadedRecords.addAll(records);
        });
        return loadedRecords;
    }

    public static List> loadFromFile(FileSystem fs,
        String deltaFilePath, Schema expectedSchema) {
        List> loadedRecords = Lists.newArrayList();
        Path path = new Path(deltaFilePath);
        try {
            SeekableInput input = new FsInput(path, fs.getConf());
            GenericDatumReader reader = new GenericDatumReader<>();
            // Set the expected schema to be the current schema to account for schema evolution
            reader.setExpected(expectedSchema);

            FileReader fileReader = DataFileReader.openReader(input, reader);
            for (GenericRecord deltaRecord : fileReader) {
                String key = deltaRecord.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
                String partitionPath =
                    deltaRecord.get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
                loadedRecords.add(new HoodieRecord<>(new HoodieKey(key, partitionPath),
                    new HoodieAvroPayload(Optional.of(deltaRecord))));
            }
            fileReader.close(); // also closes underlying FsInput
        } catch (IOException e) {
            throw new HoodieIOException("Could not read avro records from path " + deltaFilePath,
                e);
        }
        return loadedRecords;
    }

    public static HoodieCleanMetadata convertCleanMetadata(String startCleanTime,
        Optional durationInMs, List cleanStats) {
        ImmutableMap.Builder partitionMetadataBuilder =
            ImmutableMap.builder();
        int totalDeleted = 0;
        String earliestCommitToRetain = null;
        for (HoodieCleanStat stat : cleanStats) {
            HoodieCleanPartitionMetadata metadata =
                new HoodieCleanPartitionMetadata(stat.getPartitionPath(), stat.getPolicy().name(),
                    stat.getDeletePathPatterns(), stat.getSuccessDeleteFiles(),
                    stat.getDeletePathPatterns());
            partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
            totalDeleted += stat.getSuccessDeleteFiles().size();
            if (earliestCommitToRetain == null) {
                // This will be the same for all partitions
                earliestCommitToRetain = stat.getEarliestCommitToRetain();
            }
        }
        return new HoodieCleanMetadata(startCleanTime, durationInMs.orElseGet(() -> -1L),
            totalDeleted, earliestCommitToRetain, partitionMetadataBuilder.build());
    }

    public static HoodieRollbackMetadata convertRollbackMetadata(String startRollbackTime,
        Optional durationInMs, List commits, List stats) {
        ImmutableMap.Builder partitionMetadataBuilder =
            ImmutableMap.builder();
        int totalDeleted = 0;
        for (HoodieRollbackStat stat : stats) {
            HoodieRollbackPartitionMetadata metadata =
                new HoodieRollbackPartitionMetadata(stat.getPartitionPath(),
                    stat.getSuccessDeleteFiles(), stat.getFailedDeleteFiles());
            partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
            totalDeleted += stat.getSuccessDeleteFiles().size();
        }
        return new HoodieRollbackMetadata(startRollbackTime, durationInMs.orElseGet(() -> -1L),
            totalDeleted, commits, partitionMetadataBuilder.build());
    }

    public static HoodieSavepointMetadata convertSavepointMetadata(String user, String comment,
        Map> latestFiles) {
        ImmutableMap.Builder partitionMetadataBuilder =
            ImmutableMap.builder();
        for (Map.Entry> stat : latestFiles.entrySet()) {
            HoodieSavepointPartitionMetadata metadata =
                new HoodieSavepointPartitionMetadata(stat.getKey(), stat.getValue());
            partitionMetadataBuilder.put(stat.getKey(), metadata);
        }
        return new HoodieSavepointMetadata(user, System.currentTimeMillis(), comment,
            partitionMetadataBuilder.build());
    }


    public static Optional serializeCleanMetadata(HoodieCleanMetadata metadata)
        throws IOException {
        return serializeAvroMetadata(metadata, HoodieCleanMetadata.class);
    }

    public static Optional serializeSavepointMetadata(HoodieSavepointMetadata metadata)
        throws IOException {
        return serializeAvroMetadata(metadata, HoodieSavepointMetadata.class);
    }

    public static Optional serializeRollbackMetadata(
        HoodieRollbackMetadata rollbackMetadata) throws IOException {
        return serializeAvroMetadata(rollbackMetadata, HoodieRollbackMetadata.class);
    }

    public static  Optional serializeAvroMetadata(T metadata,
        Class clazz) throws IOException {
        DatumWriter datumWriter = new SpecificDatumWriter<>(clazz);
        DataFileWriter fileWriter = new DataFileWriter<>(datumWriter);
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        fileWriter.create(metadata.getSchema(), baos);
        fileWriter.append(metadata);
        fileWriter.flush();
        return Optional.of(baos.toByteArray());
    }

    public static HoodieCleanMetadata deserializeHoodieCleanMetadata(byte[] bytes)
        throws IOException {
        return deserializeAvroMetadata(bytes, HoodieCleanMetadata.class);
    }

    public static HoodieSavepointMetadata deserializeHoodieSavepointMetadata(byte[] bytes)
        throws IOException {
        return deserializeAvroMetadata(bytes, HoodieSavepointMetadata.class);
    }

    public static  T deserializeAvroMetadata(byte[] bytes,
        Class clazz) throws IOException {
        DatumReader reader = new SpecificDatumReader<>(clazz);
        FileReader fileReader =
            DataFileReader.openReader(new SeekableByteArrayInput(bytes), reader);
        Preconditions
            .checkArgument(fileReader.hasNext(), "Could not deserialize metadata of type " + clazz);
        return fileReader.next();
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy