All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.expedia.dsp.data.squeeze.models.CompactionCriteria Maven / Gradle / Ivy

/**
 * Copyright (C) 2018 Expedia Group
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.expedia.dsp.data.squeeze.models;

import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.EnumUtils;

import java.util.Map;

/**
 * Criteria for compaction.
 *
 * @author Yashraj R. Sontakke
 */
@Getter
@Slf4j
public class CompactionCriteria {

    /**
     * the source path for compaction
     */
    private String sourcePath;
    /**
     * the target path for compaction
     */
    private String targetPath;
    /**
     * threshold in bytes for compaction. If file size is greater then no compaction on file, file is just copied to
     * target directory else file is compacted. Optional parameter, if not provided, defaults to threshold from config
     * resources file (128 MB).
     */
    private Long thresholdInBytes;

    /**
     * Max reducers for the map reduce job.
     */
    private Long maxReducers;

    /**
     * File type to initiate valid Map Reduce job.
     * Its hard to determine the file types for few formats.
     */
    private String fileType;

    /**
     * The schema path which should be used for compaction
     */
    private String schemaPath;


    /**
     * Constructor
     *
     * @param options map of various options keyed by name
     */
    public CompactionCriteria(final Map options) throws NumberFormatException {
        this.sourcePath = options.get("sourcePath");
        this.targetPath = options.get("targetPath");
        try {
            if (options.get("thresholdInBytes") != null) {
                this.thresholdInBytes = Long.parseLong(options.get("thresholdInBytes"));
            }

        } catch (NumberFormatException e) {
            log.error("Error converting threshold from string to long {}", e.getMessage());
            throw e;
        }

        try {
            if (options.get("maxReducers") != null) {
                this.maxReducers = Long.parseLong(options.get("maxReducers"));
            }
        } catch (NumberFormatException e) {
            log.error("Error converting maxReducers from string to long {}", e.getMessage());
            throw e;
        }
        if (options.get("fileType") != null) {

            if (EnumUtils.isValidEnum(FileType.class, options.get("fileType"))) {
                this.fileType = options.get("fileType");
            } else {
                log.error("ERROR: Unsupported file type {}, we currently support only TEXT, SEQ, ORC and AVRO", options.get("fileType"));
                throw new IllegalArgumentException();
            }
        }
        if (options.get("schemaPath") != null) {
            this.schemaPath = options.get("schemaPath");
        }

        if (this.fileType != null && this.fileType.equals("AVRO") && this.schemaPath == null) {
            log.error("ERROR: For file type AVRO schema path is mandatory.");
            throw new IllegalArgumentException();
        }

    }

    /**
     * Constructor
     *
     * @param sourcePath       the source path
     * @param targetPath       the target path
     * @param thresholdInBytes threshold in bytes for compaction. If file size is greater then no compaction on file,
     *                         file is just copied to target directory else file is compacted. Optional parameter, if
     *                         not provided, defaults to threshold from config resources file (128 MB).
     * @param maxReducers      max reducers for the map reduce job
     */
    public CompactionCriteria(final String sourcePath, final String targetPath, final Long thresholdInBytes,
                              final Long maxReducers) {
        this.sourcePath = sourcePath;
        this.targetPath = targetPath;
        this.thresholdInBytes = thresholdInBytes;
        this.maxReducers = maxReducers;
    }

    /**
     * Constructor
     *
     * @param sourcePath       the source path
     * @param targetPath       the target path
     * @param thresholdInBytes threshold in bytes for compaction. If file size is greater then no compaction on file,
     *                         file is just copied to target directory else file is compacted. Optional parameter, if
     *                         not provided, defaults to threshold from config resources file (128 MB).
     * @param maxReducers      max reducers for the map reduce job
     * @param fileType         type of the file we are compacting (mandatory for AVRO)
     * @param schemaPath       Path to schema when file type is AVRO.
     */
    public CompactionCriteria(final String sourcePath, final String targetPath, final Long thresholdInBytes,
                              final Long maxReducers, final String fileType, final String schemaPath) {
        this.sourcePath = sourcePath;
        this.targetPath = targetPath;
        this.thresholdInBytes = thresholdInBytes;
        this.maxReducers = maxReducers;
        this.fileType = fileType;
        this.schemaPath = schemaPath;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy