gobblin.hive.HiveRegistrationUnitComparator Maven / Gradle / Ivy
/*
 * Copyright (C) 2014-2016 LinkedIn Corp. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use
 * this file except in compliance with the License. You may obtain a copy of the
 * License at  http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed
 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied.
 */
package gobblin.hive;
import java.util.Set;
import org.apache.hadoop.fs.Path;
import com.google.common.base.Optional;
import gobblin.annotation.Alpha;
import gobblin.configuration.State;
/**
 * A comparator between an existing {@link HiveRegistrationUnit} and a new {@link HiveRegistrationUnit}. It is
 * used to determine whether the existing {@link HiveRegistrationUnit} should be altered to match the new
 * {@link HiveRegistrationUnit}.
 *
 * 
 *   Since altering a Hive table/partition is relatively expensive, when registering a new table/partition, if the
 *   table/partition exists, it is usually beneficial to check whether the existing table/partition needs to be
 *   altered before altering it.
 * 
 *
 * 
 *   This class does not implement {@link java.util.Comparator} and does not conform to the contract of
 *   {@link java.util.Comparator}.
 * 
 *
 * 
 *   Sample usage:
 *
 *   
 {@code
 *     HiveRegistrationUnitComparator> comparator = new HiveRegistrationUnitComparator<>(existingTable, newTable);
 *     boolean needToUpdate = comparator.compareInputFormat().compareOutputFormat().compareNumBuckets()
 *      .compareIsCompressed().compareRawLocation().result();
 *     }}
 *   
 *
 *   Or to compare all fields:
 *
 *    {@code
 *     HiveRegistrationUnitComparator> comparator = new HiveRegistrationUnitComparator<>(existingTable, newTable);
 *     boolean needToUpdate = comparator.compareAll().result();
 *     }}
 *   
 * 
 *
 * @author ziliu
 */
@Alpha
public class HiveRegistrationUnitComparator> {
  protected final HiveRegistrationUnit existingUnit;
  protected final HiveRegistrationUnit newUnit;
  protected boolean result = false;
  public HiveRegistrationUnitComparator(HiveRegistrationUnit existingUnit, HiveRegistrationUnit newUnit) {
    this.existingUnit = existingUnit;
    this.newUnit = newUnit;
  }
  /**
   * Compare the raw locations (without schema and authority).
   *
   * 
   *   This is useful since existing tables/partitions in the Hive metastore have absolute paths in the location
   *   property, but the new table/partition may have a raw path.
   * 
   */
  @SuppressWarnings("unchecked")
  public T compareRawLocation() {
    if (!this.result) {
      this.result |= (!new Path(this.existingUnit.getLocation().get()).toUri().getRawPath()
          .equals(new Path(this.newUnit.getLocation().get()).toUri().getRawPath()));
    }
    return (T) this;
  }
  @SuppressWarnings("unchecked")
  public T compareInputFormat() {
    if (!this.result) {
      compare(this.existingUnit.getInputFormat(), this.newUnit.getInputFormat());
    }
    return (T) this;
  }
  @SuppressWarnings("unchecked")
  public T compareOutputFormat() {
    if (!this.result) {
      compare(this.existingUnit.getOutputFormat(), this.newUnit.getOutputFormat());
    }
    return (T) this;
  }
  @SuppressWarnings("unchecked")
  public T compareIsCompressed() {
    if (!this.result) {
      compare(this.existingUnit.getIsCompressed(), this.newUnit.getIsCompressed());
    }
    return (T) this;
  }
  @SuppressWarnings("unchecked")
  public T compareNumBuckets() {
    if (!this.result) {
      compare(this.existingUnit.getNumBuckets(), this.newUnit.getNumBuckets());
    }
    return (T) this;
  }
  @SuppressWarnings("unchecked")
  public T compareBucketCols() {
    if (!this.result) {
      compare(this.existingUnit.getBucketColumns(), this.newUnit.getBucketColumns());
    }
    return (T) this;
  }
  @SuppressWarnings("unchecked")
  public T compareIsStoredAsSubDirs() {
    if (!this.result) {
      compare(this.existingUnit.getIsStoredAsSubDirs(), this.newUnit.getIsStoredAsSubDirs());
    }
    return (T) this;
  }
  @SuppressWarnings("unchecked")
  public T compareParameters() {
    if (!this.result) {
      checkExistingIsSuperstate(this.existingUnit.getProps(), this.newUnit.getProps());
      checkExistingIsSuperstate(this.existingUnit.getStorageProps(), this.newUnit.getStorageProps());
      checkExistingIsSuperstate(this.existingUnit.getSerDeProps(), this.newUnit.getSerDeProps());
    }
    return (T) this;
  }
  /**
   * Compare all parameters.
   */
  @SuppressWarnings("unchecked")
  public T compareAll() {
    this.compareInputFormat().compareOutputFormat().compareIsCompressed().compareIsStoredAsSubDirs().compareNumBuckets()
        .compareBucketCols().compareRawLocation().compareParameters();
    return (T) this;
  }
  /**
   * Compare an existing value and a new value, and set {@link #result} accordingly.
   *
   * 
   *   This method returns false if newValue is absent (i.e., the existing value doesn't need to be updated).
   *   This is because when adding a table/partition to Hive, Hive automatically sets default values for
   *   some of the unspecified parameters. Therefore existingValue being present and newValue being absent
   *   doesn't mean the existing value needs to be updated.
   * 
   */
  protected  void compare(Optional existingValue, Optional newValue) {
    boolean different;
    if (!newValue.isPresent()) {
      different = false;
    } else {
      different = !existingValue.isPresent() || !existingValue.get().equals(newValue.get());
    }
    this.result |= different;
  }
  /**
   * Compare an existing state and a new {@link State} to ensure that the existing {@link State} contains all entries in the new
   * {@link State}, and update {@link #result} accordingly.
   */
  protected void checkExistingIsSuperstate(State existingState, State newState) {
    checkExistingIsSuperset(existingState.getProperties().entrySet(), newState.getProperties().entrySet());
  }
  /**
   * Compare an existing state and a new {@link Set} to ensure that the existing {@link Set} contains all entries in the new
   * {@link Set}, and update {@link #result} accordingly.
   */
  protected  void checkExistingIsSuperset(Set existingSet, Set newSet) {
    this.result |= !existingSet.containsAll(newSet);
  }
  /**
   * Get the result of comparison.
   * @return true if the existing {@link HiveRegistrationUnit} needs to be altered, false otherwise.
   */
  public boolean result() {
    boolean resultCopy = this.result;
    this.result = false;
    return resultCopy;
  }
}