org.apache.gobblin.hive.HiveRegistrationUnitComparator Maven / Gradle / Ivy
Show all versions of gobblin-hive-registration Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.gobblin.hive;
import java.util.Set;
import org.apache.hadoop.fs.Path;
import com.google.common.base.Optional;
import org.apache.gobblin.annotation.Alpha;
import org.apache.gobblin.configuration.State;
/**
* A comparator between an existing {@link HiveRegistrationUnit} and a new {@link HiveRegistrationUnit}. It is
* used to determine whether the existing {@link HiveRegistrationUnit} should be altered to match the new
* {@link HiveRegistrationUnit}.
*
*
* Since altering a Hive table/partition is relatively expensive, when registering a new table/partition, if the
* table/partition exists, it is usually beneficial to check whether the existing table/partition needs to be
* altered before altering it.
*
*
*
* This class does not implement {@link java.util.Comparator} and does not conform to the contract of
* {@link java.util.Comparator}.
*
*
*
* Sample usage:
*
*
{@code
* HiveRegistrationUnitComparator> comparator = new HiveRegistrationUnitComparator<>(existingTable, newTable);
* boolean needToUpdate = comparator.compareInputFormat().compareOutputFormat().compareNumBuckets()
* .compareIsCompressed().compareRawLocation().result();
* }}
*
*
* Or to compare all fields:
*
* {@code
* HiveRegistrationUnitComparator> comparator = new HiveRegistrationUnitComparator<>(existingTable, newTable);
* boolean needToUpdate = comparator.compareAll().result();
* }}
*
*
*
* @author Ziyang Liu
*/
@Alpha
public class HiveRegistrationUnitComparator> {
protected final HiveRegistrationUnit existingUnit;
protected final HiveRegistrationUnit newUnit;
protected boolean result = false;
public HiveRegistrationUnitComparator(HiveRegistrationUnit existingUnit, HiveRegistrationUnit newUnit) {
this.existingUnit = existingUnit;
this.newUnit = newUnit;
}
/**
* Compare the raw locations (without schema and authority).
*
*
* This is useful since existing tables/partitions in the Hive metastore have absolute paths in the location
* property, but the new table/partition may have a raw path.
*
*/
@SuppressWarnings("unchecked")
public T compareRawLocation() {
if (!this.result) {
this.result |= (!new Path(this.existingUnit.getLocation().get()).toUri().getRawPath()
.equals(new Path(this.newUnit.getLocation().get()).toUri().getRawPath()));
}
return (T) this;
}
@SuppressWarnings("unchecked")
public T compareInputFormat() {
if (!this.result) {
compare(this.existingUnit.getInputFormat(), this.newUnit.getInputFormat());
}
return (T) this;
}
@SuppressWarnings("unchecked")
public T compareOutputFormat() {
if (!this.result) {
compare(this.existingUnit.getOutputFormat(), this.newUnit.getOutputFormat());
}
return (T) this;
}
@SuppressWarnings("unchecked")
public T compareIsCompressed() {
if (!this.result) {
compare(this.existingUnit.getIsCompressed(), this.newUnit.getIsCompressed());
}
return (T) this;
}
@SuppressWarnings("unchecked")
public T compareNumBuckets() {
if (!this.result) {
compare(this.existingUnit.getNumBuckets(), this.newUnit.getNumBuckets());
}
return (T) this;
}
@SuppressWarnings("unchecked")
public T compareBucketCols() {
if (!this.result) {
compare(this.existingUnit.getBucketColumns(), this.newUnit.getBucketColumns());
}
return (T) this;
}
@SuppressWarnings("unchecked")
public T compareIsStoredAsSubDirs() {
if (!this.result) {
compare(this.existingUnit.getIsStoredAsSubDirs(), this.newUnit.getIsStoredAsSubDirs());
}
return (T) this;
}
@SuppressWarnings("unchecked")
public T compareParameters() {
if (!this.result) {
checkExistingIsSuperstate(this.existingUnit.getProps(), this.newUnit.getProps());
checkExistingIsSuperstate(this.existingUnit.getStorageProps(), this.newUnit.getStorageProps());
checkExistingIsSuperstate(this.existingUnit.getSerDeProps(), this.newUnit.getSerDeProps());
}
return (T) this;
}
/**
* Compare all parameters.
*/
@SuppressWarnings("unchecked")
public T compareAll() {
this.compareInputFormat().compareOutputFormat().compareIsCompressed().compareIsStoredAsSubDirs().compareNumBuckets()
.compareBucketCols().compareRawLocation().compareParameters();
return (T) this;
}
/**
* Compare an existing value and a new value, and set {@link #result} accordingly.
*
*
* This method returns false if newValue is absent (i.e., the existing value doesn't need to be updated).
* This is because when adding a table/partition to Hive, Hive automatically sets default values for
* some of the unspecified parameters. Therefore existingValue being present and newValue being absent
* doesn't mean the existing value needs to be updated.
*
*/
protected void compare(Optional existingValue, Optional newValue) {
boolean different;
if (!newValue.isPresent()) {
different = false;
} else {
different = !existingValue.isPresent() || !existingValue.get().equals(newValue.get());
}
this.result |= different;
}
/**
* Compare an existing state and a new {@link State} to ensure that the existing {@link State} contains all entries in the new
* {@link State}, and update {@link #result} accordingly.
*/
protected void checkExistingIsSuperstate(State existingState, State newState) {
checkExistingIsSuperset(existingState.getProperties().entrySet(), newState.getProperties().entrySet());
}
/**
* Compare an existing state and a new {@link Set} to ensure that the existing {@link Set} contains all entries in the new
* {@link Set}, and update {@link #result} accordingly.
*/
protected void checkExistingIsSuperset(Set existingSet, Set newSet) {
this.result |= !existingSet.containsAll(newSet);
}
/**
* Get the result of comparison.
* @return true if the existing {@link HiveRegistrationUnit} needs to be altered, false otherwise.
*/
public boolean result() {
boolean resultCopy = this.result;
this.result = false;
return resultCopy;
}
}