All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.gobblin.hive.HiveRegistrationUnitComparator Maven / Gradle / Ivy

There is a newer version: 0.17.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.gobblin.hive;

import java.util.Set;

import org.apache.hadoop.fs.Path;

import com.google.common.base.Optional;

import org.apache.gobblin.annotation.Alpha;
import org.apache.gobblin.configuration.State;


/**
 * A comparator between an existing {@link HiveRegistrationUnit} and a new {@link HiveRegistrationUnit}. It is
 * used to determine whether the existing {@link HiveRegistrationUnit} should be altered to match the new
 * {@link HiveRegistrationUnit}.
 *
 * 

* Since altering a Hive table/partition is relatively expensive, when registering a new table/partition, if the * table/partition exists, it is usually beneficial to check whether the existing table/partition needs to be * altered before altering it. *

* *

* This class does not implement {@link java.util.Comparator} and does not conform to the contract of * {@link java.util.Comparator}. *

* *

* Sample usage: * *

 {@code
 *     HiveRegistrationUnitComparator comparator = new HiveRegistrationUnitComparator<>(existingTable, newTable);
 *     boolean needToUpdate = comparator.compareInputFormat().compareOutputFormat().compareNumBuckets()
 *      .compareIsCompressed().compareRawLocation().result();
 *     }}
 *   
* * Or to compare all fields: * *
 {@code
 *     HiveRegistrationUnitComparator comparator = new HiveRegistrationUnitComparator<>(existingTable, newTable);
 *     boolean needToUpdate = comparator.compareAll().result();
 *     }}
 *   
*

* * @author Ziyang Liu */ @Alpha public class HiveRegistrationUnitComparator> { protected final HiveRegistrationUnit existingUnit; protected final HiveRegistrationUnit newUnit; protected boolean result = false; public HiveRegistrationUnitComparator(HiveRegistrationUnit existingUnit, HiveRegistrationUnit newUnit) { this.existingUnit = existingUnit; this.newUnit = newUnit; } /** * Compare the raw locations (without schema and authority). * *

* This is useful since existing tables/partitions in the Hive metastore have absolute paths in the location * property, but the new table/partition may have a raw path. *

*/ @SuppressWarnings("unchecked") public T compareRawLocation() { if (!this.result) { this.result |= (!new Path(this.existingUnit.getLocation().get()).toUri().getRawPath() .equals(new Path(this.newUnit.getLocation().get()).toUri().getRawPath())); } return (T) this; } @SuppressWarnings("unchecked") public T compareInputFormat() { if (!this.result) { compare(this.existingUnit.getInputFormat(), this.newUnit.getInputFormat()); } return (T) this; } @SuppressWarnings("unchecked") public T compareOutputFormat() { if (!this.result) { compare(this.existingUnit.getOutputFormat(), this.newUnit.getOutputFormat()); } return (T) this; } @SuppressWarnings("unchecked") public T compareIsCompressed() { if (!this.result) { compare(this.existingUnit.getIsCompressed(), this.newUnit.getIsCompressed()); } return (T) this; } @SuppressWarnings("unchecked") public T compareNumBuckets() { if (!this.result) { compare(this.existingUnit.getNumBuckets(), this.newUnit.getNumBuckets()); } return (T) this; } @SuppressWarnings("unchecked") public T compareBucketCols() { if (!this.result) { compare(this.existingUnit.getBucketColumns(), this.newUnit.getBucketColumns()); } return (T) this; } @SuppressWarnings("unchecked") public T compareIsStoredAsSubDirs() { if (!this.result) { compare(this.existingUnit.getIsStoredAsSubDirs(), this.newUnit.getIsStoredAsSubDirs()); } return (T) this; } @SuppressWarnings("unchecked") public T compareParameters() { if (!this.result) { checkExistingIsSuperstate(this.existingUnit.getProps(), this.newUnit.getProps()); checkExistingIsSuperstate(this.existingUnit.getStorageProps(), this.newUnit.getStorageProps()); checkExistingIsSuperstate(this.existingUnit.getSerDeProps(), this.newUnit.getSerDeProps()); } return (T) this; } /** * Compare all parameters. */ @SuppressWarnings("unchecked") public T compareAll() { this.compareInputFormat().compareOutputFormat().compareIsCompressed().compareIsStoredAsSubDirs().compareNumBuckets() .compareBucketCols().compareRawLocation().compareParameters(); return (T) this; } /** * Compare an existing value and a new value, and set {@link #result} accordingly. * *

* This method returns false if newValue is absent (i.e., the existing value doesn't need to be updated). * This is because when adding a table/partition to Hive, Hive automatically sets default values for * some of the unspecified parameters. Therefore existingValue being present and newValue being absent * doesn't mean the existing value needs to be updated. *

*/ protected void compare(Optional existingValue, Optional newValue) { boolean different; if (!newValue.isPresent()) { different = false; } else { different = !existingValue.isPresent() || !existingValue.get().equals(newValue.get()); } this.result |= different; } /** * Compare an existing state and a new {@link State} to ensure that the existing {@link State} contains all entries in the new * {@link State}, and update {@link #result} accordingly. */ protected void checkExistingIsSuperstate(State existingState, State newState) { checkExistingIsSuperset(existingState.getProperties().entrySet(), newState.getProperties().entrySet()); } /** * Compare an existing state and a new {@link Set} to ensure that the existing {@link Set} contains all entries in the new * {@link Set}, and update {@link #result} accordingly. */ protected void checkExistingIsSuperset(Set existingSet, Set newSet) { this.result |= !existingSet.containsAll(newSet); } /** * Get the result of comparison. * @return true if the existing {@link HiveRegistrationUnit} needs to be altered, false otherwise. */ public boolean result() { boolean resultCopy = this.result; this.result = false; return resultCopy; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy