All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hotels.bdp.circustrain.tool.vacuum.PartitionedTablePathResolver Maven / Gradle / Ivy

/**
 * Copyright (C) 2016-2018 Expedia Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.hotels.bdp.circustrain.tool.vacuum;

import static com.hotels.hcommon.hive.metastore.util.LocationUtils.locationAsPath;

import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.hotels.hcommon.hive.metastore.iterator.PartitionIterator;

class PartitionedTablePathResolver implements TablePathResolver {

  private static final Logger LOG = LoggerFactory.getLogger(PartitionedTablePathResolver.class);

  private final Path tableBaseLocation;
  private final Path globPath;
  private final IMetaStoreClient metastore;
  private final Table table;

  PartitionedTablePathResolver(IMetaStoreClient metastore, Table table)
      throws NoSuchObjectException, MetaException, TException {
    this.metastore = metastore;
    this.table = table;
    LOG.debug("Table '{}' is partitioned", Warehouse.getQualifiedName(table));
    tableBaseLocation = locationAsPath(table);
    List onePartition = metastore.listPartitions(table.getDbName(), table.getTableName(), (short) 1);
    if (onePartition.isEmpty()) {
      LOG.warn("Table '{}' has no partitions, perhaps you can simply delete: {}.", Warehouse.getQualifiedName(table),
          tableBaseLocation);
      throw new ConfigurationException();
    }
    Path partitionLocation = locationAsPath(onePartition.get(0));
    int branches = partitionLocation.depth() - tableBaseLocation.depth();
    String globSuffix = StringUtils.repeat("*", "/", branches);
    globPath = new Path(tableBaseLocation, globSuffix);
  }

  @Override
  public Path getGlobPath() {
    return globPath;
  }

  @Override
  public Path getTableBaseLocation() {
    return tableBaseLocation;
  }

  @Override
  public Set getMetastorePaths(short batchSize, int expectedPathCount)
    throws NoSuchObjectException, MetaException, TException {
    Set metastorePaths = new HashSet<>(expectedPathCount);
    PartitionIterator partitionIterator = new PartitionIterator(metastore, table, batchSize);
    while (partitionIterator.hasNext()) {
      Partition partition = partitionIterator.next();
      Path location = PathUtils.normalise(locationAsPath(partition));
      if (!location.toString().toLowerCase().startsWith(tableBaseLocation.toString().toLowerCase())) {
        LOG.error("Check your configuration: '{}' does not appear to be part of '{}'.", location, tableBaseLocation);
        throw new ConfigurationException();
      }
      metastorePaths.add(location);
    }
    return metastorePaths;
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy