All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.gobblin.compliance.retention.HivePartitionVersionRetentionReaper Maven / Gradle / Ivy

Go to download

A distributed data integration framework for streaming and batch data ecosystems.

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.gobblin.compliance.retention;

import java.io.IOException;
import java.sql.SQLException;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;

import com.google.common.base.Optional;
import com.google.common.base.Preconditions;

import lombok.extern.slf4j.Slf4j;

import org.apache.gobblin.compliance.ComplianceConfigurationKeys;
import org.apache.gobblin.compliance.HivePartitionVersion;
import org.apache.gobblin.compliance.HiveProxyQueryExecutor;
import org.apache.gobblin.compliance.purger.HivePurgerQueryTemplate;
import org.apache.gobblin.compliance.utils.PartitionUtils;
import org.apache.gobblin.compliance.utils.ProxyUtils;
import org.apache.gobblin.configuration.State;
import org.apache.gobblin.data.management.retention.dataset.CleanableDataset;
import org.apache.gobblin.data.management.version.DatasetVersion;
import org.apache.gobblin.util.HadoopUtils;

import static org.apache.gobblin.compliance.purger.HivePurgerQueryTemplate.getDropPartitionQuery;
import static org.apache.gobblin.compliance.purger.HivePurgerQueryTemplate.getUseDbQuery;


/**
 * Class to move/clean backups/staging partitions.
 *
 * @author adsharma
 */
@Slf4j
public class HivePartitionVersionRetentionReaper extends HivePartitionVersionRetentionRunner {
  private FileSystem versionOwnerFs;

  private boolean simulate;
  private Optional versionOwner = Optional.absent();
  private Optional backUpOwner = Optional.absent();

  public HivePartitionVersionRetentionReaper(CleanableDataset dataset, DatasetVersion version,
      List nonDeletableVersionLocations, State state) {
    super(dataset, version, nonDeletableVersionLocations, state);
    this.versionOwner = ((HivePartitionVersion) this.datasetVersion).getOwner();
    Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.BACKUP_OWNER),
        "Missing required property " + ComplianceConfigurationKeys.BACKUP_OWNER);
    this.backUpOwner = Optional.fromNullable(this.state.getProp(ComplianceConfigurationKeys.BACKUP_OWNER));
    this.simulate = this.state.getPropAsBoolean(ComplianceConfigurationKeys.COMPLIANCE_JOB_SIMULATE,
        ComplianceConfigurationKeys.DEFAULT_COMPLIANCE_JOB_SIMULATE);
  }

  /**
   * If simulate is set to true, will simply return.
   * If a version is pointing to a non-existing location, then drop the partition and close the jdbc connection.
   * If a version is pointing to the same location as of the dataset, then drop the partition and close the jdbc connection.
   * If a version is staging, it's data will be deleted and metadata is dropped.
   * IF a versions is backup, it's data will be moved to a backup dir, current metadata will be dropped and it will
   * be registered in the backup db.
   */
  @Override
  public void clean()
      throws IOException {
    Path versionLocation = ((HivePartitionRetentionVersion) this.datasetVersion).getLocation();
    Path datasetLocation = ((CleanableHivePartitionDataset) this.cleanableDataset).getLocation();
    String completeName = ((HivePartitionRetentionVersion) this.datasetVersion).datasetURN();
    State state = new State(this.state);

    this.versionOwnerFs = ProxyUtils.getOwnerFs(state, this.versionOwner);

    try (HiveProxyQueryExecutor queryExecutor = ProxyUtils
        .getQueryExecutor(state, this.versionOwner, this.backUpOwner)) {

      if (!this.versionOwnerFs.exists(versionLocation)) {
        log.info("Data versionLocation doesn't exist. Metadata will be dropped for the version  " + completeName);
      } else if (datasetLocation.toString().equalsIgnoreCase(versionLocation.toString())) {
        log.info(
            "Dataset location is same as version location. Won't delete the data but metadata will be dropped for the version "
                + completeName);
      } else if (this.simulate) {
        log.info("Simulate is set to true. Won't move the version " + completeName);
        return;
      } else if (completeName.contains(ComplianceConfigurationKeys.STAGING)) {
        log.info("Deleting data from version " + completeName);
        this.versionOwnerFs.delete(versionLocation, true);
      } else if (completeName.contains(ComplianceConfigurationKeys.BACKUP)) {
        executeAlterQueries(queryExecutor);
        Path newVersionLocationParent = getNewVersionLocation().getParent();
        log.info("Creating new dir " + newVersionLocationParent.toString());
        this.versionOwnerFs.mkdirs(newVersionLocationParent);
        log.info("Moving data from " + versionLocation + " to " + getNewVersionLocation());
        fsMove(versionLocation, getNewVersionLocation());
        FsPermission permission = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.NONE);
        HadoopUtils
            .setPermissions(newVersionLocationParent, this.versionOwner, this.backUpOwner, this.versionOwnerFs,
                permission);
      }
      executeDropVersionQueries(queryExecutor);
    }
  }

  // These methods are not implemented by this class
  @Override
  public void preCleanAction() {

  }

  @Override
  public void postCleanAction() {

  }

  private void executeAlterQueries(HiveProxyQueryExecutor queryExecutor)
      throws IOException {
    HivePartitionRetentionVersion version = (HivePartitionRetentionVersion) this.datasetVersion;
    String partitionSpecString = PartitionUtils.getPartitionSpecString(version.getSpec());
    Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.BACKUP_DB),
        "Missing required property " + ComplianceConfigurationKeys.BACKUP_DB);
    String backUpDb = this.state.getProp(ComplianceConfigurationKeys.BACKUP_DB);
    String backUpTableName = getVersionTableName(version);
    try {
      queryExecutor.executeQuery(HivePurgerQueryTemplate.getUseDbQuery(backUpDb), this.backUpOwner);
      queryExecutor.executeQuery(HivePurgerQueryTemplate
          .getCreateTableQuery(backUpDb + "." + backUpTableName, version.getDbName(), version.getTableName(),
              getBackUpTableLocation(version)), this.backUpOwner);
      Optional fileFormat = Optional.absent();
      if (this.state.getPropAsBoolean(ComplianceConfigurationKeys.SPECIFY_PARTITION_FORMAT,
          ComplianceConfigurationKeys.DEFAULT_SPECIFY_PARTITION_FORMAT)) {
        fileFormat = version.getFileFormat();
      }
      queryExecutor.executeQuery(HivePurgerQueryTemplate
          .getAddPartitionQuery(backUpTableName, partitionSpecString, fileFormat,
              Optional.fromNullable(getNewVersionLocation().toString())), this.backUpOwner);
    } catch (SQLException e) {
      throw new IOException(e);
    }
  }

  private void executeDropVersionQueries(HiveProxyQueryExecutor queryExecutor)
      throws IOException {
    HivePartitionRetentionVersion version = (HivePartitionRetentionVersion) this.datasetVersion;
    String partitionSpec = PartitionUtils.getPartitionSpecString(version.getSpec());
    try {
      queryExecutor.executeQuery(getUseDbQuery(version.getDbName()), this.versionOwner);
      queryExecutor.executeQuery(getDropPartitionQuery(version.getTableName(), partitionSpec), this.versionOwner);
    } catch (SQLException e) {
      throw new IOException(e);
    }
  }

  private String getVersionTableName(HivePartitionVersion version) {
    return version.getTableName();
  }

  private String getBackUpTableLocation(HivePartitionVersion version) {
    Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.TRASH_DIR),
        "Missing required property " + ComplianceConfigurationKeys.TRASH_DIR);
    return StringUtils
        .join(Arrays.asList(this.state.getProp(ComplianceConfigurationKeys.TRASH_DIR), getVersionTableName(version)),
            '/');
  }

  private Path getNewVersionLocation() {
    Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.BACKUP_DIR),
        "Missing required property " + ComplianceConfigurationKeys.BACKUP_DIR);
    HivePartitionRetentionVersion version = (HivePartitionRetentionVersion) this.datasetVersion;
    if (PartitionUtils.isUnixTimeStamp(version.getLocation().getName())) {
      return new Path(StringUtils.join(Arrays.asList(this.state.getProp(ComplianceConfigurationKeys.BACKUP_DIR),
          Path.getPathWithoutSchemeAndAuthority(version.getLocation().getParent()).toString(), version.getTimeStamp()), '/'));
    } else {
      return new Path(StringUtils.join(Arrays.asList(this.state.getProp(ComplianceConfigurationKeys.BACKUP_DIR),
          Path.getPathWithoutSchemeAndAuthority(version.getLocation()).toString(), version.getTimeStamp()), '/'));
    }
  }

  private void fsMove(Path from, Path to)
      throws IOException {
    if (PartitionUtils.isUnixTimeStamp(from.getName())) {
      this.versionOwnerFs.rename(from, to.getParent());
    } else {
      for (FileStatus fileStatus : this.versionOwnerFs.listStatus(from)) {
        if (fileStatus.isFile()) {
          this.versionOwnerFs.rename(fileStatus.getPath(), to);
        }
      }
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy