All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.gobblin.hive.policy.HiveSnapshotRegistrationPolicy Maven / Gradle / Ivy

There is a newer version: 0.17.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.gobblin.hive.policy;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.regex.Pattern;

import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;

import com.google.common.base.Optional;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;

import org.apache.gobblin.annotation.Alpha;
import org.apache.gobblin.configuration.State;
import org.apache.gobblin.hive.HiveTable;
import org.apache.gobblin.hive.spec.HiveSpec;
import org.apache.gobblin.hive.spec.SimpleHiveSpec;


/**
 * A {@link org.apache.gobblin.hive.policy.HiveRegistrationPolicy} for registering snapshots.
 *
 * @author Ziyang Liu
 */
@Alpha
public class HiveSnapshotRegistrationPolicy extends HiveRegistrationPolicyBase {

  public static final String SNAPSHOT_PATH_PATTERN = "snapshot.path.pattern";

  protected final Optional snapshotPathPattern;

  protected HiveSnapshotRegistrationPolicy(State props) throws IOException {
    super(props);
    this.snapshotPathPattern = props.contains(SNAPSHOT_PATH_PATTERN)
        ? Optional.of(Pattern.compile(props.getProp(SNAPSHOT_PATH_PATTERN))) : Optional. absent();
  }

  /**
   * @param path The root directory of snapshots. This directory may contain zero or more snapshots.
   */
  @Override
  public Collection getHiveSpecs(Path path) throws IOException {
    List tables = getTables(path);

    if (tables.isEmpty()) {
      return ImmutableList. of();
    }
    Collection specs = Lists.newArrayList();
    for (HiveTable table : tables) {
      specs.add(new SimpleHiveSpec.Builder<>(path).withTable(table).withPartition(getPartition(path, table)).build());
    }
    return specs;
  }

  /**
   * Get {@link HiveTable}s using the latest snapshot (returned by {@link #getLatestSnapshot(Path)}.
   */
  @Override
  protected List getTables(Path path) throws IOException {
    Path latestSnapshot = getLatestSnapshot(path);
    if (latestSnapshot == null) {
      return ImmutableList. of();
    }

    return super.getTables(latestSnapshot);
  }

  /**
   * Get the latest snapshot in the given {@link Path}.
   *
   * 

* The lastest snapshot is a sub-directory of the input {@link Path} that has the largest folder * name alphabetically. If property {@link #SNAPSHOT_PATH_PATTERN} is set, only those sub-directories * whose full path matches the given pattern are considered. *

*/ protected Path getLatestSnapshot(Path path) throws IOException { FileStatus statuses[] = this.fs.listStatus(path, new PathFilter() { @Override public boolean accept(Path p) { try { if (!HiveSnapshotRegistrationPolicy.this.fs.isDirectory(p)) { return false; } } catch (IOException e) { throw Throwables.propagate(e); } return !HiveSnapshotRegistrationPolicy.this.snapshotPathPattern.isPresent() || HiveSnapshotRegistrationPolicy.this.snapshotPathPattern.get().matcher(p.toString()).matches(); } }); if (statuses.length == 0) { return null; } Arrays.sort(statuses, new Comparator() { @Override public int compare(FileStatus o1, FileStatus o2) { return o2.getPath().getName().compareTo(o1.getPath().getName()); } }); return statuses[0].getPath(); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy