org.apache.gobblin.compliance.purger.HivePurgerPublisher Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.gobblin.compliance.purger;
import java.security.PrivilegedExceptionAction;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.thrift.TException;
import com.google.common.base.Splitter;
import lombok.extern.slf4j.Slf4j;
import org.apache.gobblin.compliance.ComplianceConfigurationKeys;
import org.apache.gobblin.compliance.ComplianceEvents;
import org.apache.gobblin.compliance.HivePartitionDataset;
import org.apache.gobblin.compliance.utils.DatasetUtils;
import org.apache.gobblin.configuration.ConfigurationKeys;
import org.apache.gobblin.configuration.State;
import org.apache.gobblin.configuration.WorkUnitState;
import org.apache.gobblin.instrumented.Instrumented;
import org.apache.gobblin.metrics.MetricContext;
import org.apache.gobblin.metrics.event.EventSubmitter;
import org.apache.gobblin.publisher.DataPublisher;
import org.apache.gobblin.source.workunit.WorkUnit;
import org.apache.gobblin.util.HostUtils;
/**
* The Publisher moves COMMITTED WorkUnitState to SUCCESSFUL, otherwise FAILED.
*
* @author adsharma
*/
@Slf4j
public class HivePurgerPublisher extends DataPublisher {
protected MetricContext metricContext;
protected EventSubmitter eventSubmitter;
public HiveMetaStoreClient client;
public HivePurgerPublisher(State state) throws Exception {
super(state);
this.metricContext = Instrumented.getMetricContext(state, this.getClass());
this.eventSubmitter = new EventSubmitter.Builder(this.metricContext, ComplianceEvents.NAMESPACE).
build();
initHiveMetastoreClient();
}
public void initHiveMetastoreClient() throws Exception {
if (this.state.contains(ConfigurationKeys.SUPER_USER_KEY_TAB_LOCATION)) {
String superUser = this.state.getProp(ComplianceConfigurationKeys.GOBBLIN_COMPLIANCE_SUPER_USER);
String realm = this.state.getProp(ConfigurationKeys.KERBEROS_REALM);
String keytabLocation = this.state.getProp(ConfigurationKeys.SUPER_USER_KEY_TAB_LOCATION);
log.info("Establishing MetastoreClient connection using " + keytabLocation);
UserGroupInformation.loginUserFromKeytab(HostUtils.getPrincipalUsingHostname(superUser, realm), keytabLocation);
UserGroupInformation loginUser = UserGroupInformation.getLoginUser();
loginUser.doAs(new PrivilegedExceptionAction() {
@Override
public Void run() throws TException {
HivePurgerPublisher.this.client = new HiveMetaStoreClient(new HiveConf());
return null;
}
});
} else {
HivePurgerPublisher.this.client = new HiveMetaStoreClient(new HiveConf());
}
}
public void initialize() {
}
@Override
public void publishData(Collection extends WorkUnitState> states) {
for (WorkUnitState state : states) {
if (state.getWorkingState() == WorkUnitState.WorkingState.SUCCESSFUL) {
state.setWorkingState(WorkUnitState.WorkingState.COMMITTED);
submitEvent(state, ComplianceEvents.Purger.WORKUNIT_COMMITTED);
} else {
state.setWorkingState(WorkUnitState.WorkingState.FAILED);
submitEvent(state, ComplianceEvents.Purger.WORKUNIT_FAILED);
}
}
}
private void submitEvent(WorkUnitState state, String name) {
WorkUnit workUnit = state.getWorkunit();
Map metadata = new HashMap<>();
String recordsRead = state.getProp(ComplianceConfigurationKeys.NUM_ROWS);
metadata.put(ComplianceConfigurationKeys.WORKUNIT_RECORDSREAD, recordsRead);
metadata.put(ComplianceConfigurationKeys.WORKUNIT_BYTESREAD,
getDataSize(workUnit.getProp(ComplianceConfigurationKeys.RAW_DATA_SIZE),
workUnit.getProp(ComplianceConfigurationKeys.TOTAL_SIZE)));
String partitionNameProp = workUnit.getProp(ComplianceConfigurationKeys.PARTITION_NAME);
Splitter AT_SPLITTER = Splitter.on("@").omitEmptyStrings().trimResults();
List namesList = AT_SPLITTER.splitToList(partitionNameProp);
if (namesList.size() != 3) {
log.warn("Not submitting event. Invalid partition name: " + partitionNameProp);
return;
}
String dbName = namesList.get(0), tableName = namesList.get(1), partitionName = namesList.get(2);
org.apache.hadoop.hive.metastore.api.Partition apiPartition = null;
Partition qlPartition = null;
try {
Table table = new Table(this.client.getTable(dbName, tableName));
apiPartition = this.client.getPartition(dbName, tableName, partitionName);
qlPartition = new Partition(table, apiPartition);
} catch (Exception e) {
log.warn("Not submitting event. Failed to resolve partition '" + partitionName + "': " + e);
e.printStackTrace();
return;
}
HivePartitionDataset hivePartitionDataset = new HivePartitionDataset(qlPartition);
String recordsWritten = DatasetUtils.getProperty(hivePartitionDataset, ComplianceConfigurationKeys.NUM_ROWS,
ComplianceConfigurationKeys.DEFAULT_NUM_ROWS);
String recordsPurged = Long.toString((Long.parseLong(recordsRead) - Long.parseLong(recordsWritten)));
metadata.put(ComplianceConfigurationKeys.WORKUNIT_RECORDSWRITTEN,
recordsWritten);
metadata.put(ComplianceConfigurationKeys.WORKUNIT_BYTESWRITTEN, getDataSize(DatasetUtils
.getProperty(hivePartitionDataset, ComplianceConfigurationKeys.RAW_DATA_SIZE,
ComplianceConfigurationKeys.DEFAULT_RAW_DATA_SIZE), DatasetUtils
.getProperty(hivePartitionDataset, ComplianceConfigurationKeys.TOTAL_SIZE,
ComplianceConfigurationKeys.DEFAULT_TOTAL_SIZE)));
metadata.put(DatasetMetrics.DATABASE_NAME, hivePartitionDataset.getDbName());
metadata.put(DatasetMetrics.TABLE_NAME, hivePartitionDataset.getTableName());
metadata.put(DatasetMetrics.PARTITION_NAME, hivePartitionDataset.getName());
metadata.put(DatasetMetrics.RECORDS_PURGED, recordsPurged);
this.eventSubmitter.submit(name, metadata);
}
private String getDataSize(String rawDataSize, String totalDataSize) {
long rawDataSizeVal = Long.parseLong(rawDataSize);
long totalDataSizeVal = Long.parseLong(totalDataSize);
long dataSize = totalDataSizeVal;
if (totalDataSizeVal <= 0) {
dataSize = rawDataSizeVal;
}
return Long.toString(dataSize);
}
public void publishMetadata(Collection extends WorkUnitState> states) {
}
@Override
public void close() {
}
public static class DatasetMetrics {
public static final String DATABASE_NAME = "HiveDatabaseName";
public static final String TABLE_NAME = "HiveTableName";
public static final String PARTITION_NAME = "HivePartitionName";
public static final String RECORDS_PURGED = "RecordsPurged";
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy