org.elasticsearch.hadoop.hive.EsHiveOutputFormat Maven / Gradle / Ivy
The newest version!
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.hadoop.hive;
import java.io.IOException;
import java.util.Properties;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.Progressable;
import org.elasticsearch.hadoop.EsHadoopIllegalArgumentException;
import org.elasticsearch.hadoop.cfg.HadoopSettingsManager;
import org.elasticsearch.hadoop.cfg.Settings;
import org.elasticsearch.hadoop.mr.EsOutputFormat;
import org.elasticsearch.hadoop.mr.security.HadoopUserProvider;
import org.elasticsearch.hadoop.rest.InitializationUtils;
/**
* Hive specific OutputFormat.
*/
@SuppressWarnings("rawtypes")
public class EsHiveOutputFormat extends EsOutputFormat implements HiveOutputFormat {
static class EsHiveRecordWriter extends EsOutputFormat.EsRecordWriter implements RecordWriter {
private final Progressable progress;
public EsHiveRecordWriter(Configuration cfg, Progressable progress) {
super(cfg, progress);
this.progress = progress;
}
@Override
public void write(Writable w) throws IOException {
if (!initialized) {
initialized = true;
init();
}
if (w instanceof HiveBytesArrayWritable) {
HiveBytesArrayWritable hbaw = ((HiveBytesArrayWritable) w);
repository.writeProcessedToIndex(hbaw.getContent());
}
else {
// we could allow custom BAs
throw new EsHadoopIllegalArgumentException(String.format("Unexpected type; expected [%s], received [%s]", HiveBytesArrayWritable.class, w));
}
}
@Override
public void close(boolean abort) throws IOException {
// TODO: check whether a proper Reporter can be passed in
super.doClose(progress);
}
}
public EsHiveRecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) {
// force the table properties to be merged into the configuration
// NB: the properties are also available in HiveConstants#OUTPUT_TBL_PROPERTIES
Settings settings = HadoopSettingsManager.loadFrom(jc).merge(tableProperties);
Log log = LogFactory.getLog(getClass());
// NB: ESSerDe is already initialized at this stage but should still have a reference to the same cfg object
// NB: the value writer is not needed by Hive but it's set for consistency and debugging purposes
InitializationUtils.setValueWriterIfNotSet(settings, HiveValueWriter.class, log);
InitializationUtils.setBytesConverterIfNeeded(settings, HiveBytesConverter.class, log);
InitializationUtils.setUserProviderIfNotSet(settings, HadoopUserProvider.class, log);
// set write resource
settings.setResourceWrite(settings.getResourceWrite());
HiveUtils.init(settings, log);
return new EsHiveRecordWriter(jc, progress);
}
}