All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hotels.plunger.TapDataWriter Maven / Gradle / Ivy

The newest version!
/**
 * Copyright (C) 2015 Expedia Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.hotels.plunger;

import java.io.IOException;
import java.util.Collection;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;

import cascading.cascade.Cascade;
import cascading.flow.Flow;
import cascading.flow.FlowElement;
import cascading.flow.FlowNode;
import cascading.flow.FlowStep;
import cascading.flow.FlowStepListener;
import cascading.flow.hadoop.HadoopFlowProcess;
import cascading.flow.hadoop.util.HadoopUtil;
import cascading.flow.local.LocalFlowProcess;
import cascading.flow.planner.graph.ElementGraph;
import cascading.flow.planner.process.FlowNodeGraph;
import cascading.management.state.ClientState;
import cascading.pipe.Group;
import cascading.stats.CascadingStats;
import cascading.stats.FlowStepStats;
import cascading.stats.local.LocalStepStats;
import cascading.tap.Tap;
import cascading.tap.hadoop.util.Hadoop18TapUtil;
import cascading.tap.partition.BasePartitionTap;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;
import cascading.tuple.TupleEntryCollector;

/**
 * Writes a {@link Data} instance to a {@link Tap}. Currently works only with hadoop and local taps.
 * 

* WARNING: This is exceedingly brittle as it relies on cascading internals. */ public class TapDataWriter { private final Data data; TapDataWriter(Data data) { this.data = data; } /** Writes the {@link Tuple Tuples} provided in the {@link Data} instance to the supplied {@link Tap}. */ public Tap toTap(Tap tap) throws IOException { Class tapConfigClass = TapTypeUtil.getTapConfigClass(tap); if (Configuration.class.equals(tapConfigClass)) { if (tap instanceof BasePartitionTap) { writeToHadoopPartitionTap(tap); } else { writeToHadoopTap(tap); } } else if (Properties.class.equals(tapConfigClass)) { writeToLocalTap(tap); } else { throw new IllegalArgumentException("Unsupported tap type: " + tap.getClass()); } return tap; } /* WARNING: This is exceedingly brittle as it relies on cascading internals */ private void writeToHadoopTap(Tap tap) throws IOException { @SuppressWarnings("unchecked") Tap hadoopTap = (Tap) tap; JobConf conf = new JobConf(); HadoopFlowProcess flowProcess = new HadoopFlowProcess(conf); hadoopTap.sinkConfInit(flowProcess, conf); TupleEntryCollector collector = hadoopTap.openForWrite(flowProcess); for (TupleEntry tuple : data.asTupleEntryList()) { collector.add(tuple); } collector.close(); } /* WARNING: This is exceedingly brittle as it relies on cascading internals */ private void writeToHadoopPartitionTap(Tap tap) throws IOException { @SuppressWarnings("unchecked") BasePartitionTap hadoopTap = (BasePartitionTap) tap; JobConf conf = new JobConf(); // Avoids deletion of results when using a partition tap (close() will delete the _temporary before the copy has // been done if not in a flow) HadoopUtil.setIsInflow(conf); HadoopFlowProcess flowProcess = new HadoopFlowProcess(conf); hadoopTap.sinkConfInit(flowProcess, conf); TupleEntryCollector collector = hadoopTap.openForWrite(flowProcess); for (TupleEntry tuple : data.asTupleEntryList()) { collector.add(tuple); } collector.close(); // We need to clean up the '_temporary' folder BasePartitionTap partitionTap = hadoopTap; @SuppressWarnings("unchecked") String basePath = partitionTap.getParent().getFullIdentifier(flowProcess); deleteTemporaryPath(new Path(basePath), FileSystem.get(conf)); } private void deleteTemporaryPath(Path outputPath, FileSystem fileSystem) throws IOException { if (fileSystem.exists(outputPath)) { Path tmpDir = new Path(outputPath, Hadoop18TapUtil.TEMPORARY_PATH); if (fileSystem.exists(tmpDir)) { fileSystem.delete(tmpDir, true); } } } /* WARNING: This is exceedingly brittle as it relies on cascading internals */ private void writeToLocalTap(Tap tap) throws IOException { @SuppressWarnings("unchecked") Tap localTap = (Tap) tap; Properties conf = new Properties(); LocalFlowProcess flowProcess = new LocalFlowProcess(conf); flowProcess.setStepStats(new LocalStepStats(new NullFlowStep(), NullClientState.INSTANCE)); localTap.sinkConfInit(flowProcess, conf); TupleEntryCollector collector = localTap.openForWrite(flowProcess); for (TupleEntry tuple : data.asTupleEntryList()) { collector.add(tuple); } collector.close(); localTap.commitResource(conf); } private static class NullClientState extends ClientState { private static NullClientState INSTANCE = new NullClientState(); private NullClientState() { } @Override public void recordStats(@SuppressWarnings("rawtypes") CascadingStats stats) { } @Override public void recordFlowStep(@SuppressWarnings("rawtypes") FlowStep flowStep) { } @Override public void recordFlow(@SuppressWarnings("rawtypes") Flow flow) { } @Override public void recordCascade(Cascade cascade) { } @Override public void recordFlowNode(FlowNode flowNode) { } } @SuppressWarnings("rawtypes") private static class NullFlowStep implements FlowStep { @Override public Collection getGroups() { return null; } @Override public Set getSourceTaps() { return null; } @Override public Set getSinkTaps() { return null; } @Override public Set getSinkElements() { return null; } @Override public Set getSourceElements() { return null; } @Override public Map getTrapMap() { return null; } @Override public ElementGraph getElementGraph() { return null; } @Override public String getID() { return null; } @Override public int getOrdinal() { return 0; } @Override public String getName() { return null; } @Override public Flow getFlow() { return null; } @Override public String getFlowID() { return null; } @Override public String getFlowName() { return null; } @Override public Properties getConfig() { return null; } @Override public String getStepDisplayName() { return null; } @Override public int getSubmitPriority() { return 0; } @Override public void setSubmitPriority(int submitPriority) { } @Override public FlowNodeGraph getFlowNodeGraph() { return null; } @Override public int getNumFlowNodes() { return 0; } @Override public Group getGroup() { return null; } @Override public Tap getSink() { return null; } @Override public Set getSourceName(Tap source) { return null; } @Override public Set getSinkName(Tap sink) { return null; } @Override public Tap getSourceWith(String identifier) { return null; } @Override public Tap getSinkWith(String identifier) { return null; } @Override public Set getTraps() { return null; } @Override public Tap getTrap(String name) { return null; } @Override public boolean containsPipeNamed(String pipeName) { return false; } @Override public FlowStepStats getFlowStepStats() { return null; } @Override public boolean hasListeners() { return false; } @Override public void addListener(FlowStepListener flowStepListener) { } @Override public boolean removeListener(FlowStepListener flowStepListener) { return false; } @Override public Map getConfigAsProperties() { return null; } @Override public Map getProcessAnnotations() { return null; } @Override public void addProcessAnnotation(Enum annotation) { } @Override public void addProcessAnnotation(String key, String value) { } @Override public void setFlowStepStats(FlowStepStats flowStepStats) { } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy