
edu.ucr.cs.bdlab.indexing.IndexRecordWriter Maven / Gradle / Ivy
/*
* Copyright 2018 University of California, Riverside
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.ucr.cs.bdlab.indexing;
import edu.ucr.cs.bdlab.geolite.EnvelopeND;
import edu.ucr.cs.bdlab.geolite.EnvelopeNDLite;
import edu.ucr.cs.bdlab.geolite.IFeature;
import edu.ucr.cs.bdlab.io.FeatureWriter;
import edu.ucr.cs.bdlab.io.SpatialOutputFormat;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.util.Progressable;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Vector;
/**
* A class that writes an index as a set of files, each representing one partition. The key-value pairs represent
* a partition ID and a feature. All features belonging to one partition are stored in one file. The format of the file
* can be configured through the parameter {@link SpatialOutputFormat#OutputFormat}.
*/
public class IndexRecordWriter extends RecordWriter {
private static final Log LOG = LogFactory.getLog(IndexRecordWriter.class);
/**Maximum number of active closing threads*/
private static final int MaxClosingThreads = Runtime.getRuntime().availableProcessors() * 2;
/**The class for a writer that writes the contents of each partition file*/
protected Class extends FeatureWriter> writerClass;
/**The metadata of the writer class*/
private final FeatureWriter.Metadata writerClassMetadata;
/**To continuously report the progress and avoid killing the job*/
private Progressable progress;
/**Job configuration*/
private Configuration conf;
/**The spatial partitioner used by the current job*/
private SpatialPartitioner partitioner;
/**The output file system*/
private FileSystem outFS;
/**The path to the directory where output files are written*/
private Path outPath;
/**A list of all threads that are closing partitions in the background*/
private Vector closingThreads = new Vector();
/**The master file contains information about all written partitions*/
private OutputStream masterFile;
/**List of errors that happened by background threads that close the partitions*/
private Vector listOfErrors = new Vector();
/**Whether records are replicated in the index to keep the partitions disjoint*/
private boolean disjoint;
/**The current partition ID which is being written. null means there is no partition is being written yet*/
private Integer currentOpeningPartitionID = null;
/**Parameters for writing features to a partition*/
private Path partitionPath = null;
private FeatureWriter writer = null;
private PartitionInfo partition;
public IndexRecordWriter(TaskAttemptContext task, Path outPath)
throws IOException {
this(task, Integer.toString(task.getTaskAttemptID().getTaskID().getId()), outPath);
}
public IndexRecordWriter(TaskAttemptContext task, String name, Path outPath)
throws IOException {
this(IndexerParams.readPartitionerFromHadoopConfiguration(task.getConfiguration()), name, outPath, task.getConfiguration());
this.progress = task;
}
/**
* Create a record writer for index files
* @param partitioner the partitioner used to partition records into files
* @param name A unique name added to the global index file which is used
* to prevent multiple reducers from writing separate files with
* the same name.
* @param outPath the path of the output
* @param conf the configuration of the enviornment
* @throws IOException if an error happens while creating the master file.
*/
public IndexRecordWriter(SpatialPartitioner partitioner, String name, Path outPath, Configuration conf)
throws IOException {
this.conf = conf;
this.disjoint = partitioner.isDisjoint();
this.outFS = outPath.getFileSystem(conf);
this.outPath = outPath;
this.partitioner = partitioner;
String globalIndexExtension = partitioner.getClass().getAnnotation(SpatialPartitioner.Metadata.class).extension();
Path masterFilePath = name == null ?
new Path(outPath, String.format("_master.%s", globalIndexExtension)) :
new Path(outPath, String.format("_master_%s.%s", name, globalIndexExtension));
this.masterFile = outFS.create(masterFilePath);
writerClass = SpatialOutputFormat.getConfiguredFeatureWriterClass(conf);
// Get writer class metadata if it is defined
this.writerClassMetadata = writerClass.getAnnotation(FeatureWriter.Metadata.class);
}
/**
* Initialize parameters for writing features to a partition
* @param partitionID
* @throws IOException
* @throws InstantiationException
* @throws IllegalAccessException
*/
private void initializePartition(Integer partitionID) throws IOException, InstantiationException, IllegalAccessException {
partitionPath = getPartitionPath(partitionID);
writer = writerClass.newInstance();
writer.initialize(partitionPath, conf);
partition = new PartitionInfo();
partition.filename = partitionPath.getName();
partition.partitionId = partitionID;
partition.setCoordinateDimension(partitioner.getCoordinateDimension());
}
/**
* Write a feature to current opening partition
* @param f
*/
private void writeFeature(IFeature f) {
partition.numFeatures++;
partition.merge(f.getGeometry());
try {
writer.write(null, f);
} catch (Exception e) {
throw new RuntimeException(String.format("Error writing the feature '%s'", f.toString()), e);
}
}
/**
* Writes the given feature in the given partitionID. If the given partition ID is already open, the given feature
* is appended to it. Otherwise, the current open partition is closed and a new file is created to the given
* partitionID.
* @param partitionID the ID of the partition to write to
* @param f the featre to write into the given partition
* @throws IOException if an error happens while writing the feature.
*/
@Override
public void write(Integer partitionID, IFeature f) throws IOException {
try {
if(currentOpeningPartitionID == null) {
currentOpeningPartitionID = partitionID;
initializePartition(currentOpeningPartitionID);
writeFeature(f);
} else {
if(partitionID.equals(currentOpeningPartitionID)) {
writeFeature(f);
} else {
// Close current partition
this.closePartition(partitionPath, partitionID, partition, writer);
// Initialize new partition to write
currentOpeningPartitionID = partitionID;
initializePartition(currentOpeningPartitionID);
writeFeature(f);
}
}
} catch (IllegalAccessException e) {
throw new IOException("Error writing to the output", e);
} catch (InstantiationException e) {
throw new IOException("Error writing to the output", e);
}
}
/**
* Closes a file that is currently open for a specific partition. Returns a background thread that will continue
* all close-related logic.
* @param id the partition ID to close
*
*/
private void closePartition(final Path partitionPath, final int id, final PartitionInfo partition, final FeatureWriter writer) {
Thread closeThread = new Thread() {
@Override
public void run() {
try {
writer.close(null);
partition.size = partitionPath.getFileSystem(conf).getFileStatus(partitionPath).getLen();
if (disjoint) {
// If data is replicated, we need to shrink down the size of the partition to keep partitions disjoint
EnvelopeNDLite partitionMBR = new EnvelopeNDLite();
partitioner.getPartitionMBR(id, partitionMBR);
partition.shrink(partitionMBR);
}
String partitionText = partition.toString();
synchronized (masterFile) {
// Write partition information to the master file
masterFile.write(partitionText.getBytes());
masterFile.write('\n');
}
} catch (IOException e) {
e.printStackTrace();
throw new RuntimeException("Error closing partition: "+id, e);
} catch (InterruptedException e) {
e.printStackTrace();
} finally {
closingThreads.remove(Thread.currentThread());
// Start more background threads if needed
int numRunningThreads = 0;
try {
for (int i_thread = 0; i_thread < closingThreads.size() &&
numRunningThreads < MaxClosingThreads; i_thread++) {
Thread thread = closingThreads.elementAt(i_thread);
synchronized(thread) {
switch (thread.getState()) {
case NEW:
// Start the thread and fall through to increment the counter
thread.start();
case RUNNABLE:
case BLOCKED:
case WAITING:
case TIMED_WAITING:
// No need to start. Just increment number of threads
numRunningThreads++;
break;
case TERMINATED: // Do nothing.
// Should never happen as each thread removes itself from
// the list before completion
}
}
}
} catch (ArrayIndexOutOfBoundsException e) {
// No problem. The array of threads might have gone empty
}
}
}
};
closeThread.setUncaughtExceptionHandler((t, e) -> listOfErrors.add(e));
if (closingThreads.size() < MaxClosingThreads) {
// Start the thread in the background and make sure it started before
// adding it to the list of threads to avoid an exception when other
// thread tries to start it after it is in the queue
closeThread.start();
try {
while (closeThread.getState() == Thread.State.NEW) {
Thread.sleep(1000);
LOG.info("Waiting for thread #"+closeThread.getId()+" to start");
}
} catch (InterruptedException e) {}
}
closingThreads.add(closeThread);
}
/**
* Returns a unique name for a file to write the given partition
* @param id
* @return
* @throws IOException
*/
private Path getPartitionPath(int id) throws IOException {
String format = "part-%05d";
if (writerClassMetadata != null)
format += writerClassMetadata.extension();
Path partitionPath = new Path(outPath, String.format(format, id));
if (outFS.exists(partitionPath)) {
format = "part-%05d-%03d";
if (writerClassMetadata != null)
format += writerClassMetadata.extension();
int i = 0;
do {
partitionPath = new Path(outPath, String.format(format, id, ++i));
} while (outFS.exists(partitionPath));
}
return partitionPath;
}
@Override
public void close(TaskAttemptContext task) throws IOException {
if (currentOpeningPartitionID != null)
this.closePartition(partitionPath, currentOpeningPartitionID, partition, writer);
try {
if (task != null)
task.setStatus("Closing! "+closingThreads.size()+" remaining");
// Wait until all background threads are closed
try {
while (!closingThreads.isEmpty()) {
Thread thread;
synchronized (closingThreads) {
thread = closingThreads.isEmpty()? null : closingThreads.firstElement();
}
while (thread != null && thread.isAlive()) {
try {
thread.join(10000);
if (task != null)
task.progress();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
if (task != null)
task.setStatus("Closing! "+closingThreads.size()+" remaining");
if (thread != null && !thread.isAlive()) {
synchronized (closingThreads) {
closingThreads.remove(thread);
}
}
}
} catch (ArrayIndexOutOfBoundsException NoSuchElementException) {
// The array of threads has gone empty. Nothing to do
}
if (task != null)
task.setStatus("All closed");
// All threads are now closed. Check if errors happened
if (!listOfErrors.isEmpty()) {
for (Throwable t : listOfErrors)
LOG.error("Error in thread", t);
throw new RuntimeException("Encountered "+listOfErrors.size()+" errors in background thread", listOfErrors.firstElement());
}
} finally {
// Close the master file to ensure there are no open files
masterFile.close();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy