edu.ucr.cs.bdlab.beast.indexing.MasterFileReader Maven / Gradle / Ivy
package edu.ucr.cs.bdlab.beast.indexing;
import edu.ucr.cs.bdlab.beast.geolite.EnvelopeND;
import edu.ucr.cs.bdlab.beast.geolite.EnvelopeNDLite;
import edu.ucr.cs.bdlab.beast.io.CSVFeatureReader;
import edu.ucr.cs.bdlab.beast.io.FeatureReader;
import edu.ucr.cs.bdlab.beast.io.SpatialInputFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
import java.io.IOException;
public class MasterFileReader extends FeatureReader {
/**An underlying reader to read the input line-by-line*/
protected final LineRecordReader lineReader = new LineRecordReader();
/**The field separator for master files*/
protected final static char FieldSeparator = '\t';
/**An optional attributed to filter the geometries in the input file*/
private EnvelopeNDLite filterMBR;
/**Number of dimensions in the envelope*/
private int numDimensions;
/**The key (and value) to return*/
private PartitionInfo partitionInfo;
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException {
lineReader.initialize(inputSplit, taskAttemptContext);
Configuration conf = taskAttemptContext.getConfiguration();
String filterMBRStr = conf.get(SpatialInputFormat.FilterMBR);
if (filterMBRStr != null) {
String[] parts = filterMBRStr.split(",");
double[] dblParts = new double[parts.length];
for (int i = 0; i < parts.length; i++)
dblParts[i] = Double.parseDouble(parts[i]);
this.filterMBR = new EnvelopeNDLite(dblParts.length/2, dblParts);
}
}
public void initialize(InputSplit split, Configuration conf) throws IOException {
this.initialize(split, new TaskAttemptContextImpl(conf, new TaskAttemptID()));
}
/**
* Initialize the reader given a path to the master file.
* @param masterFilePath path to the master file
* @param conf environment configuration
* @throws IOException if an error happens while reading the file
*/
public void initialize(Path masterFilePath, Configuration conf) throws IOException {
FileSystem fileSystem = masterFilePath.getFileSystem(conf);
long length = fileSystem.getFileStatus(masterFilePath).getLen();
this.initialize(new FileSplit(masterFilePath, 0, length, new String[0]), conf);
}
@Override
public boolean nextKeyValue() throws IOException {
Text line;
if (!lineReader.nextKeyValue())
return false;
if (lineReader.getCurrentKey().get() == 0) {
// First line, skip the header
if (!lineReader.nextKeyValue())
return false;
// First data line, use it to determine the number of dimensions
line = lineReader.getCurrentValue();
int numColumns = 1;
for (int $i = 0; $i < line.getLength(); $i++)
if (line.charAt($i) == FieldSeparator)
numColumns++;
numDimensions = (numColumns - 5) / 2;
}
line = lineReader.getCurrentValue();
partitionInfo = new PartitionInfo();
partitionInfo.setCoordinateDimension(numDimensions);
partitionInfo.partitionId = Integer.parseInt(
CSVFeatureReader.deleteAttribute(line, FieldSeparator, 0, CSVFeatureReader.DefaultQuoteCharacters));
partitionInfo.filename = CSVFeatureReader.deleteAttribute(line, FieldSeparator, 0, CSVFeatureReader.DefaultQuoteCharacters);
partitionInfo.setNumFeatures(Long.parseLong(
CSVFeatureReader.deleteAttribute(line, FieldSeparator, 0, CSVFeatureReader.DefaultQuoteCharacters)));
partitionInfo.setSize(Long.parseLong(
CSVFeatureReader.deleteAttribute(line, FieldSeparator, 0, CSVFeatureReader.DefaultQuoteCharacters)));
// Drop the WKT (if exists)
CSVFeatureReader.deleteAttribute(line, FieldSeparator, 0, CSVFeatureReader.DefaultQuoteCharacters);
for (int $i = 0; $i < numDimensions; $i++)
partitionInfo.setMinCoord($i, Double.parseDouble(
CSVFeatureReader.deleteAttribute(line, FieldSeparator, 0, CSVFeatureReader.DefaultQuoteCharacters)));
for (int $i = 0; $i < numDimensions; $i++)
partitionInfo.setMaxCoord($i, Double.parseDouble(
CSVFeatureReader.deleteAttribute(line, FieldSeparator, 0, CSVFeatureReader.DefaultQuoteCharacters)));
return true;
}
@Override
public EnvelopeND getCurrentKey() {
return new EnvelopeND(DefaultGeometryFactory, partitionInfo);
}
@Override
public PartitionInfo getCurrentValue() {
return partitionInfo;
}
@Override
public float getProgress() throws IOException {
return lineReader.getProgress();
}
@Override
public void close() throws IOException {
lineReader.close();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy