All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.ucr.cs.bdlab.beast.indexing.MasterFileReader Maven / Gradle / Ivy

There is a newer version: 0.10.1-RC2
Show newest version
package edu.ucr.cs.bdlab.beast.indexing;

import edu.ucr.cs.bdlab.beast.geolite.EnvelopeND;
import edu.ucr.cs.bdlab.beast.geolite.EnvelopeNDLite;
import edu.ucr.cs.bdlab.beast.io.CSVFeatureReader;
import edu.ucr.cs.bdlab.beast.io.FeatureReader;
import edu.ucr.cs.bdlab.beast.io.SpatialInputFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;

import java.io.IOException;

public class MasterFileReader extends FeatureReader {

  /**An underlying reader to read the input line-by-line*/
  protected final LineRecordReader lineReader = new LineRecordReader();

  /**The field separator for master files*/
  protected final static char FieldSeparator = '\t';

  /**An optional attributed to filter the geometries in the input file*/
  private EnvelopeNDLite filterMBR;

  /**Number of dimensions in the envelope*/
  private int numDimensions;

  /**The key (and value) to return*/
  private PartitionInfo partitionInfo;

  @Override
  public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException {
    lineReader.initialize(inputSplit, taskAttemptContext);
    Configuration conf = taskAttemptContext.getConfiguration();
    String filterMBRStr = conf.get(SpatialInputFormat.FilterMBR);
    if (filterMBRStr != null) {
      String[] parts = filterMBRStr.split(",");
      double[] dblParts = new double[parts.length];
      for (int i = 0; i < parts.length; i++)
        dblParts[i] = Double.parseDouble(parts[i]);
      this.filterMBR = new EnvelopeNDLite(dblParts.length/2, dblParts);
    }
  }

  public void initialize(InputSplit split, Configuration conf) throws IOException {
    this.initialize(split, new TaskAttemptContextImpl(conf, new TaskAttemptID()));
  }

  /**
   * Initialize the reader given a path to the master file.
   * @param masterFilePath path to the master file
   * @param conf environment configuration
   * @throws IOException if an error happens while reading the file
   */
  public void initialize(Path masterFilePath, Configuration conf) throws IOException {
    FileSystem fileSystem = masterFilePath.getFileSystem(conf);
    long length = fileSystem.getFileStatus(masterFilePath).getLen();
    this.initialize(new FileSplit(masterFilePath, 0, length, new String[0]), conf);
  }

  @Override
  public boolean nextKeyValue() throws IOException {
    Text line;
    if (!lineReader.nextKeyValue())
      return false;
    if (lineReader.getCurrentKey().get() == 0) {
      // First line, skip the header
      if (!lineReader.nextKeyValue())
        return false;
      // First data line, use it to determine the number of dimensions
      line = lineReader.getCurrentValue();
      int numColumns = 1;
      for (int $i = 0; $i < line.getLength(); $i++)
        if (line.charAt($i) == FieldSeparator)
          numColumns++;
      numDimensions = (numColumns - 5) / 2;
    }
    line = lineReader.getCurrentValue();
    partitionInfo = new PartitionInfo();
    partitionInfo.setCoordinateDimension(numDimensions);

    partitionInfo.partitionId = Integer.parseInt(
        CSVFeatureReader.deleteAttribute(line, FieldSeparator, 0, CSVFeatureReader.DefaultQuoteCharacters));
    partitionInfo.filename = CSVFeatureReader.deleteAttribute(line, FieldSeparator, 0, CSVFeatureReader.DefaultQuoteCharacters);
    partitionInfo.setNumFeatures(Long.parseLong(
        CSVFeatureReader.deleteAttribute(line, FieldSeparator, 0, CSVFeatureReader.DefaultQuoteCharacters)));
    partitionInfo.setSize(Long.parseLong(
        CSVFeatureReader.deleteAttribute(line, FieldSeparator, 0, CSVFeatureReader.DefaultQuoteCharacters)));
    // Drop the WKT (if exists)
    CSVFeatureReader.deleteAttribute(line, FieldSeparator, 0, CSVFeatureReader.DefaultQuoteCharacters);
    for (int $i = 0; $i < numDimensions; $i++)
      partitionInfo.setMinCoord($i, Double.parseDouble(
          CSVFeatureReader.deleteAttribute(line, FieldSeparator, 0, CSVFeatureReader.DefaultQuoteCharacters)));
    for (int $i = 0; $i < numDimensions; $i++)
      partitionInfo.setMaxCoord($i, Double.parseDouble(
          CSVFeatureReader.deleteAttribute(line, FieldSeparator, 0, CSVFeatureReader.DefaultQuoteCharacters)));
    return true;
  }

  @Override
  public EnvelopeND getCurrentKey() {
    return new EnvelopeND(DefaultGeometryFactory, partitionInfo);
  }

  @Override
  public PartitionInfo getCurrentValue() {
    return partitionInfo;
  }

  @Override
  public float getProgress() throws IOException {
    return lineReader.getProgress();
  }

  @Override
  public void close() throws IOException {
    lineReader.close();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy