All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.ucr.cs.bdlab.io.DBFWriter Maven / Gradle / Ivy

There is a newer version: 0.10.1-RC2
Show newest version
/*
 * Copyright 2018 University of California, Riverside
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.ucr.cs.bdlab.io;

import edu.ucr.cs.bdlab.geolite.Feature;
import edu.ucr.cs.bdlab.geolite.IFeature;
import edu.ucr.cs.bdlab.util.IOUtil;
import edu.ucr.cs.bdlab.util.MathUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.time.LocalDateTime;

/**
 * A record writer that writes features into DBF file format. It should be accompanied with a {@link ShapefileGeometryWriter}
 * to write a correct shapefile.
 */
public class DBFWriter extends RecordWriter {

  /**Path to the desired DBF file*/
  private Path dbfPath;

  /**Configuration of the job*/
  private Configuration conf;

  /**A temporary file for writing the records until all records have been written*/
  protected File tempDbfFile;

  /**The output stream that writes to the temporary DBF file*/
  protected DataOutputStream tempDbfOut;

  /**File header is updated while the features are written and is flushed to disk at the very end*/
  protected DBFReader.DBFHeader header;

  /**A temporary reusable standard feature object*/
  protected Feature feature;

  /**
   * Initializes the record writer to write to the given DBF file.
   * @param dbfPath
   * @param conf
   */
  public void initialize(Path dbfPath, Configuration conf) throws IOException {
    this.dbfPath = dbfPath;
    this.conf = conf;

    // We cannot write the final DBF file directly due to unknown header information, e.g., number of records
    // This class first writes a temporary file with the feature data and write the final file upon closure
    tempDbfFile = File.createTempFile(dbfPath.getName(), ".dbf.tmp");
    tempDbfOut = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(tempDbfFile)));
    tempDbfFile.deleteOnExit();

    feature = new Feature();
    header = new DBFReader.DBFHeader();
  }

  @Override
  public void write(Object key, IFeature value) throws IOException, InterruptedException {
    header.numRecords++;
    if (header.fieldDescriptors == null) {
      // First feature, get the header from it
      feature.copyAttributeMetadata(value);
      feature.writeHeader(tempDbfOut);
      header.fieldDescriptors = new DBFReader.FieldDescriptor[value.getNumAttributes()];
      for (int iAttr = 0; iAttr < value.getNumAttributes(); iAttr++) {
        DBFReader.FieldDescriptor attr = header.fieldDescriptors[iAttr] = new DBFReader.FieldDescriptor();
        attr.fieldName = new byte[11]; // Field name with a maximum of 11 characters, initially filled with zeros
        if (value.getAttributeName(iAttr) != null) {
          // Crop the name to 11 bytes
          byte[] fullName = value.getAttributeName(iAttr).getBytes();
          System.arraycopy(fullName, 0, attr.fieldName, 0, Math.min(11, fullName.length));
        }
        Object fieldValue = value.getAttributeValue(iAttr);
        if (fieldValue instanceof String) {
          attr.fieldType = 'C';
          attr.fieldLength = (short) ((String)fieldValue).length();
        } else if (fieldValue instanceof Integer) {
          attr.fieldType = 'N';
          attr.fieldLength = (short) MathUtil.getNumberOfDecimalDigits((Integer)fieldValue);
        } else if (fieldValue instanceof Long) {
          attr.fieldType = 'N';
          attr.fieldLength = (short) MathUtil.getNumberOfDecimalDigits((Long)fieldValue);
        } else if (fieldValue instanceof Float) {
          attr.fieldType = 'F';
          attr.fieldLength = 4; // 32-bit floating point
        } else if (fieldValue instanceof Double) {
          attr.fieldType = 'O';
          attr.fieldLength = 8; // 64-bit floating point
        } else {
          throw new RuntimeException("Unsupported attribute value type: "+attr.getClass());
        }
      }
    }
    // Write the feature attribute values to the temporary file
    feature.copyAttributeValues(value);
    feature.writeValue(tempDbfOut);
    // Update field lengths
    for (int iAttr = 0; iAttr < value.getNumAttributes(); iAttr++) {
      Object fieldValue = value.getAttributeValue(iAttr);
      if (fieldValue instanceof String) {
        header.fieldDescriptors[iAttr].fieldLength = (short) Math.max(header.fieldDescriptors[iAttr].fieldLength, ((String)fieldValue).length());
      } else if (fieldValue instanceof Integer) {
        header.fieldDescriptors[iAttr].fieldLength = (short) Math.max(header.fieldDescriptors[iAttr].fieldLength, MathUtil.getNumberOfDecimalDigits((Integer)fieldValue));
      } else if (fieldValue instanceof Long) {
        header.fieldDescriptors[iAttr].fieldLength = (short) Math.max(header.fieldDescriptors[iAttr].fieldLength, MathUtil.getNumberOfDecimalDigits((Long)fieldValue));
      }
    }
  }

  @Override
  public void close(TaskAttemptContext context) throws IOException, InterruptedException {
    tempDbfOut.close();
    LocalDateTime now = LocalDateTime.now();
    header.version = 3;
    header.dateLastUpdatedYY = (short) (now.getYear() - 1900);
    header.dateLastUpdatedMM = (short) now.getMonthValue();
    header.dateLastUpdatedDD = (short) now.getDayOfMonth();

    // Calculate header size and record size
    header.headerSize = 32 /*Main header*/ +
        32 * header.fieldDescriptors.length /*field descriptors*/ +
        1 /*Header record terminator*/;
    header.recordSize = 1; // Terminator
    for (DBFReader.FieldDescriptor field : header.fieldDescriptors)
      header.recordSize += field.fieldLength;

    FileSystem fileSystem = dbfPath.getFileSystem(conf);
    FSDataOutputStream dbfOut = fileSystem.create(dbfPath);

    // Write header
    writeHeader(dbfOut, header);
    // Write records
    DataInputStream tempDbfIn = new DataInputStream(new BufferedInputStream(new FileInputStream(tempDbfFile)));
    // Create a new feature to make sure the geometry in it is not reused outside this class.
    Feature f = new Feature();
    f.readHeader(tempDbfIn);
    for (int i = 0; i < header.numRecords; i++) {
      f.readValue(tempDbfIn);
      dbfOut.write(DBFReader.ValidRecordMarker);
      writeRecord(dbfOut, f);
    }
    // Write record terminator
    dbfOut.write(DBFReader.EOFMarker);
    tempDbfIn.close();
    dbfOut.close();
  }

  protected void writeHeader(DataOutputStream out, DBFReader.DBFHeader header) throws IOException {
    out.write(header.version);
    out.write(header.dateLastUpdatedYY);
    out.write(header.dateLastUpdatedMM);
    out.write(header.dateLastUpdatedDD);
    IOUtil.writeIntLittleEndian(out, header.numRecords);
    IOUtil.writeShortLittleEndian(out, (short) header.headerSize);
    IOUtil.writeShortLittleEndian(out, (short) header.recordSize);
    // Skip 16 bytes
    out.writeLong(0);
    out.writeLong(0);
    out.write(0); // No special flags
    out.write(0); // Code page mark
    out.writeShort(0); // Reserved. Filled with zeros
    // Write field descriptors
    int fieldDisplacement = 0;
    for (DBFReader.FieldDescriptor descriptor : header.fieldDescriptors) {
      assert descriptor.fieldName.length == 11;
      out.write(descriptor.fieldName);
      out.write(descriptor.fieldType);
      out.writeInt(fieldDisplacement); // Field displacement in record
      out.write(descriptor.fieldLength);
      out.write(descriptor.decimalCount);
      out.write(0); // Field flags
      out.writeInt(0); // Value of autoincrement Next value
      out.write(0); // Value of autoincrement Step value
      out.writeLong(0); // Reserved (8-bytes)
      fieldDisplacement += descriptor.fieldLength;
    }
    out.write(0x0D); // Header record terminator
  }

  protected void writeRecord(DataOutputStream out, Feature feature) throws IOException {
    for (int iAttr = 0; iAttr < feature.getNumAttributes(); iAttr++) {
      Object value = feature.getAttributeValue(iAttr);
      if (value instanceof String) {
        byte[] valBytes = ((String) value).getBytes();
        out.write(valBytes);
        int diff = header.fieldDescriptors[iAttr].fieldLength - valBytes.length;
        // Append spaces
        while (diff-- > 0)
          out.write(' ');
      } else if (value instanceof Integer) {
        int intValue = (Integer) value;
        int valLength = MathUtil.getNumberOfDecimalDigits(intValue);
        int diff = header.fieldDescriptors[iAttr].fieldLength - valLength;
        // Prepend spaces
        while (diff-- > 0)
          out.write(' ');
        byte[] valBytes = String.valueOf(intValue).getBytes();
        out.write(valBytes);
      } else if (value instanceof Long) {
        long longValue = (Long) value;
        int valLength = MathUtil.getNumberOfDecimalDigits(longValue);
        int diff = header.fieldDescriptors[iAttr].fieldLength - valLength;
        // Prepend spaces
        while (diff-- > 0)
          out.write(' ');
        byte[] valBytes = String.valueOf(longValue).getBytes();
        out.write(valBytes);
      } else if (value instanceof Double) {
        IOUtil.writeDoubleLittleEndian(out, (Double)value);
      } else {
        throw new RuntimeException("Unsupported value type: "+value.getClass());
      }
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy