All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.ucr.cs.bdlab.beast.geolite.Feature Maven / Gradle / Ivy

/*
 * Copyright 2018 University of California, Riverside
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.ucr.cs.bdlab.beast.geolite;

import edu.ucr.cs.bdlab.beast.util.BitArray;
import org.locationtech.jts.geom.Geometry;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.nio.ByteBuffer;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.util.Arrays;
import java.util.Calendar;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.Map;
import java.util.SimpleTimeZone;
import java.util.TimeZone;

/**
 * A standard feature implementation that balances the parsing time and access time.
 */
public class Feature implements IFeature {

  /**The types of attributes. This helps in parsing them from the ByteBuffer*/
  protected FieldType[] attributeTypes;

  /**All attribute values*/
  protected ByteBuffer attributeValues;

  /**A bitmask that tells whether an attribute is null or not*/
  protected BitArray attributeExists;

  /**An optional list of attribute names*/
  protected String[] attributeNames;

  /**The geometry associated with this feature*/
  protected Geometry geometry;

  /**Maps each attribute name to its index for fast lookup*/
  private Map attributeNameToIndex;

  public static final TimeZone UTC = new SimpleTimeZone(0, "UTC");

  public Feature() {}

  public Feature(Geometry geometry) {
    this.geometry = geometry;
  }

  public Feature(IFeature f) {
    this.copyAttributeMetadata(f);
    this.copyAttributeValues(f);
    this.setGeometry(f.getGeometry());
  }

  @Override
  public Geometry getGeometry() {
    return this.geometry;
  }

  public void setGeometry(Geometry geometry) {
    this.geometry = geometry;
  }

  /**
   * Populate the metadata (attribute types and names) from another IFeature
   * @param feature the feature to copy its attribute metadata
   */
  public void copyAttributeMetadata(IFeature feature) {
    int numAttributes = feature.getNumAttributes();
    if (numAttributes == 0) {
      attributeTypes = null;
      attributeValues = null;
      attributeNames = null;
      attributeExists = null;
      return;
    }
    if (feature instanceof Feature) {
      // An efficient method to copy attribute metadata from another feature
      Feature that = (Feature) feature;
      this.attributeTypes = Arrays.copyOf(that.attributeTypes, numAttributes);
      this.attributeNames = Arrays.copyOf(that.attributeNames, numAttributes);
      if (that.attributeNameToIndex != null)
        this.attributeNameToIndex = new HashMap<>(that.attributeNameToIndex);
      this.attributeExists = new BitArray(that.attributeExists);
    } else {
      // A generic method to copy metadata from another feature
      setNumAttribute(numAttributes);
      for (int iAttr = 0; iAttr < numAttributes; iAttr++) {
        this.attributeNames[iAttr] = feature.getAttributeName(iAttr);
        this.attributeTypes[iAttr] = feature.getAttributeType(iAttr);
      }
    }
  }

  /**
   * Populate the data of this feature (geometry and attribute values) from another feature.
   * @param feature the feature to copy its attribute values
   */
  public void copyAttributeValues(IFeature feature) {
    assert feature.getNumAttributes() == this.getNumAttributes();
    // Set geometry
    this.geometry = feature.getGeometry();
    // Set attribute values
    // Reset the value buffer if it is already in use
    if (this.attributeValues != null)
      this.attributeValues.position(0);
    for (int iAttr = 0; iAttr < this.getNumAttributes(); iAttr++) {
      Object attrValue = feature.getAttributeValue(iAttr);
      // Set the corresponding bit to null and do not append anything to the attribute values
      attributeExists.set(iAttr, attrValue != null);
      if (attrValue != null) {
        switch (feature.getAttributeType(iAttr)) {
          case DoubleType:
            expandAttributeValues(8);
            attributeValues.putDouble(((Number) attrValue).doubleValue());
            break;
          case IntegerType:
            expandAttributeValues(4);
            attributeValues.putInt(((Number) attrValue).intValue());
            break;
          case LongType:
            expandAttributeValues(8);
            attributeValues.putLong(((Number) attrValue).longValue());
            break;
          case StringType:
            byte[] strBytes = ((String) attrValue).getBytes();
            expandAttributeValues(2 + strBytes.length);
            attributeValues.putShort((short) strBytes.length);
            attributeValues.put(strBytes);
            break;
          case TimestampType:
            expandAttributeValues(8);
            // Get calendar in UTC
            ZonedDateTime utctime = ZonedDateTime.ofInstant(((GregorianCalendar)attrValue).toZonedDateTime().toInstant(), ZoneOffset.ofTotalSeconds(0));
            attrValue = GregorianCalendar.from(utctime);
            attributeValues.putLong(((GregorianCalendar) attrValue).getTimeInMillis());
            break;
          case BooleanType:
            expandAttributeValues(1);
            attributeValues.put((byte)((Boolean)attrValue? 1 : 0));
            break;
          default:
            throw new RuntimeException("Unsupported type " + attrValue.getClass());
        }
        assert this.getAttributeValue(iAttr).toString().equals(feature.getAttributeValue(iAttr).toString()) :
            String.format("Error in attribute #%d. '%s' != '%s'", iAttr,
                this.getAttributeValue(iAttr), feature.getAttributeValue(iAttr));
      }
    }
  }

  /**
   * Expand the byte buffer of attribute values with the given number of bytes
   * @param numBytesToExpand the number of bytes to add to the attribute values
   */
  private void expandAttributeValues(int numBytesToExpand) {
    if (attributeValues == null) {
      attributeValues = ByteBuffer.allocate(numBytesToExpand * 2);
      attributeValues.limit(numBytesToExpand);
      return;
    }
    int currentPosition = attributeValues.position();
    int newSize = currentPosition + numBytesToExpand;
    if (newSize > attributeValues.capacity()) {
      byte[] newArray = expand(attributeValues.array(), newSize * 2);
      attributeValues = ByteBuffer.wrap(newArray, 0, currentPosition);
      attributeValues.position(currentPosition);
    }
    // Expand the limit
    attributeValues.limit(newSize);
  }

  private void setNumAttribute(int numAttributes) {
    if (numAttributes != getNumAttributes()) {
      attributeTypes = new FieldType[numAttributes];
      attributeNames = new String[numAttributes];
      attributeNameToIndex = null; // Invalidate the cache
      attributeExists = new BitArray(numAttributes);
    }
  }

  /**
   * Returns the offset of an attribute in the byte buffer array
   * @param iAttr the index of the attribute to return its offset in the values array
   * @return the offset of the value of that given attribute in the array of values.
   */
  protected short getAttributeOffset(int iAttr) {
    short offset = 0;
    for (int $i = 0; $i < iAttr; $i++) {
      // Skip the attribute if it is null
      if (attributeExists.get($i)) {
        int length = attributeTypes[$i].size;
        // Check for variable-size attributes
        if (length == -1) {
          length = attributeValues.getShort(offset);
          offset += 2;
        }
        offset += length;
      }
    }
    assert offset >= 0 : String.format("Invalid offset %d", offset);
    assert offset < attributeValues.limit() :
        String.format("Offset %d is out of bound %d", offset, attributeValues.limit());
    return offset;
  }

  @Override
  public FieldType getAttributeType(int i) {
    return attributeTypes[i];
  }

  @Override
  public Object getAttributeValue(int iAttr) {
    if (iAttr >= getNumAttributes())
      return null;
    if (!attributeExists.get(iAttr))
      return null;
    int offset = getAttributeOffset(iAttr);
    switch (attributeTypes[iAttr]) {
      case StringType:
        short stringLength = attributeValues.getShort(offset);
        assert offset + 2 + stringLength <= attributeValues.limit() : "String too long "+stringLength;
        return new String(attributeValues.array(), offset + 2, stringLength);
      case IntegerType:
        return attributeValues.getInt(offset);
      case LongType:
        return attributeValues.getLong(offset);
      case DoubleType:
        return attributeValues.getDouble(offset);
      case TimestampType:
        GregorianCalendar c = new GregorianCalendar(UTC);
        c.setTimeInMillis(attributeValues.getLong(offset));
        return c;
      case BooleanType:
        return attributeValues.get(offset) == 1;
      default:
        throw new RuntimeException("Unsupported type " + attributeTypes[iAttr]);
    }
  }

  @Override
  public Object getAttributeValue(String name) {
    if (getNumAttributes() == 0)
      return null;
    if (attributeNameToIndex == null) {
      // Lazily initialize the attribute name to index map
      this.attributeNameToIndex = new HashMap<>();
      for (byte iAttr = 0; iAttr < attributeNames.length; iAttr++)
        attributeNameToIndex.put(attributeNames[iAttr], iAttr);
    }
    return attributeNameToIndex.containsKey(name) ? getAttributeValue(attributeNameToIndex.get(name)) : null;
  }

  @Override
  public int getNumAttributes() {
    return attributeTypes == null ? 0 : attributeTypes.length;
  }

  @Override
  public String getAttributeName(int iAttr) {
    return attributeNames[iAttr];
  }

  @Override
  public int getStorageSize() {
    int storageSize = 0;
    storageSize += GeometryHelper.getGeometryStorageSize(getGeometry());
    if (getNumAttributes() > 0) {
      storageSize += attributeValues.limit() + 4;
      storageSize += attributeTypes.length;
      for (String name : attributeNames)
        storageSize += name.length() + 1;
    }
    return storageSize;
  }

  /**
   * Write only the header (field types, lengths, and names) with no values or geometries. This is useful when storing
   * a large set of features in a file where we can write the header once and use the method
   * {@link #writeValue(DataOutput, boolean)} to write only the values for each record
   * @param out the output to write the header to
   * @throws IOException if an error happens while writing to the given output
   */
  public void writeHeader(DataOutput out) throws IOException {
    // Write number of attributes (maximum 127 attributes in a byte)
    out.writeByte(getNumAttributes());
    if (getNumAttributes() > 0) {
      // Write attribute types
      for (FieldType f : attributeTypes)
        out.writeByte(f.ordinal());
      // Write attribute names
      for (int iAttr = 0; iAttr < getNumAttributes(); iAttr++)
        out.writeUTF(getAttributeName(iAttr) == null? "attr#"+ iAttr : getAttributeName(iAttr));
    }
  }

  /**
   * Reader the header back from the given input.
   * @param in the input to read the header from
   * @throws IOException if an error happens while reading the header
   */
  public void readHeader(DataInput in) throws IOException {
    int numAttributes = in.readUnsignedByte();
    setNumAttribute(numAttributes);
    if (numAttributes > 0) {
      for (int $i = 0; $i < getNumAttributes(); $i++)
        attributeTypes[$i] = FieldType.values()[in.readByte()];

      // Read attribute names
      for (int iAttr = 0; iAttr < attributeNames.length; iAttr++) {
        String name = in.readUTF();
        if (attributeNameToIndex != null && !name.equals(attributeNames[iAttr]))
          attributeNameToIndex = null; // Invalidate the cache
        attributeNames[iAttr] = name;
      }
    }
  }

  /**
   * Write the geometry and field values to the output. It is assumed that the field type is stored once using the
   * {@link #writeHeader(DataOutput)} method.
   * @param out the output to write the attribute values to
   * @param includeSRID whether to include the SRID in the serialized value or not
   * @throws IOException if an error happens while writing the attribute values
   */
  public void writeValue(DataOutput out, boolean includeSRID) throws IOException {
    if (getNumAttributes() > 0) {
      if (attributeValues != null) {
        // Write the value length
        out.writeInt(attributeValues.limit());
        // Write the values
        out.write(attributeValues.array(), 0, attributeValues.limit());
      } else {
        out.writeInt(0);
      }
      attributeExists.writeBitsMinimal(out);
    }
    new GeometryWriter().write(geometry == null? EmptyGeometry.instance : geometry, out, includeSRID);
  }

  /**
   * Read only the value part of the feature from the given {@link DataInput}.
   * @param in the input to read the attribute values from
   * @param reader the geometry reader used to parse the geometry from the input
   * @throws IOException if an error happens while reading the values
   */
  public void readValue(DataInput in, GeometryReader reader) throws IOException {
    if (getNumAttributes() > 0) {
      // Read value length
      int valueSize = in.readInt();
      // Initialize the buffer to hold the values
      if (attributeValues == null || valueSize > attributeValues.capacity())
        attributeValues = ByteBuffer.allocate(valueSize);
      else
        attributeValues.limit(valueSize);

      // Read the values as a byte array
      in.readFully(attributeValues.array(), 0, attributeValues.limit());
      if (attributeExists == null)
        attributeExists = new BitArray(getNumAttributes());
      attributeExists.readBitsMinimal(in);
    }

    // Read the geometry value
    geometry = reader.parse(in);
  }

  public void readValue(DataInput in) throws IOException {
    this.readValue(in, GeometryReader.DefaultInstance);
  }

  /**
   * Write both the header (field types, names, and lengths) and value (geometry and attribute values) to the given
   * output. This is helpful when writing a single feature or when the feature is passed between machines in Spark RDD
   * or Hadoop MapReduce.
   * @param out the output to write both the header and values to
   * @throws IOException if an error happens while writing the feature data
   */
  @Override
  public void write(DataOutput out) throws IOException {
    writeHeader(out);
    writeValue(out, true);
  }

  @Override
  public void writeExternal(ObjectOutput out) throws IOException {
    this.write(out);
  }

  @Override
  public void readFields(DataInput in) throws IOException {
    readHeader(in);
    readValue(in);
  }

  @Override
  public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
    readFields(in);
  }

  public Feature appendAttribute(String name, Object value) {
    appendAttribute(name, value, detectFiledType(value));
    return this;
  }

  protected FieldType detectFiledType(Object value) {
    if (value == null)
      return null;
    if (value instanceof String)
      return FieldType.StringType;
    if (value instanceof Integer || value instanceof Short || value instanceof Byte)
      return FieldType.IntegerType;
    if (value instanceof Long)
      return FieldType.LongType;
    if (value instanceof Double || value instanceof Float)
      return FieldType.DoubleType;
    if (value instanceof Calendar)
      return FieldType.TimestampType;
    if (value instanceof Boolean)
      return FieldType.BooleanType;
    throw new RuntimeException("Unsupported value type " + value.getClass());
  }

  /**
   * Add an attribute to the list of attributes
   * @param name the name of the attribute to append
   * @param value the value of the attribute to append
   * @param fieldType the type of the field that should match the value. This can be helpful if the value is null.
   */
  public void appendAttribute(String name, Object value, FieldType fieldType) {
    int newAttributeIndex = getNumAttributes();
    int newNumAttributes = newAttributeIndex + 1;
    // Add attribute name
    attributeNames = expand(attributeNames, newNumAttributes);
    attributeNames[newNumAttributes - 1] = name;
    if (attributeNameToIndex != null)
      attributeNameToIndex.put(name, (byte) newAttributeIndex);

    // Set attribute type and value
    if (attributeTypes == null)
      attributeTypes = new FieldType[newNumAttributes];
    else
      attributeTypes = Arrays.copyOf(attributeTypes, newNumAttributes);

    if (attributeExists == null)
      attributeExists = new BitArray(newNumAttributes);
    attributeExists.resize(newNumAttributes);

    attributeExists.set(newAttributeIndex, value != null);
    attributeTypes[newAttributeIndex] = fieldType;
    if (value != null) {
      switch (fieldType) {
        case StringType:
          byte[] strBytes = ((String) value).getBytes();
          expandAttributeValues(2 + strBytes.length);
          attributeValues.putShort((short) strBytes.length);
          attributeValues.put(strBytes);
          break;
        case IntegerType:
          expandAttributeValues(4);
          attributeValues.putInt(((Number) value).intValue());
          break;
        case LongType:
          expandAttributeValues(8);
          attributeValues.putLong((Long) value);
          break;
        case DoubleType:
          expandAttributeValues(8);
          attributeValues.putDouble(((Number) value).doubleValue());
          break;
        case TimestampType:
          expandAttributeValues(8);
          attributeValues.putLong(((Calendar) value).getTimeInMillis());
          break;
        case BooleanType:
          expandAttributeValues(FieldType.BooleanType.size);
          attributeValues.put((byte)((Boolean)value ? 1 : 0));
          break;
        default:
          throw new RuntimeException("Unsupported value type " + fieldType);
      }
    }
  }

  @Override
  public String toString() {
    return IFeature.toString(this);
  }

  @Override
  public boolean equals(Object obj) {
    if (!(obj instanceof IFeature))
      return false;
    return IFeature.equals(this, (IFeature) obj);
  }

  public void clearAttributes() {
    attributeTypes = null;
    attributeValues = null;
    attributeNames = null;
  }

  /**
   * If the given array is null or smaller than the given size, a new array is created of the given size and initialized
   * to the values int he given array if not null.
   * @param array An existing array or {@code null}
   * @param newSize the new upper limit of the size
   * @return either the same array if it is not smaller than the given size or a newly created array
   */
  private static byte[] expand(byte[] array, int newSize) {
    if (array == null)
      array = new byte[newSize];
    else if (array.length < newSize) {
      byte[] newArray = new byte[newSize];
      System.arraycopy(array, 0, newArray, 0, array.length);
      array = newArray;
    }
    return array;
  }

  /**
   * If the given array is null or smaller than the given size, a new array is created of the given size and initialized
   * to the values int he given array if not null.
   * @param array An existing array or {@code null}
   * @param newSize the new upper limit of the size
   * @return either the same array if it is not smaller than the given size or a newly created array
   */
  private static String[] expand(String[] array, int newSize) {
    if (array == null)
      array = new String[newSize];
    else if (array.length < newSize) {
      String[] newArray = new String[newSize];
      System.arraycopy(array, 0, newArray, 0, array.length);
      array = newArray;
    }
    return array;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy