![JAR search and dependency download from the Maven repository](/logo.png)
edu.ucr.cs.bdlab.beast.geolite.Feature Maven / Gradle / Ivy
/*
* Copyright 2018 University of California, Riverside
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.ucr.cs.bdlab.beast.geolite;
import edu.ucr.cs.bdlab.beast.util.BitArray;
import org.locationtech.jts.geom.Geometry;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.nio.ByteBuffer;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.util.Arrays;
import java.util.Calendar;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.Map;
import java.util.SimpleTimeZone;
import java.util.TimeZone;
/**
* A standard feature implementation that balances the parsing time and access time.
*/
public class Feature implements IFeature {
/**The types of attributes. This helps in parsing them from the ByteBuffer*/
protected FieldType[] attributeTypes;
/**All attribute values*/
protected ByteBuffer attributeValues;
/**A bitmask that tells whether an attribute is null or not*/
protected BitArray attributeExists;
/**An optional list of attribute names*/
protected String[] attributeNames;
/**The geometry associated with this feature*/
protected Geometry geometry;
/**Maps each attribute name to its index for fast lookup*/
private Map attributeNameToIndex;
public static final TimeZone UTC = new SimpleTimeZone(0, "UTC");
public Feature() {}
public Feature(Geometry geometry) {
this.geometry = geometry;
}
public Feature(IFeature f) {
this.copyAttributeMetadata(f);
this.copyAttributeValues(f);
this.setGeometry(f.getGeometry());
}
@Override
public Geometry getGeometry() {
return this.geometry;
}
public void setGeometry(Geometry geometry) {
this.geometry = geometry;
}
/**
* Populate the metadata (attribute types and names) from another IFeature
* @param feature the feature to copy its attribute metadata
*/
public void copyAttributeMetadata(IFeature feature) {
int numAttributes = feature.getNumAttributes();
if (numAttributes == 0) {
attributeTypes = null;
attributeValues = null;
attributeNames = null;
attributeExists = null;
return;
}
if (feature instanceof Feature) {
// An efficient method to copy attribute metadata from another feature
Feature that = (Feature) feature;
this.attributeTypes = Arrays.copyOf(that.attributeTypes, numAttributes);
this.attributeNames = Arrays.copyOf(that.attributeNames, numAttributes);
if (that.attributeNameToIndex != null)
this.attributeNameToIndex = new HashMap<>(that.attributeNameToIndex);
this.attributeExists = new BitArray(that.attributeExists);
} else {
// A generic method to copy metadata from another feature
setNumAttribute(numAttributes);
for (int iAttr = 0; iAttr < numAttributes; iAttr++) {
this.attributeNames[iAttr] = feature.getAttributeName(iAttr);
this.attributeTypes[iAttr] = feature.getAttributeType(iAttr);
}
}
}
/**
* Populate the data of this feature (geometry and attribute values) from another feature.
* @param feature the feature to copy its attribute values
*/
public void copyAttributeValues(IFeature feature) {
assert feature.getNumAttributes() == this.getNumAttributes();
// Set geometry
this.geometry = feature.getGeometry();
// Set attribute values
// Reset the value buffer if it is already in use
if (this.attributeValues != null)
this.attributeValues.position(0);
for (int iAttr = 0; iAttr < this.getNumAttributes(); iAttr++) {
Object attrValue = feature.getAttributeValue(iAttr);
// Set the corresponding bit to null and do not append anything to the attribute values
attributeExists.set(iAttr, attrValue != null);
if (attrValue != null) {
switch (feature.getAttributeType(iAttr)) {
case DoubleType:
expandAttributeValues(8);
attributeValues.putDouble(((Number) attrValue).doubleValue());
break;
case IntegerType:
expandAttributeValues(4);
attributeValues.putInt(((Number) attrValue).intValue());
break;
case LongType:
expandAttributeValues(8);
attributeValues.putLong(((Number) attrValue).longValue());
break;
case StringType:
byte[] strBytes = ((String) attrValue).getBytes();
expandAttributeValues(2 + strBytes.length);
attributeValues.putShort((short) strBytes.length);
attributeValues.put(strBytes);
break;
case TimestampType:
expandAttributeValues(8);
// Get calendar in UTC
ZonedDateTime utctime = ZonedDateTime.ofInstant(((GregorianCalendar)attrValue).toZonedDateTime().toInstant(), ZoneOffset.ofTotalSeconds(0));
attrValue = GregorianCalendar.from(utctime);
attributeValues.putLong(((GregorianCalendar) attrValue).getTimeInMillis());
break;
case BooleanType:
expandAttributeValues(1);
attributeValues.put((byte)((Boolean)attrValue? 1 : 0));
break;
default:
throw new RuntimeException("Unsupported type " + attrValue.getClass());
}
assert this.getAttributeValue(iAttr).toString().equals(feature.getAttributeValue(iAttr).toString()) :
String.format("Error in attribute #%d. '%s' != '%s'", iAttr,
this.getAttributeValue(iAttr), feature.getAttributeValue(iAttr));
}
}
}
/**
* Expand the byte buffer of attribute values with the given number of bytes
* @param numBytesToExpand the number of bytes to add to the attribute values
*/
private void expandAttributeValues(int numBytesToExpand) {
if (attributeValues == null) {
attributeValues = ByteBuffer.allocate(numBytesToExpand * 2);
attributeValues.limit(numBytesToExpand);
return;
}
int currentPosition = attributeValues.position();
int newSize = currentPosition + numBytesToExpand;
if (newSize > attributeValues.capacity()) {
byte[] newArray = expand(attributeValues.array(), newSize * 2);
attributeValues = ByteBuffer.wrap(newArray, 0, currentPosition);
attributeValues.position(currentPosition);
}
// Expand the limit
attributeValues.limit(newSize);
}
private void setNumAttribute(int numAttributes) {
if (numAttributes != getNumAttributes()) {
attributeTypes = new FieldType[numAttributes];
attributeNames = new String[numAttributes];
attributeNameToIndex = null; // Invalidate the cache
attributeExists = new BitArray(numAttributes);
}
}
/**
* Returns the offset of an attribute in the byte buffer array
* @param iAttr the index of the attribute to return its offset in the values array
* @return the offset of the value of that given attribute in the array of values.
*/
protected short getAttributeOffset(int iAttr) {
short offset = 0;
for (int $i = 0; $i < iAttr; $i++) {
// Skip the attribute if it is null
if (attributeExists.get($i)) {
int length = attributeTypes[$i].size;
// Check for variable-size attributes
if (length == -1) {
length = attributeValues.getShort(offset);
offset += 2;
}
offset += length;
}
}
assert offset >= 0 : String.format("Invalid offset %d", offset);
assert offset < attributeValues.limit() :
String.format("Offset %d is out of bound %d", offset, attributeValues.limit());
return offset;
}
@Override
public FieldType getAttributeType(int i) {
return attributeTypes[i];
}
@Override
public Object getAttributeValue(int iAttr) {
if (iAttr >= getNumAttributes())
return null;
if (!attributeExists.get(iAttr))
return null;
int offset = getAttributeOffset(iAttr);
switch (attributeTypes[iAttr]) {
case StringType:
short stringLength = attributeValues.getShort(offset);
assert offset + 2 + stringLength <= attributeValues.limit() : "String too long "+stringLength;
return new String(attributeValues.array(), offset + 2, stringLength);
case IntegerType:
return attributeValues.getInt(offset);
case LongType:
return attributeValues.getLong(offset);
case DoubleType:
return attributeValues.getDouble(offset);
case TimestampType:
GregorianCalendar c = new GregorianCalendar(UTC);
c.setTimeInMillis(attributeValues.getLong(offset));
return c;
case BooleanType:
return attributeValues.get(offset) == 1;
default:
throw new RuntimeException("Unsupported type " + attributeTypes[iAttr]);
}
}
@Override
public Object getAttributeValue(String name) {
if (getNumAttributes() == 0)
return null;
if (attributeNameToIndex == null) {
// Lazily initialize the attribute name to index map
this.attributeNameToIndex = new HashMap<>();
for (byte iAttr = 0; iAttr < attributeNames.length; iAttr++)
attributeNameToIndex.put(attributeNames[iAttr], iAttr);
}
return attributeNameToIndex.containsKey(name) ? getAttributeValue(attributeNameToIndex.get(name)) : null;
}
@Override
public int getNumAttributes() {
return attributeTypes == null ? 0 : attributeTypes.length;
}
@Override
public String getAttributeName(int iAttr) {
return attributeNames[iAttr];
}
@Override
public int getStorageSize() {
int storageSize = 0;
storageSize += GeometryHelper.getGeometryStorageSize(getGeometry());
if (getNumAttributes() > 0) {
storageSize += attributeValues.limit() + 4;
storageSize += attributeTypes.length;
for (String name : attributeNames)
storageSize += name.length() + 1;
}
return storageSize;
}
/**
* Write only the header (field types, lengths, and names) with no values or geometries. This is useful when storing
* a large set of features in a file where we can write the header once and use the method
* {@link #writeValue(DataOutput, boolean)} to write only the values for each record
* @param out the output to write the header to
* @throws IOException if an error happens while writing to the given output
*/
public void writeHeader(DataOutput out) throws IOException {
// Write number of attributes (maximum 127 attributes in a byte)
out.writeByte(getNumAttributes());
if (getNumAttributes() > 0) {
// Write attribute types
for (FieldType f : attributeTypes)
out.writeByte(f.ordinal());
// Write attribute names
for (int iAttr = 0; iAttr < getNumAttributes(); iAttr++)
out.writeUTF(getAttributeName(iAttr) == null? "attr#"+ iAttr : getAttributeName(iAttr));
}
}
/**
* Reader the header back from the given input.
* @param in the input to read the header from
* @throws IOException if an error happens while reading the header
*/
public void readHeader(DataInput in) throws IOException {
int numAttributes = in.readUnsignedByte();
setNumAttribute(numAttributes);
if (numAttributes > 0) {
for (int $i = 0; $i < getNumAttributes(); $i++)
attributeTypes[$i] = FieldType.values()[in.readByte()];
// Read attribute names
for (int iAttr = 0; iAttr < attributeNames.length; iAttr++) {
String name = in.readUTF();
if (attributeNameToIndex != null && !name.equals(attributeNames[iAttr]))
attributeNameToIndex = null; // Invalidate the cache
attributeNames[iAttr] = name;
}
}
}
/**
* Write the geometry and field values to the output. It is assumed that the field type is stored once using the
* {@link #writeHeader(DataOutput)} method.
* @param out the output to write the attribute values to
* @param includeSRID whether to include the SRID in the serialized value or not
* @throws IOException if an error happens while writing the attribute values
*/
public void writeValue(DataOutput out, boolean includeSRID) throws IOException {
if (getNumAttributes() > 0) {
if (attributeValues != null) {
// Write the value length
out.writeInt(attributeValues.limit());
// Write the values
out.write(attributeValues.array(), 0, attributeValues.limit());
} else {
out.writeInt(0);
}
attributeExists.writeBitsMinimal(out);
}
new GeometryWriter().write(geometry == null? EmptyGeometry.instance : geometry, out, includeSRID);
}
/**
* Read only the value part of the feature from the given {@link DataInput}.
* @param in the input to read the attribute values from
* @param reader the geometry reader used to parse the geometry from the input
* @throws IOException if an error happens while reading the values
*/
public void readValue(DataInput in, GeometryReader reader) throws IOException {
if (getNumAttributes() > 0) {
// Read value length
int valueSize = in.readInt();
// Initialize the buffer to hold the values
if (attributeValues == null || valueSize > attributeValues.capacity())
attributeValues = ByteBuffer.allocate(valueSize);
else
attributeValues.limit(valueSize);
// Read the values as a byte array
in.readFully(attributeValues.array(), 0, attributeValues.limit());
if (attributeExists == null)
attributeExists = new BitArray(getNumAttributes());
attributeExists.readBitsMinimal(in);
}
// Read the geometry value
geometry = reader.parse(in);
}
public void readValue(DataInput in) throws IOException {
this.readValue(in, GeometryReader.DefaultInstance);
}
/**
* Write both the header (field types, names, and lengths) and value (geometry and attribute values) to the given
* output. This is helpful when writing a single feature or when the feature is passed between machines in Spark RDD
* or Hadoop MapReduce.
* @param out the output to write both the header and values to
* @throws IOException if an error happens while writing the feature data
*/
@Override
public void write(DataOutput out) throws IOException {
writeHeader(out);
writeValue(out, true);
}
@Override
public void writeExternal(ObjectOutput out) throws IOException {
this.write(out);
}
@Override
public void readFields(DataInput in) throws IOException {
readHeader(in);
readValue(in);
}
@Override
public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
readFields(in);
}
public Feature appendAttribute(String name, Object value) {
appendAttribute(name, value, detectFiledType(value));
return this;
}
protected FieldType detectFiledType(Object value) {
if (value == null)
return null;
if (value instanceof String)
return FieldType.StringType;
if (value instanceof Integer || value instanceof Short || value instanceof Byte)
return FieldType.IntegerType;
if (value instanceof Long)
return FieldType.LongType;
if (value instanceof Double || value instanceof Float)
return FieldType.DoubleType;
if (value instanceof Calendar)
return FieldType.TimestampType;
if (value instanceof Boolean)
return FieldType.BooleanType;
throw new RuntimeException("Unsupported value type " + value.getClass());
}
/**
* Add an attribute to the list of attributes
* @param name the name of the attribute to append
* @param value the value of the attribute to append
* @param fieldType the type of the field that should match the value. This can be helpful if the value is null.
*/
public void appendAttribute(String name, Object value, FieldType fieldType) {
int newAttributeIndex = getNumAttributes();
int newNumAttributes = newAttributeIndex + 1;
// Add attribute name
attributeNames = expand(attributeNames, newNumAttributes);
attributeNames[newNumAttributes - 1] = name;
if (attributeNameToIndex != null)
attributeNameToIndex.put(name, (byte) newAttributeIndex);
// Set attribute type and value
if (attributeTypes == null)
attributeTypes = new FieldType[newNumAttributes];
else
attributeTypes = Arrays.copyOf(attributeTypes, newNumAttributes);
if (attributeExists == null)
attributeExists = new BitArray(newNumAttributes);
attributeExists.resize(newNumAttributes);
attributeExists.set(newAttributeIndex, value != null);
attributeTypes[newAttributeIndex] = fieldType;
if (value != null) {
switch (fieldType) {
case StringType:
byte[] strBytes = ((String) value).getBytes();
expandAttributeValues(2 + strBytes.length);
attributeValues.putShort((short) strBytes.length);
attributeValues.put(strBytes);
break;
case IntegerType:
expandAttributeValues(4);
attributeValues.putInt(((Number) value).intValue());
break;
case LongType:
expandAttributeValues(8);
attributeValues.putLong((Long) value);
break;
case DoubleType:
expandAttributeValues(8);
attributeValues.putDouble(((Number) value).doubleValue());
break;
case TimestampType:
expandAttributeValues(8);
attributeValues.putLong(((Calendar) value).getTimeInMillis());
break;
case BooleanType:
expandAttributeValues(FieldType.BooleanType.size);
attributeValues.put((byte)((Boolean)value ? 1 : 0));
break;
default:
throw new RuntimeException("Unsupported value type " + fieldType);
}
}
}
@Override
public String toString() {
return IFeature.toString(this);
}
@Override
public boolean equals(Object obj) {
if (!(obj instanceof IFeature))
return false;
return IFeature.equals(this, (IFeature) obj);
}
public void clearAttributes() {
attributeTypes = null;
attributeValues = null;
attributeNames = null;
}
/**
* If the given array is null or smaller than the given size, a new array is created of the given size and initialized
* to the values int he given array if not null.
* @param array An existing array or {@code null}
* @param newSize the new upper limit of the size
* @return either the same array if it is not smaller than the given size or a newly created array
*/
private static byte[] expand(byte[] array, int newSize) {
if (array == null)
array = new byte[newSize];
else if (array.length < newSize) {
byte[] newArray = new byte[newSize];
System.arraycopy(array, 0, newArray, 0, array.length);
array = newArray;
}
return array;
}
/**
* If the given array is null or smaller than the given size, a new array is created of the given size and initialized
* to the values int he given array if not null.
* @param array An existing array or {@code null}
* @param newSize the new upper limit of the size
* @return either the same array if it is not smaller than the given size or a newly created array
*/
private static String[] expand(String[] array, int newSize) {
if (array == null)
array = new String[newSize];
else if (array.length < newSize) {
String[] newArray = new String[newSize];
System.arraycopy(array, 0, newArray, 0, array.length);
array = newArray;
}
return array;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy